You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@age.apache.org by jg...@apache.org on 2021/02/26 00:06:06 UTC

[incubator-age] branch master updated: Add auto group by and DISTINCT for aggregate functions

This is an automated email from the ASF dual-hosted git repository.

jgemignani pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-age.git


The following commit(s) were added to refs/heads/master by this push:
     new b5fb62b  Add auto group by and DISTINCT for aggregate functions
b5fb62b is described below

commit b5fb62bb168e1a18070022f08b17354a48c93094
Author: John Gemignani <jr...@gmail.com>
AuthorDate: Wed Feb 24 09:55:27 2021 -0800

    Add auto group by and DISTINCT for aggregate functions
    
    Added explicit auto group by for aggregate functions. This means, the
    key needs to be specified in the RETURN clause. For example -
    
        RETURN key, aggregate(column)
    
    Implicit auto grouping is not allowed at this time. This is due, in
    part, to how it would cause confusion as to what key was selected.
    So, to avoid confusion, we only allow explicit keys. For example -
    
        RETURN x.a + count(*)
    
    Would need to be written as -
    
        RETURN x.a, x.a + count(*)
    
    Added the DISTINCT syntax for aggregate functions.
    
    Added regression tests for both.
---
 Makefile                                           |   1 +
 regress/expected/expr.out                          | 135 ++++
 regress/sql/expr.sql                               |  31 +
 src/backend/parser/cypher_clause.c                 | 320 +++++++-
 src/backend/parser/cypher_expr.c                   |  60 +-
 src/backend/parser/cypher_gram.y                   |   7 +
 src/backend/parser/cypher_item.c                   |  26 +-
 src/backend/parser/cypher_parse_agg.c              | 848 +++++++++++++++++++++
 src/include/parser/cypher_item.h                   |   2 +-
 .../parser/{cypher_item.h => cypher_parse_agg.h}   |  19 +-
 src/include/parser/cypher_parse_node.h             |   8 +
 11 files changed, 1380 insertions(+), 77 deletions(-)

diff --git a/Makefile b/Makefile
index aad6958..9f166fb 100644
--- a/Makefile
+++ b/Makefile
@@ -39,6 +39,7 @@ OBJS = src/backend/age.o \
        src/backend/parser/cypher_gram.o \
        src/backend/parser/cypher_item.o \
        src/backend/parser/cypher_keywords.o \
+       src/backend/parser/cypher_parse_agg.o \
        src/backend/parser/cypher_parse_node.o \
        src/backend/parser/cypher_parser.o \
        src/backend/utils/adt/agtype.o \
diff --git a/regress/expected/expr.out b/regress/expected/expr.out
index 5f4ddba..d800be5 100644
--- a/regress/expected/expr.out
+++ b/regress/expected/expr.out
@@ -4527,9 +4527,144 @@ ERROR:  function ag_catalog.age_collect() does not exist
 LINE 1: SELECT * FROM cypher('UCSC', $$ RETURN collect() $$) AS (col...
                                                ^
 HINT:  No function matches the given name and argument types. You might need to add explicit type casts.
+-- test DISTINCT inside aggregate functions
+SELECT * FROM cypher('UCSC', $$CREATE (:students {name: "Sven", gpa: 3.2, age: 27, zip: 94110})$$)
+AS (a agtype);
+ a 
+---
+(0 rows)
+
+SELECT * FROM cypher('UCSC', $$ MATCH (u) RETURN (u) $$) AS (vertex agtype);
+                                                           vertex                                                            
+-----------------------------------------------------------------------------------------------------------------------------
+ {"id": 844424930131969, "label": "students", "properties": {"age": 21, "gpa": 3.0, "zip": 94110, "name": "Jack"}}::vertex
+ {"id": 844424930131970, "label": "students", "properties": {"age": 27, "gpa": 3.5, "zip": 95060, "name": "Jill"}}::vertex
+ {"id": 844424930131971, "label": "students", "properties": {"age": 32, "gpa": 3.75, "zip": 96062, "name": "Jim"}}::vertex
+ {"id": 844424930131972, "label": "students", "properties": {"age": 24, "gpa": 2.5, "zip": "95060", "name": "Rick"}}::vertex
+ {"id": 844424930131973, "label": "students", "properties": {"age": 23, "gpa": 3.8::numeric, "name": "Ann"}}::vertex
+ {"id": 844424930131974, "label": "students", "properties": {"age": 19, "gpa": 4.0, "zip": 90210, "name": "Derek"}}::vertex
+ {"id": 844424930131975, "label": "students", "properties": {"age": 20, "gpa": 3.9::numeric, "name": "Jessica"}}::vertex
+ {"id": 844424930131976, "label": "students", "properties": {"age": 24, "name": "Dave"}}::vertex
+ {"id": 844424930131977, "label": "students", "properties": {"age": 18, "name": "Mike"}}::vertex
+ {"id": 844424930131978, "label": "students", "properties": {"age": 27, "gpa": 3.2, "zip": 94110, "name": "Sven"}}::vertex
+(10 rows)
+
+SELECT * FROM cypher('UCSC', $$ MATCH (u) RETURN count(u.zip), count(DISTINCT u.zip) $$)
+AS (zip agtype, distinct_zip agtype);
+ zip | distinct_zip 
+-----+--------------
+ 6   | 5
+(1 row)
+
+SELECT * FROM cypher('UCSC', $$ MATCH (u) RETURN count(u.age), count(DISTINCT u.age) $$)
+AS (age agtype, distinct_age agtype);
+ age | distinct_age 
+-----+--------------
+ 10  | 8
+(1 row)
+
+-- test AUTO GROUP BY for aggregate functions
+SELECT create_graph('group_by');
+NOTICE:  graph "group_by" has been created
+ create_graph 
+--------------
+ 
+(1 row)
+
+SELECT * FROM cypher('group_by', $$CREATE (:row {i: 1, j: 2, k:3})$$) AS (result agtype);
+ result 
+--------
+(0 rows)
+
+SELECT * FROM cypher('group_by', $$CREATE (:row {i: 1, j: 2, k:4})$$) AS (result agtype);
+ result 
+--------
+(0 rows)
+
+SELECT * FROM cypher('group_by', $$CREATE (:row {i: 1, j: 3, k:5})$$) AS (result agtype);
+ result 
+--------
+(0 rows)
+
+SELECT * FROM cypher('group_by', $$CREATE (:row {i: 2, j: 3, k:6})$$) AS (result agtype);
+ result 
+--------
+(0 rows)
+
+SELECT * FROM cypher('group_by', $$MATCH (u:row) RETURN u.i, u.j, u.k$$) AS (i agtype, j agtype, k agtype);
+ i | j | k 
+---+---+---
+ 1 | 2 | 3
+ 1 | 2 | 4
+ 1 | 3 | 5
+ 2 | 3 | 6
+(4 rows)
+
+SELECT * FROM cypher('group_by', $$MATCH (u:row) RETURN u.i, u.j, sum(u.k)$$) AS (i agtype, j agtype, sumk agtype);
+ i | j | sumk 
+---+---+------
+ 1 | 2 | 7.0
+ 2 | 3 | 6.0
+ 1 | 3 | 5.0
+(3 rows)
+
+SELECT * FROM cypher('group_by', $$CREATE (:L {a: 1, b: 2, c:3})$$) AS (result agtype);
+ result 
+--------
+(0 rows)
+
+SELECT * FROM cypher('group_by', $$CREATE (:L {a: 2, b: 3, c:1})$$) AS (result agtype);
+ result 
+--------
+(0 rows)
+
+SELECT * FROM cypher('group_by', $$CREATE (:L {a: 3, b: 1, c:2})$$) AS (result agtype);
+ result 
+--------
+(0 rows)
+
+SELECT * FROM cypher('group_by', $$MATCH (x:L) RETURN x.a, x.b, x.c, x.a + count(*) + x.b + count(*) + x.c$$)
+AS (a agtype, b agtype, c agtype, result agtype);
+ a | b | c | result 
+---+---+---+--------
+ 3 | 1 | 2 | 8.0
+ 2 | 3 | 1 | 8.0
+ 1 | 2 | 3 | 8.0
+(3 rows)
+
+SELECT * FROM cypher('group_by', $$MATCH (x:L) RETURN x.a + x.b + x.c, x.a + x.b + x.c + count(*) + count(*) $$)
+AS (a_b_c agtype,  result agtype);
+ a_b_c | result 
+-------+--------
+ 6     | 12.0
+(1 row)
+
+-- should fail
+SELECT * FROM cypher('group_by', $$MATCH (x:L) RETURN x.a, x.a + count(*) + x.b + count(*) + x.c$$)
+AS (a agtype, result agtype);
+ERROR:  "x" must be either part of an explicitly listed key or used inside an aggregate function
+LINE 1: ...p_by', $$MATCH (x:L) RETURN x.a, x.a + count(*) + x.b + coun...
+                                                             ^
+SELECT * FROM cypher('group_by', $$MATCH (x:L) RETURN x.a + count(*) + x.b + count(*) + x.c$$)
+AS (result agtype);
+ERROR:  "x" must be either part of an explicitly listed key or used inside an aggregate function
+LINE 1: ...CT * FROM cypher('group_by', $$MATCH (x:L) RETURN x.a + coun...
+                                                             ^
 --
 -- Cleanup
 --
+SELECT * FROM drop_graph('group_by', true);
+NOTICE:  drop cascades to 4 other objects
+DETAIL:  drop cascades to table group_by._ag_label_vertex
+drop cascades to table group_by._ag_label_edge
+drop cascades to table group_by."row"
+drop cascades to table group_by."L"
+NOTICE:  graph "group_by" has been dropped
+ drop_graph 
+------------
+ 
+(1 row)
+
 SELECT * FROM drop_graph('UCSC', true);
 NOTICE:  drop cascades to 3 other objects
 DETAIL:  drop cascades to table "UCSC"._ag_label_vertex
diff --git a/regress/sql/expr.sql b/regress/sql/expr.sql
index 1e845db..fe5d342 100644
--- a/regress/sql/expr.sql
+++ b/regress/sql/expr.sql
@@ -1884,9 +1884,40 @@ SELECT * FROM cypher('UCSC', $$ RETURN collect(NULL) $$) AS (empty agtype);
 -- should fail
 SELECT * FROM cypher('UCSC', $$ RETURN collect() $$) AS (collect agtype);
 
+-- test DISTINCT inside aggregate functions
+SELECT * FROM cypher('UCSC', $$CREATE (:students {name: "Sven", gpa: 3.2, age: 27, zip: 94110})$$)
+AS (a agtype);
+SELECT * FROM cypher('UCSC', $$ MATCH (u) RETURN (u) $$) AS (vertex agtype);
+SELECT * FROM cypher('UCSC', $$ MATCH (u) RETURN count(u.zip), count(DISTINCT u.zip) $$)
+AS (zip agtype, distinct_zip agtype);
+SELECT * FROM cypher('UCSC', $$ MATCH (u) RETURN count(u.age), count(DISTINCT u.age) $$)
+AS (age agtype, distinct_age agtype);
+
+-- test AUTO GROUP BY for aggregate functions
+SELECT create_graph('group_by');
+SELECT * FROM cypher('group_by', $$CREATE (:row {i: 1, j: 2, k:3})$$) AS (result agtype);
+SELECT * FROM cypher('group_by', $$CREATE (:row {i: 1, j: 2, k:4})$$) AS (result agtype);
+SELECT * FROM cypher('group_by', $$CREATE (:row {i: 1, j: 3, k:5})$$) AS (result agtype);
+SELECT * FROM cypher('group_by', $$CREATE (:row {i: 2, j: 3, k:6})$$) AS (result agtype);
+SELECT * FROM cypher('group_by', $$MATCH (u:row) RETURN u.i, u.j, u.k$$) AS (i agtype, j agtype, k agtype);
+SELECT * FROM cypher('group_by', $$MATCH (u:row) RETURN u.i, u.j, sum(u.k)$$) AS (i agtype, j agtype, sumk agtype);
+SELECT * FROM cypher('group_by', $$CREATE (:L {a: 1, b: 2, c:3})$$) AS (result agtype);
+SELECT * FROM cypher('group_by', $$CREATE (:L {a: 2, b: 3, c:1})$$) AS (result agtype);
+SELECT * FROM cypher('group_by', $$CREATE (:L {a: 3, b: 1, c:2})$$) AS (result agtype);
+SELECT * FROM cypher('group_by', $$MATCH (x:L) RETURN x.a, x.b, x.c, x.a + count(*) + x.b + count(*) + x.c$$)
+AS (a agtype, b agtype, c agtype, result agtype);
+SELECT * FROM cypher('group_by', $$MATCH (x:L) RETURN x.a + x.b + x.c, x.a + x.b + x.c + count(*) + count(*) $$)
+AS (a_b_c agtype,  result agtype);
+-- should fail
+SELECT * FROM cypher('group_by', $$MATCH (x:L) RETURN x.a, x.a + count(*) + x.b + count(*) + x.c$$)
+AS (a agtype, result agtype);
+SELECT * FROM cypher('group_by', $$MATCH (x:L) RETURN x.a + count(*) + x.b + count(*) + x.c$$)
+AS (result agtype);
+
 --
 -- Cleanup
 --
+SELECT * FROM drop_graph('group_by', true);
 SELECT * FROM drop_graph('UCSC', true);
 SELECT * FROM drop_graph('expr', true);
 
diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c
index 4444d2e..f0bd7dd 100644
--- a/src/backend/parser/cypher_clause.c
+++ b/src/backend/parser/cypher_clause.c
@@ -1,23 +1,27 @@
 /*
- * Copyright 2020 Bitnine Co., Ltd.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
  */
 
 #include "postgres.h"
 
 #include "access/sysattr.h"
 #include "catalog/pg_type_d.h"
+#include "miscadmin.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "nodes/nodes.h"
@@ -25,13 +29,13 @@
 #include "nodes/pg_list.h"
 #include "nodes/primnodes.h"
 #include "optimizer/var.h"
-#include "parser/parse_agg.h"
 #include "parser/parse_clause.h"
 #include "parser/parse_coerce.h"
 #include "parser/parse_collate.h"
 #include "parser/parse_expr.h"
 #include "parser/parse_func.h"
 #include "parser/parse_node.h"
+#include "parser/parse_oper.h"
 #include "parser/parse_relation.h"
 #include "parser/parse_target.h"
 #include "parser/parsetree.h"
@@ -48,6 +52,7 @@
 #include "parser/cypher_clause.h"
 #include "parser/cypher_expr.h"
 #include "parser/cypher_item.h"
+#include "parser/cypher_parse_agg.h"
 #include "parser/cypher_parse_node.h"
 #include "utils/ag_cache.h"
 #include "utils/ag_func.h"
@@ -205,20 +210,21 @@ static Query *transform_cypher_sub_pattern(cypher_parsestate *cpstate,
 // set and remove clause
 static Query *transform_cypher_set(cypher_parsestate *cpstate,
                                    cypher_clause *clause);
-cypher_update_information *transform_cypher_set_item_list(cypher_parsestate *cpstate, List *set_item_list, Query *query);
-cypher_update_information *transform_cypher_remove_item_list(
-    cypher_parsestate *cpstate, List *remove_item_list, Query *query);
-
+static cypher_update_information *transform_cypher_set_item_list(cypher_parsestate *cpstate,
+                                                                 List *set_item_list,
+                                                                 Query *query);
+static cypher_update_information *transform_cypher_remove_item_list(cypher_parsestate *cpstate,
+                                                                    List *remove_item_list,
+                                                                    Query *query);
 // transform
 #define PREV_CYPHER_CLAUSE_ALIAS "_"
 #define transform_prev_cypher_clause(cpstate, prev_clause) \
     transform_cypher_clause_as_subquery(cpstate, transform_cypher_clause, \
                                         prev_clause)
 
-static RangeTblEntry *
-transform_cypher_clause_as_subquery(cypher_parsestate *cpstate,
-                                    transform_method transform,
-                                    cypher_clause *clause);
+static RangeTblEntry *transform_cypher_clause_as_subquery(cypher_parsestate *cpstate,
+                                                          transform_method transform,
+                                                          cypher_clause *clause);
 static Query *analyze_cypher_clause(transform_method transform,
                                     cypher_clause *clause,
                                     cypher_parsestate *parent_cpstate);
@@ -227,7 +233,22 @@ static ParseNamespaceItem *makeNamespaceItem(RangeTblEntry *rte,
                                              bool cols_visible,
                                              bool lateral_only,
                                              bool lateral_ok);
-
+static List *transform_group_clause(cypher_parsestate *cpstate,
+                                    List *grouplist, List **groupingSets,
+                                    List **targetlist, List *sortClause,
+                                    ParseExprKind exprKind);
+static Node *flatten_grouping_sets(Node *expr, bool toplevel,
+                                   bool *hasGroupingSets);
+static Index transform_group_clause_expr(List **flatresult,
+                                         Bitmapset *seen_local,
+                                         cypher_parsestate *cpstate,
+                                         Node *gexpr, List **targetlist,
+                                         List *sortClause,
+                                         ParseExprKind exprKind,
+                                         bool toplevel);
+static List *add_target_to_group_list(cypher_parsestate *cpstate,
+                                      TargetEntry *tle, List *grouplist,
+                                      List *targetlist, int location);
 /*
  * transform a cypher_clause
  */
@@ -504,12 +525,249 @@ cypher_update_information *transform_cypher_set_item_list(
     return info;
 }
 
+/* from PG's static helper function */
+static Node *flatten_grouping_sets(Node *expr, bool toplevel,
+                                   bool *hasGroupingSets)
+{
+    /* just in case of pathological input */
+    check_stack_depth();
+
+    if (expr == (Node *) NIL)
+        return (Node *) NIL;
+
+    switch (expr->type)
+    {
+        case T_RowExpr:
+        {
+            RowExpr *r = (RowExpr *) expr;
+
+            if (r->row_format == COERCE_IMPLICIT_CAST)
+                return flatten_grouping_sets((Node *) r->args, false, NULL);
+            break;
+        }
+        case T_GroupingSet:
+            ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                    errmsg("flattening of GroupingSet is not implemented")));
+            break;
+        case T_List:
+        {
+            List *result = NIL;
+            ListCell *l;
+
+            foreach(l, (List *) expr)
+            {
+                Node *n = flatten_grouping_sets(lfirst(l), toplevel, hasGroupingSets);
+
+                if (n != (Node *) NIL)
+                {
+                    if (IsA(n, List))
+                        result = list_concat(result, (List *) n);
+                    else
+                        result = lappend(result, n);
+                }
+            }
+            return (Node *) result;
+        }
+        default:
+            break;
+    }
+    return expr;
+}
+
+/* from PG's addTargetToGroupList */
+static List *add_target_to_group_list(cypher_parsestate *cpstate,
+                                      TargetEntry *tle, List *grouplist,
+                                      List *targetlist, int location)
+{
+    ParseState *pstate = &cpstate->pstate;
+    Oid restype = exprType((Node *) tle->expr);
+
+    /* if tlist item is an UNKNOWN literal, change it to TEXT */
+    if (restype == UNKNOWNOID)
+    {
+        tle->expr = (Expr *) coerce_type(pstate, (Node *) tle->expr, restype,
+                                         TEXTOID, -1, COERCION_IMPLICIT,
+                                         COERCE_IMPLICIT_CAST, -1);
+        restype = TEXTOID;
+    }
+
+    /* avoid making duplicate grouplist entries */
+    if (!targetIsInSortList(tle, InvalidOid, grouplist))
+    {
+        SortGroupClause *grpcl = makeNode(SortGroupClause);
+        Oid sortop;
+        Oid eqop;
+        bool hashable;
+        ParseCallbackState pcbstate;
+
+        setup_parser_errposition_callback(&pcbstate, pstate, location);
+
+        /* determine the eqop and optional sortop */
+        get_sort_group_operators(restype, false, true, false, &sortop, &eqop,
+                                 NULL, &hashable);
+
+        cancel_parser_errposition_callback(&pcbstate);
+
+        grpcl->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
+        grpcl->eqop = eqop;
+        grpcl->sortop = sortop;
+        grpcl->nulls_first = false; /* OK with or without sortop */
+        grpcl->hashable = hashable;
+
+        grouplist = lappend(grouplist, grpcl);
+    }
+
+    return grouplist;
+}
+
+/* from PG's transformGroupClauseExpr */
+static Index transform_group_clause_expr(List **flatresult,
+                                         Bitmapset *seen_local,
+                                         cypher_parsestate *cpstate,
+                                         Node *gexpr, List **targetlist,
+                                         List *sortClause,
+                                         ParseExprKind exprKind, bool toplevel)
+{
+    TargetEntry *tle = NULL;
+    bool found = false;
+
+    tle = find_target_list_entry(cpstate, gexpr, targetlist, exprKind);
+
+    if (tle->ressortgroupref > 0)
+    {
+        ListCell *sl;
+
+        /*
+         * Eliminate duplicates (GROUP BY x, x) but only at local level.
+         * (Duplicates in grouping sets can affect the number of returned
+         * rows, so can't be dropped indiscriminately.)
+         *
+         * Since we don't care about anything except the sortgroupref, we can
+         * use a bitmapset rather than scanning lists.
+         */
+        if (bms_is_member(tle->ressortgroupref, seen_local))
+            return 0;
+
+        /*
+         * If we're already in the flat clause list, we don't need to consider
+         * adding ourselves again.
+         */
+        found = targetIsInSortList(tle, InvalidOid, *flatresult);
+        if (found)
+            return tle->ressortgroupref;
+
+        /*
+         * If the GROUP BY tlist entry also appears in ORDER BY, copy operator
+         * info from the (first) matching ORDER BY item.  This means that if
+         * you write something like "GROUP BY foo ORDER BY foo USING <<<", the
+         * GROUP BY operation silently takes on the equality semantics implied
+         * by the ORDER BY.  There are two reasons to do this: it improves the
+         * odds that we can implement both GROUP BY and ORDER BY with a single
+         * sort step, and it allows the user to choose the equality semantics
+         * used by GROUP BY, should she be working with a datatype that has
+         * more than one equality operator.
+         *
+         * If we're in a grouping set, though, we force our requested ordering
+         * to be NULLS LAST, because if we have any hope of using a sorted agg
+         * for the job, we're going to be tacking on generated NULL values
+         * after the corresponding groups. If the user demands nulls first,
+         * another sort step is going to be inevitable, but that's the
+         * planner's problem.
+         */
+
+
+         foreach(sl, sortClause)
+         {
+             SortGroupClause *sc = (SortGroupClause *) lfirst(sl);
+
+             if (sc->tleSortGroupRef == tle->ressortgroupref)
+             {
+                 SortGroupClause *grpc = copyObject(sc);
+
+                 if (!toplevel)
+                     grpc->nulls_first = false;
+                 *flatresult = lappend(*flatresult, grpc);
+                 found = true;
+                 break;
+             }
+         }
+    }
+
+    /*
+     * If no match in ORDER BY, just add it to the result using default
+     * sort/group semantics.
+     */
+    if (!found)
+        *flatresult = add_target_to_group_list(cpstate, tle, *flatresult,
+                                               *targetlist, exprLocation(gexpr));
+
+    /* _something_ must have assigned us a sortgroupref by now... */
+
+    return tle->ressortgroupref;
+}
+
+/* from PG's transformGroupClause */
+static List * transform_group_clause(cypher_parsestate *cpstate,
+                                     List *grouplist, List **groupingSets,
+                                     List **targetlist, List *sortClause,
+                                     ParseExprKind exprKind)
+{
+    List *result = NIL;
+    List *flat_grouplist;
+    List *gsets = NIL;
+    ListCell *gl;
+    bool hasGroupingSets = false;
+    Bitmapset *seen_local = NULL;
+
+    /*
+     * Recursively flatten implicit RowExprs. (Technically this is only needed
+     * for GROUP BY, per the syntax rules for grouping sets, but we do it
+     * anyway.)
+     */
+    flat_grouplist = (List *) flatten_grouping_sets((Node *) grouplist, true,
+                                                    &hasGroupingSets);
+
+    foreach(gl, flat_grouplist)
+    {
+        Node       *gexpr = (Node *) lfirst(gl);
+
+        if (IsA(gexpr, GroupingSet))
+        {
+            ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                            errmsg("GroupingSet is not implemented")));
+            break;
+        }
+        else
+        {
+            Index ref = transform_group_clause_expr(&result, seen_local,
+                                                    cpstate, gexpr, targetlist,
+                                                    sortClause, exprKind, true);
+            if (ref > 0)
+            {
+                seen_local = bms_add_member(seen_local, ref);
+                if (hasGroupingSets)
+                    ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                            errmsg("GroupingSet is not implemented")));
+            }
+        }
+    }
+
+    /* parser should prevent this */
+    Assert(gsets == NIL || groupingSets != NULL);
+
+    if (groupingSets)
+        *groupingSets = gsets;
+
+    return result;
+}
+
 static Query *transform_cypher_return(cypher_parsestate *cpstate,
                                       cypher_clause *clause)
 {
     ParseState *pstate = (ParseState *)cpstate;
     cypher_return *self = (cypher_return *)clause->self;
     Query *query;
+    List *groupClause = NIL;
 
     query = makeNode(Query);
     query->commandType = CMD_SELECT;
@@ -518,15 +776,22 @@ static Query *transform_cypher_return(cypher_parsestate *cpstate,
         transform_prev_cypher_clause(cpstate, clause->prev);
 
     query->targetList = transform_cypher_item_list(cpstate, self->items,
+                                                   &groupClause,
                                                    EXPR_KIND_SELECT_TARGET);
 
     markTargetListOrigins(pstate, query->targetList);
 
     // ORDER BY
-    query->sortClause = transform_cypher_order_by(
-        cpstate, self->order_by, &query->targetList, EXPR_KIND_ORDER_BY);
+    query->sortClause = transform_cypher_order_by(cpstate, self->order_by,
+                                                  &query->targetList,
+                                                  EXPR_KIND_ORDER_BY);
 
-    // TODO: auto GROUP BY for aggregation
+    /* 'auto' GROUP BY (from PG's transformGroupClause) */
+    query->groupClause = transform_group_clause(cpstate, groupClause,
+                                                &query->groupingSets,
+                                                &query->targetList,
+                                                query->sortClause,
+                                                EXPR_KIND_GROUP_BY);
 
     // DISTINCT
     if (self->distinct)
@@ -553,6 +818,11 @@ static Query *transform_cypher_return(cypher_parsestate *cpstate,
 
     assign_query_collations(pstate, query);
 
+    /* this must be done after collations, for reliable comparison of exprs */
+    if (pstate->p_hasAggs ||
+        query->groupClause || query->groupingSets || query->havingQual)
+        parse_check_aggregates(pstate, query);
+
     return query;
 }
 
@@ -847,7 +1117,7 @@ static Query *transform_cypher_sub_pattern(cypher_parsestate *cpstate,
     qry->hasAggs = pstate->p_hasAggs;
 
     if (qry->hasAggs)
-        parseCheckAggregates(pstate, qry);
+        parse_check_aggregates(pstate, qry);
 
     assign_query_collations(pstate, qry);
 
diff --git a/src/backend/parser/cypher_expr.c b/src/backend/parser/cypher_expr.c
index f0c191c..1d61d73 100644
--- a/src/backend/parser/cypher_expr.c
+++ b/src/backend/parser/cypher_expr.c
@@ -1,17 +1,20 @@
 /*
- * Copyright 2020 Bitnine Co., Ltd.
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
  */
 
 #include "postgres.h"
@@ -700,6 +703,7 @@ static Node *transform_FuncCall(cypher_parsestate *cpstate, FuncCall *fn)
     List *targs = NIL;
     List *fname = NIL;
     ListCell *args;
+    Node *retval = NULL;
 
     /* Transform the list of arguments ... */
     foreach(args, fn->args)
@@ -707,28 +711,8 @@ static Node *transform_FuncCall(cypher_parsestate *cpstate, FuncCall *fn)
                         transform_cypher_expr_recurse(cpstate,
                                                       (Node *)lfirst(args)));
 
-    /*
-     * When WITHIN GROUP is used, we treat its ORDER BY expressions as
-     * additional arguments to the function, for purposes of function lookup
-     * and argument type coercion.  So, transform each such expression and add
-     * them to the targs list.  We don't explicitly mark where each argument
-     * came from, but ParseFuncOrColumn can tell what's what by reference to
-     * list_length(fn->agg_order).
-     */
-
-    /* This part needs to be worked on. So, for now, we exit if it is set. */
-    Assert(fn->agg_within_group == false);
-    if (fn->agg_within_group)
-    {
-        Assert(fn->agg_order != NIL);
-        foreach(args, fn->agg_order)
-        {
-            SortBy *arg = (SortBy *) lfirst(args);
-
-            targs = lappend(targs, transformExpr(pstate, arg->node,
-                                                 EXPR_KIND_ORDER_BY));
-        }
-    }
+    /* within group should not happen */
+    Assert(!fn->agg_within_group);
 
     /*
      * If the function name is not qualified, then it is one of ours. We need to
@@ -781,8 +765,14 @@ static Node *transform_FuncCall(cypher_parsestate *cpstate, FuncCall *fn)
         fname = fn->funcname;
 
     /* ... and hand off to ParseFuncOrColumn */
-    return ParseFuncOrColumn(pstate, fname, targs, last_srf, fn, false,
-                             fn->location);
+    retval = ParseFuncOrColumn(pstate, fname, targs, last_srf, fn, false,
+                               fn->location);
+
+    /* flag that an aggregate was found during a transform */
+    if (retval != NULL && retval->type == T_Aggref)
+        cpstate->exprHasAgg = true;
+
+    return retval;
 }
 
 /*
diff --git a/src/backend/parser/cypher_gram.y b/src/backend/parser/cypher_gram.y
index d9a7f89..3c7121f 100644
--- a/src/backend/parser/cypher_gram.y
+++ b/src/backend/parser/cypher_gram.y
@@ -1127,6 +1127,13 @@ expr_func_norm:
              n->agg_star = true;
              $$ = (Node *)n;
          }
+    | func_name '(' DISTINCT  expr_list ')'
+        {
+            FuncCall *n = (FuncCall *)make_function_expr($1, $4, @1);
+            n->agg_order = NIL;
+            n->agg_distinct = true;
+            $$ = (Node *)n;
+        }
     ;
 
 expr_func_subexpr:
diff --git a/src/backend/parser/cypher_item.c b/src/backend/parser/cypher_item.c
index 3581bd0..9cfc1d7 100644
--- a/src/backend/parser/cypher_item.c
+++ b/src/backend/parser/cypher_item.c
@@ -51,21 +51,45 @@ TargetEntry *transform_cypher_item(cypher_parsestate *cpstate, Node *node,
 
 // see transformTargetList()
 List *transform_cypher_item_list(cypher_parsestate *cpstate, List *item_list,
-                                 ParseExprKind expr_kind)
+                                 List **groupClause, ParseExprKind expr_kind)
 {
     List *target_list = NIL;
     ListCell *li;
+    List *group_clause = NIL;
+    bool hasAgg = false;
 
     foreach (li, item_list)
     {
         ResTarget *item = lfirst(li);
         TargetEntry *te;
 
+        /* clear the exprHasAgg flag to check transform for an aggregate */
+        cpstate->exprHasAgg = false;
+
+        /* transform the item */
         te = transform_cypher_item(cpstate, item->val, NULL, expr_kind,
                                    item->name, false);
 
         target_list = lappend(target_list, te);
+
+        /*
+         * Did the tranformed item contain an aggregate function? If it didn't,
+         * add it to the potential group_clause. If it did, flag that we found
+         * an aggregate in an expression
+         */
+        if (!cpstate->exprHasAgg)
+            group_clause = lappend(group_clause, item->val);
+        else
+            hasAgg = true;
     }
 
+    /*
+     * If we found an aggregate function, we need to return the group_clause,
+     * even if NIL. parseCheckAggregates at the end of transform_cypher_return
+     * will verify if it is valid.
+     */
+    if (hasAgg)
+        *groupClause = group_clause;
+
     return target_list;
 }
diff --git a/src/backend/parser/cypher_parse_agg.c b/src/backend/parser/cypher_parse_agg.c
new file mode 100644
index 0000000..8a865cd
--- /dev/null
+++ b/src/backend/parser/cypher_parse_agg.c
@@ -0,0 +1,848 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "postgres.h"
+
+#include "catalog/pg_constraint.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/tlist.h"
+#include "optimizer/var.h"
+#include "parser/cypher_parse_agg.h"
+#include "parser/parsetree.h"
+#include "rewrite/rewriteManip.h"
+
+typedef struct
+{
+    ParseState *pstate;
+    int min_varlevel;
+    int min_agglevel;
+    int sublevels_up;
+} check_agg_arguments_context;
+
+typedef struct
+{
+    ParseState *pstate;
+    Query *qry;
+    PlannerInfo *root;
+    List *groupClauses;
+    List *groupClauseCommonVars;
+    bool have_non_var_grouping;
+    List **func_grouped_rels;
+    int sublevels_up;
+    bool in_agg_direct_args;
+} check_ungrouped_columns_context;
+
+static void check_ungrouped_columns(Node *node, ParseState *pstate, Query *qry,
+                                    List *groupClauses, List *groupClauseVars,
+                                    bool have_non_var_grouping,
+                                    List **func_grouped_rels);
+static bool check_ungrouped_columns_walker(Node *node,
+                                           check_ungrouped_columns_context *context);
+static void finalize_grouping_exprs(Node *node, ParseState *pstate, Query *qry,
+                                    List *groupClauses, PlannerInfo *root,
+                                    bool have_non_var_grouping);
+static bool finalize_grouping_exprs_walker(Node *node,
+                                           check_ungrouped_columns_context *context);
+static List *expand_groupingset_node(GroupingSet *gs);
+static List * expand_grouping_sets(List *groupingSets, int limit);
+
+/*
+ * From PG's parseCheckAggregates
+ *
+ * Check for aggregates where they shouldn't be and improper grouping.
+ * This function should be called after the target list and qualifications
+ * are finalized.
+ *
+ * Misplaced aggregates are now mostly detected in transformAggregateCall,
+ * but it seems more robust to check for aggregates in recursive queries
+ * only after everything is finalized.  In any case it's hard to detect
+ * improper grouping on-the-fly, so we have to make another pass over the
+ * query for that.
+ */
+void parse_check_aggregates(ParseState *pstate, Query *qry)
+{
+    List *gset_common = NIL;
+    List *groupClauses = NIL;
+    List *groupClauseCommonVars = NIL;
+    bool have_non_var_grouping;
+    List *func_grouped_rels = NIL;
+    ListCell *l;
+    bool hasJoinRTEs;
+    bool hasSelfRefRTEs;
+    PlannerInfo *root = NULL;
+    Node *clause;
+
+    /* This should only be called if we found aggregates or grouping */
+    Assert(pstate->p_hasAggs || qry->groupClause || qry->havingQual || qry->groupingSets);
+
+    /*
+     * If we have grouping sets, expand them and find the intersection of all
+     * sets.
+     */
+    if (qry->groupingSets)
+    {
+        /*
+         * The limit of 4096 is arbitrary and exists simply to avoid resource
+         * issues from pathological constructs.
+         */
+        List *gsets = expand_grouping_sets(qry->groupingSets, 4096);
+
+        if (!gsets)
+            ereport(ERROR,
+                    (errcode(ERRCODE_STATEMENT_TOO_COMPLEX),
+                     errmsg("too many grouping sets present (maximum 4096)"),
+                     parser_errposition(pstate,
+                                        qry->groupClause ?
+                                        exprLocation((Node *) qry->groupClause) :
+                                        exprLocation((Node *) qry->groupingSets))));
+
+        /*
+         * The intersection will often be empty, so help things along by
+         * seeding the intersect with the smallest set.
+         */
+        gset_common = linitial(gsets);
+
+        if (gset_common)
+        {
+            for_each_cell(l, lnext(list_head(gsets)))
+            {
+                gset_common = list_intersection_int(gset_common, lfirst(l));
+                if (!gset_common)
+                    break;
+            }
+        }
+
+        /*
+         * If there was only one grouping set in the expansion, AND if the
+         * groupClause is non-empty (meaning that the grouping set is not
+         * empty either), then we can ditch the grouping set and pretend we
+         * just had a normal GROUP BY.
+         */
+        if (list_length(gsets) == 1 && qry->groupClause)
+            qry->groupingSets = NIL;
+    }
+
+    /*
+     * Scan the range table to see if there are JOIN or self-reference CTE
+     * entries.  We'll need this info below.
+     */
+    hasJoinRTEs = hasSelfRefRTEs = false;
+    foreach(l, pstate->p_rtable)
+    {
+        RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
+
+        if (rte->rtekind == RTE_JOIN)
+            hasJoinRTEs = true;
+        else if (rte->rtekind == RTE_CTE && rte->self_reference)
+            hasSelfRefRTEs = true;
+    }
+
+    /*
+     * Build a list of the acceptable GROUP BY expressions for use by
+     * check_ungrouped_columns().
+     *
+     * We get the TLE, not just the expr, because GROUPING wants to know the
+     * sortgroupref.
+     */
+    foreach(l, qry->groupClause)
+    {
+        SortGroupClause *grpcl = (SortGroupClause *) lfirst(l);
+        TargetEntry *expr;
+
+        expr = get_sortgroupclause_tle(grpcl, qry->targetList);
+        if (expr == NULL)
+            continue; /* probably cannot happen */
+
+        groupClauses = lcons(expr, groupClauses);
+    }
+
+    /*
+     * If there are join alias vars involved, we have to flatten them to the
+     * underlying vars, so that aliased and unaliased vars will be correctly
+     * taken as equal.  We can skip the expense of doing this if no rangetable
+     * entries are RTE_JOIN kind. We use the planner's flatten_join_alias_vars
+     * routine to do the flattening; it wants a PlannerInfo root node, which
+     * fortunately can be mostly dummy.
+     */
+    if (hasJoinRTEs)
+    {
+        root = makeNode(PlannerInfo);
+        root->parse = qry;
+        root->planner_cxt = CurrentMemoryContext;
+        root->hasJoinRTEs = true;
+
+        groupClauses = (List *) flatten_join_alias_vars(root,
+                                                        (Node *) groupClauses);
+    }
+
+    /*
+     * Detect whether any of the grouping expressions aren't simple Vars; if
+     * they're all Vars then we don't have to work so hard in the recursive
+     * scans.  (Note we have to flatten aliases before this.)
+     *
+     * Track Vars that are included in all grouping sets separately in
+     * groupClauseCommonVars, since these are the only ones we can use to
+     * check for functional dependencies.
+     */
+    have_non_var_grouping = false;
+    foreach(l, groupClauses)
+    {
+        TargetEntry *tle = lfirst(l);
+
+        if (!IsA(tle->expr, Var))
+        {
+            have_non_var_grouping = true;
+        }
+        else if (!qry->groupingSets ||
+                 list_member_int(gset_common, tle->ressortgroupref))
+        {
+            groupClauseCommonVars = lappend(groupClauseCommonVars, tle->expr);
+        }
+    }
+
+    /*
+     * Check the targetlist and HAVING clause for ungrouped variables.
+     *
+     * Note: because we check resjunk tlist elements as well as regular ones,
+     * this will also find ungrouped variables that came from ORDER BY and
+     * WINDOW clauses.  For that matter, it's also going to examine the
+     * grouping expressions themselves --- but they'll all pass the test ...
+     *
+     * We also finalize GROUPING expressions, but for that we need to traverse
+     * the original (unflattened) clause in order to modify nodes.
+     */
+    clause = (Node *) qry->targetList;
+    finalize_grouping_exprs(clause, pstate, qry, groupClauses, root,
+                            have_non_var_grouping);
+    if (hasJoinRTEs)
+        clause = flatten_join_alias_vars(root, clause);
+    check_ungrouped_columns(clause, pstate, qry, groupClauses,
+                            groupClauseCommonVars, have_non_var_grouping,
+                            &func_grouped_rels);
+
+    clause = (Node *) qry->havingQual;
+    finalize_grouping_exprs(clause, pstate, qry, groupClauses, root,
+                            have_non_var_grouping);
+    if (hasJoinRTEs)
+        clause = flatten_join_alias_vars(root, clause);
+    check_ungrouped_columns(clause, pstate, qry, groupClauses,
+                            groupClauseCommonVars, have_non_var_grouping,
+                            &func_grouped_rels);
+
+    /*
+     * Per spec, aggregates can't appear in a recursive term.
+     */
+    if (pstate->p_hasAggs && hasSelfRefRTEs)
+        ereport(ERROR,
+                (errcode(ERRCODE_INVALID_RECURSION),
+                 errmsg("aggregate functions are not allowed in a recursive query's recursive term"),
+                 parser_errposition(pstate, locate_agg_of_level((Node *) qry, 0))));
+}
+
+/*
+ * check_ungrouped_columns -
+ *
+ * Scan the given expression tree for ungrouped variables (variables
+ * that are not listed in the groupClauses list and are not within
+ * the arguments of aggregate functions).  Emit a suitable error message
+ * if any are found.
+ *
+ * NOTE: we assume that the given clause has been transformed suitably for
+ * parser output.  This means we can use expression_tree_walker.
+ *
+ * NOTE: we recognize grouping expressions in the main query, but only
+ * grouping Vars in subqueries.  For example, this will be rejected,
+ * although it could be allowed:
+ *    SELECT (SELECT x FROM bar where y = (foo.a + foo.b))
+ *    FROM foo
+ *    GROUP BY a + b;
+ * The difficulty is the need to account for different sublevels_up.
+ * This appears to require a whole custom version of equal(), which is
+ * way more pain than the feature seems worth.
+ */
+static void check_ungrouped_columns(Node *node, ParseState *pstate, Query *qry,
+                                    List *groupClauses,
+                                    List *groupClauseCommonVars,
+                                    bool have_non_var_grouping,
+                                    List **func_grouped_rels)
+{
+    check_ungrouped_columns_context context;
+
+    context.pstate = pstate;
+    context.qry = qry;
+    context.root = NULL;
+    context.groupClauses = groupClauses;
+    context.groupClauseCommonVars = groupClauseCommonVars;
+    context.have_non_var_grouping = have_non_var_grouping;
+    context.func_grouped_rels = func_grouped_rels;
+    context.sublevels_up = 0;
+    context.in_agg_direct_args = false;
+    check_ungrouped_columns_walker(node, &context);
+}
+
+static bool check_ungrouped_columns_walker(Node *node, check_ungrouped_columns_context *context)
+{
+    ListCell *gl;
+
+    if (node == NULL)
+        return false;
+
+    if (IsA(node, Const) || IsA(node, Param))
+        return false; /* constants are always acceptable */
+
+    if (IsA(node, Aggref))
+    {
+        Aggref *agg = (Aggref *) node;
+
+        if ((int) agg->agglevelsup == context->sublevels_up)
+        {
+            /*
+             * If we find an aggregate call of the original level, do not
+             * recurse into its normal arguments, ORDER BY arguments, or
+             * filter; ungrouped vars there are not an error.  But we should
+             * check direct arguments as though they weren't in an aggregate.
+             * We set a special flag in the context to help produce a useful
+             * error message for ungrouped vars in direct arguments.
+             */
+            bool result;
+
+            Assert(!context->in_agg_direct_args);
+            context->in_agg_direct_args = true;
+            result = check_ungrouped_columns_walker((Node *) agg->aggdirectargs,
+                                                    context);
+            context->in_agg_direct_args = false;
+            return result;
+        }
+
+        /*
+         * We can skip recursing into aggregates of higher levels altogether,
+         * since they could not possibly contain Vars of concern to us (see
+         * transformAggregateCall).  We do need to look at aggregates of lower
+         * levels, however.
+         */
+        if ((int) agg->agglevelsup > context->sublevels_up)
+            return false;
+    }
+
+    if (IsA(node, GroupingFunc))
+    {
+        GroupingFunc *grp = (GroupingFunc *) node;
+
+        /* handled GroupingFunc separately, no need to recheck at this level */
+
+        if ((int) grp->agglevelsup >= context->sublevels_up)
+            return false;
+    }
+
+    /*
+     * If we have any GROUP BY items that are not simple Vars, check to see if
+     * subexpression as a whole matches any GROUP BY item. We need to do this
+     * at every recursion level so that we recognize GROUPed-BY expressions
+     * before reaching variables within them. But this only works at the outer
+     * query level, as noted above.
+     */
+    if (context->have_non_var_grouping && context->sublevels_up == 0)
+    {
+        foreach(gl, context->groupClauses)
+        {
+            TargetEntry *tle = lfirst(gl);
+
+            if (equal(node, tle->expr))
+                return false; /* acceptable, do not descend more */
+        }
+    }
+
+    /*
+     * If we have an ungrouped Var of the original query level, we have a
+     * failure.  Vars below the original query level are not a problem, and
+     * neither are Vars from above it.  (If such Vars are ungrouped as far as
+     * their own query level is concerned, that's someone else's problem...)
+     */
+    if (IsA(node, Var))
+    {
+        Var *var = (Var *) node;
+        RangeTblEntry *rte;
+        char *attname;
+
+        if (var->varlevelsup != context->sublevels_up)
+            return false; /* it's not local to my query, ignore */
+
+        /*
+         * Check for a match, if we didn't do it above.
+         */
+        if (!context->have_non_var_grouping || context->sublevels_up != 0)
+        {
+            foreach(gl, context->groupClauses)
+            {
+                Var *gvar = (Var *) ((TargetEntry *) lfirst(gl))->expr;
+
+                if (IsA(gvar, Var) &&
+                    gvar->varno == var->varno &&
+                    gvar->varattno == var->varattno &&
+                    gvar->varlevelsup == 0)
+                    return false; /* acceptable, we're okay */
+            }
+        }
+
+        /*
+         * Check whether the Var is known functionally dependent on the GROUP
+         * BY columns.  If so, we can allow the Var to be used, because the
+         * grouping is really a no-op for this table.  However, this deduction
+         * depends on one or more constraints of the table, so we have to add
+         * those constraints to the query's constraintDeps list, because it's
+         * not semantically valid anymore if the constraint(s) get dropped.
+         * (Therefore, this check must be the last-ditch effort before raising
+         * error: we don't want to add dependencies unnecessarily.)
+         *
+         * Because this is a pretty expensive check, and will have the same
+         * outcome for all columns of a table, we remember which RTEs we've
+         * already proven functional dependency for in the func_grouped_rels
+         * list.  This test also prevents us from adding duplicate entries to
+         * the constraintDeps list.
+         */
+        if (list_member_int(*context->func_grouped_rels, var->varno))
+            return false; /* previously proven acceptable */
+
+        Assert(var->varno > 0 &&
+               (int) var->varno <= list_length(context->pstate->p_rtable));
+        rte = rt_fetch(var->varno, context->pstate->p_rtable);
+        if (rte->rtekind == RTE_RELATION)
+        {
+            if (check_functional_grouping(rte->relid, var->varno, 0,
+                                          context->groupClauseCommonVars,
+                                          &context->qry->constraintDeps))
+            {
+                *context->func_grouped_rels = lappend_int(*context->func_grouped_rels,
+                                                          var->varno);
+                return false; /* acceptable */
+            }
+        }
+
+        /* Found an ungrouped local variable; generate error message */
+        attname = get_rte_attribute_name(rte, var->varattno);
+        if (context->sublevels_up == 0)
+            ereport(ERROR, (errcode(ERRCODE_GROUPING_ERROR),
+                            errmsg("\"%s\" must be either part of an explicitly listed key or used inside an aggregate function",
+                                   attname), context->in_agg_direct_args ?
+                                       errdetail("Direct arguments of an ordered-set aggregate must use only grouped columns.") :
+                                       0, parser_errposition(context->pstate, var->location)));
+        else
+            ereport(ERROR, (errcode(ERRCODE_GROUPING_ERROR),
+                            errmsg("subquery uses ungrouped column \"%s.%s\" from outer query",
+                                   rte->eref->aliasname, attname),
+                                   parser_errposition(context->pstate, var->location)));
+    }
+
+    if (IsA(node, Query))
+    {
+        /* Recurse into subselects */
+        bool result;
+
+        context->sublevels_up++;
+        result = query_tree_walker((Query *) node,
+                                   check_ungrouped_columns_walker,
+                                   (void *) context, 0);
+        context->sublevels_up--;
+        return result;
+    }
+
+    return expression_tree_walker(node, check_ungrouped_columns_walker,
+                                  (void *) context);
+}
+
+/*
+ * finalize_grouping_exprs -
+ *	  Scan the given expression tree for GROUPING() and related calls,
+ *	  and validate and process their arguments.
+ *
+ * This is split out from check_ungrouped_columns above because it needs
+ * to modify the nodes (which it does in-place, not via a mutator) while
+ * check_ungrouped_columns may see only a copy of the original thanks to
+ * flattening of join alias vars. So here, we flatten each individual
+ * GROUPING argument as we see it before comparing it.
+ */
+static void finalize_grouping_exprs(Node *node, ParseState *pstate, Query *qry,
+                                    List *groupClauses, PlannerInfo *root,
+                                    bool have_non_var_grouping)
+{
+    check_ungrouped_columns_context context;
+
+    context.pstate = pstate;
+    context.qry = qry;
+    context.root = root;
+    context.groupClauses = groupClauses;
+    context.groupClauseCommonVars = NIL;
+    context.have_non_var_grouping = have_non_var_grouping;
+    context.func_grouped_rels = NULL;
+    context.sublevels_up = 0;
+    context.in_agg_direct_args = false;
+    finalize_grouping_exprs_walker(node, &context);
+}
+
+static bool finalize_grouping_exprs_walker(Node *node,
+                                           check_ungrouped_columns_context *context)
+{
+    ListCell *gl;
+
+    if (node == NULL)
+        return false;
+    if (IsA(node, Const) || IsA(node, Param))
+        return false; /* constants are always acceptable */
+
+    if (IsA(node, Aggref))
+    {
+        Aggref *agg = (Aggref *) node;
+
+        if ((int) agg->agglevelsup == context->sublevels_up)
+        {
+            /*
+             * If we find an aggregate call of the original level, do not
+             * recurse into its normal arguments, ORDER BY arguments, or
+             * filter; GROUPING exprs of this level are not allowed there. But
+             * check direct arguments as though they weren't in an aggregate.
+             */
+            bool result;
+
+            Assert(!context->in_agg_direct_args);
+            context->in_agg_direct_args = true;
+            result = finalize_grouping_exprs_walker((Node *) agg->aggdirectargs,
+                                                context);
+            context->in_agg_direct_args = false;
+            return result;
+        }
+
+        /*
+         * We can skip recursing into aggregates of higher levels altogether,
+         * since they could not possibly contain exprs of concern to us (see
+         * transformAggregateCall).  We do need to look at aggregates of lower
+         * levels, however.
+         */
+        if ((int) agg->agglevelsup > context->sublevels_up)
+            return false;
+    }
+
+    if (IsA(node, GroupingFunc))
+    {
+        GroupingFunc *grp = (GroupingFunc *) node;
+
+        /*
+         * We only need to check GroupingFunc nodes at the exact level to
+         * which they belong, since they cannot mix levels in arguments.
+         */
+
+        if ((int) grp->agglevelsup == context->sublevels_up)
+        {
+            ListCell *lc;
+            List *ref_list = NIL;
+
+            foreach(lc, grp->args)
+            {
+                Node *expr = lfirst(lc);
+                Index ref = 0;
+
+                if (context->root)
+                    expr = flatten_join_alias_vars(context->root, expr);
+
+                /*
+                 * Each expression must match a grouping entry at the current
+                 * query level. Unlike the general expression case, we don't
+                 * allow functional dependencies or outer references.
+                 */
+
+                if (IsA(expr, Var))
+                {
+                    Var *var = (Var *) expr;
+
+                    if (var->varlevelsup == context->sublevels_up)
+                    {
+                        foreach(gl, context->groupClauses)
+                        {
+                            TargetEntry *tle = lfirst(gl);
+                            Var *gvar = (Var *) tle->expr;
+
+                            if (IsA(gvar, Var) &&
+                                gvar->varno == var->varno &&
+                                gvar->varattno == var->varattno &&
+                                gvar->varlevelsup == 0)
+                            {
+                                ref = tle->ressortgroupref;
+                                break;
+                            }
+                        }
+                    }
+                }
+                else if (context->have_non_var_grouping &&
+                         context->sublevels_up == 0)
+                {
+                    foreach(gl, context->groupClauses)
+                    {
+                        TargetEntry *tle = lfirst(gl);
+
+                        if (equal(expr, tle->expr))
+                        {
+                           ref = tle->ressortgroupref;
+                           break;
+                        }
+                    }
+                }
+
+                if (ref == 0)
+                    ereport(ERROR,
+                            (errcode(ERRCODE_GROUPING_ERROR),
+                             errmsg("arguments to GROUPING must be grouping expressions of the associated query level"),
+                             parser_errposition(context->pstate,
+                             exprLocation(expr))));
+
+                ref_list = lappend_int(ref_list, ref);
+            }
+
+            grp->refs = ref_list;
+        }
+
+        if ((int) grp->agglevelsup > context->sublevels_up)
+            return false;
+    }
+
+    if (IsA(node, Query))
+    {
+        /* Recurse into subselects */
+        bool result;
+
+        context->sublevels_up++;
+        result = query_tree_walker((Query *) node,
+                                   finalize_grouping_exprs_walker,
+                                   (void *) context, 0);
+        context->sublevels_up--;
+        return result;
+    }
+    return expression_tree_walker(node, finalize_grouping_exprs_walker,
+                                  (void *) context);
+}
+
+
+/*
+ * Given a GroupingSet node, expand it and return a list of lists.
+ *
+ * For EMPTY nodes, return a list of one empty list.
+ *
+ * For SIMPLE nodes, return a list of one list, which is the node content.
+ *
+ * For CUBE and ROLLUP nodes, return a list of the expansions.
+ *
+ * For SET nodes, recursively expand contained CUBE and ROLLUP.
+ */
+static List * expand_groupingset_node(GroupingSet *gs)
+{
+    List *result = NIL;
+
+    switch (gs->kind)
+    {
+        case GROUPING_SET_EMPTY:
+            result = list_make1(NIL);
+            break;
+
+        case GROUPING_SET_SIMPLE:
+            result = list_make1(gs->content);
+            break;
+
+        case GROUPING_SET_ROLLUP:
+        {
+            List *rollup_val = gs->content;
+            ListCell *lc;
+            int curgroup_size = list_length(gs->content);
+
+            while (curgroup_size > 0)
+            {
+                List *current_result = NIL;
+                int i = curgroup_size;
+
+                foreach(lc, rollup_val)
+                {
+                    GroupingSet *gs_current = (GroupingSet *) lfirst(lc);
+
+                    Assert(gs_current->kind == GROUPING_SET_SIMPLE);
+
+                    current_result = list_concat(current_result,
+                                                 list_copy(gs_current->content));
+
+                    /* If we are done with making the current group, break */
+                    if (--i == 0)
+                        break;
+                }
+
+                result = lappend(result, current_result);
+                --curgroup_size;
+            }
+
+            result = lappend(result, NIL);
+        }
+            break;
+
+        case GROUPING_SET_CUBE:
+        {
+            List *cube_list = gs->content;
+            int number_bits = list_length(cube_list);
+            uint32 num_sets;
+            uint32 i;
+
+            /* parser should cap this much lower */
+            Assert(number_bits < 31);
+
+            num_sets = (1U << number_bits);
+
+            for (i = 0; i < num_sets; i++)
+            {
+                List *current_result = NIL;
+                ListCell *lc;
+                uint32 mask = 1U;
+
+                foreach(lc, cube_list)
+                {
+                    GroupingSet *gs_current = (GroupingSet *) lfirst(lc);
+
+                    Assert(gs_current->kind == GROUPING_SET_SIMPLE);
+
+                    if (mask & i)
+                    {
+                        current_result = list_concat(current_result,
+                                                     list_copy(gs_current->content));
+                    }
+
+                    mask <<= 1;
+                }
+
+                result = lappend(result, current_result);
+            }
+        }
+            break;
+
+        case GROUPING_SET_SETS:
+        {
+            ListCell *lc;
+
+            foreach(lc, gs->content)
+            {
+                List *current_result = expand_groupingset_node(lfirst(lc));
+
+                result = list_concat(result, current_result);
+            }
+        }
+            break;
+    }
+
+    return result;
+}
+
+static int cmp_list_len_asc(const void *a, const void *b)
+{
+    int la = list_length(*(List *const *) a);
+    int lb = list_length(*(List *const *) b);
+
+    return (la > lb) ? 1 : (la == lb) ? 0 : -1;
+}
+
+/*
+ * Expand a groupingSets clause to a flat list of grouping sets.
+ * The returned list is sorted by length, shortest sets first.
+ *
+ * This is mainly for the planner, but we use it here too to do
+ * some consistency checks.
+ */
+static List * expand_grouping_sets(List *groupingSets, int limit)
+{
+    List *expanded_groups = NIL;
+    List *result = NIL;
+    double numsets = 1;
+    ListCell *lc;
+
+    if (groupingSets == NIL)
+        return NIL;
+
+    foreach(lc, groupingSets)
+    {
+        List *current_result = NIL;
+        GroupingSet *gs = lfirst(lc);
+
+        current_result = expand_groupingset_node(gs);
+
+        Assert(current_result != NIL);
+
+        numsets *= list_length(current_result);
+
+        if (limit >= 0 && numsets > limit)
+            return NIL;
+
+        expanded_groups = lappend(expanded_groups, current_result);
+    }
+
+    /*
+     * Do cartesian product between sublists of expanded_groups. While at it,
+     * remove any duplicate elements from individual grouping sets (we must
+     * NOT change the number of sets though)
+     */
+
+    foreach(lc, (List *) linitial(expanded_groups))
+    {
+        result = lappend(result, list_union_int(NIL, (List *) lfirst(lc)));
+    }
+
+    for_each_cell(lc, lnext(list_head(expanded_groups)))
+    {
+        List *p = lfirst(lc);
+        List *new_result = NIL;
+        ListCell *lc2;
+
+        foreach(lc2, result)
+        {
+            List *q = lfirst(lc2);
+            ListCell *lc3;
+
+            foreach(lc3, p)
+            {
+                new_result = lappend(new_result,
+                                     list_union_int(q,(List *) lfirst(lc3)));
+            }
+        }
+        result = new_result;
+    }
+
+    if (list_length(result) > 1)
+    {
+        int			result_len = list_length(result);
+        List	  **buf = palloc(sizeof(List *) * result_len);
+        List	  **ptr = buf;
+
+        foreach(lc, result)
+        {
+            *ptr++ = lfirst(lc);
+        }
+
+        qsort(buf, result_len, sizeof(List *), cmp_list_len_asc);
+
+        result = NIL;
+        ptr = buf;
+
+        while (result_len-- > 0)
+            result = lappend(result, *ptr++);
+
+        pfree(buf);
+    }
+
+    return result;
+}
diff --git a/src/include/parser/cypher_item.h b/src/include/parser/cypher_item.h
index fdfa858..3b782b8 100644
--- a/src/include/parser/cypher_item.h
+++ b/src/include/parser/cypher_item.h
@@ -33,6 +33,6 @@ TargetEntry *transform_cypher_item(cypher_parsestate *cpstate, Node *node,
                                    Node *expr, ParseExprKind expr_kind,
                                    char *colname, bool resjunk);
 List *transform_cypher_item_list(cypher_parsestate *cpstate, List *item_list,
-                                 ParseExprKind expr_kind);
+                                 List **groupClause, ParseExprKind expr_kind);
 
 #endif
diff --git a/src/include/parser/cypher_item.h b/src/include/parser/cypher_parse_agg.h
similarity index 60%
copy from src/include/parser/cypher_item.h
copy to src/include/parser/cypher_parse_agg.h
index fdfa858..f1848a6 100644
--- a/src/include/parser/cypher_item.h
+++ b/src/include/parser/cypher_parse_agg.h
@@ -17,22 +17,11 @@
  * under the License.
  */
 
-#ifndef AG_CYPHER_ITEM_H
-#define AG_CYPHER_ITEM_H
+#ifndef CYPHER_PARSE_AGG_H
+#define CYPHER_PARSE_AGG_H
 
-#include "postgres.h"
-
-#include "nodes/nodes.h"
-#include "nodes/pg_list.h"
-#include "nodes/primnodes.h"
 #include "parser/parse_node.h"
 
-#include "parser/cypher_parse_node.h"
-
-TargetEntry *transform_cypher_item(cypher_parsestate *cpstate, Node *node,
-                                   Node *expr, ParseExprKind expr_kind,
-                                   char *colname, bool resjunk);
-List *transform_cypher_item_list(cypher_parsestate *cpstate, List *item_list,
-                                 ParseExprKind expr_kind);
+extern void parse_check_aggregates(ParseState *pstate, Query *qry);
 
-#endif
+#endif /* CYPHER_PARSE_AGG_H */
diff --git a/src/include/parser/cypher_parse_node.h b/src/include/parser/cypher_parse_node.h
index 0c2114f..5066c6c 100644
--- a/src/include/parser/cypher_parse_node.h
+++ b/src/include/parser/cypher_parse_node.h
@@ -34,6 +34,14 @@ typedef struct cypher_parsestate
     int default_alias_num;
     List *entities;
     List *property_constraint_quals;
+    /*
+     * To flag when an aggregate has been found in an expression during an
+     * expression transform. This is used during the return_item list transform
+     * to know which expressions are group by keys (not an aggregate or a
+     * composite expression with an aggregate), and which aren't (everything
+     * else). It is only used by transform_cypher_item_list.
+     */
+    bool exprHasAgg;
 } cypher_parsestate;
 
 typedef struct errpos_ecb_state