You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@age.apache.org by jg...@apache.org on 2020/12/03 22:59:13 UTC

[incubator-age] branch master updated: Add support for aggregate functions avg, sum, & count

This is an automated email from the ASF dual-hosted git repository.

jgemignani pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-age.git


The following commit(s) were added to refs/heads/master by this push:
     new 78e9e03  Add support for aggregate functions avg, sum, & count
78e9e03 is described below

commit 78e9e0354d0b34778e6959cc40fc135356fad910
Author: John Gemignani <jr...@gmail.com>
AuthorDate: Thu Dec 3 10:45:49 2020 -0800

    Add support for aggregate functions avg, sum, & count
    
    Added support for aggregate functions, in general. The pstate value
    p_hasAggs needed to be passed upward from the RETURN clause in order
    to allow the planner to properly set them up.
    
    Right now, only the RETURN clause will pass this value upward. I am not
    sure if it is needed to be done elsewhere atm.
    
    Added the aggregate functions avg(), sum(), count(), and count(*). These
    functions are directly mapped to their corresponding PG functions as those
    PG functions meet the openCypher specifications.
    
    Added regression tests.
---
 age--0.2.0.sql                     |  16 +++--
 regress/expected/expr.out          | 142 +++++++++++++++++++++++++++++++++++++
 regress/sql/expr.sql               |  31 ++++++++
 src/backend/parser/cypher_clause.c |   1 +
 src/backend/parser/cypher_gram.y   |  26 ++++++-
 src/backend/utils/adt/agtype.c     |  10 +++
 6 files changed, 221 insertions(+), 5 deletions(-)

diff --git a/age--0.2.0.sql b/age--0.2.0.sql
index 0d3f6e5..58f6eb5 100644
--- a/age--0.2.0.sql
+++ b/age--0.2.0.sql
@@ -700,7 +700,6 @@ AS 'MODULE_PATHNAME', 'agtype_build_list_noargs';
 --
 
 -- agtype -> boolean (implicit)
-
 CREATE FUNCTION ag_catalog.agtype_to_bool(agtype)
 RETURNS boolean
 LANGUAGE c
@@ -714,7 +713,6 @@ WITH FUNCTION ag_catalog.agtype_to_bool(agtype)
 AS IMPLICIT;
 
 -- boolean -> agtype (explicit)
-
 CREATE FUNCTION ag_catalog.bool_to_agtype(boolean)
 RETURNS agtype
 LANGUAGE c
@@ -727,7 +725,6 @@ CREATE CAST (boolean AS agtype)
 WITH FUNCTION ag_catalog.bool_to_agtype(boolean);
 
 -- float8 -> agtype (explicit)
-
 CREATE FUNCTION ag_catalog.float8_to_agtype(float8)
 RETURNS agtype
 LANGUAGE c
@@ -740,7 +737,6 @@ CREATE CAST (float8 AS agtype)
 WITH FUNCTION ag_catalog.float8_to_agtype(float8);
 
 -- agtype -> float8 (implicit)
-
 CREATE FUNCTION ag_catalog.agtype_to_float8(agtype)
 RETURNS float8
 LANGUAGE c
@@ -753,6 +749,18 @@ CREATE CAST (agtype AS float8)
 WITH FUNCTION ag_catalog.agtype_to_float8(agtype)
 AS IMPLICIT;
 
+-- int8 -> agtype (explicit)
+CREATE FUNCTION ag_catalog.int8_to_agtype(int8)
+RETURNS agtype
+LANGUAGE c
+STABLE
+RETURNS NULL ON NULL INPUT
+PARALLEL SAFE
+AS 'MODULE_PATHNAME';
+
+CREATE CAST (int8 AS agtype)
+WITH FUNCTION ag_catalog.int8_to_agtype(int8);
+
 --
 -- agtype - access operators
 --
diff --git a/regress/expected/expr.out b/regress/expected/expr.out
index c8e586c..6988700 100644
--- a/regress/expected/expr.out
+++ b/regress/expected/expr.out
@@ -4152,8 +4152,150 @@ ERROR:  schema "contains" does not exist
 LINE 2:     RETURN contains.age_sqrt(25)
                                     ^
 --
+-- aggregate functions avg(), sum(), count(), & count(*)
+--
+SELECT create_graph('UCSC');
+NOTICE:  graph "UCSC" has been created
+ create_graph 
+--------------
+ 
+(1 row)
+
+SELECT * FROM cypher('UCSC', $$CREATE (:students {name: "Jack", gpa: 3.0, age: 21})$$) AS (a agtype);
+ a 
+---
+(0 rows)
+
+SELECT * FROM cypher('UCSC', $$CREATE (:students {name: "Jill", gpa: 3.5, age: 27})$$) AS (a agtype);
+ a 
+---
+(0 rows)
+
+SELECT * FROM cypher('UCSC', $$CREATE (:students {name: "Jim", gpa: 3.75, age: 32})$$) AS (a agtype);
+ a 
+---
+(0 rows)
+
+SELECT * FROM cypher('UCSC', $$CREATE (:students {name: "Rick", gpa: 2.5, age: 24})$$) AS (a agtype);
+ a 
+---
+(0 rows)
+
+SELECT * FROM cypher('UCSC', $$CREATE (:students {name: "Ann", gpa: 3.8, age: 23})$$) AS (a agtype);
+ a 
+---
+(0 rows)
+
+SELECT * FROM cypher('UCSC', $$CREATE (:students {name: "Derek", gpa: 4.0, age: 19})$$) AS (a agtype);
+ a 
+---
+(0 rows)
+
+SELECT * FROM cypher('UCSC', $$CREATE (:students {name: "Jessica", gpa: 3.9, age: 20})$$) AS (a agtype);
+ a 
+---
+(0 rows)
+
+SELECT * FROM cypher('UCSC', $$ MATCH (u) RETURN (u) $$) AS (vertex agtype);
+                                                     vertex                                                     
+----------------------------------------------------------------------------------------------------------------
+ {"id": 844424930131969, "label": "students", "properties": {"age": 21, "gpa": 3.0, "name": "Jack"}}::vertex
+ {"id": 844424930131970, "label": "students", "properties": {"age": 27, "gpa": 3.5, "name": "Jill"}}::vertex
+ {"id": 844424930131971, "label": "students", "properties": {"age": 32, "gpa": 3.75, "name": "Jim"}}::vertex
+ {"id": 844424930131972, "label": "students", "properties": {"age": 24, "gpa": 2.5, "name": "Rick"}}::vertex
+ {"id": 844424930131973, "label": "students", "properties": {"age": 23, "gpa": 3.8, "name": "Ann"}}::vertex
+ {"id": 844424930131974, "label": "students", "properties": {"age": 19, "gpa": 4.0, "name": "Derek"}}::vertex
+ {"id": 844424930131975, "label": "students", "properties": {"age": 20, "gpa": 3.9, "name": "Jessica"}}::vertex
+(7 rows)
+
+SELECT * FROM cypher('UCSC', $$ MATCH (u) RETURN avg(u.gpa), sum(u.gpa), sum(u.gpa)/count(u.gpa), count(u.gpa), count(*) $$) 
+AS (avg agtype, sum agtype, sum_divided_by_count agtype, count agtype, count_star agtype);
+       avg        |  sum  | sum_divided_by_count | count | count_star 
+------------------+-------+----------------------+-------+------------
+ 3.49285714285714 | 24.45 | 3.49285714285714     | 7     | 7
+(1 row)
+
+-- add in 2 null gpa records
+SELECT * FROM cypher('UCSC', $$CREATE (:students {name: "Dave", age: 24})$$) AS (a agtype);
+ a 
+---
+(0 rows)
+
+SELECT * FROM cypher('UCSC', $$CREATE (:students {name: "Mike", age: 18})$$) AS (a agtype);
+ a 
+---
+(0 rows)
+
+SELECT * FROM cypher('UCSC', $$ MATCH (u) RETURN (u) $$) AS (vertex agtype);
+                                                     vertex                                                     
+----------------------------------------------------------------------------------------------------------------
+ {"id": 844424930131969, "label": "students", "properties": {"age": 21, "gpa": 3.0, "name": "Jack"}}::vertex
+ {"id": 844424930131970, "label": "students", "properties": {"age": 27, "gpa": 3.5, "name": "Jill"}}::vertex
+ {"id": 844424930131971, "label": "students", "properties": {"age": 32, "gpa": 3.75, "name": "Jim"}}::vertex
+ {"id": 844424930131972, "label": "students", "properties": {"age": 24, "gpa": 2.5, "name": "Rick"}}::vertex
+ {"id": 844424930131973, "label": "students", "properties": {"age": 23, "gpa": 3.8, "name": "Ann"}}::vertex
+ {"id": 844424930131974, "label": "students", "properties": {"age": 19, "gpa": 4.0, "name": "Derek"}}::vertex
+ {"id": 844424930131975, "label": "students", "properties": {"age": 20, "gpa": 3.9, "name": "Jessica"}}::vertex
+ {"id": 844424930131976, "label": "students", "properties": {"age": 24, "name": "Dave"}}::vertex
+ {"id": 844424930131977, "label": "students", "properties": {"age": 18, "name": "Mike"}}::vertex
+(9 rows)
+
+SELECT * FROM cypher('UCSC', $$ MATCH (u) RETURN avg(u.gpa), sum(u.gpa), sum(u.gpa)/count(u.gpa), count(u.gpa), count(*) $$) 
+AS (avg agtype, sum agtype, sum_divided_by_count agtype, count agtype, count_star agtype);
+       avg        |  sum  | sum_divided_by_count | count | count_star 
+------------------+-------+----------------------+-------+------------
+ 3.49285714285714 | 24.45 | 3.49285714285714     | 7     | 9
+(1 row)
+
+-- should return null
+SELECT * FROM cypher('UCSC', $$ RETURN avg(NULL) $$) AS (avg agtype);
+ avg 
+-----
+ 
+(1 row)
+
+SELECT * FROM cypher('UCSC', $$ RETURN sum(NULL) $$) AS (sum agtype);
+ sum 
+-----
+ 
+(1 row)
+
+-- should return 0
+SELECT * FROM cypher('UCSC', $$ RETURN count(NULL) $$) AS (count agtype);
+ count 
+-------
+ 0
+(1 row)
+
+-- should fail
+SELECT * FROM cypher('UCSC', $$ RETURN avg() $$) AS (avg agtype);
+ERROR:  function pg_catalog.avg() does not exist
+LINE 1: SELECT * FROM cypher('UCSC', $$ RETURN avg() $$) AS (avg agt...
+                                               ^
+HINT:  No function matches the given name and argument types. You might need to add explicit type casts.
+SELECT * FROM cypher('UCSC', $$ RETURN sum() $$) AS (sum agtype);
+ERROR:  function pg_catalog.sum() does not exist
+LINE 1: SELECT * FROM cypher('UCSC', $$ RETURN sum() $$) AS (sum agt...
+                                               ^
+HINT:  No function matches the given name and argument types. You might need to add explicit type casts.
+SELECT * FROM cypher('UCSC', $$ RETURN count() $$) AS (count agtype);
+ERROR:  pg_catalog.count(*) must be used to call a parameterless aggregate function
+LINE 1: SELECT * FROM cypher('UCSC', $$ RETURN count() $$) AS (count...
+                                               ^
+--
 -- Cleanup
 --
+SELECT * FROM drop_graph('UCSC', true);
+NOTICE:  drop cascades to 3 other objects
+DETAIL:  drop cascades to table "UCSC"._ag_label_vertex
+drop cascades to table "UCSC"._ag_label_edge
+drop cascades to table "UCSC".students
+NOTICE:  graph "UCSC" has been dropped
+ drop_graph 
+------------
+ 
+(1 row)
+
 SELECT * FROM drop_graph('expr', true);
 NOTICE:  drop cascades to 5 other objects
 DETAIL:  drop cascades to table expr._ag_label_vertex
diff --git a/regress/sql/expr.sql b/regress/sql/expr.sql
index 56f003c..8d7e3bb 100644
--- a/regress/sql/expr.sql
+++ b/regress/sql/expr.sql
@@ -1773,10 +1773,41 @@ SELECT * from cypher('expr', $$
     RETURN contains.age_sqrt(25)
 $$) as (result agtype);
 
+--
+-- aggregate functions avg(), sum(), count(), & count(*)
+--
+SELECT create_graph('UCSC');
+
+SELECT * FROM cypher('UCSC', $$CREATE (:students {name: "Jack", gpa: 3.0, age: 21})$$) AS (a agtype);
+SELECT * FROM cypher('UCSC', $$CREATE (:students {name: "Jill", gpa: 3.5, age: 27})$$) AS (a agtype);
+SELECT * FROM cypher('UCSC', $$CREATE (:students {name: "Jim", gpa: 3.75, age: 32})$$) AS (a agtype);
+SELECT * FROM cypher('UCSC', $$CREATE (:students {name: "Rick", gpa: 2.5, age: 24})$$) AS (a agtype);
+SELECT * FROM cypher('UCSC', $$CREATE (:students {name: "Ann", gpa: 3.8, age: 23})$$) AS (a agtype);
+SELECT * FROM cypher('UCSC', $$CREATE (:students {name: "Derek", gpa: 4.0, age: 19})$$) AS (a agtype);
+SELECT * FROM cypher('UCSC', $$CREATE (:students {name: "Jessica", gpa: 3.9, age: 20})$$) AS (a agtype);
+SELECT * FROM cypher('UCSC', $$ MATCH (u) RETURN (u) $$) AS (vertex agtype);
+SELECT * FROM cypher('UCSC', $$ MATCH (u) RETURN avg(u.gpa), sum(u.gpa), sum(u.gpa)/count(u.gpa), count(u.gpa), count(*) $$) 
+AS (avg agtype, sum agtype, sum_divided_by_count agtype, count agtype, count_star agtype);
+-- add in 2 null gpa records
+SELECT * FROM cypher('UCSC', $$CREATE (:students {name: "Dave", age: 24})$$) AS (a agtype);
+SELECT * FROM cypher('UCSC', $$CREATE (:students {name: "Mike", age: 18})$$) AS (a agtype);
+SELECT * FROM cypher('UCSC', $$ MATCH (u) RETURN (u) $$) AS (vertex agtype);
+SELECT * FROM cypher('UCSC', $$ MATCH (u) RETURN avg(u.gpa), sum(u.gpa), sum(u.gpa)/count(u.gpa), count(u.gpa), count(*) $$) 
+AS (avg agtype, sum agtype, sum_divided_by_count agtype, count agtype, count_star agtype);
+-- should return null
+SELECT * FROM cypher('UCSC', $$ RETURN avg(NULL) $$) AS (avg agtype);
+SELECT * FROM cypher('UCSC', $$ RETURN sum(NULL) $$) AS (sum agtype);
+-- should return 0
+SELECT * FROM cypher('UCSC', $$ RETURN count(NULL) $$) AS (count agtype);
+-- should fail
+SELECT * FROM cypher('UCSC', $$ RETURN avg() $$) AS (avg agtype);
+SELECT * FROM cypher('UCSC', $$ RETURN sum() $$) AS (sum agtype);
+SELECT * FROM cypher('UCSC', $$ RETURN count() $$) AS (count agtype);
 
 --
 -- Cleanup
 --
+SELECT * FROM drop_graph('UCSC', true);
 SELECT * FROM drop_graph('expr', true);
 
 --
diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c
index 67a6d03..6d68755 100644
--- a/src/backend/parser/cypher_clause.c
+++ b/src/backend/parser/cypher_clause.c
@@ -299,6 +299,7 @@ static Query *transform_cypher_return(cypher_parsestate *cpstate,
 
     query->rtable = pstate->p_rtable;
     query->jointree = makeFromExpr(pstate->p_joinlist, NULL);
+    query->hasAggs = pstate->p_hasAggs;
 
     assign_query_collations(pstate, query);
 
diff --git a/src/backend/parser/cypher_gram.y b/src/backend/parser/cypher_gram.y
index 2458988..b3332be 100644
--- a/src/backend/parser/cypher_gram.y
+++ b/src/backend/parser/cypher_gram.y
@@ -1111,6 +1111,23 @@ expr_func_norm:
         {
             $$ = make_function_expr($1, $3, @2);
         }
+    /* borrowed from PG's grammar */
+    | func_name '(' '*' ')'
+        {
+            /*
+             * We consider AGGREGATE(*) to invoke a parameterless
+             * aggregate.  This does the right thing for COUNT(*),
+             * and there are no other aggregates in SQL that accept
+             * '*' as parameter.
+             *
+             * The FuncCall node is also marked agg_star = true,
+             * so that later processing can detect what the argument
+             * really was.
+             */
+             FuncCall *n = (FuncCall *)make_function_expr($1, NIL, @1);
+             n->agg_star = true;
+             $$ = (Node *)n;
+         }
     ;
 
 expr_func_subexpr:
@@ -1566,12 +1583,19 @@ static Node *make_function_expr(List *func_name, List *exprs, int location)
 
         /*
          * Check for openCypher functions that are directly mapped to PG
-         * functions. Currently, we only map rand() and pi().
+         * functions. We may want to find a better way to do this, as there
+         * could be many.
          */
         if (pg_strcasecmp(name, "rand") == 0)
             funcname = SystemFuncName("random");
         else if (pg_strcasecmp(name, "pi") == 0)
             funcname = SystemFuncName("pi");
+        else if (pg_strcasecmp(name, "avg") == 0)
+            funcname = SystemFuncName("avg");
+        else if (pg_strcasecmp(name, "sum") == 0)
+            funcname = SystemFuncName("sum");
+        else if (pg_strcasecmp(name, "count") == 0)
+            funcname = SystemFuncName("count");
         else
             /*
              * We don't qualify AGE functions here. This is done in the
diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c
index c3f39d7..2fd5caf 100644
--- a/src/backend/utils/adt/agtype.c
+++ b/src/backend/utils/adt/agtype.c
@@ -2223,6 +2223,16 @@ Datum float8_to_agtype(PG_FUNCTION_ARGS)
     return float_to_agtype(PG_GETARG_FLOAT8(0));
 }
 
+PG_FUNCTION_INFO_V1(int8_to_agtype);
+
+/*
+ * Cast float8 to agtype.
+ */
+Datum int8_to_agtype(PG_FUNCTION_ARGS)
+{
+    return integer_to_agtype(PG_GETARG_INT64(0));
+}
+
 /*
  * Helper function for agtype_access_operator map access.
  * Note: This function expects that a map and a scalar key are being passed.