You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Takeshi Yamamuro (Jira)" <ji...@apache.org> on 2019/11/01 05:08:00 UTC
[jira] [Updated] (SPARK-29704) Support the combinations of grouping operations

     [ https://issues.apache.org/jira/browse/SPARK-29704?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Takeshi Yamamuro updated SPARK-29704:
-------------------------------------
    Description: 
PgSQL can accept a query below with the combinations of grouping operations, but Spark cannot;
{code:java}
postgres=# create table gstest2 (a integer, b integer, c integer, d integer, e integer, f integer, g integer, h integer);
postgres=# insert into gstest2 values
postgres-#   (1, 1, 1, 1, 1, 1, 1, 1),
postgres-#   (1, 1, 1, 1, 1, 1, 1, 2),
postgres-#   (1, 1, 1, 1, 1, 1, 2, 2),
postgres-#   (1, 1, 1, 1, 1, 2, 2, 2),
postgres-#   (1, 1, 1, 1, 2, 2, 2, 2),
postgres-#   (1, 1, 1, 2, 2, 2, 2, 2),
postgres-#   (1, 1, 2, 2, 2, 2, 2, 2),
postgres-#   (1, 2, 2, 2, 2, 2, 2, 2),
postgres-#   (2, 2, 2, 2, 2, 2, 2, 2);
INSERT 0 9

postgres=# select a, b, c, d from gstest2 group by rollup(a,b),grouping sets(c,d);
 a | b | c | d 
---+---+---+---
 1 | 2 | 2 |  
 1 | 1 | 2 |  
 1 | 1 | 1 |  
 2 | 2 | 2 |  
 1 |   | 1 |  
 2 |   | 2 |  
 1 |   | 2 |  
   |   | 2 |  
   |   | 1 |  
 1 | 2 |   | 2
 1 | 1 |   | 2
 1 | 1 |   | 1
 2 | 2 |   | 2
 1 |   |   | 1
 2 |   |   | 2
 1 |   |   | 2
   |   |   | 2
   |   |   | 1
(18 rows)
{code}
scala> sql("""select a, b, c, d from gstest2 group by rollup(a,b),grouping sets(c,d)""").show
 org.apache.spark.sql.catalyst.parser.ParseException:
 mismatched input 'sets' expecting

{<EOF>, ',', '.', '[', 'AND', 'BETWEEN', 'CLUSTER', 'DISTRIBUTE', 'EXCEPT', 'GROUPING', 'HAVING', 'IN', 'INTERSECT', 'IS', 'LIKE', 'LIMIT', NOT, 'OR', 'ORDER', RLIKE, 'MINUS', 'SORT', 'UNION', 'WINDOW', 'WITH', EQ, '<=>', '<>', '!=', '<', LTE, '>', GTE, '+', '-', '*', '/', '%', 'DIV', '&', '|', '||', '^'}

(line 1, pos 61)

== SQL ==
 select a, b, c, d from gstest2 group by rollup(a,b),grouping sets(c,d)
 -------------------------------------------------------------^^^

at org.apache.spark.sql.catalyst.parser.ParseException.withCommand(ParseDriver.scala:268)
 at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parse(ParseDriver.scala:135)
 at org.apache.spark.sql.execution.SparkSqlParser.parse(SparkSqlParser.scala:48)
 at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parsePlan(ParseDriver.scala:85)
 at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:605)
 at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
 at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:605)
 ... 47 elided
{code:java}
 {code}

  was:
PgSQL can accept a query below with the combinations of grouping operations, but Spark cannot;
{code}
postgres=# create table gstest2 (a integer, b integer, c integer, d integer, e integer, f integer, g integer, h integer);
ERROR:  relation "gstest2" already exists
postgres=# insert into gstest2 values
postgres-#   (1, 1, 1, 1, 1, 1, 1, 1),
postgres-#   (1, 1, 1, 1, 1, 1, 1, 2),
postgres-#   (1, 1, 1, 1, 1, 1, 2, 2),
postgres-#   (1, 1, 1, 1, 1, 2, 2, 2),
postgres-#   (1, 1, 1, 1, 2, 2, 2, 2),
postgres-#   (1, 1, 1, 2, 2, 2, 2, 2),
postgres-#   (1, 1, 2, 2, 2, 2, 2, 2),
postgres-#   (1, 2, 2, 2, 2, 2, 2, 2),
postgres-#   (2, 2, 2, 2, 2, 2, 2, 2);
INSERT 0 9

postgres=# select a, b, c, d from gstest2 group by rollup(a,b),grouping sets(c,d);
 a | b | c | d 
---+---+---+---
 1 | 2 | 2 |  
 1 | 1 | 2 |  
 1 | 1 | 1 |  
 2 | 2 | 2 |  
 1 |   | 1 |  
 2 |   | 2 |  
 1 |   | 2 |  
   |   | 2 |  
   |   | 1 |  
 1 | 2 |   | 2
 1 | 1 |   | 2
 1 | 1 |   | 1
 2 | 2 |   | 2
 1 |   |   | 1
 2 |   |   | 2
 1 |   |   | 2
   |   |   | 2
   |   |   | 1
(18 rows)
{code}
scala> sql("""select a, b, c, d from gstest2 group by rollup(a,b),grouping sets(c,d)""").show
org.apache.spark.sql.catalyst.parser.ParseException:
mismatched input 'sets' expecting {<EOF>, ',', '.', '[', 'AND', 'BETWEEN', 'CLUSTER', 'DISTRIBUTE', 'EXCEPT', 'GROUPING', 'HAVING', 'IN', 'INTERSECT', 'IS', 'LIKE', 'LIMIT', NOT, 'OR', 'ORDER', RLIKE, 'MINUS', 'SORT', 'UNION', 'WINDOW', 'WITH', EQ, '<=>', '<>', '!=', '<', LTE, '>', GTE, '+', '-', '*', '/', '%', 'DIV', '&', '|', '||', '^'}(line 1, pos 61)

== SQL ==
select a, b, c, d from gstest2 group by rollup(a,b),grouping sets(c,d)
-------------------------------------------------------------^^^

  at org.apache.spark.sql.catalyst.parser.ParseException.withCommand(ParseDriver.scala:268)
  at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parse(ParseDriver.scala:135)
  at org.apache.spark.sql.execution.SparkSqlParser.parse(SparkSqlParser.scala:48)
  at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parsePlan(ParseDriver.scala:85)
  at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:605)
  at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
  at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:605)
  ... 47 elided
{code}

{code}


> Support the combinations of grouping operations
> -----------------------------------------------
>
>                 Key: SPARK-29704
>                 URL: https://issues.apache.org/jira/browse/SPARK-29704
>             Project: Spark
>          Issue Type: Sub-task
>          Components: SQL
>    Affects Versions: 3.0.0
>            Reporter: Takeshi Yamamuro
>            Priority: Major
>
> PgSQL can accept a query below with the combinations of grouping operations, but Spark cannot;
> {code:java}
> postgres=# create table gstest2 (a integer, b integer, c integer, d integer, e integer, f integer, g integer, h integer);
> postgres=# insert into gstest2 values
> postgres-#   (1, 1, 1, 1, 1, 1, 1, 1),
> postgres-#   (1, 1, 1, 1, 1, 1, 1, 2),
> postgres-#   (1, 1, 1, 1, 1, 1, 2, 2),
> postgres-#   (1, 1, 1, 1, 1, 2, 2, 2),
> postgres-#   (1, 1, 1, 1, 2, 2, 2, 2),
> postgres-#   (1, 1, 1, 2, 2, 2, 2, 2),
> postgres-#   (1, 1, 2, 2, 2, 2, 2, 2),
> postgres-#   (1, 2, 2, 2, 2, 2, 2, 2),
> postgres-#   (2, 2, 2, 2, 2, 2, 2, 2);
> INSERT 0 9
> postgres=# select a, b, c, d from gstest2 group by rollup(a,b),grouping sets(c,d);
>  a | b | c | d 
> ---+---+---+---
>  1 | 2 | 2 |  
>  1 | 1 | 2 |  
>  1 | 1 | 1 |  
>  2 | 2 | 2 |  
>  1 |   | 1 |  
>  2 |   | 2 |  
>  1 |   | 2 |  
>    |   | 2 |  
>    |   | 1 |  
>  1 | 2 |   | 2
>  1 | 1 |   | 2
>  1 | 1 |   | 1
>  2 | 2 |   | 2
>  1 |   |   | 1
>  2 |   |   | 2
>  1 |   |   | 2
>    |   |   | 2
>    |   |   | 1
> (18 rows)
> {code}
> scala> sql("""select a, b, c, d from gstest2 group by rollup(a,b),grouping sets(c,d)""").show
>  org.apache.spark.sql.catalyst.parser.ParseException:
>  mismatched input 'sets' expecting
> {<EOF>, ',', '.', '[', 'AND', 'BETWEEN', 'CLUSTER', 'DISTRIBUTE', 'EXCEPT', 'GROUPING', 'HAVING', 'IN', 'INTERSECT', 'IS', 'LIKE', 'LIMIT', NOT, 'OR', 'ORDER', RLIKE, 'MINUS', 'SORT', 'UNION', 'WINDOW', 'WITH', EQ, '<=>', '<>', '!=', '<', LTE, '>', GTE, '+', '-', '*', '/', '%', 'DIV', '&', '|', '||', '^'}
> (line 1, pos 61)
> == SQL ==
>  select a, b, c, d from gstest2 group by rollup(a,b),grouping sets(c,d)
>  -------------------------------------------------------------^^^
> at org.apache.spark.sql.catalyst.parser.ParseException.withCommand(ParseDriver.scala:268)
>  at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parse(ParseDriver.scala:135)
>  at org.apache.spark.sql.execution.SparkSqlParser.parse(SparkSqlParser.scala:48)
>  at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parsePlan(ParseDriver.scala:85)
>  at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:605)
>  at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
>  at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:605)
>  ... 47 elided
> {code:java}
>  {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org