You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Takeshi Yamamuro (Jira)" <ji...@apache.org> on 2019/11/01 05:07:00 UTC
[jira] [Updated] (SPARK-29705) Support more expressive forms in GroupingSets/Cube/Rollup

     [ https://issues.apache.org/jira/browse/SPARK-29705?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Takeshi Yamamuro updated SPARK-29705:
-------------------------------------
    Description: 
{code:java}
postgres=# create table gstest2 (a integer, b integer, c integer, d integer, e integer, f integer, g integer, h integer);
postgres=# insert into gstest2 values
postgres-#   (1, 1, 1, 1, 1, 1, 1, 1),
postgres-#   (1, 1, 1, 1, 1, 1, 1, 2),
postgres-#   (1, 1, 1, 1, 1, 1, 2, 2),
postgres-#   (1, 1, 1, 1, 1, 2, 2, 2),
postgres-#   (1, 1, 1, 1, 2, 2, 2, 2),
postgres-#   (1, 1, 1, 2, 2, 2, 2, 2),
postgres-#   (1, 1, 2, 2, 2, 2, 2, 2),
postgres-#   (1, 2, 2, 2, 2, 2, 2, 2),
postgres-#   (2, 2, 2, 2, 2, 2, 2, 2);
INSERT 0 9

postgres=# select a, b, grouping(a,b), sum(c), count(*), max(c) from gstest2 group by rollup ((a,b,c),(c,d));
 a | b | grouping | sum | count | max 
---+---+----------+-----+-------+-----
   |   |        3 |  24 |    18 |   2
 1 | 1 |        0 |   4 |     2 |   2
 1 | 2 |        0 |   4 |     2 |   2
 1 | 1 |        0 |   2 |     2 |   1
 2 | 2 |        0 |   4 |     2 |   2
 1 | 1 |        0 |  10 |    10 |   1
 1 | 2 |        0 |   4 |     2 |   2
 1 | 1 |        0 |  12 |    12 |   1
 1 | 1 |        0 |   4 |     2 |   2
 2 | 2 |        0 |   4 |     2 |   2
(10 rows)
{code}
{code:java}
scala> sql("""select a, b, grouping(a,b), sum(c), count(*), max(c) from gstest2 group by rollup ((a,b,c),(c,d))""").show
org.apache.spark.sql.AnalysisException: Invalid number of arguments for function grouping. Expected: 1; Found: 2; line 1 pos 13
  at org.apache.spark.sql.catalyst.analysis.FunctionRegistry$.$anonfun$expression$8(FunctionRegistry.scala:614)
  at scala.Option.getOrElse(Option.scala:189)
  at org.apache.spark.sql.catalyst.analysis.FunctionRegistry$.$anonfun$expression$4(FunctionRegistry.scala:598)
  at org.apache.spark.sql.catalyst.analysis.SimpleFunctionRegistry.lookupFunction(FunctionRegistry.scala:121)
  at org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupFunction(SessionCatalog.scala:1375)
  at org.apache.spark.sql.hive.HiveSessionCatalog.super$lookupFunction(HiveSessionCatalog.scala:132)
  at org.apache.spark.sql.hive.HiveSessionCatalog.$anonfun$lookupFunction0$2(HiveSessionCatalog.scala:132)
  at scala.util.Try$.apply(Try.scala:213)
{code}

  was:
{code:java}
postgres=# create table gstest2 (a integer, b integer, c integer, d integer, e integer, f integer, g integer, h integer);
ERROR:  relation "gstest2" already exists
postgres=# insert into gstest2 values
postgres-#   (1, 1, 1, 1, 1, 1, 1, 1),
postgres-#   (1, 1, 1, 1, 1, 1, 1, 2),
postgres-#   (1, 1, 1, 1, 1, 1, 2, 2),
postgres-#   (1, 1, 1, 1, 1, 2, 2, 2),
postgres-#   (1, 1, 1, 1, 2, 2, 2, 2),
postgres-#   (1, 1, 1, 2, 2, 2, 2, 2),
postgres-#   (1, 1, 2, 2, 2, 2, 2, 2),
postgres-#   (1, 2, 2, 2, 2, 2, 2, 2),
postgres-#   (2, 2, 2, 2, 2, 2, 2, 2);
INSERT 0 9

postgres=# select a, b, grouping(a,b), sum(c), count(*), max(c) from gstest2 group by rollup ((a,b,c),(c,d));
 a | b | grouping | sum | count | max 
---+---+----------+-----+-------+-----
   |   |        3 |  24 |    18 |   2
 1 | 1 |        0 |   4 |     2 |   2
 1 | 2 |        0 |   4 |     2 |   2
 1 | 1 |        0 |   2 |     2 |   1
 2 | 2 |        0 |   4 |     2 |   2
 1 | 1 |        0 |  10 |    10 |   1
 1 | 2 |        0 |   4 |     2 |   2
 1 | 1 |        0 |  12 |    12 |   1
 1 | 1 |        0 |   4 |     2 |   2
 2 | 2 |        0 |   4 |     2 |   2
(10 rows)
{code}
{code:java}
scala> sql("""select a, b, grouping(a,b), sum(c), count(*), max(c) from gstest2 group by rollup ((a,b,c),(c,d))""").show
org.apache.spark.sql.AnalysisException: Invalid number of arguments for function grouping. Expected: 1; Found: 2; line 1 pos 13
  at org.apache.spark.sql.catalyst.analysis.FunctionRegistry$.$anonfun$expression$8(FunctionRegistry.scala:614)
  at scala.Option.getOrElse(Option.scala:189)
  at org.apache.spark.sql.catalyst.analysis.FunctionRegistry$.$anonfun$expression$4(FunctionRegistry.scala:598)
  at org.apache.spark.sql.catalyst.analysis.SimpleFunctionRegistry.lookupFunction(FunctionRegistry.scala:121)
  at org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupFunction(SessionCatalog.scala:1375)
  at org.apache.spark.sql.hive.HiveSessionCatalog.super$lookupFunction(HiveSessionCatalog.scala:132)
  at org.apache.spark.sql.hive.HiveSessionCatalog.$anonfun$lookupFunction0$2(HiveSessionCatalog.scala:132)
  at scala.util.Try$.apply(Try.scala:213)
{code}


> Support more expressive forms in GroupingSets/Cube/Rollup
> ---------------------------------------------------------
>
>                 Key: SPARK-29705
>                 URL: https://issues.apache.org/jira/browse/SPARK-29705
>             Project: Spark
>          Issue Type: Sub-task
>          Components: SQL
>    Affects Versions: 3.0.0
>            Reporter: Takeshi Yamamuro
>            Priority: Major
>
> {code:java}
> postgres=# create table gstest2 (a integer, b integer, c integer, d integer, e integer, f integer, g integer, h integer);
> postgres=# insert into gstest2 values
> postgres-#   (1, 1, 1, 1, 1, 1, 1, 1),
> postgres-#   (1, 1, 1, 1, 1, 1, 1, 2),
> postgres-#   (1, 1, 1, 1, 1, 1, 2, 2),
> postgres-#   (1, 1, 1, 1, 1, 2, 2, 2),
> postgres-#   (1, 1, 1, 1, 2, 2, 2, 2),
> postgres-#   (1, 1, 1, 2, 2, 2, 2, 2),
> postgres-#   (1, 1, 2, 2, 2, 2, 2, 2),
> postgres-#   (1, 2, 2, 2, 2, 2, 2, 2),
> postgres-#   (2, 2, 2, 2, 2, 2, 2, 2);
> INSERT 0 9
> postgres=# select a, b, grouping(a,b), sum(c), count(*), max(c) from gstest2 group by rollup ((a,b,c),(c,d));
>  a | b | grouping | sum | count | max 
> ---+---+----------+-----+-------+-----
>    |   |        3 |  24 |    18 |   2
>  1 | 1 |        0 |   4 |     2 |   2
>  1 | 2 |        0 |   4 |     2 |   2
>  1 | 1 |        0 |   2 |     2 |   1
>  2 | 2 |        0 |   4 |     2 |   2
>  1 | 1 |        0 |  10 |    10 |   1
>  1 | 2 |        0 |   4 |     2 |   2
>  1 | 1 |        0 |  12 |    12 |   1
>  1 | 1 |        0 |   4 |     2 |   2
>  2 | 2 |        0 |   4 |     2 |   2
> (10 rows)
> {code}
> {code:java}
> scala> sql("""select a, b, grouping(a,b), sum(c), count(*), max(c) from gstest2 group by rollup ((a,b,c),(c,d))""").show
> org.apache.spark.sql.AnalysisException: Invalid number of arguments for function grouping. Expected: 1; Found: 2; line 1 pos 13
>   at org.apache.spark.sql.catalyst.analysis.FunctionRegistry$.$anonfun$expression$8(FunctionRegistry.scala:614)
>   at scala.Option.getOrElse(Option.scala:189)
>   at org.apache.spark.sql.catalyst.analysis.FunctionRegistry$.$anonfun$expression$4(FunctionRegistry.scala:598)
>   at org.apache.spark.sql.catalyst.analysis.SimpleFunctionRegistry.lookupFunction(FunctionRegistry.scala:121)
>   at org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupFunction(SessionCatalog.scala:1375)
>   at org.apache.spark.sql.hive.HiveSessionCatalog.super$lookupFunction(HiveSessionCatalog.scala:132)
>   at org.apache.spark.sql.hive.HiveSessionCatalog.$anonfun$lookupFunction0$2(HiveSessionCatalog.scala:132)
>   at scala.util.Try$.apply(Try.scala:213)
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org