You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by do...@apache.org on 2019/05/20 02:28:01 UTC

[spark] branch branch-2.3 updated: [SPARK-27771][SQL] Add SQL description for grouping functions (cube, rollup, grouping and grouping_id)

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-2.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-2.3 by this push:
     new 89095f6  [SPARK-27771][SQL] Add SQL description for grouping functions (cube, rollup, grouping and grouping_id)
89095f6 is described below

commit 89095f67e12f3a33ef57fd75a41351559cf01863
Author: HyukjinKwon <gu...@apache.org>
AuthorDate: Sun May 19 19:26:20 2019 -0700

    [SPARK-27771][SQL] Add SQL description for grouping functions (cube, rollup, grouping and grouping_id)
    
    ## What changes were proposed in this pull request?
    
    Both look added as of 2.0 (see SPARK-12541 and SPARK-12706). I referred existing docs and examples in other API docs.
    
    ## How was this patch tested?
    
    Manually built the documentation and, by running examples, by running `DESCRIBE FUNCTION EXTENDED`.
    
    Closes #24642 from HyukjinKwon/SPARK-27771.
    
    Authored-by: HyukjinKwon <gu...@apache.org>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
    (cherry picked from commit 2431ab0999dbb322dcefeb9b1671d935945dc29a)
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .../spark/sql/catalyst/expressions/grouping.scala  | 74 ++++++++++++++++++++++
 1 file changed, 74 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala
index 3be761c..b8ff455 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala
@@ -38,14 +38,65 @@ trait GroupingSet extends Expression with CodegenFallback {
   override def eval(input: InternalRow): Any = throw new UnsupportedOperationException
 }
 
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_([col1[, col2 ..]]) - create a multi-dimensional cube using the specified columns
+      so that we can run aggregation on them.
+  """,
+  examples = """
+    Examples:
+      > SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY _FUNC_(name, age);
+        NULL    2       1
+        NULL    NULL    2
+        Alice   2       1
+        Bob     5       1
+        NULL    5       1
+        Bob     NULL    1
+        Alice   NULL    1
+  """,
+  since = "2.0.0")
+// scalastyle:on line.size.limit
 case class Cube(groupByExprs: Seq[Expression]) extends GroupingSet {}
 
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_([col1[, col2 ..]]) - create a multi-dimensional rollup using the specified columns
+      so that we can run aggregation on them.
+  """,
+  examples = """
+    Examples:
+      > SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY _FUNC_(name, age);
+        NULL    NULL    2
+        Alice   2       1
+        Bob     5       1
+        Bob     NULL    1
+        Alice   NULL    1
+  """,
+  since = "2.0.0")
+// scalastyle:on line.size.limit
 case class Rollup(groupByExprs: Seq[Expression]) extends GroupingSet {}
 
 /**
  * Indicates whether a specified column expression in a GROUP BY list is aggregated or not.
  * GROUPING returns 1 for aggregated or 0 for not aggregated in the result set.
  */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_(col) - indicates whether a specified column in a GROUP BY is aggregated or
+      not, returns 1 for aggregated or 0 for not aggregated in the result set.",
+  """,
+  examples = """
+    Examples:
+      > SELECT name, _FUNC_(name), sum(age) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name);
+        Alice   0       2
+        NULL    1       7
+        Bob     0       5
+  """,
+  since = "2.0.0")
+// scalastyle:on line.size.limit
 case class Grouping(child: Expression) extends Expression with Unevaluable {
   override def references: AttributeSet = AttributeSet(VirtualColumn.groupingIdAttribute :: Nil)
   override def children: Seq[Expression] = child :: Nil
@@ -58,6 +109,29 @@ case class Grouping(child: Expression) extends Expression with Unevaluable {
  *
  * If groupByExprs is empty, it means all grouping expressions in GroupingSets.
  */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_([col1[, col2 ..]]) - returns the level of grouping, equals to
+      `(grouping(c1) << (n-1)) + (grouping(c2) << (n-2)) + ... + grouping(cn)`
+  """,
+  examples = """
+    Examples:
+      > SELECT name, _FUNC_(), sum(age), avg(height) FROM VALUES (2, 'Alice', 165), (5, 'Bob', 180) people(age, name, height) GROUP BY cube(name, height);
+        NULL    2       2       165.0
+        Alice   0       2       165.0
+        NULL    2       5       180.0
+        NULL    3       7       172.5
+        Bob     0       5       180.0
+        Bob     1       5       180.0
+        Alice   1       2       165.0
+  """,
+  note = """
+    Input columns should match with grouping columns exactly, or empty (means all the grouping
+    columns).
+  """,
+  since = "2.0.0")
+// scalastyle:on line.size.limit
 case class GroupingID(groupByExprs: Seq[Expression]) extends Expression with Unevaluable {
   override def references: AttributeSet = AttributeSet(VirtualColumn.groupingIdAttribute :: Nil)
   override def children: Seq[Expression] = groupByExprs


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org