You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by gatorsmile <gi...@git.apache.org> on 2018/08/07 17:53:57 UTC

[GitHub] spark pull request #22030: [SPARK-25048][SQL] Pivoting by multiple columns i...

Github user gatorsmile commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22030#discussion_r208326382
  
    --- Diff: sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala ---
    @@ -403,20 +415,29 @@ class RelationalGroupedDataset protected[sql](
        *
        * {{{
        *   // Compute the sum of earnings for each year by course with each course as a separate column
    -   *   df.groupBy($"year").pivot($"course", Seq("dotNET", "Java")).sum($"earnings")
    +   *   df.groupBy($"year").pivot($"course", Seq(lit("dotNET"), lit("Java"))).sum($"earnings")
    +   * }}}
    +   *
    +   * For pivoting by multiple columns, use the `struct` function to combine the columns and values:
    +   *
    +   * {{{
    +   *   df
    +   *     .groupBy($"year")
    +   *     .pivot(struct($"course", $"training"), Seq(struct(lit("java"), lit("Experts"))))
    +   *     .agg(sum($"earnings"))
        * }}}
        *
        * @param pivotColumn the column to pivot.
        * @param values List of values that will be translated to columns in the output DataFrame.
        * @since 2.4.0
        */
    -  def pivot(pivotColumn: Column, values: Seq[Any]): RelationalGroupedDataset = {
    +  def pivot(pivotColumn: Column, values: Seq[Column]): RelationalGroupedDataset = {
    --- End diff --
    
    @HyukjinKwon I think this change is better than what https://github.com/apache/spark/pull/21699 did. 
    



---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org