You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by codeatri <gi...@git.apache.org> on 2018/08/06 19:04:46 UTC

[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

GitHub user codeatri opened a pull request:

    https://github.com/apache/spark/pull/22013

    [SPARK-23939][SQL] Add transform_keys function

    ## What changes were proposed in this pull request?
    This pr adds transform_keys function which applies the function to each entry of the map and transforms the keys.
    
    ## How was this patch tested?
    Added tests.

You can merge this pull request into a Git repository by running:

    $ git pull https://github.com/codeatri/spark SPARK-23939

Alternatively you can review and apply these changes as the patch at:

    https://github.com/apache/spark/pull/22013.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

    This closes #22013
    
----
commit 0a19cc44bf694f76f8f1be8faeaa16dc47f9bb86
Author: codeatri <ne...@...>
Date:   2018-08-06T18:32:47Z

    Added Support for transform_keys function

----


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Test FAILed.
    Refer to this link for build results (access rights to CI server needed): 
    https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/94758/
    Test FAILed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by hvanhovell <gi...@git.apache.org>.
Github user hvanhovell commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    ok to test


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by mn-mikke <gi...@git.apache.org>.
Github user mn-mikke commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r210373675
  
    --- Diff: sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala ---
    @@ -2302,6 +2302,97 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
         assert(ex5.getMessage.contains("function map_zip_with does not support ordering on type map"))
       }
     
    +  test("transform keys function - test various primitive data types combinations") {
    +    val dfExample1 = Seq(
    +      Map[Int, Int](1 -> 1, 9 -> 9, 8 -> 8, 7 -> 7)
    +    ).toDF("i")
    +
    +    val dfExample2 = Seq(
    +      Map[Int, Double](1 -> 1.0E0, 2 -> 1.4E0, 3 -> 1.7E0)
    +    ).toDF("j")
    +
    +    val dfExample3 = Seq(
    +      Map[Int, Boolean](25 -> true, 26 -> false)
    +    ).toDF("x")
    +
    +    val dfExample4 = Seq(
    +      Map[Array[Int], Boolean](Array(1, 2) -> false)
    +    ).toDF("y")
    +
    +
    +    def testMapOfPrimitiveTypesCombination(): Unit = {
    +      checkAnswer(dfExample1.selectExpr("transform_keys(i, (k, v) -> k + v)"),
    +        Seq(Row(Map(2 -> 1, 18 -> 9, 16 -> 8, 14 -> 7))))
    +
    +      checkAnswer(dfExample2.selectExpr("transform_keys(j, " +
    +        "(k, v) -> map_from_arrays(ARRAY(1, 2, 3), ARRAY('one', 'two', 'three'))[k])"),
    +        Seq(Row(Map("one" -> 1.0, "two" -> 1.4, "three" -> 1.7))))
    +
    +      checkAnswer(dfExample2.selectExpr("transform_keys(j, (k, v) -> CAST(v * 2 AS BIGINT) + k)"),
    +        Seq(Row(Map(3 -> 1.0, 4 -> 1.4, 6 -> 1.7))))
    +
    +      checkAnswer(dfExample2.selectExpr("transform_keys(j, (k, v) -> k + v)"),
    +        Seq(Row(Map(2.0 -> 1.0, 3.4 -> 1.4, 4.7 -> 1.7))))
    +
    +      checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) ->  k % 2 = 0 OR v)"),
    +        Seq(Row(Map(true -> true, true -> false))))
    +
    +      checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> if(v, 2 * k, 3 * k))"),
    +        Seq(Row(Map(50 -> true, 78 -> false))))
    +
    +      checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> if(v, 2 * k, 3 * k))"),
    +        Seq(Row(Map(50 -> true, 78 -> false))))
    +
    +      checkAnswer(dfExample4.selectExpr("transform_keys(y, (k, v) -> array_contains(k, 3) AND v)"),
    +        Seq(Row(Map(false -> false))))
    +    }
    +    // Test with local relation, the Project will be evaluated without codegen
    +    testMapOfPrimitiveTypesCombination()
    +    dfExample1.cache()
    +    dfExample2.cache()
    +    dfExample3.cache()
    +    dfExample4.cache()
    +    // Test with cached relation, the Project will be evaluated with codegen
    +    testMapOfPrimitiveTypesCombination()
    +  }
    +
    +  test("transform keys function - Invalid lambda functions and exceptions") {
    +    val dfExample1 = Seq(
    +      Map[Int, Int](1 -> 1, 9 -> 9, 8 -> 8, 7 -> 7)
    +    ).toDF("i")
    +
    +    val dfExample2 = Seq(
    +      Map[String, String]("a" -> "b")
    +    ).toDF("j")
    +
    +    val dfExample3 = Seq(
    +      Map[String, String]("a" -> null)
    +    ).toDF("x")
    +
    +    def testInvalidLambdaFunctions(): Unit = {
    +      val ex1 = intercept[AnalysisException] {
    +        dfExample1.selectExpr("transform_keys(i, k -> k )")
    +      }
    +      assert(ex1.getMessage.contains("The number of lambda function arguments '1' does not match"))
    +
    +      val ex2 = intercept[AnalysisException] {
    +        dfExample2.selectExpr("transform_keys(j, (k, v, x) -> k + 1)")
    +      }
    +      assert(ex2.getMessage.contains(
    +      "The number of lambda function arguments '3' does not match"))
    +
    +      val ex3 = intercept[RuntimeException] {
    +        dfExample3.selectExpr("transform_keys(x, (k, v) -> v)").show()
    +      }
    +      assert(ex3.getMessage.contains("Cannot use null as map key!"))
    +    }
    +
    +    testInvalidLambdaFunctions()
    +    dfExample1.cache()
    +    dfExample2.cache()
    +    testInvalidLambdaFunctions()
    --- End diff --
    
    @ueshin I would like to ask you a generic question regarding higher-order functions. Is it necessary to perform checks with codegen paths if all the newly added functions extends from ```CodegenFallback```? Eventually, is there a plan to add coden for these functions in future? 


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    **[Test build #94405 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94405/testReport)** for PR 22013 at commit [`150a6a5`](https://github.com/apache/spark/commit/150a6a5c405c78e7a5f7dd9b3f3c72f95290ec71).
     * This patch **fails due to an unknown error code, -9**.
     * This patch merges cleanly.
     * This patch adds no public classes.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Merged build finished. Test PASSed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    **[Test build #94817 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94817/testReport)** for PR 22013 at commit [`58b60b2`](https://github.com/apache/spark/commit/58b60b2f851fb1464743257fe1cca075a1e77ba9).
     * This patch **fails Spark unit tests**.
     * This patch merges cleanly.
     * This patch adds no public classes.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by mn-mikke <gi...@git.apache.org>.
Github user mn-mikke commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r210368929
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
    @@ -497,6 +497,62 @@ case class ArrayAggregate(
       override def prettyName: String = "aggregate"
     }
     
    +/**
    + * Transform Keys for every entry of the map by applying the transform_keys function.
    + * Returns map with transformed key entries
    + */
    +@ExpressionDescription(
    +  usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1);
    +       map(array(2, 3, 4), array(1, 2, 3))
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v);
    +       map(array(2, 4, 6), array(1, 2, 3))
    +  """,
    +  since = "2.4.0")
    +case class TransformKeys(
    +    argument: Expression,
    +    function: Expression)
    +  extends MapBasedSimpleHigherOrderFunction with CodegenFallback {
    +
    +  override def nullable: Boolean = argument.nullable
    +
    +  @transient lazy val MapType(keyType, valueType, valueContainsNull) = argument.dataType
    +
    +  override def dataType: DataType = {
    +    MapType(function.dataType, valueType, valueContainsNull)
    +  }
    +
    +  override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): TransformKeys = {
    +    copy(function = f(function, (keyType, false) :: (valueType, valueContainsNull) :: Nil))
    +  }
    +
    +  @transient lazy val LambdaFunction(
    +  _, (keyVar: NamedLambdaVariable) :: (valueVar: NamedLambdaVariable) :: Nil, _) = function
    +
    +
    +  override def nullSafeEval(inputRow: InternalRow, argumentValue: Any): Any = {
    +    val map = argumentValue.asInstanceOf[MapData]
    +    val f = functionForEval
    --- End diff --
    
    Can't we use ```functionForEval``` directly?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Merged build finished. Test FAILed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r208811976
  
    --- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala ---
    @@ -230,4 +236,56 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
             (acc, array) => coalesce(aggregate(array, acc, (acc, elem) => acc + elem), acc)),
           15)
       }
    +
    +  test("TransformKeys") {
    +    val ai0 = Literal.create(
    +      Map(1 -> 1, 2 -> 2, 3 -> 3),
    +      MapType(IntegerType, IntegerType))
    --- End diff --
    
    Can you add `valueContainsNull` explicitly?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    **[Test build #94455 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94455/testReport)** for PR 22013 at commit [`6526630`](https://github.com/apache/spark/commit/652663077e383f8b188743c4494d697e34d5d02c).
     * This patch passes all tests.
     * This patch merges cleanly.
     * This patch adds no public classes.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r210160909
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
    @@ -497,6 +497,65 @@ case class ArrayAggregate(
       override def prettyName: String = "aggregate"
     }
     
    +/**
    + * Transform Keys for every entry of the map by applying the transform_keys function.
    + * Returns map with transformed key entries
    + */
    +@ExpressionDescription(
    +  usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1);
    +       map(array(2, 3, 4), array(1, 2, 3))
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v);
    +       map(array(2, 4, 6), array(1, 2, 3))
    +  """,
    +  since = "2.4.0")
    +case class TransformKeys(
    +    argument: Expression,
    +    function: Expression)
    +  extends MapBasedSimpleHigherOrderFunction with CodegenFallback {
    +
    +  override def nullable: Boolean = argument.nullable
    +
    +  override def dataType: DataType = {
    +    val map = argument.dataType.asInstanceOf[MapType]
    +    MapType(function.dataType, map.valueType, map.valueContainsNull)
    +  }
    +
    +  @transient val MapType(keyType, valueType, valueContainsNull) = argument.dataType
    +
    +  override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): TransformKeys = {
    +    copy(function = f(function, (keyType, false) :: (valueType, valueContainsNull) :: Nil))
    +  }
    +
    +  @transient lazy val (keyVar, valueVar) = {
    +    val LambdaFunction(
    +    _, (keyVar: NamedLambdaVariable) :: (valueVar: NamedLambdaVariable) :: Nil, _) = function
    +    (keyVar, valueVar)
    +  }
    +
    +  override def nullSafeEval(inputRow: InternalRow, argumentValue: Any): Any = {
    +    val map = argumentValue.asInstanceOf[MapData]
    +    val f = functionForEval
    +    val resultKeys = new GenericArrayData(new Array[Any](map.numElements))
    +    var i = 0
    +    while (i < map.numElements) {
    +      keyVar.value.set(map.keyArray().get(i, keyVar.dataType))
    +      valueVar.value.set(map.valueArray().get(i, valueVar.dataType))
    +      val result = f.eval(inputRow)
    +      if (result ==  null) {
    +        throw new RuntimeException("Cannot use null as map key!")
    +      }
    +      resultKeys.update(i, result)
    +      i += 1
    +    }
    +    new ArrayBasedMapData(resultKeys, map.valueArray())
    +  }
    +
    +  override def prettyName: String = "transform_keys"
    +  }
    --- End diff --
    
    nit: indent


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by mn-mikke <gi...@git.apache.org>.
Github user mn-mikke commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r208160707
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
    @@ -365,3 +365,69 @@ case class ArrayAggregate(
     
       override def prettyName: String = "aggregate"
     }
    +
    +/**
    + * Transform Keys in a map using the transform_keys function.
    + */
    +@ExpressionDescription(
    +  usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k,v) -> k + 1);
    --- End diff --
    
    nit: missing space -> ```k, v```


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by mn-mikke <gi...@git.apache.org>.
Github user mn-mikke commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r208164141
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
    @@ -365,3 +365,69 @@ case class ArrayAggregate(
     
       override def prettyName: String = "aggregate"
     }
    +
    +/**
    + * Transform Keys in a map using the transform_keys function.
    + */
    +@ExpressionDescription(
    +  usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k,v) -> k + 1);
    +       map(array(2, 3, 4), array(1, 2, 3))
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k, v) -> k + v);
    +       map(array(2, 4, 6), array(1, 2, 3))
    +  """,
    +  since = "2.4.0")
    +case class TransformKeys(
    +    input: Expression,
    +    function: Expression)
    +  extends ArrayBasedHigherOrderFunction with CodegenFallback {
    +
    +  override def nullable: Boolean = input.nullable
    +
    +  override def dataType: DataType = {
    +    val valueType = input.dataType.asInstanceOf[MapType].valueType
    +    MapType(function.dataType, valueType, input.nullable)
    --- End diff --
    
    Is there any reason for changing ```valueContainsNull``` flag if the function transforms just keys? WDYT about:
    ```
    val MapType(_, valueType, valueContainsNull) = input.dataType.asInstanceOf[MapType]
    MapType(function.dataType, valueType, valueContainsNull)
    ```


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Test PASSed.
    Refer to this link for build results (access rights to CI server needed): 
    https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/94315/
    Test PASSed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Test FAILed.
    Refer to this link for build results (access rights to CI server needed): 
    https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/94788/
    Test FAILed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r210159871
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
    @@ -497,6 +497,65 @@ case class ArrayAggregate(
       override def prettyName: String = "aggregate"
     }
     
    +/**
    + * Transform Keys for every entry of the map by applying the transform_keys function.
    + * Returns map with transformed key entries
    + */
    +@ExpressionDescription(
    +  usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1);
    +       map(array(2, 3, 4), array(1, 2, 3))
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v);
    +       map(array(2, 4, 6), array(1, 2, 3))
    +  """,
    +  since = "2.4.0")
    +case class TransformKeys(
    +    argument: Expression,
    +    function: Expression)
    +  extends MapBasedSimpleHigherOrderFunction with CodegenFallback {
    +
    +  override def nullable: Boolean = argument.nullable
    +
    +  override def dataType: DataType = {
    +    val map = argument.dataType.asInstanceOf[MapType]
    +    MapType(function.dataType, map.valueType, map.valueContainsNull)
    +  }
    +
    +  @transient val MapType(keyType, valueType, valueContainsNull) = argument.dataType
    +
    +  override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): TransformKeys = {
    +    copy(function = f(function, (keyType, false) :: (valueType, valueContainsNull) :: Nil))
    +  }
    +
    +  @transient lazy val (keyVar, valueVar) = {
    +    val LambdaFunction(
    +    _, (keyVar: NamedLambdaVariable) :: (valueVar: NamedLambdaVariable) :: Nil, _) = function
    +    (keyVar, valueVar)
    +  }
    +
    +  override def nullSafeEval(inputRow: InternalRow, argumentValue: Any): Any = {
    +    val map = argumentValue.asInstanceOf[MapData]
    +    val f = functionForEval
    +    val resultKeys = new GenericArrayData(new Array[Any](map.numElements))
    +    var i = 0
    +    while (i < map.numElements) {
    +      keyVar.value.set(map.keyArray().get(i, keyVar.dataType))
    +      valueVar.value.set(map.valueArray().get(i, valueVar.dataType))
    +      val result = f.eval(inputRow)
    +      if (result ==  null) {
    --- End diff --
    
    nit: extra space between `==` and `null`.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Merged build finished. Test PASSed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Can one of the admins verify this patch?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    **[Test build #94758 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94758/testReport)** for PR 22013 at commit [`bb52630`](https://github.com/apache/spark/commit/bb52630dd720ecaf5f7ffe0c498d422ce60f3bb7).


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Test PASSed.
    Refer to this link for build results (access rights to CI server needed): 
    https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/94404/
    Test PASSed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    **[Test build #94765 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94765/testReport)** for PR 22013 at commit [`621213d`](https://github.com/apache/spark/commit/621213dd1658fbc8cb19e15dd77c9c389653d4db).
     * This patch **fails to build**.
     * This patch merges cleanly.
     * This patch adds no public classes.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by mn-mikke <gi...@git.apache.org>.
Github user mn-mikke commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r208169140
  
    --- Diff: sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala ---
    @@ -2071,6 +2071,158 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
         assert(ex4.getMessage.contains("data type mismatch: argument 3 requires int type"))
       }
     
    +  test("transform keys function - test various primitive data types combinations") {
    +    val dfExample1 = Seq(
    +      Map[Int, Int](1 -> 1, 9 -> 9, 8 -> 8, 7 -> 7)
    +    ).toDF("i")
    +
    +    val dfExample2 = Seq(
    +      Map[Int, String](1 -> "a", 2 -> "b", 3 -> "c")
    +    ).toDF("x")
    +
    +    val dfExample3 = Seq(
    +      Map[String, Int]("a" -> 1, "b" -> 2, "c" -> 3)
    +    ).toDF("y")
    +
    +    val dfExample4 = Seq(
    +      Map[Int, Double](1 -> 1.0E0, 2 -> 1.4E0, 3 -> 1.7E0)
    +    ).toDF("z")
    +
    +    val dfExample5 = Seq(
    +      Map[Int, Boolean](25 -> true, 26 -> false)
    +    ).toDF("a")
    +
    +    val dfExample6 = Seq(
    +      Map[Int, String](25 -> "ab", 26 -> "cd")
    +    ).toDF("b")
    +
    +    val dfExample7 = Seq(
    +      Map[Array[Int], Boolean](Array(1, 2) -> false)
    +    ).toDF("c")
    +
    +
    +    def testMapOfPrimitiveTypesCombination(): Unit = {
    +      checkAnswer(dfExample1.selectExpr("transform_keys(i, (k, v) -> k + v)"),
    +        Seq(Row(Map(2 -> 1, 18 -> 9, 16 -> 8, 14 -> 7))))
    +
    +      checkAnswer(dfExample2.selectExpr("transform_keys(x, (k, v) -> k + 1)"),
    +        Seq(Row(Map(2 -> "a", 3 -> "b", 4 -> "c"))))
    +
    +      checkAnswer(dfExample3.selectExpr("transform_keys(y, (k, v) -> v * v)"),
    +        Seq(Row(Map(1 -> 1, 4 -> 2, 9 -> 3))))
    +
    +      checkAnswer(dfExample3.selectExpr("transform_keys(y, (k, v) -> length(k) + v)"),
    +        Seq(Row(Map(2 -> 1, 3 -> 2, 4 -> 3))))
    +
    +      checkAnswer(
    +        dfExample3.selectExpr("transform_keys(y, (k, v) -> concat(k, cast(v as String)))"),
    +        Seq(Row(Map("a1" -> 1, "b2" -> 2, "c3" -> 3))))
    +
    +      checkAnswer(dfExample4.selectExpr("transform_keys(z, " +
    +        "(k, v) -> map_from_arrays(ARRAY(1, 2, 3), ARRAY('one', 'two', 'three'))[k])"),
    +        Seq(Row(Map("one" -> 1.0, "two" -> 1.4, "three" -> 1.7))))
    +
    +      checkAnswer(dfExample4.selectExpr("transform_keys(z, (k, v) -> CAST(v * 2 AS BIGINT) + k)"),
    +        Seq(Row(Map(3 -> 1.0, 4 -> 1.4, 6 -> 1.7))))
    +
    +      checkAnswer(dfExample4.selectExpr("transform_keys(z, (k, v) -> k + v)"),
    +        Seq(Row(Map(2.0 -> 1.0, 3.4 -> 1.4, 4.7 -> 1.7))))
    +
    +      checkAnswer(dfExample5.selectExpr("transform_keys(a, (k, v) ->  k % 2 = 0 OR v)"),
    +        Seq(Row(Map(true -> true, true -> false))))
    +
    +      checkAnswer(dfExample5.selectExpr("transform_keys(a, (k, v) -> if(v, 2 * k, 3 * k))"),
    +        Seq(Row(Map(50 -> true, 78 -> false))))
    +
    +      checkAnswer(dfExample5.selectExpr("transform_keys(a, (k, v) -> if(v, 2 * k, 3 * k))"),
    +        Seq(Row(Map(50 -> true, 78 -> false))))
    +
    +      checkAnswer(dfExample6.selectExpr(
    +        "transform_keys(b, (k, v) ->  concat(conv(k, 10, 16) , substr(v, 1, 1)))"),
    +        Seq(Row(Map("19a" -> "ab", "1Ac" -> "cd"))))
    +
    +      checkAnswer(dfExample7.selectExpr("transform_keys(c, (k, v) -> array_contains(k, 3) AND v)"),
    +        Seq(Row(Map(false -> false))))
    +    }
    +    // Test with local relation, the Project will be evaluated without codegen
    +    testMapOfPrimitiveTypesCombination()
    +    dfExample1.cache()
    +    dfExample2.cache()
    +    dfExample3.cache()
    +    dfExample4.cache()
    +    dfExample5.cache()
    +    dfExample6.cache()
    +    // Test with cached relation, the Project will be evaluated with codegen
    +    testMapOfPrimitiveTypesCombination()
    --- End diff --
    
    Do we have do that if the expression implements ```CodegenFallback```?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    I'd merge this now.
    @mn-mikke @mgaido91 If you have any other comments, let's have a follow-up pr.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    **[Test build #94451 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94451/testReport)** for PR 22013 at commit [`9f6a8ab`](https://github.com/apache/spark/commit/9f6a8abae75b70c5be89c6bbccf3a574bd7fb17d).
     * This patch **fails Spark unit tests**.
     * This patch merges cleanly.
     * This patch adds no public classes.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r210163358
  
    --- Diff: sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala ---
    @@ -2302,6 +2302,97 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
         assert(ex5.getMessage.contains("function map_zip_with does not support ordering on type map"))
       }
     
    +  test("transform keys function - test various primitive data types combinations") {
    +    val dfExample1 = Seq(
    +      Map[Int, Int](1 -> 1, 9 -> 9, 8 -> 8, 7 -> 7)
    +    ).toDF("i")
    +
    +    val dfExample2 = Seq(
    +      Map[Int, Double](1 -> 1.0E0, 2 -> 1.4E0, 3 -> 1.7E0)
    +    ).toDF("j")
    +
    +    val dfExample3 = Seq(
    +      Map[Int, Boolean](25 -> true, 26 -> false)
    +    ).toDF("x")
    +
    +    val dfExample4 = Seq(
    +      Map[Array[Int], Boolean](Array(1, 2) -> false)
    +    ).toDF("y")
    +
    +
    +    def testMapOfPrimitiveTypesCombination(): Unit = {
    +      checkAnswer(dfExample1.selectExpr("transform_keys(i, (k, v) -> k + v)"),
    +        Seq(Row(Map(2 -> 1, 18 -> 9, 16 -> 8, 14 -> 7))))
    +
    +      checkAnswer(dfExample2.selectExpr("transform_keys(j, " +
    +        "(k, v) -> map_from_arrays(ARRAY(1, 2, 3), ARRAY('one', 'two', 'three'))[k])"),
    +        Seq(Row(Map("one" -> 1.0, "two" -> 1.4, "three" -> 1.7))))
    +
    +      checkAnswer(dfExample2.selectExpr("transform_keys(j, (k, v) -> CAST(v * 2 AS BIGINT) + k)"),
    +        Seq(Row(Map(3 -> 1.0, 4 -> 1.4, 6 -> 1.7))))
    +
    +      checkAnswer(dfExample2.selectExpr("transform_keys(j, (k, v) -> k + v)"),
    +        Seq(Row(Map(2.0 -> 1.0, 3.4 -> 1.4, 4.7 -> 1.7))))
    +
    +      checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) ->  k % 2 = 0 OR v)"),
    +        Seq(Row(Map(true -> true, true -> false))))
    +
    +      checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> if(v, 2 * k, 3 * k))"),
    +        Seq(Row(Map(50 -> true, 78 -> false))))
    +
    +      checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> if(v, 2 * k, 3 * k))"),
    +        Seq(Row(Map(50 -> true, 78 -> false))))
    +
    +      checkAnswer(dfExample4.selectExpr("transform_keys(y, (k, v) -> array_contains(k, 3) AND v)"),
    +        Seq(Row(Map(false -> false))))
    +    }
    +    // Test with local relation, the Project will be evaluated without codegen
    +    testMapOfPrimitiveTypesCombination()
    +    dfExample1.cache()
    +    dfExample2.cache()
    +    dfExample3.cache()
    +    dfExample4.cache()
    +    // Test with cached relation, the Project will be evaluated with codegen
    +    testMapOfPrimitiveTypesCombination()
    +  }
    +
    +  test("transform keys function - Invalid lambda functions and exceptions") {
    +    val dfExample1 = Seq(
    +      Map[Int, Int](1 -> 1, 9 -> 9, 8 -> 8, 7 -> 7)
    +    ).toDF("i")
    +
    +    val dfExample2 = Seq(
    +      Map[String, String]("a" -> "b")
    +    ).toDF("j")
    +
    +    val dfExample3 = Seq(
    +      Map[String, String]("a" -> null)
    +    ).toDF("x")
    +
    +    def testInvalidLambdaFunctions(): Unit = {
    +      val ex1 = intercept[AnalysisException] {
    +        dfExample1.selectExpr("transform_keys(i, k -> k )")
    +      }
    +      assert(ex1.getMessage.contains("The number of lambda function arguments '1' does not match"))
    +
    +      val ex2 = intercept[AnalysisException] {
    +        dfExample2.selectExpr("transform_keys(j, (k, v, x) -> k + 1)")
    +      }
    +      assert(ex2.getMessage.contains(
    +      "The number of lambda function arguments '3' does not match"))
    +
    +      val ex3 = intercept[RuntimeException] {
    +        dfExample3.selectExpr("transform_keys(x, (k, v) -> v)").show()
    +      }
    +      assert(ex3.getMessage.contains("Cannot use null as map key!"))
    +    }
    +
    +    testInvalidLambdaFunctions()
    +    dfExample1.cache()
    +    dfExample2.cache()
    +    testInvalidLambdaFunctions()
    --- End diff --
    
    We need `dfExample3.cache()` as well?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r210162791
  
    --- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala ---
    @@ -283,6 +289,75 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
           15)
       }
     
    +  test("TransformKeys") {
    +    val ai0 = Literal.create(
    +      Map(1 -> 1, 2 -> 2, 3 -> 3, 4 -> 4),
    +      MapType(IntegerType, IntegerType, valueContainsNull = false))
    +    val ai1 = Literal.create(
    +      Map.empty[Int, Int],
    +      MapType(IntegerType, IntegerType, valueContainsNull = true))
    +    val ai2 = Literal.create(
    +      Map(1 -> 1, 2 -> null, 3 -> 3),
    +      MapType(IntegerType, IntegerType, valueContainsNull = true))
    +    val ai3 = Literal.create(null, MapType(IntegerType, IntegerType, valueContainsNull = false))
    +
    +    val plusOne: (Expression, Expression) => Expression = (k, v) => k + 1
    +    val plusValue: (Expression, Expression) => Expression = (k, v) => k + v
    +    val modKey: (Expression, Expression) => Expression = (k, v) => k % 3
    +
    +    checkEvaluation(transformKeys(ai0, plusOne), Map(2 -> 1, 3 -> 2, 4 -> 3, 5 -> 4))
    +    checkEvaluation(transformKeys(ai0, plusValue), Map(2 -> 1, 4 -> 2, 6 -> 3, 8 -> 4))
    +    checkEvaluation(
    +      transformKeys(transformKeys(ai0, plusOne), plusValue), Map(3 -> 1, 5 -> 2, 7 -> 3, 9 -> 4))
    +    checkEvaluation(transformKeys(ai0, modKey),
    +      ArrayBasedMapData(Array(1, 2, 0, 1), Array(1, 2, 3, 4)))
    +    checkEvaluation(transformKeys(ai1, plusOne), Map.empty[Int, Int])
    +    checkEvaluation(transformKeys(ai1, plusOne), Map.empty[Int, Int])
    +    checkEvaluation(
    +      transformKeys(transformKeys(ai1, plusOne), plusValue), Map.empty[Int, Int])
    +    checkEvaluation(transformKeys(ai2, plusOne), Map(2 -> 1, 3 -> null, 4 -> 3))
    +    checkEvaluation(
    +      transformKeys(transformKeys(ai2, plusOne), plusOne), Map(3 -> 1, 4 -> null, 5 -> 3))
    +    checkEvaluation(transformKeys(ai3, plusOne), null)
    +
    +    val as0 = Literal.create(
    +      Map("a" -> "xy", "bb" -> "yz", "ccc" -> "zx"),
    +      MapType(StringType, StringType, valueContainsNull = false))
    +    val as1 = Literal.create(
    +      Map("a" -> "xy", "bb" -> "yz", "ccc" -> null),
    +      MapType(StringType, StringType, valueContainsNull = true))
    +    val as2 = Literal.create(null,
    +      MapType(StringType, StringType, valueContainsNull = false))
    +    val asn = Literal.create(Map.empty[StringType, StringType],
    --- End diff --
    
    `as3`?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r210160419
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
    @@ -497,6 +497,65 @@ case class ArrayAggregate(
       override def prettyName: String = "aggregate"
     }
     
    +/**
    + * Transform Keys for every entry of the map by applying the transform_keys function.
    + * Returns map with transformed key entries
    + */
    +@ExpressionDescription(
    +  usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1);
    +       map(array(2, 3, 4), array(1, 2, 3))
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v);
    +       map(array(2, 4, 6), array(1, 2, 3))
    +  """,
    +  since = "2.4.0")
    +case class TransformKeys(
    +    argument: Expression,
    +    function: Expression)
    +  extends MapBasedSimpleHigherOrderFunction with CodegenFallback {
    +
    +  override def nullable: Boolean = argument.nullable
    +
    +  override def dataType: DataType = {
    +    val map = argument.dataType.asInstanceOf[MapType]
    +    MapType(function.dataType, map.valueType, map.valueContainsNull)
    +  }
    +
    +  @transient val MapType(keyType, valueType, valueContainsNull) = argument.dataType
    --- End diff --
    
    `lazy val`?
    Could you add a test when `argument` is not a map in invalid cases of `DataFrameFunctionsSuite`?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Test PASSed.
    Refer to this link for build results (access rights to CI server needed): 
    https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/94775/
    Test PASSed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Merged build finished. Test PASSed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    **[Test build #94788 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94788/testReport)** for PR 22013 at commit [`e5d9b05`](https://github.com/apache/spark/commit/e5d9b051b027cf86fbcd82701f54e50f1aeac7f6).
     * This patch **fails due to an unknown error code, -9**.
     * This patch merges cleanly.
     * This patch adds no public classes.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    **[Test build #94775 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94775/testReport)** for PR 22013 at commit [`5db526b`](https://github.com/apache/spark/commit/5db526be7bad0fa38dc9743c919014b475cf8aeb).
     * This patch passes all tests.
     * This patch merges cleanly.
     * This patch adds no public classes.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    **[Test build #94315 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94315/testReport)** for PR 22013 at commit [`0a19cc4`](https://github.com/apache/spark/commit/0a19cc44bf694f76f8f1be8faeaa16dc47f9bb86).


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    **[Test build #94404 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94404/testReport)** for PR 22013 at commit [`5806ac4`](https://github.com/apache/spark/commit/5806ac46707772fd1e4befa445157ed0f9c75084).
     * This patch passes all tests.
     * This patch merges cleanly.
     * This patch adds no public classes.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    **[Test build #94518 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94518/testReport)** for PR 22013 at commit [`1cbaf0c`](https://github.com/apache/spark/commit/1cbaf0c6adc508299d42a82628f4f0954bed7a95).
     * This patch passes all tests.
     * This patch merges cleanly.
     * This patch adds no public classes.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r210161616
  
    --- Diff: sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala ---
    @@ -2302,6 +2302,97 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
         assert(ex5.getMessage.contains("function map_zip_with does not support ordering on type map"))
       }
     
    +  test("transform keys function - test various primitive data types combinations") {
    +    val dfExample1 = Seq(
    +      Map[Int, Int](1 -> 1, 9 -> 9, 8 -> 8, 7 -> 7)
    +    ).toDF("i")
    +
    +    val dfExample2 = Seq(
    +      Map[Int, Double](1 -> 1.0E0, 2 -> 1.4E0, 3 -> 1.7E0)
    +    ).toDF("j")
    +
    +    val dfExample3 = Seq(
    +      Map[Int, Boolean](25 -> true, 26 -> false)
    +    ).toDF("x")
    +
    +    val dfExample4 = Seq(
    +      Map[Array[Int], Boolean](Array(1, 2) -> false)
    +    ).toDF("y")
    +
    +
    +    def testMapOfPrimitiveTypesCombination(): Unit = {
    +      checkAnswer(dfExample1.selectExpr("transform_keys(i, (k, v) -> k + v)"),
    +        Seq(Row(Map(2 -> 1, 18 -> 9, 16 -> 8, 14 -> 7))))
    +
    +      checkAnswer(dfExample2.selectExpr("transform_keys(j, " +
    +        "(k, v) -> map_from_arrays(ARRAY(1, 2, 3), ARRAY('one', 'two', 'three'))[k])"),
    +        Seq(Row(Map("one" -> 1.0, "two" -> 1.4, "three" -> 1.7))))
    +
    +      checkAnswer(dfExample2.selectExpr("transform_keys(j, (k, v) -> CAST(v * 2 AS BIGINT) + k)"),
    +        Seq(Row(Map(3 -> 1.0, 4 -> 1.4, 6 -> 1.7))))
    +
    +      checkAnswer(dfExample2.selectExpr("transform_keys(j, (k, v) -> k + v)"),
    +        Seq(Row(Map(2.0 -> 1.0, 3.4 -> 1.4, 4.7 -> 1.7))))
    +
    +      checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) ->  k % 2 = 0 OR v)"),
    +        Seq(Row(Map(true -> true, true -> false))))
    +
    +      checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> if(v, 2 * k, 3 * k))"),
    +        Seq(Row(Map(50 -> true, 78 -> false))))
    +
    +      checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> if(v, 2 * k, 3 * k))"),
    +        Seq(Row(Map(50 -> true, 78 -> false))))
    +
    +      checkAnswer(dfExample4.selectExpr("transform_keys(y, (k, v) -> array_contains(k, 3) AND v)"),
    +        Seq(Row(Map(false -> false))))
    +    }
    +    // Test with local relation, the Project will be evaluated without codegen
    +    testMapOfPrimitiveTypesCombination()
    +    dfExample1.cache()
    +    dfExample2.cache()
    +    dfExample3.cache()
    +    dfExample4.cache()
    +    // Test with cached relation, the Project will be evaluated with codegen
    +    testMapOfPrimitiveTypesCombination()
    +  }
    +
    +  test("transform keys function - Invalid lambda functions and exceptions") {
    +    val dfExample1 = Seq(
    +      Map[Int, Int](1 -> 1, 9 -> 9, 8 -> 8, 7 -> 7)
    +    ).toDF("i")
    +
    +    val dfExample2 = Seq(
    +      Map[String, String]("a" -> "b")
    +    ).toDF("j")
    +
    +    val dfExample3 = Seq(
    +      Map[String, String]("a" -> null)
    +    ).toDF("x")
    +
    +    def testInvalidLambdaFunctions(): Unit = {
    +      val ex1 = intercept[AnalysisException] {
    +        dfExample1.selectExpr("transform_keys(i, k -> k )")
    +      }
    +      assert(ex1.getMessage.contains("The number of lambda function arguments '1' does not match"))
    +
    +      val ex2 = intercept[AnalysisException] {
    +        dfExample2.selectExpr("transform_keys(j, (k, v, x) -> k + 1)")
    +      }
    +      assert(ex2.getMessage.contains(
    +      "The number of lambda function arguments '3' does not match"))
    +
    +      val ex3 = intercept[RuntimeException] {
    +        dfExample3.selectExpr("transform_keys(x, (k, v) -> v)").show()
    +      }
    +      assert(ex3.getMessage.contains("Cannot use null as map key!"))
    --- End diff --
    
    Seems like we can do those tests only with `dfExample3`?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Merged build finished. Test FAILed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Test FAILed.
    Refer to this link for build results (access rights to CI server needed): 
    https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/94811/
    Test FAILed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    **[Test build #94405 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94405/testReport)** for PR 22013 at commit [`150a6a5`](https://github.com/apache/spark/commit/150a6a5c405c78e7a5f7dd9b3f3c72f95290ec71).


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Can one of the admins verify this patch?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r210193591
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
    @@ -497,6 +497,65 @@ case class ArrayAggregate(
       override def prettyName: String = "aggregate"
     }
     
    +/**
    + * Transform Keys for every entry of the map by applying the transform_keys function.
    + * Returns map with transformed key entries
    + */
    +@ExpressionDescription(
    +  usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1);
    +       map(array(2, 3, 4), array(1, 2, 3))
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v);
    +       map(array(2, 4, 6), array(1, 2, 3))
    +  """,
    +  since = "2.4.0")
    +case class TransformKeys(
    +    argument: Expression,
    +    function: Expression)
    +  extends MapBasedSimpleHigherOrderFunction with CodegenFallback {
    +
    +  override def nullable: Boolean = argument.nullable
    +
    +  override def dataType: DataType = {
    +    val map = argument.dataType.asInstanceOf[MapType]
    +    MapType(function.dataType, map.valueType, map.valueContainsNull)
    --- End diff --
    
    What about this?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    **[Test build #94819 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94819/testReport)** for PR 22013 at commit [`2f4943f`](https://github.com/apache/spark/commit/2f4943f3cec0705c296b2988c415ac3372b7ea86).


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    **[Test build #94451 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94451/testReport)** for PR 22013 at commit [`9f6a8ab`](https://github.com/apache/spark/commit/9f6a8abae75b70c5be89c6bbccf3a574bd7fb17d).


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r210161746
  
    --- Diff: sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala ---
    @@ -2302,6 +2302,97 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
         assert(ex5.getMessage.contains("function map_zip_with does not support ordering on type map"))
       }
     
    +  test("transform keys function - test various primitive data types combinations") {
    +    val dfExample1 = Seq(
    +      Map[Int, Int](1 -> 1, 9 -> 9, 8 -> 8, 7 -> 7)
    +    ).toDF("i")
    +
    +    val dfExample2 = Seq(
    +      Map[Int, Double](1 -> 1.0E0, 2 -> 1.4E0, 3 -> 1.7E0)
    +    ).toDF("j")
    +
    +    val dfExample3 = Seq(
    +      Map[Int, Boolean](25 -> true, 26 -> false)
    +    ).toDF("x")
    +
    +    val dfExample4 = Seq(
    +      Map[Array[Int], Boolean](Array(1, 2) -> false)
    +    ).toDF("y")
    +
    +
    +    def testMapOfPrimitiveTypesCombination(): Unit = {
    +      checkAnswer(dfExample1.selectExpr("transform_keys(i, (k, v) -> k + v)"),
    +        Seq(Row(Map(2 -> 1, 18 -> 9, 16 -> 8, 14 -> 7))))
    +
    +      checkAnswer(dfExample2.selectExpr("transform_keys(j, " +
    +        "(k, v) -> map_from_arrays(ARRAY(1, 2, 3), ARRAY('one', 'two', 'three'))[k])"),
    +        Seq(Row(Map("one" -> 1.0, "two" -> 1.4, "three" -> 1.7))))
    +
    +      checkAnswer(dfExample2.selectExpr("transform_keys(j, (k, v) -> CAST(v * 2 AS BIGINT) + k)"),
    +        Seq(Row(Map(3 -> 1.0, 4 -> 1.4, 6 -> 1.7))))
    +
    +      checkAnswer(dfExample2.selectExpr("transform_keys(j, (k, v) -> k + v)"),
    +        Seq(Row(Map(2.0 -> 1.0, 3.4 -> 1.4, 4.7 -> 1.7))))
    +
    +      checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) ->  k % 2 = 0 OR v)"),
    +        Seq(Row(Map(true -> true, true -> false))))
    +
    +      checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> if(v, 2 * k, 3 * k))"),
    +        Seq(Row(Map(50 -> true, 78 -> false))))
    +
    +      checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> if(v, 2 * k, 3 * k))"),
    +        Seq(Row(Map(50 -> true, 78 -> false))))
    +
    +      checkAnswer(dfExample4.selectExpr("transform_keys(y, (k, v) -> array_contains(k, 3) AND v)"),
    +        Seq(Row(Map(false -> false))))
    +    }
    +    // Test with local relation, the Project will be evaluated without codegen
    +    testMapOfPrimitiveTypesCombination()
    +    dfExample1.cache()
    +    dfExample2.cache()
    +    dfExample3.cache()
    +    dfExample4.cache()
    +    // Test with cached relation, the Project will be evaluated with codegen
    +    testMapOfPrimitiveTypesCombination()
    +  }
    +
    +  test("transform keys function - Invalid lambda functions and exceptions") {
    +    val dfExample1 = Seq(
    +      Map[Int, Int](1 -> 1, 9 -> 9, 8 -> 8, 7 -> 7)
    +    ).toDF("i")
    +
    +    val dfExample2 = Seq(
    +      Map[String, String]("a" -> "b")
    +    ).toDF("j")
    +
    +    val dfExample3 = Seq(
    +      Map[String, String]("a" -> null)
    +    ).toDF("x")
    +
    +    def testInvalidLambdaFunctions(): Unit = {
    +      val ex1 = intercept[AnalysisException] {
    +        dfExample1.selectExpr("transform_keys(i, k -> k )")
    +      }
    +      assert(ex1.getMessage.contains("The number of lambda function arguments '1' does not match"))
    +
    +      val ex2 = intercept[AnalysisException] {
    +        dfExample2.selectExpr("transform_keys(j, (k, v, x) -> k + 1)")
    +      }
    +      assert(ex2.getMessage.contains(
    +      "The number of lambda function arguments '3' does not match"))
    --- End diff --
    
    nit: indent


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Btw, we need one more right parenthesis after the second `array(1, 2, 3)` and a space at `(k,v)` in the description?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by codeatri <gi...@git.apache.org>.
Github user codeatri commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    @hvanhovell  @mn-mikke  @mgaido91 Thanks for the review! I have addressed all your comments and added appropriate test cases for the same.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    **[Test build #94817 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94817/testReport)** for PR 22013 at commit [`58b60b2`](https://github.com/apache/spark/commit/58b60b2f851fb1464743257fe1cca075a1e77ba9).


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    **[Test build #94811 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94811/testReport)** for PR 22013 at commit [`fb885f4`](https://github.com/apache/spark/commit/fb885f4797e72d0c2cbfa23980199c71e0c5aaee).


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    **[Test build #94788 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94788/testReport)** for PR 22013 at commit [`e5d9b05`](https://github.com/apache/spark/commit/e5d9b051b027cf86fbcd82701f54e50f1aeac7f6).


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    **[Test build #94315 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94315/testReport)** for PR 22013 at commit [`0a19cc4`](https://github.com/apache/spark/commit/0a19cc44bf694f76f8f1be8faeaa16dc47f9bb86).
     * This patch passes all tests.
     * This patch merges cleanly.
     * This patch adds the following public classes _(experimental)_:
      * `case class TransformKeys(`


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r210161509
  
    --- Diff: sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala ---
    @@ -2302,6 +2302,97 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
         assert(ex5.getMessage.contains("function map_zip_with does not support ordering on type map"))
       }
     
    +  test("transform keys function - test various primitive data types combinations") {
    +    val dfExample1 = Seq(
    +      Map[Int, Int](1 -> 1, 9 -> 9, 8 -> 8, 7 -> 7)
    +    ).toDF("i")
    +
    +    val dfExample2 = Seq(
    +      Map[Int, Double](1 -> 1.0E0, 2 -> 1.4E0, 3 -> 1.7E0)
    +    ).toDF("j")
    +
    +    val dfExample3 = Seq(
    +      Map[Int, Boolean](25 -> true, 26 -> false)
    +    ).toDF("x")
    +
    +    val dfExample4 = Seq(
    +      Map[Array[Int], Boolean](Array(1, 2) -> false)
    +    ).toDF("y")
    +
    +
    +    def testMapOfPrimitiveTypesCombination(): Unit = {
    +      checkAnswer(dfExample1.selectExpr("transform_keys(i, (k, v) -> k + v)"),
    +        Seq(Row(Map(2 -> 1, 18 -> 9, 16 -> 8, 14 -> 7))))
    +
    +      checkAnswer(dfExample2.selectExpr("transform_keys(j, " +
    +        "(k, v) -> map_from_arrays(ARRAY(1, 2, 3), ARRAY('one', 'two', 'three'))[k])"),
    +        Seq(Row(Map("one" -> 1.0, "two" -> 1.4, "three" -> 1.7))))
    +
    +      checkAnswer(dfExample2.selectExpr("transform_keys(j, (k, v) -> CAST(v * 2 AS BIGINT) + k)"),
    +        Seq(Row(Map(3 -> 1.0, 4 -> 1.4, 6 -> 1.7))))
    +
    +      checkAnswer(dfExample2.selectExpr("transform_keys(j, (k, v) -> k + v)"),
    +        Seq(Row(Map(2.0 -> 1.0, 3.4 -> 1.4, 4.7 -> 1.7))))
    +
    +      checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) ->  k % 2 = 0 OR v)"),
    +        Seq(Row(Map(true -> true, true -> false))))
    +
    +      checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> if(v, 2 * k, 3 * k))"),
    +        Seq(Row(Map(50 -> true, 78 -> false))))
    +
    +      checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> if(v, 2 * k, 3 * k))"),
    +        Seq(Row(Map(50 -> true, 78 -> false))))
    +
    +      checkAnswer(dfExample4.selectExpr("transform_keys(y, (k, v) -> array_contains(k, 3) AND v)"),
    +        Seq(Row(Map(false -> false))))
    +    }
    +    // Test with local relation, the Project will be evaluated without codegen
    +    testMapOfPrimitiveTypesCombination()
    +    dfExample1.cache()
    +    dfExample2.cache()
    +    dfExample3.cache()
    +    dfExample4.cache()
    +    // Test with cached relation, the Project will be evaluated with codegen
    +    testMapOfPrimitiveTypesCombination()
    +  }
    +
    +  test("transform keys function - Invalid lambda functions and exceptions") {
    +    val dfExample1 = Seq(
    +      Map[Int, Int](1 -> 1, 9 -> 9, 8 -> 8, 7 -> 7)
    +    ).toDF("i")
    +
    +    val dfExample2 = Seq(
    +      Map[String, String]("a" -> "b")
    +    ).toDF("j")
    +
    +    val dfExample3 = Seq(
    +      Map[String, String]("a" -> null)
    +    ).toDF("x")
    +
    +    def testInvalidLambdaFunctions(): Unit = {
    +      val ex1 = intercept[AnalysisException] {
    +        dfExample1.selectExpr("transform_keys(i, k -> k )")
    --- End diff --
    
    nit: extra space after `k -> k`.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Merged build finished. Test PASSed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by mgaido91 <gi...@git.apache.org>.
Github user mgaido91 commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r210524076
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
    @@ -497,6 +497,59 @@ case class ArrayAggregate(
       override def prettyName: String = "aggregate"
     }
     
    +/**
    + * Transform Keys for every entry of the map by applying the transform_keys function.
    + * Returns map with transformed key entries
    + */
    +@ExpressionDescription(
    +  usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1);
    +       map(array(2, 3, 4), array(1, 2, 3))
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v);
    +       map(array(2, 4, 6), array(1, 2, 3))
    +  """,
    +  since = "2.4.0")
    +case class TransformKeys(
    +    argument: Expression,
    +    function: Expression)
    +  extends MapBasedSimpleHigherOrderFunction with CodegenFallback {
    +
    +  override def nullable: Boolean = argument.nullable
    --- End diff --
    
    I think this can be moved to `SimpleHigherOrderFunction`


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r208811179
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
    @@ -442,3 +442,65 @@ case class ArrayAggregate(
     
       override def prettyName: String = "aggregate"
     }
    +
    +/**
    + * Transform Keys for every entry of the map by applying the transform_keys function.
    + * Returns map with transformed key entries
    + */
    +@ExpressionDescription(
    +  usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k, v) -> k + 1);
    +       map(array(2, 3, 4), array(1, 2, 3))
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k, v) -> k + v);
    --- End diff --
    
    ditto.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Merged build finished. Test FAILed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Test FAILed.
    Refer to this link for build results (access rights to CI server needed): 
    https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/94451/
    Test FAILed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r208811169
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
    @@ -442,3 +442,65 @@ case class ArrayAggregate(
     
       override def prettyName: String = "aggregate"
     }
    +
    +/**
    + * Transform Keys for every entry of the map by applying the transform_keys function.
    + * Returns map with transformed key entries
    + */
    +@ExpressionDescription(
    +  usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k, v) -> k + 1);
    --- End diff --
    
    nit: we need one more right parenthesis after the second `array(1, 2, 3)`?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Test PASSed.
    Refer to this link for build results (access rights to CI server needed): 
    https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/94455/
    Test PASSed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Can one of the admins verify this patch?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r208815130
  
    --- Diff: sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala ---
    @@ -2117,6 +2117,198 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
         assert(ex4.getMessage.contains("data type mismatch: argument 3 requires int type"))
       }
     
    +  test("transform keys function - test various primitive data types combinations") {
    --- End diff --
    
    We don't need so many cases here. We only need to verify the api works end to end.
    Evaluation checks of the function should be in `HigherOrderFunctionsSuite`.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by hvanhovell <gi...@git.apache.org>.
Github user hvanhovell commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r208136330
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
    @@ -365,3 +365,69 @@ case class ArrayAggregate(
     
       override def prettyName: String = "aggregate"
     }
    +
    +/**
    + * Transform Keys in a map using the transform_keys function.
    + */
    +@ExpressionDescription(
    +  usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k,v) -> k + 1);
    +       map(array(2, 3, 4), array(1, 2, 3))
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k, v) -> k + v);
    +       map(array(2, 4, 6), array(1, 2, 3))
    +  """,
    +  since = "2.4.0")
    +case class TransformKeys(
    +    input: Expression,
    +    function: Expression)
    +  extends ArrayBasedHigherOrderFunction with CodegenFallback {
    +
    +  override def nullable: Boolean = input.nullable
    +
    +  override def dataType: DataType = {
    +    val valueType = input.dataType.asInstanceOf[MapType].valueType
    +    MapType(function.dataType, valueType, input.nullable)
    +  }
    +
    +  override def inputTypes: Seq[AbstractDataType] = Seq(MapType, expectingFunctionType)
    +
    +  override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction):
    +  TransformKeys = {
    +    val (keyElementType, valueElementType, containsNull) = input.dataType match {
    +      case MapType(keyType, valueType, containsNullValue) =>
    +        (keyType, valueType, containsNullValue)
    +      case _ =>
    +        val MapType(keyType, valueType, containsNullValue) = MapType.defaultConcreteType
    +        (keyType, valueType, containsNullValue)
    +    }
    +    copy(function = f(function, (keyElementType, false) :: (valueElementType, containsNull) :: Nil))
    +  }
    +
    +  @transient lazy val (keyVar, valueVar) = {
    +    val LambdaFunction(
    +    _, (keyVar: NamedLambdaVariable) :: (valueVar: NamedLambdaVariable) :: Nil, _) = function
    +    (keyVar, valueVar)
    +  }
    +
    +  override def eval(input: InternalRow): Any = {
    +    val arr = this.input.eval(input).asInstanceOf[MapData]
    +    if (arr == null) {
    +      null
    +    } else {
    +      val f = functionForEval
    +      val resultKeys = new GenericArrayData(new Array[Any](arr.numElements))
    +      var i = 0
    +      while (i < arr.numElements) {
    +        keyVar.value.set(arr.keyArray().get(i, keyVar.dataType))
    +        valueVar.value.set(arr.valueArray().get(i, valueVar.dataType))
    +        resultKeys.update(i, f.eval(input))
    --- End diff --
    
    This assumes that the transformation will return a unique key right? If it doesn't you'll break the map semantics. For example: `map_key(some_map, (k, v) -> 0)`


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Thanks! merging to master.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Test PASSed.
    Refer to this link for build results (access rights to CI server needed): 
    https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/94819/
    Test PASSed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Merged build finished. Test FAILed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r210165079
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
    @@ -497,6 +497,65 @@ case class ArrayAggregate(
       override def prettyName: String = "aggregate"
     }
     
    +/**
    + * Transform Keys for every entry of the map by applying the transform_keys function.
    + * Returns map with transformed key entries
    + */
    +@ExpressionDescription(
    +  usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1);
    +       map(array(2, 3, 4), array(1, 2, 3))
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v);
    +       map(array(2, 4, 6), array(1, 2, 3))
    +  """,
    +  since = "2.4.0")
    +case class TransformKeys(
    +    argument: Expression,
    +    function: Expression)
    +  extends MapBasedSimpleHigherOrderFunction with CodegenFallback {
    +
    +  override def nullable: Boolean = argument.nullable
    +
    +  override def dataType: DataType = {
    +    val map = argument.dataType.asInstanceOf[MapType]
    +    MapType(function.dataType, map.valueType, map.valueContainsNull)
    +  }
    +
    +  @transient val MapType(keyType, valueType, valueContainsNull) = argument.dataType
    +
    +  override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): TransformKeys = {
    +    copy(function = f(function, (keyType, false) :: (valueType, valueContainsNull) :: Nil))
    +  }
    +
    +  @transient lazy val (keyVar, valueVar) = {
    +    val LambdaFunction(
    +    _, (keyVar: NamedLambdaVariable) :: (valueVar: NamedLambdaVariable) :: Nil, _) = function
    +    (keyVar, valueVar)
    +  }
    --- End diff --
    
    nit: how about:
    
    ```scala
    @transient lazy val LambdaFunction(_,
      (keyVar: NamedLambdaVariable) :: (valueVar: NamedLambdaVariable) :: Nil, _) = function
    ```


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by mgaido91 <gi...@git.apache.org>.
Github user mgaido91 commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r208159784
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
    @@ -365,3 +365,69 @@ case class ArrayAggregate(
     
       override def prettyName: String = "aggregate"
     }
    +
    +/**
    + * Transform Keys in a map using the transform_keys function.
    + */
    +@ExpressionDescription(
    +  usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k,v) -> k + 1);
    +       map(array(2, 3, 4), array(1, 2, 3))
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k, v) -> k + v);
    +       map(array(2, 4, 6), array(1, 2, 3))
    +  """,
    +  since = "2.4.0")
    +case class TransformKeys(
    +    input: Expression,
    +    function: Expression)
    +  extends ArrayBasedHigherOrderFunction with CodegenFallback {
    +
    +  override def nullable: Boolean = input.nullable
    +
    +  override def dataType: DataType = {
    +    val valueType = input.dataType.asInstanceOf[MapType].valueType
    +    MapType(function.dataType, valueType, input.nullable)
    --- End diff --
    
    I think here `input.nullable` is wrong. This should indicate whether the value contains null, not whether the returned object can be null or not.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    **[Test build #94765 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94765/testReport)** for PR 22013 at commit [`621213d`](https://github.com/apache/spark/commit/621213dd1658fbc8cb19e15dd77c9c389653d4db).


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by mn-mikke <gi...@git.apache.org>.
Github user mn-mikke commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r208161643
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
    @@ -365,3 +365,69 @@ case class ArrayAggregate(
     
       override def prettyName: String = "aggregate"
     }
    +
    +/**
    + * Transform Keys in a map using the transform_keys function.
    --- End diff --
    
    maybe a better comment?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Merged build finished. Test FAILed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r210193484
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
    @@ -497,6 +497,65 @@ case class ArrayAggregate(
       override def prettyName: String = "aggregate"
     }
     
    +/**
    + * Transform Keys for every entry of the map by applying the transform_keys function.
    + * Returns map with transformed key entries
    + */
    +@ExpressionDescription(
    +  usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1);
    +       map(array(2, 3, 4), array(1, 2, 3))
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v);
    +       map(array(2, 4, 6), array(1, 2, 3))
    +  """,
    +  since = "2.4.0")
    +case class TransformKeys(
    +    argument: Expression,
    +    function: Expression)
    +  extends MapBasedSimpleHigherOrderFunction with CodegenFallback {
    +
    +  override def nullable: Boolean = argument.nullable
    +
    +  override def dataType: DataType = {
    +    val map = argument.dataType.asInstanceOf[MapType]
    +    MapType(function.dataType, map.valueType, map.valueContainsNull)
    +  }
    +
    +  @transient val MapType(keyType, valueType, valueContainsNull) = argument.dataType
    +
    +  override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): TransformKeys = {
    +    copy(function = f(function, (keyType, false) :: (valueType, valueContainsNull) :: Nil))
    +  }
    +
    +  @transient lazy val (keyVar, valueVar) = {
    +    val LambdaFunction(
    +    _, (keyVar: NamedLambdaVariable) :: (valueVar: NamedLambdaVariable) :: Nil, _) = function
    +    (keyVar, valueVar)
    +  }
    --- End diff --
    
    Sorry, I meant we don't need to surround by:
    
    ```scala
    @transient lazy val (keyVar, valueVar) = {
      ...
      (keyVar, valueVar)
    }
    ```
    
    just
    
    ```scala
    @transient lazy val LambdaFunction(_,
      (keyVar: NamedLambdaVariable) :: (valueVar: NamedLambdaVariable) :: Nil, _) = function
    ```
    
    should work.



---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r210160577
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
    @@ -497,6 +497,65 @@ case class ArrayAggregate(
       override def prettyName: String = "aggregate"
     }
     
    +/**
    + * Transform Keys for every entry of the map by applying the transform_keys function.
    + * Returns map with transformed key entries
    + */
    +@ExpressionDescription(
    +  usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1);
    +       map(array(2, 3, 4), array(1, 2, 3))
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v);
    +       map(array(2, 4, 6), array(1, 2, 3))
    +  """,
    +  since = "2.4.0")
    +case class TransformKeys(
    +    argument: Expression,
    +    function: Expression)
    +  extends MapBasedSimpleHigherOrderFunction with CodegenFallback {
    +
    +  override def nullable: Boolean = argument.nullable
    +
    +  override def dataType: DataType = {
    +    val map = argument.dataType.asInstanceOf[MapType]
    +    MapType(function.dataType, map.valueType, map.valueContainsNull)
    --- End diff --
    
    We can use `valueType` and `valueContainsNull` from the following val?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    **[Test build #94775 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94775/testReport)** for PR 22013 at commit [`5db526b`](https://github.com/apache/spark/commit/5db526be7bad0fa38dc9743c919014b475cf8aeb).


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Merged build finished. Test FAILed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Merged build finished. Test FAILed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    **[Test build #94455 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94455/testReport)** for PR 22013 at commit [`6526630`](https://github.com/apache/spark/commit/652663077e383f8b188743c4494d697e34d5d02c).


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by codeatri <gi...@git.apache.org>.
Github user codeatri commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r208446666
  
    --- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala ---
    @@ -181,4 +187,46 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
             (acc, array) => coalesce(aggregate(array, acc, (acc, elem) => acc + elem), acc)),
           15)
       }
    +
    +  test("TransformKeys") {
    +    val ai0 = Literal.create(
    +      Map(1 -> 1, 2 -> 2, 3 -> 3),
    --- End diff --
    
    Thanks for catching this!
    Included test cases, both here and in DataFrameFunctionsSuite.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by codeatri <gi...@git.apache.org>.
Github user codeatri commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Thanks for the review @ueshin! I have addressed all your comments.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r210527183
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
    @@ -497,6 +497,59 @@ case class ArrayAggregate(
       override def prettyName: String = "aggregate"
     }
     
    +/**
    + * Transform Keys for every entry of the map by applying the transform_keys function.
    + * Returns map with transformed key entries
    + */
    +@ExpressionDescription(
    +  usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1);
    +       map(array(2, 3, 4), array(1, 2, 3))
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v);
    +       map(array(2, 4, 6), array(1, 2, 3))
    +  """,
    +  since = "2.4.0")
    +case class TransformKeys(
    +    argument: Expression,
    +    function: Expression)
    +  extends MapBasedSimpleHigherOrderFunction with CodegenFallback {
    +
    +  override def nullable: Boolean = argument.nullable
    --- End diff --
    
    makes sense.
    Let's have wrap-up prs for higher-order functions after the remaining 2 prs are merged.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r208811790
  
    --- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala ---
    @@ -59,6 +59,12 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
         ArrayFilter(expr, createLambda(at.elementType, at.containsNull, f))
       }
     
    +  def transformKeys(expr: Expression, f: (Expression, Expression) => Expression): Expression = {
    +    val valueType = expr.dataType.asInstanceOf[MapType].valueType
    +    val keyType = expr.dataType.asInstanceOf[MapType].keyType
    +    TransformKeys(expr, createLambda(keyType, false, valueType, true, f))
    --- End diff --
    
    We should use `valueContainsNull` instead of `true`?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Test PASSed.
    Refer to this link for build results (access rights to CI server needed): 
    https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/94518/
    Test PASSed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    **[Test build #94758 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94758/testReport)** for PR 22013 at commit [`bb52630`](https://github.com/apache/spark/commit/bb52630dd720ecaf5f7ffe0c498d422ce60f3bb7).
     * This patch **fails Scala style tests**.
     * This patch merges cleanly.
     * This patch adds no public classes.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Test FAILed.
    Refer to this link for build results (access rights to CI server needed): 
    https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/94817/
    Test FAILed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r208812207
  
    --- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala ---
    @@ -230,4 +236,56 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
             (acc, array) => coalesce(aggregate(array, acc, (acc, elem) => acc + elem), acc)),
           15)
       }
    +
    +  test("TransformKeys") {
    +    val ai0 = Literal.create(
    +      Map(1 -> 1, 2 -> 2, 3 -> 3),
    +      MapType(IntegerType, IntegerType))
    +    val ai1 = Literal.create(
    +      Map.empty[Int, Int],
    +      MapType(IntegerType, IntegerType))
    +    val ai2 = Literal.create(
    +      Map(1 -> 1, 2 -> null, 3 -> 3),
    +      MapType(IntegerType, IntegerType))
    --- End diff --
    
    Can you add tests for `Literal.create(null, MapType(IntegerType, IntegerType))`?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by asfgit <gi...@git.apache.org>.
Github user asfgit closed the pull request at:

    https://github.com/apache/spark/pull/22013


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    **[Test build #94811 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94811/testReport)** for PR 22013 at commit [`fb885f4`](https://github.com/apache/spark/commit/fb885f4797e72d0c2cbfa23980199c71e0c5aaee).
     * This patch **fails Spark unit tests**.
     * This patch merges cleanly.
     * This patch adds no public classes.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by mn-mikke <gi...@git.apache.org>.
Github user mn-mikke commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r208190999
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
    @@ -365,3 +365,69 @@ case class ArrayAggregate(
     
       override def prettyName: String = "aggregate"
     }
    +
    +/**
    + * Transform Keys in a map using the transform_keys function.
    + */
    +@ExpressionDescription(
    +  usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k,v) -> k + 1);
    +       map(array(2, 3, 4), array(1, 2, 3))
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k, v) -> k + v);
    +       map(array(2, 4, 6), array(1, 2, 3))
    +  """,
    +  since = "2.4.0")
    +case class TransformKeys(
    +    input: Expression,
    +    function: Expression)
    +  extends ArrayBasedHigherOrderFunction with CodegenFallback {
    +
    +  override def nullable: Boolean = input.nullable
    +
    +  override def dataType: DataType = {
    +    val valueType = input.dataType.asInstanceOf[MapType].valueType
    +    MapType(function.dataType, valueType, input.nullable)
    +  }
    +
    +  override def inputTypes: Seq[AbstractDataType] = Seq(MapType, expectingFunctionType)
    +
    +  override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction):
    +  TransformKeys = {
    +    val (keyElementType, valueElementType, containsNull) = input.dataType match {
    +      case MapType(keyType, valueType, containsNullValue) =>
    +        (keyType, valueType, containsNullValue)
    +      case _ =>
    +        val MapType(keyType, valueType, containsNullValue) = MapType.defaultConcreteType
    +        (keyType, valueType, containsNullValue)
    +    }
    +    copy(function = f(function, (keyElementType, false) :: (valueElementType, containsNull) :: Nil))
    +  }
    +
    +  @transient lazy val (keyVar, valueVar) = {
    +    val LambdaFunction(
    +    _, (keyVar: NamedLambdaVariable) :: (valueVar: NamedLambdaVariable) :: Nil, _) = function
    +    (keyVar, valueVar)
    +  }
    +
    +  override def eval(input: InternalRow): Any = {
    +    val arr = this.input.eval(input).asInstanceOf[MapData]
    +    if (arr == null) {
    +      null
    +    } else {
    +      val f = functionForEval
    +      val resultKeys = new GenericArrayData(new Array[Any](arr.numElements))
    +      var i = 0
    +      while (i < arr.numElements) {
    +        keyVar.value.set(arr.keyArray().get(i, keyVar.dataType))
    +        valueVar.value.set(arr.valueArray().get(i, valueVar.dataType))
    +        resultKeys.update(i, f.eval(input))
    --- End diff --
    
    I'm not a fun of duplicated keys either, but other functions transforming maps have the same problem. See the discussions [here](https://github.com/apache/spark/pull/21282#discussion_r187234431) and [here](https://github.com/apache/spark/pull/21258#discussion_r186410527).
    
    Example:
    ```
    scala> spark.range(1).selectExpr("map(0,1,0,2)").show()
    +----------------+
    | map(0, 1, 0, 2)|
    +----------------+
    |[0 -> 1, 0 -> 2]|
    +----------------+
    ```


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by mn-mikke <gi...@git.apache.org>.
Github user mn-mikke commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r208167785
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
    @@ -365,3 +365,69 @@ case class ArrayAggregate(
     
       override def prettyName: String = "aggregate"
     }
    +
    +/**
    + * Transform Keys in a map using the transform_keys function.
    + */
    +@ExpressionDescription(
    +  usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k,v) -> k + 1);
    +       map(array(2, 3, 4), array(1, 2, 3))
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k, v) -> k + v);
    +       map(array(2, 4, 6), array(1, 2, 3))
    +  """,
    +  since = "2.4.0")
    +case class TransformKeys(
    +    input: Expression,
    +    function: Expression)
    +  extends ArrayBasedHigherOrderFunction with CodegenFallback {
    +
    +  override def nullable: Boolean = input.nullable
    +
    +  override def dataType: DataType = {
    +    val valueType = input.dataType.asInstanceOf[MapType].valueType
    +    MapType(function.dataType, valueType, input.nullable)
    +  }
    +
    +  override def inputTypes: Seq[AbstractDataType] = Seq(MapType, expectingFunctionType)
    +
    +  override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction):
    +  TransformKeys = {
    +    val (keyElementType, valueElementType, containsNull) = input.dataType match {
    +      case MapType(keyType, valueType, containsNullValue) =>
    +        (keyType, valueType, containsNullValue)
    +      case _ =>
    +        val MapType(keyType, valueType, containsNullValue) = MapType.defaultConcreteType
    +        (keyType, valueType, containsNullValue)
    +    }
    +    copy(function = f(function, (keyElementType, false) :: (valueElementType, containsNull) :: Nil))
    +  }
    +
    +  @transient lazy val (keyVar, valueVar) = {
    +    val LambdaFunction(
    +    _, (keyVar: NamedLambdaVariable) :: (valueVar: NamedLambdaVariable) :: Nil, _) = function
    +    (keyVar, valueVar)
    +  }
    +
    +  override def eval(input: InternalRow): Any = {
    +    val arr = this.input.eval(input).asInstanceOf[MapData]
    +    if (arr == null) {
    +      null
    +    } else {
    +      val f = functionForEval
    +      val resultKeys = new GenericArrayData(new Array[Any](arr.numElements))
    +      var i = 0
    +      while (i < arr.numElements) {
    +        keyVar.value.set(arr.keyArray().get(i, keyVar.dataType))
    +        valueVar.value.set(arr.valueArray().get(i, valueVar.dataType))
    +        resultKeys.update(i, f.eval(input))
    --- End diff --
    
    Maybe I'm missing something, but couldn't ```f.eval(input)``` be evaluated to ```null```? Keys are not allowed to be```null```. Other functions have usually a ```null``` check and throw ```RuntimeException``` for such cases.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by mn-mikke <gi...@git.apache.org>.
Github user mn-mikke commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r208169969
  
    --- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala ---
    @@ -181,4 +187,46 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
             (acc, array) => coalesce(aggregate(array, acc, (acc, elem) => acc + elem), acc)),
           15)
       }
    +
    +  test("TransformKeys") {
    +    val ai0 = Literal.create(
    +      Map(1 -> 1, 2 -> 2, 3 -> 3),
    --- End diff --
    
    It's maybe irrelevant but WDYT about adding test cases with ```null``` values?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Test FAILed.
    Refer to this link for build results (access rights to CI server needed): 
    https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/94765/
    Test FAILed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Test FAILed.
    Refer to this link for build results (access rights to CI server needed): 
    https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/94405/
    Test FAILed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    **[Test build #94518 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94518/testReport)** for PR 22013 at commit [`1cbaf0c`](https://github.com/apache/spark/commit/1cbaf0c6adc508299d42a82628f4f0954bed7a95).


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Merged build finished. Test PASSed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    LGTM.
    @mn-mikke @mgaido91 Do you have any other comments on this?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Merged build finished. Test PASSed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    **[Test build #94457 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94457/testReport)** for PR 22013 at commit [`f7fd231`](https://github.com/apache/spark/commit/f7fd2313dddfea3555bda61fc96339c24afb71b0).


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    **[Test build #94819 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94819/testReport)** for PR 22013 at commit [`2f4943f`](https://github.com/apache/spark/commit/2f4943f3cec0705c296b2988c415ac3372b7ea86).
     * This patch passes all tests.
     * This patch merges cleanly.
     * This patch adds no public classes.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Test PASSed.
    Refer to this link for build results (access rights to CI server needed): 
    https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/94457/
    Test PASSed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r210161936
  
    --- Diff: sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala ---
    @@ -2302,6 +2302,97 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
         assert(ex5.getMessage.contains("function map_zip_with does not support ordering on type map"))
       }
     
    +  test("transform keys function - test various primitive data types combinations") {
    +    val dfExample1 = Seq(
    +      Map[Int, Int](1 -> 1, 9 -> 9, 8 -> 8, 7 -> 7)
    +    ).toDF("i")
    +
    +    val dfExample2 = Seq(
    +      Map[Int, Double](1 -> 1.0E0, 2 -> 1.4E0, 3 -> 1.7E0)
    --- End diff --
    
    Do we need `E0`?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r210162501
  
    --- Diff: sql/core/src/test/resources/sql-tests/inputs/higher-order-functions.sql ---
    @@ -51,3 +51,17 @@ select exists(ys, y -> y > 30) as v from nested;
     
     -- Check for element existence in a null array
     select exists(cast(null as array<int>), y -> y > 30) as v;
    +                                                                         
    +create or replace temporary view nested as values
    +  (1, map(1,1,2,2,3,3)),
    +  (2, map(4,4,5,5,6,6))
    --- End diff --
    
    nit:
    
    ```
      (1, map(1, 1, 2, 2, 3, 3)),
      (2, map(4, 4, 5, 5, 6, 6))
    ```


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    **[Test build #94457 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94457/testReport)** for PR 22013 at commit [`f7fd231`](https://github.com/apache/spark/commit/f7fd2313dddfea3555bda61fc96339c24afb71b0).
     * This patch passes all tests.
     * This patch merges cleanly.
     * This patch adds no public classes.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    **[Test build #94404 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94404/testReport)** for PR 22013 at commit [`5806ac4`](https://github.com/apache/spark/commit/5806ac46707772fd1e4befa445157ed0f9c75084).


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:

    https://github.com/apache/spark/pull/22013
  
    Merged build finished. Test PASSed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org


[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function

Posted by mn-mikke <gi...@git.apache.org>.
Github user mn-mikke commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22013#discussion_r210366383
  
    --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
    @@ -497,6 +497,62 @@ case class ArrayAggregate(
       override def prettyName: String = "aggregate"
     }
     
    +/**
    + * Transform Keys for every entry of the map by applying the transform_keys function.
    + * Returns map with transformed key entries
    + */
    +@ExpressionDescription(
    +  usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1);
    +       map(array(2, 3, 4), array(1, 2, 3))
    +      > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v);
    +       map(array(2, 4, 6), array(1, 2, 3))
    +  """,
    +  since = "2.4.0")
    +case class TransformKeys(
    +    argument: Expression,
    +    function: Expression)
    +  extends MapBasedSimpleHigherOrderFunction with CodegenFallback {
    +
    +  override def nullable: Boolean = argument.nullable
    +
    +  @transient lazy val MapType(keyType, valueType, valueContainsNull) = argument.dataType
    +
    +  override def dataType: DataType = {
    +    MapType(function.dataType, valueType, valueContainsNull)
    --- End diff --
    
    nit: just in one line?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org