You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by codeatri <gi...@git.apache.org> on 2018/08/06 19:04:46 UTC
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
GitHub user codeatri opened a pull request:
https://github.com/apache/spark/pull/22013
[SPARK-23939][SQL] Add transform_keys function
## What changes were proposed in this pull request?
This pr adds transform_keys function which applies the function to each entry of the map and transforms the keys.
## How was this patch tested?
Added tests.
You can merge this pull request into a Git repository by running:
$ git pull https://github.com/codeatri/spark SPARK-23939
Alternatively you can review and apply these changes as the patch at:
https://github.com/apache/spark/pull/22013.patch
To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:
This closes #22013
----
commit 0a19cc44bf694f76f8f1be8faeaa16dc47f9bb86
Author: codeatri <ne...@...>
Date: 2018-08-06T18:32:47Z
Added Support for transform_keys function
----
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Test FAILed.
Refer to this link for build results (access rights to CI server needed):
https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/94758/
Test FAILed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by hvanhovell <gi...@git.apache.org>.
Github user hvanhovell commented on the issue:
https://github.com/apache/spark/pull/22013
ok to test
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by mn-mikke <gi...@git.apache.org>.
Github user mn-mikke commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r210373675
--- Diff: sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala ---
@@ -2302,6 +2302,97 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
assert(ex5.getMessage.contains("function map_zip_with does not support ordering on type map"))
}
+ test("transform keys function - test various primitive data types combinations") {
+ val dfExample1 = Seq(
+ Map[Int, Int](1 -> 1, 9 -> 9, 8 -> 8, 7 -> 7)
+ ).toDF("i")
+
+ val dfExample2 = Seq(
+ Map[Int, Double](1 -> 1.0E0, 2 -> 1.4E0, 3 -> 1.7E0)
+ ).toDF("j")
+
+ val dfExample3 = Seq(
+ Map[Int, Boolean](25 -> true, 26 -> false)
+ ).toDF("x")
+
+ val dfExample4 = Seq(
+ Map[Array[Int], Boolean](Array(1, 2) -> false)
+ ).toDF("y")
+
+
+ def testMapOfPrimitiveTypesCombination(): Unit = {
+ checkAnswer(dfExample1.selectExpr("transform_keys(i, (k, v) -> k + v)"),
+ Seq(Row(Map(2 -> 1, 18 -> 9, 16 -> 8, 14 -> 7))))
+
+ checkAnswer(dfExample2.selectExpr("transform_keys(j, " +
+ "(k, v) -> map_from_arrays(ARRAY(1, 2, 3), ARRAY('one', 'two', 'three'))[k])"),
+ Seq(Row(Map("one" -> 1.0, "two" -> 1.4, "three" -> 1.7))))
+
+ checkAnswer(dfExample2.selectExpr("transform_keys(j, (k, v) -> CAST(v * 2 AS BIGINT) + k)"),
+ Seq(Row(Map(3 -> 1.0, 4 -> 1.4, 6 -> 1.7))))
+
+ checkAnswer(dfExample2.selectExpr("transform_keys(j, (k, v) -> k + v)"),
+ Seq(Row(Map(2.0 -> 1.0, 3.4 -> 1.4, 4.7 -> 1.7))))
+
+ checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> k % 2 = 0 OR v)"),
+ Seq(Row(Map(true -> true, true -> false))))
+
+ checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> if(v, 2 * k, 3 * k))"),
+ Seq(Row(Map(50 -> true, 78 -> false))))
+
+ checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> if(v, 2 * k, 3 * k))"),
+ Seq(Row(Map(50 -> true, 78 -> false))))
+
+ checkAnswer(dfExample4.selectExpr("transform_keys(y, (k, v) -> array_contains(k, 3) AND v)"),
+ Seq(Row(Map(false -> false))))
+ }
+ // Test with local relation, the Project will be evaluated without codegen
+ testMapOfPrimitiveTypesCombination()
+ dfExample1.cache()
+ dfExample2.cache()
+ dfExample3.cache()
+ dfExample4.cache()
+ // Test with cached relation, the Project will be evaluated with codegen
+ testMapOfPrimitiveTypesCombination()
+ }
+
+ test("transform keys function - Invalid lambda functions and exceptions") {
+ val dfExample1 = Seq(
+ Map[Int, Int](1 -> 1, 9 -> 9, 8 -> 8, 7 -> 7)
+ ).toDF("i")
+
+ val dfExample2 = Seq(
+ Map[String, String]("a" -> "b")
+ ).toDF("j")
+
+ val dfExample3 = Seq(
+ Map[String, String]("a" -> null)
+ ).toDF("x")
+
+ def testInvalidLambdaFunctions(): Unit = {
+ val ex1 = intercept[AnalysisException] {
+ dfExample1.selectExpr("transform_keys(i, k -> k )")
+ }
+ assert(ex1.getMessage.contains("The number of lambda function arguments '1' does not match"))
+
+ val ex2 = intercept[AnalysisException] {
+ dfExample2.selectExpr("transform_keys(j, (k, v, x) -> k + 1)")
+ }
+ assert(ex2.getMessage.contains(
+ "The number of lambda function arguments '3' does not match"))
+
+ val ex3 = intercept[RuntimeException] {
+ dfExample3.selectExpr("transform_keys(x, (k, v) -> v)").show()
+ }
+ assert(ex3.getMessage.contains("Cannot use null as map key!"))
+ }
+
+ testInvalidLambdaFunctions()
+ dfExample1.cache()
+ dfExample2.cache()
+ testInvalidLambdaFunctions()
--- End diff --
@ueshin I would like to ask you a generic question regarding higher-order functions. Is it necessary to perform checks with codegen paths if all the newly added functions extends from ```CodegenFallback```? Eventually, is there a plan to add coden for these functions in future?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:
https://github.com/apache/spark/pull/22013
**[Test build #94405 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94405/testReport)** for PR 22013 at commit [`150a6a5`](https://github.com/apache/spark/commit/150a6a5c405c78e7a5f7dd9b3f3c72f95290ec71).
* This patch **fails due to an unknown error code, -9**.
* This patch merges cleanly.
* This patch adds no public classes.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Merged build finished. Test PASSed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:
https://github.com/apache/spark/pull/22013
**[Test build #94817 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94817/testReport)** for PR 22013 at commit [`58b60b2`](https://github.com/apache/spark/commit/58b60b2f851fb1464743257fe1cca075a1e77ba9).
* This patch **fails Spark unit tests**.
* This patch merges cleanly.
* This patch adds no public classes.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by mn-mikke <gi...@git.apache.org>.
Github user mn-mikke commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r210368929
--- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
@@ -497,6 +497,62 @@ case class ArrayAggregate(
override def prettyName: String = "aggregate"
}
+/**
+ * Transform Keys for every entry of the map by applying the transform_keys function.
+ * Returns map with transformed key entries
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1);
+ map(array(2, 3, 4), array(1, 2, 3))
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v);
+ map(array(2, 4, 6), array(1, 2, 3))
+ """,
+ since = "2.4.0")
+case class TransformKeys(
+ argument: Expression,
+ function: Expression)
+ extends MapBasedSimpleHigherOrderFunction with CodegenFallback {
+
+ override def nullable: Boolean = argument.nullable
+
+ @transient lazy val MapType(keyType, valueType, valueContainsNull) = argument.dataType
+
+ override def dataType: DataType = {
+ MapType(function.dataType, valueType, valueContainsNull)
+ }
+
+ override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): TransformKeys = {
+ copy(function = f(function, (keyType, false) :: (valueType, valueContainsNull) :: Nil))
+ }
+
+ @transient lazy val LambdaFunction(
+ _, (keyVar: NamedLambdaVariable) :: (valueVar: NamedLambdaVariable) :: Nil, _) = function
+
+
+ override def nullSafeEval(inputRow: InternalRow, argumentValue: Any): Any = {
+ val map = argumentValue.asInstanceOf[MapData]
+ val f = functionForEval
--- End diff --
Can't we use ```functionForEval``` directly?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Merged build finished. Test FAILed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r208811976
--- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala ---
@@ -230,4 +236,56 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
(acc, array) => coalesce(aggregate(array, acc, (acc, elem) => acc + elem), acc)),
15)
}
+
+ test("TransformKeys") {
+ val ai0 = Literal.create(
+ Map(1 -> 1, 2 -> 2, 3 -> 3),
+ MapType(IntegerType, IntegerType))
--- End diff --
Can you add `valueContainsNull` explicitly?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:
https://github.com/apache/spark/pull/22013
**[Test build #94455 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94455/testReport)** for PR 22013 at commit [`6526630`](https://github.com/apache/spark/commit/652663077e383f8b188743c4494d697e34d5d02c).
* This patch passes all tests.
* This patch merges cleanly.
* This patch adds no public classes.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r210160909
--- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
@@ -497,6 +497,65 @@ case class ArrayAggregate(
override def prettyName: String = "aggregate"
}
+/**
+ * Transform Keys for every entry of the map by applying the transform_keys function.
+ * Returns map with transformed key entries
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1);
+ map(array(2, 3, 4), array(1, 2, 3))
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v);
+ map(array(2, 4, 6), array(1, 2, 3))
+ """,
+ since = "2.4.0")
+case class TransformKeys(
+ argument: Expression,
+ function: Expression)
+ extends MapBasedSimpleHigherOrderFunction with CodegenFallback {
+
+ override def nullable: Boolean = argument.nullable
+
+ override def dataType: DataType = {
+ val map = argument.dataType.asInstanceOf[MapType]
+ MapType(function.dataType, map.valueType, map.valueContainsNull)
+ }
+
+ @transient val MapType(keyType, valueType, valueContainsNull) = argument.dataType
+
+ override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): TransformKeys = {
+ copy(function = f(function, (keyType, false) :: (valueType, valueContainsNull) :: Nil))
+ }
+
+ @transient lazy val (keyVar, valueVar) = {
+ val LambdaFunction(
+ _, (keyVar: NamedLambdaVariable) :: (valueVar: NamedLambdaVariable) :: Nil, _) = function
+ (keyVar, valueVar)
+ }
+
+ override def nullSafeEval(inputRow: InternalRow, argumentValue: Any): Any = {
+ val map = argumentValue.asInstanceOf[MapData]
+ val f = functionForEval
+ val resultKeys = new GenericArrayData(new Array[Any](map.numElements))
+ var i = 0
+ while (i < map.numElements) {
+ keyVar.value.set(map.keyArray().get(i, keyVar.dataType))
+ valueVar.value.set(map.valueArray().get(i, valueVar.dataType))
+ val result = f.eval(inputRow)
+ if (result == null) {
+ throw new RuntimeException("Cannot use null as map key!")
+ }
+ resultKeys.update(i, result)
+ i += 1
+ }
+ new ArrayBasedMapData(resultKeys, map.valueArray())
+ }
+
+ override def prettyName: String = "transform_keys"
+ }
--- End diff --
nit: indent
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by mn-mikke <gi...@git.apache.org>.
Github user mn-mikke commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r208160707
--- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
@@ -365,3 +365,69 @@ case class ArrayAggregate(
override def prettyName: String = "aggregate"
}
+
+/**
+ * Transform Keys in a map using the transform_keys function.
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k,v) -> k + 1);
--- End diff --
nit: missing space -> ```k, v```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by mn-mikke <gi...@git.apache.org>.
Github user mn-mikke commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r208164141
--- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
@@ -365,3 +365,69 @@ case class ArrayAggregate(
override def prettyName: String = "aggregate"
}
+
+/**
+ * Transform Keys in a map using the transform_keys function.
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k,v) -> k + 1);
+ map(array(2, 3, 4), array(1, 2, 3))
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k, v) -> k + v);
+ map(array(2, 4, 6), array(1, 2, 3))
+ """,
+ since = "2.4.0")
+case class TransformKeys(
+ input: Expression,
+ function: Expression)
+ extends ArrayBasedHigherOrderFunction with CodegenFallback {
+
+ override def nullable: Boolean = input.nullable
+
+ override def dataType: DataType = {
+ val valueType = input.dataType.asInstanceOf[MapType].valueType
+ MapType(function.dataType, valueType, input.nullable)
--- End diff --
Is there any reason for changing ```valueContainsNull``` flag if the function transforms just keys? WDYT about:
```
val MapType(_, valueType, valueContainsNull) = input.dataType.asInstanceOf[MapType]
MapType(function.dataType, valueType, valueContainsNull)
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Test PASSed.
Refer to this link for build results (access rights to CI server needed):
https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/94315/
Test PASSed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Test FAILed.
Refer to this link for build results (access rights to CI server needed):
https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/94788/
Test FAILed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r210159871
--- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
@@ -497,6 +497,65 @@ case class ArrayAggregate(
override def prettyName: String = "aggregate"
}
+/**
+ * Transform Keys for every entry of the map by applying the transform_keys function.
+ * Returns map with transformed key entries
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1);
+ map(array(2, 3, 4), array(1, 2, 3))
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v);
+ map(array(2, 4, 6), array(1, 2, 3))
+ """,
+ since = "2.4.0")
+case class TransformKeys(
+ argument: Expression,
+ function: Expression)
+ extends MapBasedSimpleHigherOrderFunction with CodegenFallback {
+
+ override def nullable: Boolean = argument.nullable
+
+ override def dataType: DataType = {
+ val map = argument.dataType.asInstanceOf[MapType]
+ MapType(function.dataType, map.valueType, map.valueContainsNull)
+ }
+
+ @transient val MapType(keyType, valueType, valueContainsNull) = argument.dataType
+
+ override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): TransformKeys = {
+ copy(function = f(function, (keyType, false) :: (valueType, valueContainsNull) :: Nil))
+ }
+
+ @transient lazy val (keyVar, valueVar) = {
+ val LambdaFunction(
+ _, (keyVar: NamedLambdaVariable) :: (valueVar: NamedLambdaVariable) :: Nil, _) = function
+ (keyVar, valueVar)
+ }
+
+ override def nullSafeEval(inputRow: InternalRow, argumentValue: Any): Any = {
+ val map = argumentValue.asInstanceOf[MapData]
+ val f = functionForEval
+ val resultKeys = new GenericArrayData(new Array[Any](map.numElements))
+ var i = 0
+ while (i < map.numElements) {
+ keyVar.value.set(map.keyArray().get(i, keyVar.dataType))
+ valueVar.value.set(map.valueArray().get(i, valueVar.dataType))
+ val result = f.eval(inputRow)
+ if (result == null) {
--- End diff --
nit: extra space between `==` and `null`.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Merged build finished. Test PASSed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Can one of the admins verify this patch?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:
https://github.com/apache/spark/pull/22013
**[Test build #94758 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94758/testReport)** for PR 22013 at commit [`bb52630`](https://github.com/apache/spark/commit/bb52630dd720ecaf5f7ffe0c498d422ce60f3bb7).
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Test PASSed.
Refer to this link for build results (access rights to CI server needed):
https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/94404/
Test PASSed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:
https://github.com/apache/spark/pull/22013
**[Test build #94765 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94765/testReport)** for PR 22013 at commit [`621213d`](https://github.com/apache/spark/commit/621213dd1658fbc8cb19e15dd77c9c389653d4db).
* This patch **fails to build**.
* This patch merges cleanly.
* This patch adds no public classes.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by mn-mikke <gi...@git.apache.org>.
Github user mn-mikke commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r208169140
--- Diff: sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala ---
@@ -2071,6 +2071,158 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
assert(ex4.getMessage.contains("data type mismatch: argument 3 requires int type"))
}
+ test("transform keys function - test various primitive data types combinations") {
+ val dfExample1 = Seq(
+ Map[Int, Int](1 -> 1, 9 -> 9, 8 -> 8, 7 -> 7)
+ ).toDF("i")
+
+ val dfExample2 = Seq(
+ Map[Int, String](1 -> "a", 2 -> "b", 3 -> "c")
+ ).toDF("x")
+
+ val dfExample3 = Seq(
+ Map[String, Int]("a" -> 1, "b" -> 2, "c" -> 3)
+ ).toDF("y")
+
+ val dfExample4 = Seq(
+ Map[Int, Double](1 -> 1.0E0, 2 -> 1.4E0, 3 -> 1.7E0)
+ ).toDF("z")
+
+ val dfExample5 = Seq(
+ Map[Int, Boolean](25 -> true, 26 -> false)
+ ).toDF("a")
+
+ val dfExample6 = Seq(
+ Map[Int, String](25 -> "ab", 26 -> "cd")
+ ).toDF("b")
+
+ val dfExample7 = Seq(
+ Map[Array[Int], Boolean](Array(1, 2) -> false)
+ ).toDF("c")
+
+
+ def testMapOfPrimitiveTypesCombination(): Unit = {
+ checkAnswer(dfExample1.selectExpr("transform_keys(i, (k, v) -> k + v)"),
+ Seq(Row(Map(2 -> 1, 18 -> 9, 16 -> 8, 14 -> 7))))
+
+ checkAnswer(dfExample2.selectExpr("transform_keys(x, (k, v) -> k + 1)"),
+ Seq(Row(Map(2 -> "a", 3 -> "b", 4 -> "c"))))
+
+ checkAnswer(dfExample3.selectExpr("transform_keys(y, (k, v) -> v * v)"),
+ Seq(Row(Map(1 -> 1, 4 -> 2, 9 -> 3))))
+
+ checkAnswer(dfExample3.selectExpr("transform_keys(y, (k, v) -> length(k) + v)"),
+ Seq(Row(Map(2 -> 1, 3 -> 2, 4 -> 3))))
+
+ checkAnswer(
+ dfExample3.selectExpr("transform_keys(y, (k, v) -> concat(k, cast(v as String)))"),
+ Seq(Row(Map("a1" -> 1, "b2" -> 2, "c3" -> 3))))
+
+ checkAnswer(dfExample4.selectExpr("transform_keys(z, " +
+ "(k, v) -> map_from_arrays(ARRAY(1, 2, 3), ARRAY('one', 'two', 'three'))[k])"),
+ Seq(Row(Map("one" -> 1.0, "two" -> 1.4, "three" -> 1.7))))
+
+ checkAnswer(dfExample4.selectExpr("transform_keys(z, (k, v) -> CAST(v * 2 AS BIGINT) + k)"),
+ Seq(Row(Map(3 -> 1.0, 4 -> 1.4, 6 -> 1.7))))
+
+ checkAnswer(dfExample4.selectExpr("transform_keys(z, (k, v) -> k + v)"),
+ Seq(Row(Map(2.0 -> 1.0, 3.4 -> 1.4, 4.7 -> 1.7))))
+
+ checkAnswer(dfExample5.selectExpr("transform_keys(a, (k, v) -> k % 2 = 0 OR v)"),
+ Seq(Row(Map(true -> true, true -> false))))
+
+ checkAnswer(dfExample5.selectExpr("transform_keys(a, (k, v) -> if(v, 2 * k, 3 * k))"),
+ Seq(Row(Map(50 -> true, 78 -> false))))
+
+ checkAnswer(dfExample5.selectExpr("transform_keys(a, (k, v) -> if(v, 2 * k, 3 * k))"),
+ Seq(Row(Map(50 -> true, 78 -> false))))
+
+ checkAnswer(dfExample6.selectExpr(
+ "transform_keys(b, (k, v) -> concat(conv(k, 10, 16) , substr(v, 1, 1)))"),
+ Seq(Row(Map("19a" -> "ab", "1Ac" -> "cd"))))
+
+ checkAnswer(dfExample7.selectExpr("transform_keys(c, (k, v) -> array_contains(k, 3) AND v)"),
+ Seq(Row(Map(false -> false))))
+ }
+ // Test with local relation, the Project will be evaluated without codegen
+ testMapOfPrimitiveTypesCombination()
+ dfExample1.cache()
+ dfExample2.cache()
+ dfExample3.cache()
+ dfExample4.cache()
+ dfExample5.cache()
+ dfExample6.cache()
+ // Test with cached relation, the Project will be evaluated with codegen
+ testMapOfPrimitiveTypesCombination()
--- End diff --
Do we have do that if the expression implements ```CodegenFallback```?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on the issue:
https://github.com/apache/spark/pull/22013
I'd merge this now.
@mn-mikke @mgaido91 If you have any other comments, let's have a follow-up pr.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:
https://github.com/apache/spark/pull/22013
**[Test build #94451 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94451/testReport)** for PR 22013 at commit [`9f6a8ab`](https://github.com/apache/spark/commit/9f6a8abae75b70c5be89c6bbccf3a574bd7fb17d).
* This patch **fails Spark unit tests**.
* This patch merges cleanly.
* This patch adds no public classes.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r210163358
--- Diff: sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala ---
@@ -2302,6 +2302,97 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
assert(ex5.getMessage.contains("function map_zip_with does not support ordering on type map"))
}
+ test("transform keys function - test various primitive data types combinations") {
+ val dfExample1 = Seq(
+ Map[Int, Int](1 -> 1, 9 -> 9, 8 -> 8, 7 -> 7)
+ ).toDF("i")
+
+ val dfExample2 = Seq(
+ Map[Int, Double](1 -> 1.0E0, 2 -> 1.4E0, 3 -> 1.7E0)
+ ).toDF("j")
+
+ val dfExample3 = Seq(
+ Map[Int, Boolean](25 -> true, 26 -> false)
+ ).toDF("x")
+
+ val dfExample4 = Seq(
+ Map[Array[Int], Boolean](Array(1, 2) -> false)
+ ).toDF("y")
+
+
+ def testMapOfPrimitiveTypesCombination(): Unit = {
+ checkAnswer(dfExample1.selectExpr("transform_keys(i, (k, v) -> k + v)"),
+ Seq(Row(Map(2 -> 1, 18 -> 9, 16 -> 8, 14 -> 7))))
+
+ checkAnswer(dfExample2.selectExpr("transform_keys(j, " +
+ "(k, v) -> map_from_arrays(ARRAY(1, 2, 3), ARRAY('one', 'two', 'three'))[k])"),
+ Seq(Row(Map("one" -> 1.0, "two" -> 1.4, "three" -> 1.7))))
+
+ checkAnswer(dfExample2.selectExpr("transform_keys(j, (k, v) -> CAST(v * 2 AS BIGINT) + k)"),
+ Seq(Row(Map(3 -> 1.0, 4 -> 1.4, 6 -> 1.7))))
+
+ checkAnswer(dfExample2.selectExpr("transform_keys(j, (k, v) -> k + v)"),
+ Seq(Row(Map(2.0 -> 1.0, 3.4 -> 1.4, 4.7 -> 1.7))))
+
+ checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> k % 2 = 0 OR v)"),
+ Seq(Row(Map(true -> true, true -> false))))
+
+ checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> if(v, 2 * k, 3 * k))"),
+ Seq(Row(Map(50 -> true, 78 -> false))))
+
+ checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> if(v, 2 * k, 3 * k))"),
+ Seq(Row(Map(50 -> true, 78 -> false))))
+
+ checkAnswer(dfExample4.selectExpr("transform_keys(y, (k, v) -> array_contains(k, 3) AND v)"),
+ Seq(Row(Map(false -> false))))
+ }
+ // Test with local relation, the Project will be evaluated without codegen
+ testMapOfPrimitiveTypesCombination()
+ dfExample1.cache()
+ dfExample2.cache()
+ dfExample3.cache()
+ dfExample4.cache()
+ // Test with cached relation, the Project will be evaluated with codegen
+ testMapOfPrimitiveTypesCombination()
+ }
+
+ test("transform keys function - Invalid lambda functions and exceptions") {
+ val dfExample1 = Seq(
+ Map[Int, Int](1 -> 1, 9 -> 9, 8 -> 8, 7 -> 7)
+ ).toDF("i")
+
+ val dfExample2 = Seq(
+ Map[String, String]("a" -> "b")
+ ).toDF("j")
+
+ val dfExample3 = Seq(
+ Map[String, String]("a" -> null)
+ ).toDF("x")
+
+ def testInvalidLambdaFunctions(): Unit = {
+ val ex1 = intercept[AnalysisException] {
+ dfExample1.selectExpr("transform_keys(i, k -> k )")
+ }
+ assert(ex1.getMessage.contains("The number of lambda function arguments '1' does not match"))
+
+ val ex2 = intercept[AnalysisException] {
+ dfExample2.selectExpr("transform_keys(j, (k, v, x) -> k + 1)")
+ }
+ assert(ex2.getMessage.contains(
+ "The number of lambda function arguments '3' does not match"))
+
+ val ex3 = intercept[RuntimeException] {
+ dfExample3.selectExpr("transform_keys(x, (k, v) -> v)").show()
+ }
+ assert(ex3.getMessage.contains("Cannot use null as map key!"))
+ }
+
+ testInvalidLambdaFunctions()
+ dfExample1.cache()
+ dfExample2.cache()
+ testInvalidLambdaFunctions()
--- End diff --
We need `dfExample3.cache()` as well?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r210162791
--- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala ---
@@ -283,6 +289,75 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
15)
}
+ test("TransformKeys") {
+ val ai0 = Literal.create(
+ Map(1 -> 1, 2 -> 2, 3 -> 3, 4 -> 4),
+ MapType(IntegerType, IntegerType, valueContainsNull = false))
+ val ai1 = Literal.create(
+ Map.empty[Int, Int],
+ MapType(IntegerType, IntegerType, valueContainsNull = true))
+ val ai2 = Literal.create(
+ Map(1 -> 1, 2 -> null, 3 -> 3),
+ MapType(IntegerType, IntegerType, valueContainsNull = true))
+ val ai3 = Literal.create(null, MapType(IntegerType, IntegerType, valueContainsNull = false))
+
+ val plusOne: (Expression, Expression) => Expression = (k, v) => k + 1
+ val plusValue: (Expression, Expression) => Expression = (k, v) => k + v
+ val modKey: (Expression, Expression) => Expression = (k, v) => k % 3
+
+ checkEvaluation(transformKeys(ai0, plusOne), Map(2 -> 1, 3 -> 2, 4 -> 3, 5 -> 4))
+ checkEvaluation(transformKeys(ai0, plusValue), Map(2 -> 1, 4 -> 2, 6 -> 3, 8 -> 4))
+ checkEvaluation(
+ transformKeys(transformKeys(ai0, plusOne), plusValue), Map(3 -> 1, 5 -> 2, 7 -> 3, 9 -> 4))
+ checkEvaluation(transformKeys(ai0, modKey),
+ ArrayBasedMapData(Array(1, 2, 0, 1), Array(1, 2, 3, 4)))
+ checkEvaluation(transformKeys(ai1, plusOne), Map.empty[Int, Int])
+ checkEvaluation(transformKeys(ai1, plusOne), Map.empty[Int, Int])
+ checkEvaluation(
+ transformKeys(transformKeys(ai1, plusOne), plusValue), Map.empty[Int, Int])
+ checkEvaluation(transformKeys(ai2, plusOne), Map(2 -> 1, 3 -> null, 4 -> 3))
+ checkEvaluation(
+ transformKeys(transformKeys(ai2, plusOne), plusOne), Map(3 -> 1, 4 -> null, 5 -> 3))
+ checkEvaluation(transformKeys(ai3, plusOne), null)
+
+ val as0 = Literal.create(
+ Map("a" -> "xy", "bb" -> "yz", "ccc" -> "zx"),
+ MapType(StringType, StringType, valueContainsNull = false))
+ val as1 = Literal.create(
+ Map("a" -> "xy", "bb" -> "yz", "ccc" -> null),
+ MapType(StringType, StringType, valueContainsNull = true))
+ val as2 = Literal.create(null,
+ MapType(StringType, StringType, valueContainsNull = false))
+ val asn = Literal.create(Map.empty[StringType, StringType],
--- End diff --
`as3`?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r210160419
--- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
@@ -497,6 +497,65 @@ case class ArrayAggregate(
override def prettyName: String = "aggregate"
}
+/**
+ * Transform Keys for every entry of the map by applying the transform_keys function.
+ * Returns map with transformed key entries
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1);
+ map(array(2, 3, 4), array(1, 2, 3))
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v);
+ map(array(2, 4, 6), array(1, 2, 3))
+ """,
+ since = "2.4.0")
+case class TransformKeys(
+ argument: Expression,
+ function: Expression)
+ extends MapBasedSimpleHigherOrderFunction with CodegenFallback {
+
+ override def nullable: Boolean = argument.nullable
+
+ override def dataType: DataType = {
+ val map = argument.dataType.asInstanceOf[MapType]
+ MapType(function.dataType, map.valueType, map.valueContainsNull)
+ }
+
+ @transient val MapType(keyType, valueType, valueContainsNull) = argument.dataType
--- End diff --
`lazy val`?
Could you add a test when `argument` is not a map in invalid cases of `DataFrameFunctionsSuite`?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Test PASSed.
Refer to this link for build results (access rights to CI server needed):
https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/94775/
Test PASSed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Merged build finished. Test PASSed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:
https://github.com/apache/spark/pull/22013
**[Test build #94788 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94788/testReport)** for PR 22013 at commit [`e5d9b05`](https://github.com/apache/spark/commit/e5d9b051b027cf86fbcd82701f54e50f1aeac7f6).
* This patch **fails due to an unknown error code, -9**.
* This patch merges cleanly.
* This patch adds no public classes.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:
https://github.com/apache/spark/pull/22013
**[Test build #94775 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94775/testReport)** for PR 22013 at commit [`5db526b`](https://github.com/apache/spark/commit/5db526be7bad0fa38dc9743c919014b475cf8aeb).
* This patch passes all tests.
* This patch merges cleanly.
* This patch adds no public classes.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:
https://github.com/apache/spark/pull/22013
**[Test build #94315 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94315/testReport)** for PR 22013 at commit [`0a19cc4`](https://github.com/apache/spark/commit/0a19cc44bf694f76f8f1be8faeaa16dc47f9bb86).
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:
https://github.com/apache/spark/pull/22013
**[Test build #94404 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94404/testReport)** for PR 22013 at commit [`5806ac4`](https://github.com/apache/spark/commit/5806ac46707772fd1e4befa445157ed0f9c75084).
* This patch passes all tests.
* This patch merges cleanly.
* This patch adds no public classes.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:
https://github.com/apache/spark/pull/22013
**[Test build #94518 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94518/testReport)** for PR 22013 at commit [`1cbaf0c`](https://github.com/apache/spark/commit/1cbaf0c6adc508299d42a82628f4f0954bed7a95).
* This patch passes all tests.
* This patch merges cleanly.
* This patch adds no public classes.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r210161616
--- Diff: sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala ---
@@ -2302,6 +2302,97 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
assert(ex5.getMessage.contains("function map_zip_with does not support ordering on type map"))
}
+ test("transform keys function - test various primitive data types combinations") {
+ val dfExample1 = Seq(
+ Map[Int, Int](1 -> 1, 9 -> 9, 8 -> 8, 7 -> 7)
+ ).toDF("i")
+
+ val dfExample2 = Seq(
+ Map[Int, Double](1 -> 1.0E0, 2 -> 1.4E0, 3 -> 1.7E0)
+ ).toDF("j")
+
+ val dfExample3 = Seq(
+ Map[Int, Boolean](25 -> true, 26 -> false)
+ ).toDF("x")
+
+ val dfExample4 = Seq(
+ Map[Array[Int], Boolean](Array(1, 2) -> false)
+ ).toDF("y")
+
+
+ def testMapOfPrimitiveTypesCombination(): Unit = {
+ checkAnswer(dfExample1.selectExpr("transform_keys(i, (k, v) -> k + v)"),
+ Seq(Row(Map(2 -> 1, 18 -> 9, 16 -> 8, 14 -> 7))))
+
+ checkAnswer(dfExample2.selectExpr("transform_keys(j, " +
+ "(k, v) -> map_from_arrays(ARRAY(1, 2, 3), ARRAY('one', 'two', 'three'))[k])"),
+ Seq(Row(Map("one" -> 1.0, "two" -> 1.4, "three" -> 1.7))))
+
+ checkAnswer(dfExample2.selectExpr("transform_keys(j, (k, v) -> CAST(v * 2 AS BIGINT) + k)"),
+ Seq(Row(Map(3 -> 1.0, 4 -> 1.4, 6 -> 1.7))))
+
+ checkAnswer(dfExample2.selectExpr("transform_keys(j, (k, v) -> k + v)"),
+ Seq(Row(Map(2.0 -> 1.0, 3.4 -> 1.4, 4.7 -> 1.7))))
+
+ checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> k % 2 = 0 OR v)"),
+ Seq(Row(Map(true -> true, true -> false))))
+
+ checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> if(v, 2 * k, 3 * k))"),
+ Seq(Row(Map(50 -> true, 78 -> false))))
+
+ checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> if(v, 2 * k, 3 * k))"),
+ Seq(Row(Map(50 -> true, 78 -> false))))
+
+ checkAnswer(dfExample4.selectExpr("transform_keys(y, (k, v) -> array_contains(k, 3) AND v)"),
+ Seq(Row(Map(false -> false))))
+ }
+ // Test with local relation, the Project will be evaluated without codegen
+ testMapOfPrimitiveTypesCombination()
+ dfExample1.cache()
+ dfExample2.cache()
+ dfExample3.cache()
+ dfExample4.cache()
+ // Test with cached relation, the Project will be evaluated with codegen
+ testMapOfPrimitiveTypesCombination()
+ }
+
+ test("transform keys function - Invalid lambda functions and exceptions") {
+ val dfExample1 = Seq(
+ Map[Int, Int](1 -> 1, 9 -> 9, 8 -> 8, 7 -> 7)
+ ).toDF("i")
+
+ val dfExample2 = Seq(
+ Map[String, String]("a" -> "b")
+ ).toDF("j")
+
+ val dfExample3 = Seq(
+ Map[String, String]("a" -> null)
+ ).toDF("x")
+
+ def testInvalidLambdaFunctions(): Unit = {
+ val ex1 = intercept[AnalysisException] {
+ dfExample1.selectExpr("transform_keys(i, k -> k )")
+ }
+ assert(ex1.getMessage.contains("The number of lambda function arguments '1' does not match"))
+
+ val ex2 = intercept[AnalysisException] {
+ dfExample2.selectExpr("transform_keys(j, (k, v, x) -> k + 1)")
+ }
+ assert(ex2.getMessage.contains(
+ "The number of lambda function arguments '3' does not match"))
+
+ val ex3 = intercept[RuntimeException] {
+ dfExample3.selectExpr("transform_keys(x, (k, v) -> v)").show()
+ }
+ assert(ex3.getMessage.contains("Cannot use null as map key!"))
--- End diff --
Seems like we can do those tests only with `dfExample3`?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Merged build finished. Test FAILed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Test FAILed.
Refer to this link for build results (access rights to CI server needed):
https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/94811/
Test FAILed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:
https://github.com/apache/spark/pull/22013
**[Test build #94405 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94405/testReport)** for PR 22013 at commit [`150a6a5`](https://github.com/apache/spark/commit/150a6a5c405c78e7a5f7dd9b3f3c72f95290ec71).
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Can one of the admins verify this patch?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r210193591
--- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
@@ -497,6 +497,65 @@ case class ArrayAggregate(
override def prettyName: String = "aggregate"
}
+/**
+ * Transform Keys for every entry of the map by applying the transform_keys function.
+ * Returns map with transformed key entries
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1);
+ map(array(2, 3, 4), array(1, 2, 3))
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v);
+ map(array(2, 4, 6), array(1, 2, 3))
+ """,
+ since = "2.4.0")
+case class TransformKeys(
+ argument: Expression,
+ function: Expression)
+ extends MapBasedSimpleHigherOrderFunction with CodegenFallback {
+
+ override def nullable: Boolean = argument.nullable
+
+ override def dataType: DataType = {
+ val map = argument.dataType.asInstanceOf[MapType]
+ MapType(function.dataType, map.valueType, map.valueContainsNull)
--- End diff --
What about this?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:
https://github.com/apache/spark/pull/22013
**[Test build #94819 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94819/testReport)** for PR 22013 at commit [`2f4943f`](https://github.com/apache/spark/commit/2f4943f3cec0705c296b2988c415ac3372b7ea86).
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:
https://github.com/apache/spark/pull/22013
**[Test build #94451 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94451/testReport)** for PR 22013 at commit [`9f6a8ab`](https://github.com/apache/spark/commit/9f6a8abae75b70c5be89c6bbccf3a574bd7fb17d).
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r210161746
--- Diff: sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala ---
@@ -2302,6 +2302,97 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
assert(ex5.getMessage.contains("function map_zip_with does not support ordering on type map"))
}
+ test("transform keys function - test various primitive data types combinations") {
+ val dfExample1 = Seq(
+ Map[Int, Int](1 -> 1, 9 -> 9, 8 -> 8, 7 -> 7)
+ ).toDF("i")
+
+ val dfExample2 = Seq(
+ Map[Int, Double](1 -> 1.0E0, 2 -> 1.4E0, 3 -> 1.7E0)
+ ).toDF("j")
+
+ val dfExample3 = Seq(
+ Map[Int, Boolean](25 -> true, 26 -> false)
+ ).toDF("x")
+
+ val dfExample4 = Seq(
+ Map[Array[Int], Boolean](Array(1, 2) -> false)
+ ).toDF("y")
+
+
+ def testMapOfPrimitiveTypesCombination(): Unit = {
+ checkAnswer(dfExample1.selectExpr("transform_keys(i, (k, v) -> k + v)"),
+ Seq(Row(Map(2 -> 1, 18 -> 9, 16 -> 8, 14 -> 7))))
+
+ checkAnswer(dfExample2.selectExpr("transform_keys(j, " +
+ "(k, v) -> map_from_arrays(ARRAY(1, 2, 3), ARRAY('one', 'two', 'three'))[k])"),
+ Seq(Row(Map("one" -> 1.0, "two" -> 1.4, "three" -> 1.7))))
+
+ checkAnswer(dfExample2.selectExpr("transform_keys(j, (k, v) -> CAST(v * 2 AS BIGINT) + k)"),
+ Seq(Row(Map(3 -> 1.0, 4 -> 1.4, 6 -> 1.7))))
+
+ checkAnswer(dfExample2.selectExpr("transform_keys(j, (k, v) -> k + v)"),
+ Seq(Row(Map(2.0 -> 1.0, 3.4 -> 1.4, 4.7 -> 1.7))))
+
+ checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> k % 2 = 0 OR v)"),
+ Seq(Row(Map(true -> true, true -> false))))
+
+ checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> if(v, 2 * k, 3 * k))"),
+ Seq(Row(Map(50 -> true, 78 -> false))))
+
+ checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> if(v, 2 * k, 3 * k))"),
+ Seq(Row(Map(50 -> true, 78 -> false))))
+
+ checkAnswer(dfExample4.selectExpr("transform_keys(y, (k, v) -> array_contains(k, 3) AND v)"),
+ Seq(Row(Map(false -> false))))
+ }
+ // Test with local relation, the Project will be evaluated without codegen
+ testMapOfPrimitiveTypesCombination()
+ dfExample1.cache()
+ dfExample2.cache()
+ dfExample3.cache()
+ dfExample4.cache()
+ // Test with cached relation, the Project will be evaluated with codegen
+ testMapOfPrimitiveTypesCombination()
+ }
+
+ test("transform keys function - Invalid lambda functions and exceptions") {
+ val dfExample1 = Seq(
+ Map[Int, Int](1 -> 1, 9 -> 9, 8 -> 8, 7 -> 7)
+ ).toDF("i")
+
+ val dfExample2 = Seq(
+ Map[String, String]("a" -> "b")
+ ).toDF("j")
+
+ val dfExample3 = Seq(
+ Map[String, String]("a" -> null)
+ ).toDF("x")
+
+ def testInvalidLambdaFunctions(): Unit = {
+ val ex1 = intercept[AnalysisException] {
+ dfExample1.selectExpr("transform_keys(i, k -> k )")
+ }
+ assert(ex1.getMessage.contains("The number of lambda function arguments '1' does not match"))
+
+ val ex2 = intercept[AnalysisException] {
+ dfExample2.selectExpr("transform_keys(j, (k, v, x) -> k + 1)")
+ }
+ assert(ex2.getMessage.contains(
+ "The number of lambda function arguments '3' does not match"))
--- End diff --
nit: indent
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on the issue:
https://github.com/apache/spark/pull/22013
Btw, we need one more right parenthesis after the second `array(1, 2, 3)` and a space at `(k,v)` in the description?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by codeatri <gi...@git.apache.org>.
Github user codeatri commented on the issue:
https://github.com/apache/spark/pull/22013
@hvanhovell @mn-mikke @mgaido91 Thanks for the review! I have addressed all your comments and added appropriate test cases for the same.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:
https://github.com/apache/spark/pull/22013
**[Test build #94817 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94817/testReport)** for PR 22013 at commit [`58b60b2`](https://github.com/apache/spark/commit/58b60b2f851fb1464743257fe1cca075a1e77ba9).
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:
https://github.com/apache/spark/pull/22013
**[Test build #94811 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94811/testReport)** for PR 22013 at commit [`fb885f4`](https://github.com/apache/spark/commit/fb885f4797e72d0c2cbfa23980199c71e0c5aaee).
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:
https://github.com/apache/spark/pull/22013
**[Test build #94788 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94788/testReport)** for PR 22013 at commit [`e5d9b05`](https://github.com/apache/spark/commit/e5d9b051b027cf86fbcd82701f54e50f1aeac7f6).
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:
https://github.com/apache/spark/pull/22013
**[Test build #94315 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94315/testReport)** for PR 22013 at commit [`0a19cc4`](https://github.com/apache/spark/commit/0a19cc44bf694f76f8f1be8faeaa16dc47f9bb86).
* This patch passes all tests.
* This patch merges cleanly.
* This patch adds the following public classes _(experimental)_:
* `case class TransformKeys(`
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r210161509
--- Diff: sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala ---
@@ -2302,6 +2302,97 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
assert(ex5.getMessage.contains("function map_zip_with does not support ordering on type map"))
}
+ test("transform keys function - test various primitive data types combinations") {
+ val dfExample1 = Seq(
+ Map[Int, Int](1 -> 1, 9 -> 9, 8 -> 8, 7 -> 7)
+ ).toDF("i")
+
+ val dfExample2 = Seq(
+ Map[Int, Double](1 -> 1.0E0, 2 -> 1.4E0, 3 -> 1.7E0)
+ ).toDF("j")
+
+ val dfExample3 = Seq(
+ Map[Int, Boolean](25 -> true, 26 -> false)
+ ).toDF("x")
+
+ val dfExample4 = Seq(
+ Map[Array[Int], Boolean](Array(1, 2) -> false)
+ ).toDF("y")
+
+
+ def testMapOfPrimitiveTypesCombination(): Unit = {
+ checkAnswer(dfExample1.selectExpr("transform_keys(i, (k, v) -> k + v)"),
+ Seq(Row(Map(2 -> 1, 18 -> 9, 16 -> 8, 14 -> 7))))
+
+ checkAnswer(dfExample2.selectExpr("transform_keys(j, " +
+ "(k, v) -> map_from_arrays(ARRAY(1, 2, 3), ARRAY('one', 'two', 'three'))[k])"),
+ Seq(Row(Map("one" -> 1.0, "two" -> 1.4, "three" -> 1.7))))
+
+ checkAnswer(dfExample2.selectExpr("transform_keys(j, (k, v) -> CAST(v * 2 AS BIGINT) + k)"),
+ Seq(Row(Map(3 -> 1.0, 4 -> 1.4, 6 -> 1.7))))
+
+ checkAnswer(dfExample2.selectExpr("transform_keys(j, (k, v) -> k + v)"),
+ Seq(Row(Map(2.0 -> 1.0, 3.4 -> 1.4, 4.7 -> 1.7))))
+
+ checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> k % 2 = 0 OR v)"),
+ Seq(Row(Map(true -> true, true -> false))))
+
+ checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> if(v, 2 * k, 3 * k))"),
+ Seq(Row(Map(50 -> true, 78 -> false))))
+
+ checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> if(v, 2 * k, 3 * k))"),
+ Seq(Row(Map(50 -> true, 78 -> false))))
+
+ checkAnswer(dfExample4.selectExpr("transform_keys(y, (k, v) -> array_contains(k, 3) AND v)"),
+ Seq(Row(Map(false -> false))))
+ }
+ // Test with local relation, the Project will be evaluated without codegen
+ testMapOfPrimitiveTypesCombination()
+ dfExample1.cache()
+ dfExample2.cache()
+ dfExample3.cache()
+ dfExample4.cache()
+ // Test with cached relation, the Project will be evaluated with codegen
+ testMapOfPrimitiveTypesCombination()
+ }
+
+ test("transform keys function - Invalid lambda functions and exceptions") {
+ val dfExample1 = Seq(
+ Map[Int, Int](1 -> 1, 9 -> 9, 8 -> 8, 7 -> 7)
+ ).toDF("i")
+
+ val dfExample2 = Seq(
+ Map[String, String]("a" -> "b")
+ ).toDF("j")
+
+ val dfExample3 = Seq(
+ Map[String, String]("a" -> null)
+ ).toDF("x")
+
+ def testInvalidLambdaFunctions(): Unit = {
+ val ex1 = intercept[AnalysisException] {
+ dfExample1.selectExpr("transform_keys(i, k -> k )")
--- End diff --
nit: extra space after `k -> k`.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Merged build finished. Test PASSed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by mgaido91 <gi...@git.apache.org>.
Github user mgaido91 commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r210524076
--- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
@@ -497,6 +497,59 @@ case class ArrayAggregate(
override def prettyName: String = "aggregate"
}
+/**
+ * Transform Keys for every entry of the map by applying the transform_keys function.
+ * Returns map with transformed key entries
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1);
+ map(array(2, 3, 4), array(1, 2, 3))
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v);
+ map(array(2, 4, 6), array(1, 2, 3))
+ """,
+ since = "2.4.0")
+case class TransformKeys(
+ argument: Expression,
+ function: Expression)
+ extends MapBasedSimpleHigherOrderFunction with CodegenFallback {
+
+ override def nullable: Boolean = argument.nullable
--- End diff --
I think this can be moved to `SimpleHigherOrderFunction`
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r208811179
--- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
@@ -442,3 +442,65 @@ case class ArrayAggregate(
override def prettyName: String = "aggregate"
}
+
+/**
+ * Transform Keys for every entry of the map by applying the transform_keys function.
+ * Returns map with transformed key entries
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k, v) -> k + 1);
+ map(array(2, 3, 4), array(1, 2, 3))
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k, v) -> k + v);
--- End diff --
ditto.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Merged build finished. Test FAILed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Test FAILed.
Refer to this link for build results (access rights to CI server needed):
https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/94451/
Test FAILed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r208811169
--- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
@@ -442,3 +442,65 @@ case class ArrayAggregate(
override def prettyName: String = "aggregate"
}
+
+/**
+ * Transform Keys for every entry of the map by applying the transform_keys function.
+ * Returns map with transformed key entries
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k, v) -> k + 1);
--- End diff --
nit: we need one more right parenthesis after the second `array(1, 2, 3)`?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Test PASSed.
Refer to this link for build results (access rights to CI server needed):
https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/94455/
Test PASSed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Can one of the admins verify this patch?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r208815130
--- Diff: sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala ---
@@ -2117,6 +2117,198 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
assert(ex4.getMessage.contains("data type mismatch: argument 3 requires int type"))
}
+ test("transform keys function - test various primitive data types combinations") {
--- End diff --
We don't need so many cases here. We only need to verify the api works end to end.
Evaluation checks of the function should be in `HigherOrderFunctionsSuite`.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by hvanhovell <gi...@git.apache.org>.
Github user hvanhovell commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r208136330
--- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
@@ -365,3 +365,69 @@ case class ArrayAggregate(
override def prettyName: String = "aggregate"
}
+
+/**
+ * Transform Keys in a map using the transform_keys function.
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k,v) -> k + 1);
+ map(array(2, 3, 4), array(1, 2, 3))
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k, v) -> k + v);
+ map(array(2, 4, 6), array(1, 2, 3))
+ """,
+ since = "2.4.0")
+case class TransformKeys(
+ input: Expression,
+ function: Expression)
+ extends ArrayBasedHigherOrderFunction with CodegenFallback {
+
+ override def nullable: Boolean = input.nullable
+
+ override def dataType: DataType = {
+ val valueType = input.dataType.asInstanceOf[MapType].valueType
+ MapType(function.dataType, valueType, input.nullable)
+ }
+
+ override def inputTypes: Seq[AbstractDataType] = Seq(MapType, expectingFunctionType)
+
+ override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction):
+ TransformKeys = {
+ val (keyElementType, valueElementType, containsNull) = input.dataType match {
+ case MapType(keyType, valueType, containsNullValue) =>
+ (keyType, valueType, containsNullValue)
+ case _ =>
+ val MapType(keyType, valueType, containsNullValue) = MapType.defaultConcreteType
+ (keyType, valueType, containsNullValue)
+ }
+ copy(function = f(function, (keyElementType, false) :: (valueElementType, containsNull) :: Nil))
+ }
+
+ @transient lazy val (keyVar, valueVar) = {
+ val LambdaFunction(
+ _, (keyVar: NamedLambdaVariable) :: (valueVar: NamedLambdaVariable) :: Nil, _) = function
+ (keyVar, valueVar)
+ }
+
+ override def eval(input: InternalRow): Any = {
+ val arr = this.input.eval(input).asInstanceOf[MapData]
+ if (arr == null) {
+ null
+ } else {
+ val f = functionForEval
+ val resultKeys = new GenericArrayData(new Array[Any](arr.numElements))
+ var i = 0
+ while (i < arr.numElements) {
+ keyVar.value.set(arr.keyArray().get(i, keyVar.dataType))
+ valueVar.value.set(arr.valueArray().get(i, valueVar.dataType))
+ resultKeys.update(i, f.eval(input))
--- End diff --
This assumes that the transformation will return a unique key right? If it doesn't you'll break the map semantics. For example: `map_key(some_map, (k, v) -> 0)`
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on the issue:
https://github.com/apache/spark/pull/22013
Thanks! merging to master.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Test PASSed.
Refer to this link for build results (access rights to CI server needed):
https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/94819/
Test PASSed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Merged build finished. Test FAILed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r210165079
--- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
@@ -497,6 +497,65 @@ case class ArrayAggregate(
override def prettyName: String = "aggregate"
}
+/**
+ * Transform Keys for every entry of the map by applying the transform_keys function.
+ * Returns map with transformed key entries
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1);
+ map(array(2, 3, 4), array(1, 2, 3))
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v);
+ map(array(2, 4, 6), array(1, 2, 3))
+ """,
+ since = "2.4.0")
+case class TransformKeys(
+ argument: Expression,
+ function: Expression)
+ extends MapBasedSimpleHigherOrderFunction with CodegenFallback {
+
+ override def nullable: Boolean = argument.nullable
+
+ override def dataType: DataType = {
+ val map = argument.dataType.asInstanceOf[MapType]
+ MapType(function.dataType, map.valueType, map.valueContainsNull)
+ }
+
+ @transient val MapType(keyType, valueType, valueContainsNull) = argument.dataType
+
+ override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): TransformKeys = {
+ copy(function = f(function, (keyType, false) :: (valueType, valueContainsNull) :: Nil))
+ }
+
+ @transient lazy val (keyVar, valueVar) = {
+ val LambdaFunction(
+ _, (keyVar: NamedLambdaVariable) :: (valueVar: NamedLambdaVariable) :: Nil, _) = function
+ (keyVar, valueVar)
+ }
--- End diff --
nit: how about:
```scala
@transient lazy val LambdaFunction(_,
(keyVar: NamedLambdaVariable) :: (valueVar: NamedLambdaVariable) :: Nil, _) = function
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by mgaido91 <gi...@git.apache.org>.
Github user mgaido91 commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r208159784
--- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
@@ -365,3 +365,69 @@ case class ArrayAggregate(
override def prettyName: String = "aggregate"
}
+
+/**
+ * Transform Keys in a map using the transform_keys function.
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k,v) -> k + 1);
+ map(array(2, 3, 4), array(1, 2, 3))
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k, v) -> k + v);
+ map(array(2, 4, 6), array(1, 2, 3))
+ """,
+ since = "2.4.0")
+case class TransformKeys(
+ input: Expression,
+ function: Expression)
+ extends ArrayBasedHigherOrderFunction with CodegenFallback {
+
+ override def nullable: Boolean = input.nullable
+
+ override def dataType: DataType = {
+ val valueType = input.dataType.asInstanceOf[MapType].valueType
+ MapType(function.dataType, valueType, input.nullable)
--- End diff --
I think here `input.nullable` is wrong. This should indicate whether the value contains null, not whether the returned object can be null or not.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:
https://github.com/apache/spark/pull/22013
**[Test build #94765 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94765/testReport)** for PR 22013 at commit [`621213d`](https://github.com/apache/spark/commit/621213dd1658fbc8cb19e15dd77c9c389653d4db).
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by mn-mikke <gi...@git.apache.org>.
Github user mn-mikke commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r208161643
--- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
@@ -365,3 +365,69 @@ case class ArrayAggregate(
override def prettyName: String = "aggregate"
}
+
+/**
+ * Transform Keys in a map using the transform_keys function.
--- End diff --
maybe a better comment?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Merged build finished. Test FAILed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r210193484
--- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
@@ -497,6 +497,65 @@ case class ArrayAggregate(
override def prettyName: String = "aggregate"
}
+/**
+ * Transform Keys for every entry of the map by applying the transform_keys function.
+ * Returns map with transformed key entries
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1);
+ map(array(2, 3, 4), array(1, 2, 3))
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v);
+ map(array(2, 4, 6), array(1, 2, 3))
+ """,
+ since = "2.4.0")
+case class TransformKeys(
+ argument: Expression,
+ function: Expression)
+ extends MapBasedSimpleHigherOrderFunction with CodegenFallback {
+
+ override def nullable: Boolean = argument.nullable
+
+ override def dataType: DataType = {
+ val map = argument.dataType.asInstanceOf[MapType]
+ MapType(function.dataType, map.valueType, map.valueContainsNull)
+ }
+
+ @transient val MapType(keyType, valueType, valueContainsNull) = argument.dataType
+
+ override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): TransformKeys = {
+ copy(function = f(function, (keyType, false) :: (valueType, valueContainsNull) :: Nil))
+ }
+
+ @transient lazy val (keyVar, valueVar) = {
+ val LambdaFunction(
+ _, (keyVar: NamedLambdaVariable) :: (valueVar: NamedLambdaVariable) :: Nil, _) = function
+ (keyVar, valueVar)
+ }
--- End diff --
Sorry, I meant we don't need to surround by:
```scala
@transient lazy val (keyVar, valueVar) = {
...
(keyVar, valueVar)
}
```
just
```scala
@transient lazy val LambdaFunction(_,
(keyVar: NamedLambdaVariable) :: (valueVar: NamedLambdaVariable) :: Nil, _) = function
```
should work.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r210160577
--- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
@@ -497,6 +497,65 @@ case class ArrayAggregate(
override def prettyName: String = "aggregate"
}
+/**
+ * Transform Keys for every entry of the map by applying the transform_keys function.
+ * Returns map with transformed key entries
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1);
+ map(array(2, 3, 4), array(1, 2, 3))
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v);
+ map(array(2, 4, 6), array(1, 2, 3))
+ """,
+ since = "2.4.0")
+case class TransformKeys(
+ argument: Expression,
+ function: Expression)
+ extends MapBasedSimpleHigherOrderFunction with CodegenFallback {
+
+ override def nullable: Boolean = argument.nullable
+
+ override def dataType: DataType = {
+ val map = argument.dataType.asInstanceOf[MapType]
+ MapType(function.dataType, map.valueType, map.valueContainsNull)
--- End diff --
We can use `valueType` and `valueContainsNull` from the following val?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:
https://github.com/apache/spark/pull/22013
**[Test build #94775 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94775/testReport)** for PR 22013 at commit [`5db526b`](https://github.com/apache/spark/commit/5db526be7bad0fa38dc9743c919014b475cf8aeb).
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Merged build finished. Test FAILed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Merged build finished. Test FAILed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:
https://github.com/apache/spark/pull/22013
**[Test build #94455 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94455/testReport)** for PR 22013 at commit [`6526630`](https://github.com/apache/spark/commit/652663077e383f8b188743c4494d697e34d5d02c).
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by codeatri <gi...@git.apache.org>.
Github user codeatri commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r208446666
--- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala ---
@@ -181,4 +187,46 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
(acc, array) => coalesce(aggregate(array, acc, (acc, elem) => acc + elem), acc)),
15)
}
+
+ test("TransformKeys") {
+ val ai0 = Literal.create(
+ Map(1 -> 1, 2 -> 2, 3 -> 3),
--- End diff --
Thanks for catching this!
Included test cases, both here and in DataFrameFunctionsSuite.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by codeatri <gi...@git.apache.org>.
Github user codeatri commented on the issue:
https://github.com/apache/spark/pull/22013
Thanks for the review @ueshin! I have addressed all your comments.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r210527183
--- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
@@ -497,6 +497,59 @@ case class ArrayAggregate(
override def prettyName: String = "aggregate"
}
+/**
+ * Transform Keys for every entry of the map by applying the transform_keys function.
+ * Returns map with transformed key entries
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1);
+ map(array(2, 3, 4), array(1, 2, 3))
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v);
+ map(array(2, 4, 6), array(1, 2, 3))
+ """,
+ since = "2.4.0")
+case class TransformKeys(
+ argument: Expression,
+ function: Expression)
+ extends MapBasedSimpleHigherOrderFunction with CodegenFallback {
+
+ override def nullable: Boolean = argument.nullable
--- End diff --
makes sense.
Let's have wrap-up prs for higher-order functions after the remaining 2 prs are merged.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r208811790
--- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala ---
@@ -59,6 +59,12 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
ArrayFilter(expr, createLambda(at.elementType, at.containsNull, f))
}
+ def transformKeys(expr: Expression, f: (Expression, Expression) => Expression): Expression = {
+ val valueType = expr.dataType.asInstanceOf[MapType].valueType
+ val keyType = expr.dataType.asInstanceOf[MapType].keyType
+ TransformKeys(expr, createLambda(keyType, false, valueType, true, f))
--- End diff --
We should use `valueContainsNull` instead of `true`?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Test PASSed.
Refer to this link for build results (access rights to CI server needed):
https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/94518/
Test PASSed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:
https://github.com/apache/spark/pull/22013
**[Test build #94758 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94758/testReport)** for PR 22013 at commit [`bb52630`](https://github.com/apache/spark/commit/bb52630dd720ecaf5f7ffe0c498d422ce60f3bb7).
* This patch **fails Scala style tests**.
* This patch merges cleanly.
* This patch adds no public classes.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Test FAILed.
Refer to this link for build results (access rights to CI server needed):
https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/94817/
Test FAILed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r208812207
--- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala ---
@@ -230,4 +236,56 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
(acc, array) => coalesce(aggregate(array, acc, (acc, elem) => acc + elem), acc)),
15)
}
+
+ test("TransformKeys") {
+ val ai0 = Literal.create(
+ Map(1 -> 1, 2 -> 2, 3 -> 3),
+ MapType(IntegerType, IntegerType))
+ val ai1 = Literal.create(
+ Map.empty[Int, Int],
+ MapType(IntegerType, IntegerType))
+ val ai2 = Literal.create(
+ Map(1 -> 1, 2 -> null, 3 -> 3),
+ MapType(IntegerType, IntegerType))
--- End diff --
Can you add tests for `Literal.create(null, MapType(IntegerType, IntegerType))`?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by asfgit <gi...@git.apache.org>.
Github user asfgit closed the pull request at:
https://github.com/apache/spark/pull/22013
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:
https://github.com/apache/spark/pull/22013
**[Test build #94811 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94811/testReport)** for PR 22013 at commit [`fb885f4`](https://github.com/apache/spark/commit/fb885f4797e72d0c2cbfa23980199c71e0c5aaee).
* This patch **fails Spark unit tests**.
* This patch merges cleanly.
* This patch adds no public classes.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by mn-mikke <gi...@git.apache.org>.
Github user mn-mikke commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r208190999
--- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
@@ -365,3 +365,69 @@ case class ArrayAggregate(
override def prettyName: String = "aggregate"
}
+
+/**
+ * Transform Keys in a map using the transform_keys function.
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k,v) -> k + 1);
+ map(array(2, 3, 4), array(1, 2, 3))
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k, v) -> k + v);
+ map(array(2, 4, 6), array(1, 2, 3))
+ """,
+ since = "2.4.0")
+case class TransformKeys(
+ input: Expression,
+ function: Expression)
+ extends ArrayBasedHigherOrderFunction with CodegenFallback {
+
+ override def nullable: Boolean = input.nullable
+
+ override def dataType: DataType = {
+ val valueType = input.dataType.asInstanceOf[MapType].valueType
+ MapType(function.dataType, valueType, input.nullable)
+ }
+
+ override def inputTypes: Seq[AbstractDataType] = Seq(MapType, expectingFunctionType)
+
+ override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction):
+ TransformKeys = {
+ val (keyElementType, valueElementType, containsNull) = input.dataType match {
+ case MapType(keyType, valueType, containsNullValue) =>
+ (keyType, valueType, containsNullValue)
+ case _ =>
+ val MapType(keyType, valueType, containsNullValue) = MapType.defaultConcreteType
+ (keyType, valueType, containsNullValue)
+ }
+ copy(function = f(function, (keyElementType, false) :: (valueElementType, containsNull) :: Nil))
+ }
+
+ @transient lazy val (keyVar, valueVar) = {
+ val LambdaFunction(
+ _, (keyVar: NamedLambdaVariable) :: (valueVar: NamedLambdaVariable) :: Nil, _) = function
+ (keyVar, valueVar)
+ }
+
+ override def eval(input: InternalRow): Any = {
+ val arr = this.input.eval(input).asInstanceOf[MapData]
+ if (arr == null) {
+ null
+ } else {
+ val f = functionForEval
+ val resultKeys = new GenericArrayData(new Array[Any](arr.numElements))
+ var i = 0
+ while (i < arr.numElements) {
+ keyVar.value.set(arr.keyArray().get(i, keyVar.dataType))
+ valueVar.value.set(arr.valueArray().get(i, valueVar.dataType))
+ resultKeys.update(i, f.eval(input))
--- End diff --
I'm not a fun of duplicated keys either, but other functions transforming maps have the same problem. See the discussions [here](https://github.com/apache/spark/pull/21282#discussion_r187234431) and [here](https://github.com/apache/spark/pull/21258#discussion_r186410527).
Example:
```
scala> spark.range(1).selectExpr("map(0,1,0,2)").show()
+----------------+
| map(0, 1, 0, 2)|
+----------------+
|[0 -> 1, 0 -> 2]|
+----------------+
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by mn-mikke <gi...@git.apache.org>.
Github user mn-mikke commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r208167785
--- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
@@ -365,3 +365,69 @@ case class ArrayAggregate(
override def prettyName: String = "aggregate"
}
+
+/**
+ * Transform Keys in a map using the transform_keys function.
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k,v) -> k + 1);
+ map(array(2, 3, 4), array(1, 2, 3))
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k, v) -> k + v);
+ map(array(2, 4, 6), array(1, 2, 3))
+ """,
+ since = "2.4.0")
+case class TransformKeys(
+ input: Expression,
+ function: Expression)
+ extends ArrayBasedHigherOrderFunction with CodegenFallback {
+
+ override def nullable: Boolean = input.nullable
+
+ override def dataType: DataType = {
+ val valueType = input.dataType.asInstanceOf[MapType].valueType
+ MapType(function.dataType, valueType, input.nullable)
+ }
+
+ override def inputTypes: Seq[AbstractDataType] = Seq(MapType, expectingFunctionType)
+
+ override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction):
+ TransformKeys = {
+ val (keyElementType, valueElementType, containsNull) = input.dataType match {
+ case MapType(keyType, valueType, containsNullValue) =>
+ (keyType, valueType, containsNullValue)
+ case _ =>
+ val MapType(keyType, valueType, containsNullValue) = MapType.defaultConcreteType
+ (keyType, valueType, containsNullValue)
+ }
+ copy(function = f(function, (keyElementType, false) :: (valueElementType, containsNull) :: Nil))
+ }
+
+ @transient lazy val (keyVar, valueVar) = {
+ val LambdaFunction(
+ _, (keyVar: NamedLambdaVariable) :: (valueVar: NamedLambdaVariable) :: Nil, _) = function
+ (keyVar, valueVar)
+ }
+
+ override def eval(input: InternalRow): Any = {
+ val arr = this.input.eval(input).asInstanceOf[MapData]
+ if (arr == null) {
+ null
+ } else {
+ val f = functionForEval
+ val resultKeys = new GenericArrayData(new Array[Any](arr.numElements))
+ var i = 0
+ while (i < arr.numElements) {
+ keyVar.value.set(arr.keyArray().get(i, keyVar.dataType))
+ valueVar.value.set(arr.valueArray().get(i, valueVar.dataType))
+ resultKeys.update(i, f.eval(input))
--- End diff --
Maybe I'm missing something, but couldn't ```f.eval(input)``` be evaluated to ```null```? Keys are not allowed to be```null```. Other functions have usually a ```null``` check and throw ```RuntimeException``` for such cases.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by mn-mikke <gi...@git.apache.org>.
Github user mn-mikke commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r208169969
--- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala ---
@@ -181,4 +187,46 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
(acc, array) => coalesce(aggregate(array, acc, (acc, elem) => acc + elem), acc)),
15)
}
+
+ test("TransformKeys") {
+ val ai0 = Literal.create(
+ Map(1 -> 1, 2 -> 2, 3 -> 3),
--- End diff --
It's maybe irrelevant but WDYT about adding test cases with ```null``` values?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Test FAILed.
Refer to this link for build results (access rights to CI server needed):
https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/94765/
Test FAILed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Test FAILed.
Refer to this link for build results (access rights to CI server needed):
https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/94405/
Test FAILed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:
https://github.com/apache/spark/pull/22013
**[Test build #94518 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94518/testReport)** for PR 22013 at commit [`1cbaf0c`](https://github.com/apache/spark/commit/1cbaf0c6adc508299d42a82628f4f0954bed7a95).
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Merged build finished. Test PASSed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on the issue:
https://github.com/apache/spark/pull/22013
LGTM.
@mn-mikke @mgaido91 Do you have any other comments on this?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Merged build finished. Test PASSed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:
https://github.com/apache/spark/pull/22013
**[Test build #94457 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94457/testReport)** for PR 22013 at commit [`f7fd231`](https://github.com/apache/spark/commit/f7fd2313dddfea3555bda61fc96339c24afb71b0).
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:
https://github.com/apache/spark/pull/22013
**[Test build #94819 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94819/testReport)** for PR 22013 at commit [`2f4943f`](https://github.com/apache/spark/commit/2f4943f3cec0705c296b2988c415ac3372b7ea86).
* This patch passes all tests.
* This patch merges cleanly.
* This patch adds no public classes.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Test PASSed.
Refer to this link for build results (access rights to CI server needed):
https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/94457/
Test PASSed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r210161936
--- Diff: sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala ---
@@ -2302,6 +2302,97 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
assert(ex5.getMessage.contains("function map_zip_with does not support ordering on type map"))
}
+ test("transform keys function - test various primitive data types combinations") {
+ val dfExample1 = Seq(
+ Map[Int, Int](1 -> 1, 9 -> 9, 8 -> 8, 7 -> 7)
+ ).toDF("i")
+
+ val dfExample2 = Seq(
+ Map[Int, Double](1 -> 1.0E0, 2 -> 1.4E0, 3 -> 1.7E0)
--- End diff --
Do we need `E0`?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by ueshin <gi...@git.apache.org>.
Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r210162501
--- Diff: sql/core/src/test/resources/sql-tests/inputs/higher-order-functions.sql ---
@@ -51,3 +51,17 @@ select exists(ys, y -> y > 30) as v from nested;
-- Check for element existence in a null array
select exists(cast(null as array<int>), y -> y > 30) as v;
+
+create or replace temporary view nested as values
+ (1, map(1,1,2,2,3,3)),
+ (2, map(4,4,5,5,6,6))
--- End diff --
nit:
```
(1, map(1, 1, 2, 2, 3, 3)),
(2, map(4, 4, 5, 5, 6, 6))
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:
https://github.com/apache/spark/pull/22013
**[Test build #94457 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94457/testReport)** for PR 22013 at commit [`f7fd231`](https://github.com/apache/spark/commit/f7fd2313dddfea3555bda61fc96339c24afb71b0).
* This patch passes all tests.
* This patch merges cleanly.
* This patch adds no public classes.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by SparkQA <gi...@git.apache.org>.
Github user SparkQA commented on the issue:
https://github.com/apache/spark/pull/22013
**[Test build #94404 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/94404/testReport)** for PR 22013 at commit [`5806ac4`](https://github.com/apache/spark/commit/5806ac46707772fd1e4befa445157ed0f9c75084).
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark issue #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by AmplabJenkins <gi...@git.apache.org>.
Github user AmplabJenkins commented on the issue:
https://github.com/apache/spark/pull/22013
Merged build finished. Test PASSed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org
[GitHub] spark pull request #22013: [SPARK-23939][SQL] Add transform_keys function
Posted by mn-mikke <gi...@git.apache.org>.
Github user mn-mikke commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r210366383
--- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
@@ -497,6 +497,62 @@ case class ArrayAggregate(
override def prettyName: String = "aggregate"
}
+/**
+ * Transform Keys for every entry of the map by applying the transform_keys function.
+ * Returns map with transformed key entries
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(expr, func) - Transforms elements in a map using the function.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1);
+ map(array(2, 3, 4), array(1, 2, 3))
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v);
+ map(array(2, 4, 6), array(1, 2, 3))
+ """,
+ since = "2.4.0")
+case class TransformKeys(
+ argument: Expression,
+ function: Expression)
+ extends MapBasedSimpleHigherOrderFunction with CodegenFallback {
+
+ override def nullable: Boolean = argument.nullable
+
+ @transient lazy val MapType(keyType, valueType, valueContainsNull) = argument.dataType
+
+ override def dataType: DataType = {
+ MapType(function.dataType, valueType, valueContainsNull)
--- End diff --
nit: just in one line?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org