You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by techaddict <gi...@git.apache.org> on 2018/08/16 01:01:50 UTC
[GitHub] spark pull request #22031: [SPARK-23932][SQL] Higher order function zip_with
Github user techaddict commented on a diff in the pull request:
https://github.com/apache/spark/pull/22031#discussion_r210452329
--- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala ---
@@ -442,3 +442,91 @@ case class ArrayAggregate(
override def prettyName: String = "aggregate"
}
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = "_FUNC_(left, right, func) - Merges the two given arrays, element-wise, into a single array using function. If one array is shorter, nulls are appended at the end to match the length of the longer array, before applying function.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(array(1, 2, 3), array('a', 'b', 'c'), (x, y) -> (y, x));
+ array(('a', 1), ('b', 3), ('c', 5))
+ > SELECT _FUNC_(array(1, 2), array(3, 4), (x, y) -> x + y));
+ array(4, 6)
+ > SELECT _FUNC_(array('a', 'b', 'c'), array('d', 'e', 'f'), (x, y) -> concat(x, y));
+ array('ad', 'be', 'cf')
+ """,
+ since = "2.4.0")
+// scalastyle:on line.size.limit
+case class ArraysZipWith(
+ left: Expression,
+ right: Expression,
+ function: Expression)
+ extends HigherOrderFunction with CodegenFallback with ExpectsInputTypes {
+
+ override def inputs: Seq[Expression] = List(left, right)
+
+ override def functions: Seq[Expression] = List(function)
+
+ def expectingFunctionType: AbstractDataType = AnyDataType
+ @transient lazy val functionForEval: Expression = functionsForEval.head
+
+ override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, ArrayType, expectingFunctionType)
+
+ override def nullable: Boolean = inputs.exists(_.nullable)
+
+ override def dataType: ArrayType = ArrayType(function.dataType, function.nullable)
+
+ override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): ArraysZipWith = {
+ val (leftElementType, leftContainsNull) = left.dataType match {
+ case ArrayType(elementType, containsNull) => (elementType, containsNull)
+ case _ =>
+ val ArrayType(elementType, containsNull) = ArrayType.defaultConcreteType
+ (elementType, containsNull)
+ }
+ val (rightElementType, rightContainsNull) = right.dataType match {
+ case ArrayType(elementType, containsNull) => (elementType, containsNull)
+ case _ =>
+ val ArrayType(elementType, containsNull) = ArrayType.defaultConcreteType
+ (elementType, containsNull)
+ }
+ copy(function = f(function,
+ (leftElementType, leftContainsNull) :: (rightElementType, rightContainsNull) :: Nil))
--- End diff --
@mn-mikke @ueshin "both arrays must be the same length" was how zip_with in presto used to work, they've moved to appending nulls and process regardless.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org