You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2022/12/09 01:40:44 UTC

[GitHub] [spark] beliefer commented on a diff in pull request #38874: [SPARK-41235][SQL][PYTHON]High-order function: array_compact implementation

beliefer commented on code in PR #38874:
URL: https://github.com/apache/spark/pull/38874#discussion_r1043982018


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala:
##########
@@ -4600,3 +4600,57 @@ case class ArrayExcept(left: Expression, right: Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
     newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = newLeft, right = newRight)
 }
+
+@ExpressionDescription(
+  usage = "_FUNC_(array) - Removes null values from the array.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(array(1, 2, 3, null));
+       [1,2,3]
+  """,
+  group = "array_funcs",
+  since = "3.4.0")
+case class ArrayCompact(child: Expression)
+  extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType)
+  override def dataType: DataType = child.dataType
+
+  @transient private lazy val elementType: DataType = dataType.asInstanceOf[ArrayType].elementType
+
+  override def nullSafeEval(array: Any): Any = {
+    val newArray = new Array[Any](array.asInstanceOf[ArrayData].numElements())
+    var pos = 0
+    var hasNull = false
+    array.asInstanceOf[ArrayData].foreach(elementType, (index, v) =>
+      // add elements only if the source has null
+      if (v != null && hasNull) {
+        newArray(pos) = v
+        pos += 1
+      } else if (v == null && !hasNull) {
+        hasNull = true
+        // source has null elements, so copy the elements to newArray
+        for(i <- 0 until index) {
+          newArray(pos) = array.asInstanceOf[ArrayData].get(i, elementType)
+          pos += 1
+        }
+      }
+    )
+    if (hasNull) {
+      new GenericArrayData(newArray.slice(0, pos))
+    } else {
+      array
+    }
+  }
+  override def prettyName: String = "array_compact"
+
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+
+    nullSafeCodeGen(ctx, ev, array => {
+      val expr = ctx.addReferenceObj("arrayCompactExpr", this)
+      s"${ev.value} = (ArrayData)$expr.nullSafeEval($array);"

Review Comment:
   This implementation has some hackers. Please see the discussion https://github.com/apache/spark/pull/38865#discussion_r1043977011



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org