You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ue...@apache.org on 2018/09/18 03:45:02 UTC

spark git commit: [SPARK-25444][SQL] Refactor GenArrayData.genCodeToCreateArrayData method

Repository: spark
Updated Branches:
  refs/heads/master 0f1413e32 -> acc645257


[SPARK-25444][SQL] Refactor GenArrayData.genCodeToCreateArrayData method

## What changes were proposed in this pull request?

This PR makes `GenArrayData.genCodeToCreateArrayData` method simple by using `ArrayData.createArrayData` method.

Before this PR, `genCodeToCreateArrayData` method was complicated
* Generated a temporary Java array to create `ArrayData`
* Had separate code generation path to assign values for `GenericArrayData` and `UnsafeArrayData`

After this PR, the method
* Directly generates `GenericArrayData` or `UnsafeArrayData` without a temporary array
* Has only code generation path to assign values

## How was this patch tested?

Existing UTs

Closes #22439 from kiszk/SPARK-25444.

Authored-by: Kazuaki Ishizaki <is...@jp.ibm.com>
Signed-off-by: Takuya UESHIN <ue...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/acc64525
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/acc64525
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/acc64525

Branch: refs/heads/master
Commit: acc6452579aca99aae9f9787ddbe5c4aeb170e58
Parents: 0f1413e
Author: Kazuaki Ishizaki <is...@jp.ibm.com>
Authored: Tue Sep 18 12:44:54 2018 +0900
Committer: Takuya UESHIN <ue...@databricks.com>
Committed: Tue Sep 18 12:44:54 2018 +0900

----------------------------------------------------------------------
 .../expressions/complexTypeCreator.scala        | 122 +++++++------------
 1 file changed, 45 insertions(+), 77 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/acc64525/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index fd8b5e9..0361372 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -61,11 +61,10 @@ case class CreateArray(children: Seq[Expression]) extends Expression {
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val et = dataType.elementType
-    val evals = children.map(e => e.genCode(ctx))
-    val (preprocess, assigns, postprocess, arrayData) =
-      GenArrayData.genCodeToCreateArrayData(ctx, et, evals, false)
+    val (allocation, assigns, arrayData) =
+      GenArrayData.genCodeToCreateArrayData(ctx, et, children, false, "createArray")
     ev.copy(
-      code = code"${preprocess}${assigns}${postprocess}",
+      code = code"${allocation}${assigns}",
       value = JavaCode.variable(arrayData, dataType),
       isNull = FalseLiteral)
   }
@@ -75,87 +74,60 @@ case class CreateArray(children: Seq[Expression]) extends Expression {
 
 private [sql] object GenArrayData {
   /**
-   * Return Java code pieces based on DataType and isPrimitive to allocate ArrayData class
+   * Return Java code pieces based on DataType and array size to allocate ArrayData class
    *
    * @param ctx a [[CodegenContext]]
    * @param elementType data type of underlying array elements
-   * @param elementsCode concatenated set of [[ExprCode]] for each element of an underlying array
+   * @param elementsExpr concatenated set of [[Expression]] for each element of an underlying array
    * @param isMapKey if true, throw an exception when the element is null
-   * @return (code pre-assignments, concatenated assignments to each array elements,
-   *           code post-assignments, arrayData name)
+   * @param functionName string to include in the error message
+   * @return (array allocation, concatenated assignments to each array elements, arrayData name)
    */
   def genCodeToCreateArrayData(
       ctx: CodegenContext,
       elementType: DataType,
-      elementsCode: Seq[ExprCode],
-      isMapKey: Boolean): (String, String, String, String) = {
+      elementsExpr: Seq[Expression],
+      isMapKey: Boolean,
+      functionName: String): (String, String, String) = {
     val arrayDataName = ctx.freshName("arrayData")
-    val numElements = elementsCode.length
+    val numElements = s"${elementsExpr.length}L"
 
-    if (!CodeGenerator.isPrimitiveType(elementType)) {
-      val arrayName = ctx.freshName("arrayObject")
-      val genericArrayClass = classOf[GenericArrayData].getName
+    val initialization = CodeGenerator.createArrayData(
+      arrayDataName, elementType, numElements, s" $functionName failed.")
 
-      val assignments = elementsCode.zipWithIndex.map { case (eval, i) =>
-        val isNullAssignment = if (!isMapKey) {
-          s"$arrayName[$i] = null;"
-        } else {
-          "throw new RuntimeException(\"Cannot use null as map key!\");"
-        }
-        eval.code + s"""
-         if (${eval.isNull}) {
-           $isNullAssignment
-         } else {
-           $arrayName[$i] = ${eval.value};
-         }
-       """
-      }
-      val assignmentString = ctx.splitExpressionsWithCurrentInputs(
-        expressions = assignments,
-        funcName = "apply",
-        extraArguments = ("Object[]", arrayName) :: Nil)
-
-      (s"Object[] $arrayName = new Object[$numElements];",
-       assignmentString,
-       s"final ArrayData $arrayDataName = new $genericArrayClass($arrayName);",
-       arrayDataName)
-    } else {
-      val arrayName = ctx.freshName("array")
-      val unsafeArraySizeInBytes =
-        UnsafeArrayData.calculateHeaderPortionInBytes(numElements) +
-        ByteArrayMethods.roundNumberOfBytesToNearestWord(elementType.defaultSize * numElements)
-      val baseOffset = Platform.BYTE_ARRAY_OFFSET
-
-      val primitiveValueTypeName = CodeGenerator.primitiveTypeName(elementType)
-      val assignments = elementsCode.zipWithIndex.map { case (eval, i) =>
+    val assignments = elementsExpr.zipWithIndex.map { case (expr, i) =>
+      val eval = expr.genCode(ctx)
+      val setArrayElement = CodeGenerator.setArrayElement(
+        arrayDataName, elementType, i.toString, eval.value)
+
+      val assignment = if (!expr.nullable) {
+        setArrayElement
+      } else {
         val isNullAssignment = if (!isMapKey) {
           s"$arrayDataName.setNullAt($i);"
         } else {
           "throw new RuntimeException(\"Cannot use null as map key!\");"
         }
-        eval.code + s"""
-         if (${eval.isNull}) {
-           $isNullAssignment
-         } else {
-           $arrayDataName.set$primitiveValueTypeName($i, ${eval.value});
-         }
-       """
+
+        s"""
+           |if (${eval.isNull}) {
+           |  $isNullAssignment
+           |} else {
+           |  $setArrayElement
+           |}
+         """.stripMargin
       }
-      val assignmentString = ctx.splitExpressionsWithCurrentInputs(
-        expressions = assignments,
-        funcName = "apply",
-        extraArguments = ("UnsafeArrayData", arrayDataName) :: Nil)
-
-      (s"""
-        byte[] $arrayName = new byte[$unsafeArraySizeInBytes];
-        UnsafeArrayData $arrayDataName = new UnsafeArrayData();
-        Platform.putLong($arrayName, $baseOffset, $numElements);
-        $arrayDataName.pointTo($arrayName, $baseOffset, $unsafeArraySizeInBytes);
-      """,
-       assignmentString,
-       "",
-       arrayDataName)
+      s"""
+         |${eval.code}
+         |$assignment
+       """.stripMargin
     }
+    val assignmentString = ctx.splitExpressionsWithCurrentInputs(
+      expressions = assignments,
+      funcName = "apply",
+      extraArguments = ("ArrayData", arrayDataName) :: Nil)
+
+    (initialization, assignmentString, arrayDataName)
   }
 }
 
@@ -216,21 +188,17 @@ case class CreateMap(children: Seq[Expression]) extends Expression {
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val mapClass = classOf[ArrayBasedMapData].getName
     val MapType(keyDt, valueDt, _) = dataType
-    val evalKeys = keys.map(e => e.genCode(ctx))
-    val evalValues = values.map(e => e.genCode(ctx))
-    val (preprocessKeyData, assignKeys, postprocessKeyData, keyArrayData) =
-      GenArrayData.genCodeToCreateArrayData(ctx, keyDt, evalKeys, true)
-    val (preprocessValueData, assignValues, postprocessValueData, valueArrayData) =
-      GenArrayData.genCodeToCreateArrayData(ctx, valueDt, evalValues, false)
+    val (allocationKeyData, assignKeys, keyArrayData) =
+      GenArrayData.genCodeToCreateArrayData(ctx, keyDt, keys, true, "createMap")
+    val (allocationValueData, assignValues, valueArrayData) =
+      GenArrayData.genCodeToCreateArrayData(ctx, valueDt, values, false, "createMap")
     val code =
       code"""
        final boolean ${ev.isNull} = false;
-       $preprocessKeyData
+       $allocationKeyData
        $assignKeys
-       $postprocessKeyData
-       $preprocessValueData
+       $allocationValueData
        $assignValues
-       $postprocessValueData
        final MapData ${ev.value} = new $mapClass($keyArrayData, $valueArrayData);
       """
     ev.copy(code = code)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org