You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ue...@apache.org on 2018/09/18 03:45:02 UTC
spark git commit: [SPARK-25444][SQL] Refactor
GenArrayData.genCodeToCreateArrayData method
Repository: spark
Updated Branches:
refs/heads/master 0f1413e32 -> acc645257
[SPARK-25444][SQL] Refactor GenArrayData.genCodeToCreateArrayData method
## What changes were proposed in this pull request?
This PR makes `GenArrayData.genCodeToCreateArrayData` method simple by using `ArrayData.createArrayData` method.
Before this PR, `genCodeToCreateArrayData` method was complicated
* Generated a temporary Java array to create `ArrayData`
* Had separate code generation path to assign values for `GenericArrayData` and `UnsafeArrayData`
After this PR, the method
* Directly generates `GenericArrayData` or `UnsafeArrayData` without a temporary array
* Has only code generation path to assign values
## How was this patch tested?
Existing UTs
Closes #22439 from kiszk/SPARK-25444.
Authored-by: Kazuaki Ishizaki <is...@jp.ibm.com>
Signed-off-by: Takuya UESHIN <ue...@databricks.com>
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/acc64525
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/acc64525
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/acc64525
Branch: refs/heads/master
Commit: acc6452579aca99aae9f9787ddbe5c4aeb170e58
Parents: 0f1413e
Author: Kazuaki Ishizaki <is...@jp.ibm.com>
Authored: Tue Sep 18 12:44:54 2018 +0900
Committer: Takuya UESHIN <ue...@databricks.com>
Committed: Tue Sep 18 12:44:54 2018 +0900
----------------------------------------------------------------------
.../expressions/complexTypeCreator.scala | 122 +++++++------------
1 file changed, 45 insertions(+), 77 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/acc64525/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index fd8b5e9..0361372 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -61,11 +61,10 @@ case class CreateArray(children: Seq[Expression]) extends Expression {
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val et = dataType.elementType
- val evals = children.map(e => e.genCode(ctx))
- val (preprocess, assigns, postprocess, arrayData) =
- GenArrayData.genCodeToCreateArrayData(ctx, et, evals, false)
+ val (allocation, assigns, arrayData) =
+ GenArrayData.genCodeToCreateArrayData(ctx, et, children, false, "createArray")
ev.copy(
- code = code"${preprocess}${assigns}${postprocess}",
+ code = code"${allocation}${assigns}",
value = JavaCode.variable(arrayData, dataType),
isNull = FalseLiteral)
}
@@ -75,87 +74,60 @@ case class CreateArray(children: Seq[Expression]) extends Expression {
private [sql] object GenArrayData {
/**
- * Return Java code pieces based on DataType and isPrimitive to allocate ArrayData class
+ * Return Java code pieces based on DataType and array size to allocate ArrayData class
*
* @param ctx a [[CodegenContext]]
* @param elementType data type of underlying array elements
- * @param elementsCode concatenated set of [[ExprCode]] for each element of an underlying array
+ * @param elementsExpr concatenated set of [[Expression]] for each element of an underlying array
* @param isMapKey if true, throw an exception when the element is null
- * @return (code pre-assignments, concatenated assignments to each array elements,
- * code post-assignments, arrayData name)
+ * @param functionName string to include in the error message
+ * @return (array allocation, concatenated assignments to each array elements, arrayData name)
*/
def genCodeToCreateArrayData(
ctx: CodegenContext,
elementType: DataType,
- elementsCode: Seq[ExprCode],
- isMapKey: Boolean): (String, String, String, String) = {
+ elementsExpr: Seq[Expression],
+ isMapKey: Boolean,
+ functionName: String): (String, String, String) = {
val arrayDataName = ctx.freshName("arrayData")
- val numElements = elementsCode.length
+ val numElements = s"${elementsExpr.length}L"
- if (!CodeGenerator.isPrimitiveType(elementType)) {
- val arrayName = ctx.freshName("arrayObject")
- val genericArrayClass = classOf[GenericArrayData].getName
+ val initialization = CodeGenerator.createArrayData(
+ arrayDataName, elementType, numElements, s" $functionName failed.")
- val assignments = elementsCode.zipWithIndex.map { case (eval, i) =>
- val isNullAssignment = if (!isMapKey) {
- s"$arrayName[$i] = null;"
- } else {
- "throw new RuntimeException(\"Cannot use null as map key!\");"
- }
- eval.code + s"""
- if (${eval.isNull}) {
- $isNullAssignment
- } else {
- $arrayName[$i] = ${eval.value};
- }
- """
- }
- val assignmentString = ctx.splitExpressionsWithCurrentInputs(
- expressions = assignments,
- funcName = "apply",
- extraArguments = ("Object[]", arrayName) :: Nil)
-
- (s"Object[] $arrayName = new Object[$numElements];",
- assignmentString,
- s"final ArrayData $arrayDataName = new $genericArrayClass($arrayName);",
- arrayDataName)
- } else {
- val arrayName = ctx.freshName("array")
- val unsafeArraySizeInBytes =
- UnsafeArrayData.calculateHeaderPortionInBytes(numElements) +
- ByteArrayMethods.roundNumberOfBytesToNearestWord(elementType.defaultSize * numElements)
- val baseOffset = Platform.BYTE_ARRAY_OFFSET
-
- val primitiveValueTypeName = CodeGenerator.primitiveTypeName(elementType)
- val assignments = elementsCode.zipWithIndex.map { case (eval, i) =>
+ val assignments = elementsExpr.zipWithIndex.map { case (expr, i) =>
+ val eval = expr.genCode(ctx)
+ val setArrayElement = CodeGenerator.setArrayElement(
+ arrayDataName, elementType, i.toString, eval.value)
+
+ val assignment = if (!expr.nullable) {
+ setArrayElement
+ } else {
val isNullAssignment = if (!isMapKey) {
s"$arrayDataName.setNullAt($i);"
} else {
"throw new RuntimeException(\"Cannot use null as map key!\");"
}
- eval.code + s"""
- if (${eval.isNull}) {
- $isNullAssignment
- } else {
- $arrayDataName.set$primitiveValueTypeName($i, ${eval.value});
- }
- """
+
+ s"""
+ |if (${eval.isNull}) {
+ | $isNullAssignment
+ |} else {
+ | $setArrayElement
+ |}
+ """.stripMargin
}
- val assignmentString = ctx.splitExpressionsWithCurrentInputs(
- expressions = assignments,
- funcName = "apply",
- extraArguments = ("UnsafeArrayData", arrayDataName) :: Nil)
-
- (s"""
- byte[] $arrayName = new byte[$unsafeArraySizeInBytes];
- UnsafeArrayData $arrayDataName = new UnsafeArrayData();
- Platform.putLong($arrayName, $baseOffset, $numElements);
- $arrayDataName.pointTo($arrayName, $baseOffset, $unsafeArraySizeInBytes);
- """,
- assignmentString,
- "",
- arrayDataName)
+ s"""
+ |${eval.code}
+ |$assignment
+ """.stripMargin
}
+ val assignmentString = ctx.splitExpressionsWithCurrentInputs(
+ expressions = assignments,
+ funcName = "apply",
+ extraArguments = ("ArrayData", arrayDataName) :: Nil)
+
+ (initialization, assignmentString, arrayDataName)
}
}
@@ -216,21 +188,17 @@ case class CreateMap(children: Seq[Expression]) extends Expression {
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val mapClass = classOf[ArrayBasedMapData].getName
val MapType(keyDt, valueDt, _) = dataType
- val evalKeys = keys.map(e => e.genCode(ctx))
- val evalValues = values.map(e => e.genCode(ctx))
- val (preprocessKeyData, assignKeys, postprocessKeyData, keyArrayData) =
- GenArrayData.genCodeToCreateArrayData(ctx, keyDt, evalKeys, true)
- val (preprocessValueData, assignValues, postprocessValueData, valueArrayData) =
- GenArrayData.genCodeToCreateArrayData(ctx, valueDt, evalValues, false)
+ val (allocationKeyData, assignKeys, keyArrayData) =
+ GenArrayData.genCodeToCreateArrayData(ctx, keyDt, keys, true, "createMap")
+ val (allocationValueData, assignValues, valueArrayData) =
+ GenArrayData.genCodeToCreateArrayData(ctx, valueDt, values, false, "createMap")
val code =
code"""
final boolean ${ev.isNull} = false;
- $preprocessKeyData
+ $allocationKeyData
$assignKeys
- $postprocessKeyData
- $preprocessValueData
+ $allocationValueData
$assignValues
- $postprocessValueData
final MapData ${ev.value} = new $mapClass($keyArrayData, $valueArrayData);
"""
ev.copy(code = code)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org