You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by "cloud-fan (via GitHub)" <gi...@apache.org> on 2023/03/15 12:46:08 UTC
[GitHub] [spark] cloud-fan commented on a diff in pull request #40397: [SPARK-42052][SQL] Codegen Support for HiveSimpleUDF

cloud-fan commented on code in PR #40397:
URL: https://github.com/apache/spark/pull/40397#discussion_r1137003368


##########
sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala:
##########
@@ -49,68 +48,140 @@ private[hive] case class HiveSimpleUDF(
     name: String, funcWrapper: HiveFunctionWrapper, children: Seq[Expression])
   extends Expression
   with HiveInspectors
-  with CodegenFallback
-  with Logging
   with UserDefinedExpression {
 
+  @transient
+  private lazy val evaluator = new HiveSimpleUDFEvaluator(funcWrapper, children)
+
+  @transient
+  private val isUDFDeterministic = {
+    val udfType = evaluator.function.getClass.getAnnotation(classOf[HiveUDFType])
+    udfType != null && udfType.deterministic() && !udfType.stateful()
+  }
+
   override lazy val deterministic: Boolean = isUDFDeterministic && children.forall(_.deterministic)
 
   override def nullable: Boolean = true
 
-  @transient
-  lazy val function = funcWrapper.createFunction[UDF]()
+  override def foldable: Boolean = isUDFDeterministic && children.forall(_.foldable)
+
+  override lazy val dataType: DataType = javaTypeToDataType(evaluator.method.getGenericReturnType)
+
+  // TODO: Finish input output types.
+  override def eval(input: InternalRow): Any = {
+    children.zipWithIndex.map {
+      case (child, idx) =>
+        evaluator.setArg(idx, child.eval(input))
+    }
+    evaluator.evaluate()
+  }
+
+  override def toString: String = {
+    s"$nodeName#${funcWrapper.functionClassName}(${children.mkString(",")})"
+  }
+
+  override def prettyName: String = name
+
+  override def sql: String = s"$name(${children.map(_.sql).mkString(", ")})"
+
+  override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression =
+    copy(children = newChildren)
+
+  protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    evaluator.doGenCode(ctx, ev, dataType)
+  }
+}
+
+abstract class HiveUDFEvaluatorBase[UDFType <: AnyRef](
+    funcWrapper: HiveFunctionWrapper, children: Seq[Expression])
+  extends HiveInspectors with Serializable {
 
   @transient
-  private lazy val method =
-    function.getResolver.getEvalMethod(children.map(_.dataType.toTypeInfo).asJava)
+  lazy val function = funcWrapper.createFunction[UDFType]()
+
+  def setArg(index: Int, arg: Any): Unit
+
+  def evaluate(): Any
+
+  final def doGenCode(ctx: CodegenContext, ev: ExprCode, dataType: DataType): ExprCode = {

Review Comment:
   It's weird to implement codegen in the evaluator. If we really want to deduplicate the code, let's add `HiveUDFExpressionBase` later.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org