You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by GitBox <gi...@apache.org> on 2022/02/18 02:37:16 UTC

[GitHub] [spark] beliefer commented on a change in pull request #35534: [SPARK-38240][SQL] Improve RuntimeReplaceable and add a guideline for adding new functions

beliefer commented on a change in pull request #35534:
URL: https://github.com/apache/spark/pull/35534#discussion_r809629257



##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
##########
@@ -352,34 +352,44 @@ trait Unevaluable extends Expression {
  * An expression that gets replaced at runtime (currently by the optimizer) into a different
  * expression for evaluation. This is mainly used to provide compatibility with other databases.
  * For example, we use this to support "nvl" by replacing it with "coalesce".
- *
- * A RuntimeReplaceable should have the original parameters along with a "child" expression in the
- * case class constructor, and define a normal constructor that accepts only the original
- * parameters. For an example, see [[Nvl]]. To make sure the explain plan and expression SQL
- * works correctly, the implementation should also override flatArguments method and sql method.
  */
-trait RuntimeReplaceable extends UnaryExpression with Unevaluable {
-  override def nullable: Boolean = child.nullable
-  override def dataType: DataType = child.dataType
+trait RuntimeReplaceable extends Expression {
+  def replacement: Expression
+
+  override val nodePatterns: Seq[TreePattern] = Seq(RUNTIME_REPLACEABLE)
+  override def nullable: Boolean = replacement.nullable
+  override def dataType: DataType = replacement.dataType
   // As this expression gets replaced at optimization with its `child" expression,
   // two `RuntimeReplaceable` are considered to be semantically equal if their "child" expressions
   // are semantically equal.
-  override lazy val preCanonicalized: Expression = child.preCanonicalized
-
-  /**
-   * Only used to generate SQL representation of this expression.
-   *
-   * Implementations should override this with original parameters
-   */
-  def exprsReplaced: Seq[Expression]
-
-  override def sql: String = mkString(exprsReplaced.map(_.sql))
+  override lazy val preCanonicalized: Expression = replacement.preCanonicalized
 
-  final override val nodePatterns: Seq[TreePattern] = Seq(RUNTIME_REPLACEABLE)
+  final override def eval(input: InternalRow = null): Any =
+    throw QueryExecutionErrors.cannotEvaluateExpressionError(this)
+  final override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
+    throw QueryExecutionErrors.cannotGenerateCodeForExpressionError(this)
+}
 
-  def mkString(childrenString: Seq[String]): String = {
-    prettyName + childrenString.mkString("(", ", ", ")")
+/**
+ * A special variant of [[RuntimeReplaceable]]. It makes `replacement` the child of the expression,
+ * to inherit the type coercion rules for it. The implementation should put `replacement` in the
+ * case class constructor, and define a normal constructor that accepts only the original
+ * parameters, or use `ExpressionBuilder`. For an example, see [[TryAdd]]. To make sure the explain
+ * plan and expression SQL works correctly, the implementation should also implement the
+ * `actualInputs` method, which should extracts the actual inputs after type coercion from
+ * `replacement`.
+ */
+trait RuntimeReplaceableInheritingTypeCoercion

Review comment:
       I'm not sure. It looks like we also could override `inputTypes` in `RuntimeReplaceable`.
   So, do we really need this trait ?

##########
File path: sql/core/src/test/resources/sql-functions/sql-expression-schema.md
##########
@@ -182,8 +181,8 @@
 | org.apache.spark.sql.catalyst.expressions.MakeDate | make_date | SELECT make_date(2013, 7, 15) | struct<make_date(2013, 7, 15):date> |
 | org.apache.spark.sql.catalyst.expressions.MakeInterval | make_interval | SELECT make_interval(100, 11, 1, 1, 12, 30, 01.001001) | struct<make_interval(100, 11, 1, 1, 12, 30, 1.001001):interval> |
 | org.apache.spark.sql.catalyst.expressions.MakeTimestamp | make_timestamp | SELECT make_timestamp(2014, 12, 28, 6, 30, 45.887) | struct<make_timestamp(2014, 12, 28, 6, 30, 45.887):timestamp> |
-| org.apache.spark.sql.catalyst.expressions.MakeTimestampLTZ | make_timestamp_ltz | SELECT make_timestamp_ltz(2014, 12, 28, 6, 30, 45.887) | struct<make_timestamp_ltz(2014, 12, 28, 6, 30, 45.887):timestamp> |
-| org.apache.spark.sql.catalyst.expressions.MakeTimestampNTZ | make_timestamp_ntz | SELECT make_timestamp_ntz(2014, 12, 28, 6, 30, 45.887) | struct<make_timestamp_ntz(2014, 12, 28, 6, 30, 45.887):timestamp_ntz> |
+| org.apache.spark.sql.catalyst.expressions.MakeTimestampLTZExpressionBuilder$ | make_timestamp_ltz | SELECT make_timestamp_ltz(2014, 12, 28, 6, 30, 45.887) | struct<make_timestamp_ltz(2014, 12, 28, 6, 30, 45.887):timestamp> |
+| org.apache.spark.sql.catalyst.expressions.MakeTimestampNTZExpressionBuilder$ | make_timestamp_ntz | SELECT make_timestamp_ntz(2014, 12, 28, 6, 30, 45.887) | struct<make_timestamp_ntz(2014, 12, 28, 6, 30, 45.887):timestamp_ntz> |

Review comment:
       Although MakeTimestampNTZExpressionBuilder looks useful, but seems strange here.

##########
File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
##########
@@ -405,30 +404,32 @@ case class AesDecrypt(
     input: Expression,
     key: Expression,
     mode: Expression,
-    padding: Expression,
-    child: Expression)
-  extends RuntimeReplaceable {
-
-  def this(input: Expression, key: Expression, mode: Expression, padding: Expression) = {
-    this(
-      input,
-      key,
-      mode,
-      padding,
-      StaticInvoke(
-        classOf[ExpressionImplUtils],
-        BinaryType,
-        "aesDecrypt",
-        Seq(input, key, mode, padding),
-        Seq(BinaryType, BinaryType, StringType, StringType)))
-  }
+    padding: Expression)
+  extends RuntimeReplaceable with ImplicitCastInputTypes {
+
+  lazy val replacement: Expression = StaticInvoke(
+    classOf[ExpressionImplUtils],
+    BinaryType,
+    "aesDecrypt",
+    Seq(input, key, mode, padding),
+    inputTypes)
+
   def this(input: Expression, key: Expression, mode: Expression) =
     this(input, key, mode, Literal("DEFAULT"))
   def this(input: Expression, key: Expression) =
     this(input, key, Literal("GCM"))
 
-  def exprsReplaced: Seq[Expression] = Seq(input, key)

Review comment:
       Yes.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org