You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2023/03/09 06:13:25 UTC

[spark] branch master updated: [SPARK-42701][SQL] Add the `try_aes_decrypt()` function

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 7d10330720f [SPARK-42701][SQL] Add the `try_aes_decrypt()` function
7d10330720f is described below

commit 7d10330720f600d7d1aca3ea1ccfcf1f74f41136
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Thu Mar 9 09:13:05 2023 +0300

    [SPARK-42701][SQL] Add the `try_aes_decrypt()` function
    
    ### What changes were proposed in this pull request?
    In the PR, I propose to add new function `try_aes_decrypt()` which binds to new expression `TryAesDecrypt` that is a runtime replaceable expression of the combination of `TryEval` and `AesDecrypt`.
    
    ### Why are the changes needed?
    The changes improve user experience with Spark SQL. The existing function `aes_decrypt()` fails w/ an exception as soon as it faces to some invalid input that cannot be decrypted, and the rest (even if the values can be decrypted) is ignored. New function returns `NULL` on bad inputs and decrypts other values.
    
    ### Does this PR introduce _any_ user-facing change?
    No. This PR just extends existing API.
    
    ### How was this patch tested?
    By running the affected test suites:
    ```
    $ build/sbt "sql/test:testOnly org.apache.spark.sql.expressions.ExpressionInfoSuite"
    $ build/sbt "sql/testOnly *ExpressionsSchemaSuite"
    ```
    
    Closes #40340 from MaxGekk/try_aes_decrypt.
    
    Authored-by: Max Gekk <ma...@gmail.com>
    Signed-off-by: Max Gekk <ma...@gmail.com>
---
 .../sql/catalyst/analysis/FunctionRegistry.scala   |  1 +
 .../spark/sql/catalyst/expressions/misc.scala      | 34 ++++++++++++++++++++++
 .../sql-functions/sql-expression-schema.md         |  1 +
 3 files changed, 36 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 103e6aae603..ad82a836199 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -452,6 +452,7 @@ object FunctionRegistry {
     expressionBuilder("try_sum", TrySumExpressionBuilder, setAlias = true),
     expression[TryToBinary]("try_to_binary"),
     expressionBuilder("try_to_timestamp", TryToTimestampExpressionBuilder, setAlias = true),
+    expression[TryAesDecrypt]("try_aes_decrypt"),
 
     // aggregate functions
     expression[HyperLogLogPlusPlus]("approx_count_distinct"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index bf9dd700dfa..300fab0386c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -432,4 +432,38 @@ case class AesDecrypt(
     copy(newChildren(0), newChildren(1), newChildren(2), newChildren(3))
   }
 }
+
+@ExpressionDescription(
+  usage = "_FUNC_(expr, key[, mode[, padding]]) - This is a special version of `aes_decrypt` that performs the same operation, but returns a NULL value instead of raising an error if the decryption cannot be performed.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(unhex('6E7CA17BBB468D3084B5744BCA729FB7B2B7BCB8E4472847D02670489D95FA97DBBA7D3210'), '0000111122223333', 'GCM');
+       Spark SQL
+      > SELECT _FUNC_(unhex('----------468D3084B5744BCA729FB7B2B7BCB8E4472847D02670489D95FA97DBBA7D3210'), '0000111122223333', 'GCM');
+       NULL
+  """,
+  since = "3.5.0",
+  group = "misc_funcs")
+// scalastyle:on line.size.limit
+case class TryAesDecrypt(
+    input: Expression,
+    key: Expression,
+    mode: Expression,
+    padding: Expression,
+    replacement: Expression) extends RuntimeReplaceable with InheritAnalysisRules {
+
+  def this(input: Expression, key: Expression, mode: Expression, padding: Expression) =
+    this(input, key, mode, padding, TryEval(AesDecrypt(input, key, mode, padding)))
+  def this(input: Expression, key: Expression, mode: Expression) =
+    this(input, key, mode, Literal("DEFAULT"))
+  def this(input: Expression, key: Expression) =
+    this(input, key, Literal("GCM"))
+
+  override def prettyName: String = "try_aes_decrypt"
+
+  override def parameters: Seq[Expression] = Seq(input, key, mode, padding)
+
+  override protected def withNewChildInternal(newChild: Expression): Expression =
+    this.copy(replacement = newChild)
+}
 // scalastyle:on line.size.limit
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 03ec4bce54b..0894d03f9d4 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -327,6 +327,7 @@
 | org.apache.spark.sql.catalyst.expressions.TruncDate | trunc | SELECT trunc('2019-08-04', 'week') | struct<trunc(2019-08-04, week):date> |
 | org.apache.spark.sql.catalyst.expressions.TruncTimestamp | date_trunc | SELECT date_trunc('YEAR', '2015-03-05T09:32:05.359') | struct<date_trunc(YEAR, 2015-03-05T09:32:05.359):timestamp> |
 | org.apache.spark.sql.catalyst.expressions.TryAdd | try_add | SELECT try_add(1, 2) | struct<try_add(1, 2):int> |
+| org.apache.spark.sql.catalyst.expressions.TryAesDecrypt | try_aes_decrypt | SELECT try_aes_decrypt(unhex('6E7CA17BBB468D3084B5744BCA729FB7B2B7BCB8E4472847D02670489D95FA97DBBA7D3210'), '0000111122223333', 'GCM') | struct<try_aes_decrypt(unhex(6E7CA17BBB468D3084B5744BCA729FB7B2B7BCB8E4472847D02670489D95FA97DBBA7D3210), 0000111122223333, GCM, DEFAULT):binary> |
 | org.apache.spark.sql.catalyst.expressions.TryDivide | try_divide | SELECT try_divide(3, 2) | struct<try_divide(3, 2):double> |
 | org.apache.spark.sql.catalyst.expressions.TryElementAt | try_element_at | SELECT try_element_at(array(1, 2, 3), 2) | struct<try_element_at(array(1, 2, 3), 2):int> |
 | org.apache.spark.sql.catalyst.expressions.TryMultiply | try_multiply | SELECT try_multiply(2, 3) | struct<try_multiply(2, 3):int> |


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org