You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2023/03/09 06:13:25 UTC
[spark] branch master updated: [SPARK-42701][SQL] Add the `try_aes_decrypt()` function
This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 7d10330720f [SPARK-42701][SQL] Add the `try_aes_decrypt()` function
7d10330720f is described below
commit 7d10330720f600d7d1aca3ea1ccfcf1f74f41136
Author: Max Gekk <ma...@gmail.com>
AuthorDate: Thu Mar 9 09:13:05 2023 +0300
[SPARK-42701][SQL] Add the `try_aes_decrypt()` function
### What changes were proposed in this pull request?
In the PR, I propose to add new function `try_aes_decrypt()` which binds to new expression `TryAesDecrypt` that is a runtime replaceable expression of the combination of `TryEval` and `AesDecrypt`.
### Why are the changes needed?
The changes improve user experience with Spark SQL. The existing function `aes_decrypt()` fails w/ an exception as soon as it faces to some invalid input that cannot be decrypted, and the rest (even if the values can be decrypted) is ignored. New function returns `NULL` on bad inputs and decrypts other values.
### Does this PR introduce _any_ user-facing change?
No. This PR just extends existing API.
### How was this patch tested?
By running the affected test suites:
```
$ build/sbt "sql/test:testOnly org.apache.spark.sql.expressions.ExpressionInfoSuite"
$ build/sbt "sql/testOnly *ExpressionsSchemaSuite"
```
Closes #40340 from MaxGekk/try_aes_decrypt.
Authored-by: Max Gekk <ma...@gmail.com>
Signed-off-by: Max Gekk <ma...@gmail.com>
---
.../sql/catalyst/analysis/FunctionRegistry.scala | 1 +
.../spark/sql/catalyst/expressions/misc.scala | 34 ++++++++++++++++++++++
.../sql-functions/sql-expression-schema.md | 1 +
3 files changed, 36 insertions(+)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 103e6aae603..ad82a836199 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -452,6 +452,7 @@ object FunctionRegistry {
expressionBuilder("try_sum", TrySumExpressionBuilder, setAlias = true),
expression[TryToBinary]("try_to_binary"),
expressionBuilder("try_to_timestamp", TryToTimestampExpressionBuilder, setAlias = true),
+ expression[TryAesDecrypt]("try_aes_decrypt"),
// aggregate functions
expression[HyperLogLogPlusPlus]("approx_count_distinct"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index bf9dd700dfa..300fab0386c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -432,4 +432,38 @@ case class AesDecrypt(
copy(newChildren(0), newChildren(1), newChildren(2), newChildren(3))
}
}
+
+@ExpressionDescription(
+ usage = "_FUNC_(expr, key[, mode[, padding]]) - This is a special version of `aes_decrypt` that performs the same operation, but returns a NULL value instead of raising an error if the decryption cannot be performed.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(unhex('6E7CA17BBB468D3084B5744BCA729FB7B2B7BCB8E4472847D02670489D95FA97DBBA7D3210'), '0000111122223333', 'GCM');
+ Spark SQL
+ > SELECT _FUNC_(unhex('----------468D3084B5744BCA729FB7B2B7BCB8E4472847D02670489D95FA97DBBA7D3210'), '0000111122223333', 'GCM');
+ NULL
+ """,
+ since = "3.5.0",
+ group = "misc_funcs")
+// scalastyle:on line.size.limit
+case class TryAesDecrypt(
+ input: Expression,
+ key: Expression,
+ mode: Expression,
+ padding: Expression,
+ replacement: Expression) extends RuntimeReplaceable with InheritAnalysisRules {
+
+ def this(input: Expression, key: Expression, mode: Expression, padding: Expression) =
+ this(input, key, mode, padding, TryEval(AesDecrypt(input, key, mode, padding)))
+ def this(input: Expression, key: Expression, mode: Expression) =
+ this(input, key, mode, Literal("DEFAULT"))
+ def this(input: Expression, key: Expression) =
+ this(input, key, Literal("GCM"))
+
+ override def prettyName: String = "try_aes_decrypt"
+
+ override def parameters: Seq[Expression] = Seq(input, key, mode, padding)
+
+ override protected def withNewChildInternal(newChild: Expression): Expression =
+ this.copy(replacement = newChild)
+}
// scalastyle:on line.size.limit
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 03ec4bce54b..0894d03f9d4 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -327,6 +327,7 @@
| org.apache.spark.sql.catalyst.expressions.TruncDate | trunc | SELECT trunc('2019-08-04', 'week') | struct<trunc(2019-08-04, week):date> |
| org.apache.spark.sql.catalyst.expressions.TruncTimestamp | date_trunc | SELECT date_trunc('YEAR', '2015-03-05T09:32:05.359') | struct<date_trunc(YEAR, 2015-03-05T09:32:05.359):timestamp> |
| org.apache.spark.sql.catalyst.expressions.TryAdd | try_add | SELECT try_add(1, 2) | struct<try_add(1, 2):int> |
+| org.apache.spark.sql.catalyst.expressions.TryAesDecrypt | try_aes_decrypt | SELECT try_aes_decrypt(unhex('6E7CA17BBB468D3084B5744BCA729FB7B2B7BCB8E4472847D02670489D95FA97DBBA7D3210'), '0000111122223333', 'GCM') | struct<try_aes_decrypt(unhex(6E7CA17BBB468D3084B5744BCA729FB7B2B7BCB8E4472847D02670489D95FA97DBBA7D3210), 0000111122223333, GCM, DEFAULT):binary> |
| org.apache.spark.sql.catalyst.expressions.TryDivide | try_divide | SELECT try_divide(3, 2) | struct<try_divide(3, 2):double> |
| org.apache.spark.sql.catalyst.expressions.TryElementAt | try_element_at | SELECT try_element_at(array(1, 2, 3), 2) | struct<try_element_at(array(1, 2, 3), 2):int> |
| org.apache.spark.sql.catalyst.expressions.TryMultiply | try_multiply | SELECT try_multiply(2, 3) | struct<try_multiply(2, 3):int> |
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org