You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ma...@apache.org on 2023/06/16 12:42:21 UTC
[spark] branch master updated: [SPARK-43290][SQL] Adds support for aes_encrypt IVs and AAD
This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new fb1ee25a89e [SPARK-43290][SQL] Adds support for aes_encrypt IVs and AAD
fb1ee25a89e is described below
commit fb1ee25a89e8b42178b7f55718859ab5117c2320
Author: Steve Weis <st...@databricks.com>
AuthorDate: Fri Jun 16 15:42:05 2023 +0300
[SPARK-43290][SQL] Adds support for aes_encrypt IVs and AAD
### What changes were proposed in this pull request?
This change adds support for user-provided initialization vectors (IVs) or authenticated additional data (AAD) to `aes_encrypt` / `aes_decrypt`. 12-byte IVs may optionally be passed if the mode is "GCM" and 16-byte IVs may be passed if the mode is "CBC". An arbitrary binary value may be passed as additional authenticated data only if "GCM" mode is used.
### Why are the changes needed?
Callers may wish to provide their own IV values so that the output ciphertext matches a ciphertext generated outside of Spark. AAD is used to bind some input to a ciphertext and ensure that it is presented during decryption -- often used to scope an operation to a specific context.
### Does this PR introduce _any_ user-facing change?
Yes, this change introduces two optional parameters to `aes_encrypt` and one optional parameter to `aes_decrypt`:
```
aes_encrypt(expr, key[, mode[, padding[, iv[, aad]]]])
aes_decrypt(expr, key[, mode[, padding[, iv]]])
```
### How was this patch tested?
```
build/sbt "sql/test:testOnly org.apache.spark.sql.DataFrameFunctionsSuite -- -z aes"
```
Closes #41488 from sweisdb/SPARK-43290.
Authored-by: Steve Weis <st...@databricks.com>
Signed-off-by: Max Gekk <ma...@gmail.com>
---
.../catalyst/expressions/ExpressionImplUtils.java | 14 +----
.../spark/sql/catalyst/expressions/misc.scala | 64 +++++++++++++++++-----
.../expressions/ExpressionImplUtilsSuite.scala | 23 +++++++-
.../sql-functions/sql-expression-schema.md | 6 +-
.../apache/spark/sql/DataFrameFunctionsSuite.scala | 50 +++++++++++++++++
5 files changed, 127 insertions(+), 30 deletions(-)
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java
index 6aae649718a..a604e6bf225 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java
@@ -111,14 +111,6 @@ public class ExpressionImplUtils {
return checkSum % 10 == 0;
}
- public static byte[] aesEncrypt(byte[] input, byte[] key, UTF8String mode, UTF8String padding) {
- return aesEncrypt(input, key, mode, padding, null, null);
- }
-
- public static byte[] aesDecrypt(byte[] input, byte[] key, UTF8String mode, UTF8String padding) {
- return aesDecrypt(input, key, mode, padding, null);
- }
-
public static byte[] aesEncrypt(byte[] input,
byte[] key,
UTF8String mode,
@@ -192,7 +184,7 @@ public class ExpressionImplUtils {
Cipher cipher = Cipher.getInstance(cipherMode.transformation);
if (opmode == Cipher.ENCRYPT_MODE) {
// This may be 0-length for ECB
- if (iv == null) {
+ if (iv == null || iv.length == 0) {
iv = generateIv(cipherMode);
} else if (!cipherMode.usesSpec) {
// If the caller passes an IV, ensure the mode actually uses it.
@@ -210,7 +202,7 @@ public class ExpressionImplUtils {
}
// If the cipher mode supports additional authenticated data and it is provided, update it
- if (aad != null) {
+ if (aad != null && aad.length != 0) {
if (cipherMode.supportsAad != true) {
throw QueryExecutionErrors.aesUnsupportedAad(mode);
}
@@ -231,7 +223,7 @@ public class ExpressionImplUtils {
if (cipherMode.usesSpec) {
AlgorithmParameterSpec algSpec = getParamSpec(cipherMode, input);
cipher.init(opmode, secretKey, algSpec);
- if (aad != null) {
+ if (aad != null && aad.length != 0) {
if (cipherMode.supportsAad != true) {
throw QueryExecutionErrors.aesUnsupportedAad(mode);
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index 67328cde71a..92ed0843521 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -312,8 +312,10 @@ case class CurrentUser() extends LeafExpression with Unevaluable {
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = """
- _FUNC_(expr, key[, mode[, padding]]) - Returns an encrypted value of `expr` using AES in given `mode` with the specified `padding`.
+ _FUNC_(expr, key[, mode[, padding[, iv[, aad]]]]) - Returns an encrypted value of `expr` using AES in given `mode` with the specified `padding`.
Key lengths of 16, 24 and 32 bits are supported. Supported combinations of (`mode`, `padding`) are ('ECB', 'PKCS'), ('GCM', 'NONE') and ('CBC', 'PKCS').
+ Optional initialization vectors (IVs) are only supported for CBC and GCM modes. These must be 16 bytes for CBC and 12 bytes for GCM. If not provided, a random vector will be generated and prepended to the output.
+ Optional additional authenticated data (AAD) is only supported for GCM. If provided for encryption, the identical AAD value must be provided for decryption.
The default mode is GCM.
""",
arguments = """
@@ -324,6 +326,10 @@ case class CurrentUser() extends LeafExpression with Unevaluable {
Valid modes: ECB, GCM, CBC.
* padding - Specifies how to pad messages whose length is not a multiple of the block size.
Valid values: PKCS, NONE, DEFAULT. The DEFAULT padding means PKCS for ECB, NONE for GCM and PKCS for CBC.
+ * iv - Optional initialization vector. Only supported for CBC and GCM modes.
+ Valid values: None or ''. 16-byte array for CBC mode. 12-byte array for GCM mode.
+ * aad - Optional additional authenticated data. Only supported for GCM mode. This can be any free-form input and
+ must be provided for both encryption and decryption.
""",
examples = """
Examples:
@@ -335,6 +341,10 @@ case class CurrentUser() extends LeafExpression with Unevaluable {
3lmwu+Mw0H3fi5NDvcu9lg==
> SELECT base64(_FUNC_('Apache Spark', '1234567890abcdef', 'CBC', 'DEFAULT'));
2NYmDCjgXTbbxGA3/SnJEfFC/JQ7olk2VQWReIAAFKo=
+ > SELECT base64(_FUNC_('Spark', 'abcdefghijklmnop12345678ABCDEFGH', 'CBC', 'DEFAULT', unhex('00000000000000000000000000000000')));
+ AAAAAAAAAAAAAAAAAAAAAPSd4mWyMZ5mhvjiAPQJnfg=
+ > SELECT base64(_FUNC_('Spark', 'abcdefghijklmnop12345678ABCDEFGH', 'GCM', 'DEFAULT', unhex('000000000000000000000000'), 'This is an AAD mixed into the input'));
+ AAAAAAAAAAAAAAAAQiYi+sTLm7KD9UcZ2nlRdYDe/PX4
""",
since = "3.3.0",
group = "misc_funcs")
@@ -342,16 +352,22 @@ case class AesEncrypt(
input: Expression,
key: Expression,
mode: Expression,
- padding: Expression)
+ padding: Expression,
+ iv: Expression,
+ aad: Expression)
extends RuntimeReplaceable with ImplicitCastInputTypes {
override lazy val replacement: Expression = StaticInvoke(
classOf[ExpressionImplUtils],
BinaryType,
"aesEncrypt",
- Seq(input, key, mode, padding),
+ Seq(input, key, mode, padding, iv, aad),
inputTypes)
+ def this(input: Expression, key: Expression, mode: Expression, padding: Expression, iv: Expression) =
+ this(input, key, mode, padding, iv, Literal(""))
+ def this(input: Expression, key: Expression, mode: Expression, padding: Expression) =
+ this(input, key, mode, padding, Literal(""))
def this(input: Expression, key: Expression, mode: Expression) =
this(input, key, mode, Literal("DEFAULT"))
def this(input: Expression, key: Expression) =
@@ -359,13 +375,14 @@ case class AesEncrypt(
override def prettyName: String = "aes_encrypt"
- override def inputTypes: Seq[AbstractDataType] = Seq(BinaryType, BinaryType, StringType, StringType)
+ override def inputTypes: Seq[AbstractDataType] =
+ Seq(BinaryType, BinaryType, StringType, StringType, BinaryType, BinaryType)
- override def children: Seq[Expression] = Seq(input, key, mode, padding)
+ override def children: Seq[Expression] = Seq(input, key, mode, padding, iv, aad)
override protected def withNewChildrenInternal(
newChildren: IndexedSeq[Expression]): Expression = {
- copy(newChildren(0), newChildren(1), newChildren(2), newChildren(3))
+ copy(newChildren(0), newChildren(1), newChildren(2), newChildren(3), newChildren(4), newChildren(5))
}
}
@@ -378,8 +395,9 @@ case class AesEncrypt(
*/
@ExpressionDescription(
usage = """
- _FUNC_(expr, key[, mode[, padding]]) - Returns a decrypted value of `expr` using AES in `mode` with `padding`.
+ _FUNC_(expr, key[, mode[, padding[, aad]]]) - Returns a decrypted value of `expr` using AES in `mode` with `padding`.
Key lengths of 16, 24 and 32 bits are supported. Supported combinations of (`mode`, `padding`) are ('ECB', 'PKCS'), ('GCM', 'NONE') and ('CBC', 'PKCS').
+ Optional additional authenticated data (AAD) is only supported for GCM. If provided for encryption, the identical AAD value must be provided for decryption.
The default mode is GCM.
""",
arguments = """
@@ -390,6 +408,8 @@ case class AesEncrypt(
Valid modes: ECB, GCM, CBC.
* padding - Specifies how to pad messages whose length is not a multiple of the block size.
Valid values: PKCS, NONE, DEFAULT. The DEFAULT padding means PKCS for ECB, NONE for GCM and PKCS for CBC.
+ * aad - Optional additional authenticated data. Only supported for GCM mode. This can be any free-form input and
+ must be provided for both encryption and decryption.
""",
examples = """
Examples:
@@ -401,6 +421,10 @@ case class AesEncrypt(
Spark SQL
> SELECT _FUNC_(unbase64('2NYmDCjgXTbbxGA3/SnJEfFC/JQ7olk2VQWReIAAFKo='), '1234567890abcdef', 'CBC');
Apache Spark
+ > SELECT _FUNC_(unbase64('AAAAAAAAAAAAAAAAAAAAAPSd4mWyMZ5mhvjiAPQJnfg='), 'abcdefghijklmnop12345678ABCDEFGH', 'CBC', 'DEFAULT');
+ Spark
+ > SELECT _FUNC_(unbase64('AAAAAAAAAAAAAAAAQiYi+sTLm7KD9UcZ2nlRdYDe/PX4'), 'abcdefghijklmnop12345678ABCDEFGH', 'GCM', 'DEFAULT', 'This is an AAD mixed into the input');
+ Spark
""",
since = "3.3.0",
group = "misc_funcs")
@@ -408,37 +432,40 @@ case class AesDecrypt(
input: Expression,
key: Expression,
mode: Expression,
- padding: Expression)
+ padding: Expression,
+ aad: Expression)
extends RuntimeReplaceable with ImplicitCastInputTypes {
override lazy val replacement: Expression = StaticInvoke(
classOf[ExpressionImplUtils],
BinaryType,
"aesDecrypt",
- Seq(input, key, mode, padding),
+ Seq(input, key, mode, padding, aad),
inputTypes)
+ def this(input: Expression, key: Expression, mode: Expression, padding: Expression) =
+ this(input, key, mode, padding, Literal(""))
def this(input: Expression, key: Expression, mode: Expression) =
this(input, key, mode, Literal("DEFAULT"))
def this(input: Expression, key: Expression) =
this(input, key, Literal("GCM"))
override def inputTypes: Seq[AbstractDataType] = {
- Seq(BinaryType, BinaryType, StringType, StringType)
+ Seq(BinaryType, BinaryType, StringType, StringType, BinaryType)
}
override def prettyName: String = "aes_decrypt"
- override def children: Seq[Expression] = Seq(input, key, mode, padding)
+ override def children: Seq[Expression] = Seq(input, key, mode, padding, aad)
override protected def withNewChildrenInternal(
newChildren: IndexedSeq[Expression]): Expression = {
- copy(newChildren(0), newChildren(1), newChildren(2), newChildren(3))
+ copy(newChildren(0), newChildren(1), newChildren(2), newChildren(3), newChildren(4))
}
}
@ExpressionDescription(
- usage = "_FUNC_(expr, key[, mode[, padding]]) - This is a special version of `aes_decrypt` that performs the same operation, but returns a NULL value instead of raising an error if the decryption cannot be performed.",
+ usage = "_FUNC_(expr, key[, mode[, padding[, aad]]]) - This is a special version of `aes_decrypt` that performs the same operation, but returns a NULL value instead of raising an error if the decryption cannot be performed.",
examples = """
Examples:
> SELECT _FUNC_(unhex('6E7CA17BBB468D3084B5744BCA729FB7B2B7BCB8E4472847D02670489D95FA97DBBA7D3210'), '0000111122223333', 'GCM');
@@ -454,10 +481,17 @@ case class TryAesDecrypt(
key: Expression,
mode: Expression,
padding: Expression,
+ aad: Expression,
replacement: Expression) extends RuntimeReplaceable with InheritAnalysisRules {
+ def this(input: Expression,
+ key: Expression,
+ mode: Expression,
+ padding: Expression,
+ aad: Expression) =
+ this(input, key, mode, padding, aad, TryEval(AesDecrypt(input, key, mode, padding, aad)))
def this(input: Expression, key: Expression, mode: Expression, padding: Expression) =
- this(input, key, mode, padding, TryEval(AesDecrypt(input, key, mode, padding)))
+ this(input, key, mode, padding, Literal(""))
def this(input: Expression, key: Expression, mode: Expression) =
this(input, key, mode, Literal("DEFAULT"))
def this(input: Expression, key: Expression) =
@@ -465,7 +499,7 @@ case class TryAesDecrypt(
override def prettyName: String = "try_aes_decrypt"
- override def parameters: Seq[Expression] = Seq(input, key, mode, padding)
+ override def parameters: Seq[Expression] = Seq(input, key, mode, padding, aad)
override protected def withNewChildInternal(newChild: Expression): Expression =
this.copy(replacement = newChild)
diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala b/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala
index 52258156e31..3b0dd82c173 100644
--- a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala
+++ b/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala
@@ -34,11 +34,16 @@ class ExpressionImplUtilsSuite extends SparkFunSuite {
aadOpt: Option[String] = None,
expectedErrorClassOpt: Option[String] = None,
errorParamsMap: Map[String, String] = Map()) {
+
+ def isIvDefined: Boolean = {
+ ivHexOpt.isDefined && ivHexOpt.get != null && ivHexOpt.get.length > 0
+ }
+
val plaintextBytes: Array[Byte] = plaintext.getBytes("UTF-8")
val keyBytes: Array[Byte] = key.getBytes("UTF-8")
val utf8mode: UTF8String = UTF8String.fromString(mode)
val utf8Padding: UTF8String = UTF8String.fromString(padding)
- val deterministic: Boolean = mode.equalsIgnoreCase("ECB") || ivHexOpt.isDefined
+ val deterministic: Boolean = mode.equalsIgnoreCase("ECB") || isIvDefined
val ivBytes: Array[Byte] =
ivHexOpt.map({ivHex => Hex.unhex(ivHex.getBytes("UTF-8"))}).getOrElse(null)
val aadBytes: Array[Byte] = aadOpt.map({aad => aad.getBytes("UTF-8")}).getOrElse(null)
@@ -59,11 +64,27 @@ class ExpressionImplUtilsSuite extends SparkFunSuite {
"abcdefghijklmnop12345678ABCDEFGH",
"9J3iZbIxnmaG+OIA9Amd+A==",
"ECB"),
+ // Test passing non-null, but empty arrays for IV and AAD
+ TestCase(
+ "Spark",
+ "abcdefghijklmnop12345678ABCDEFGH",
+ "9J3iZbIxnmaG+OIA9Amd+A==",
+ "ECB",
+ ivHexOpt = Some(""),
+ aadOpt = Some("")),
TestCase(
"Spark",
"abcdefghijklmnop12345678ABCDEFGH",
"+MgyzJxhusYVGWCljk7fhhl6C6oUqWmtdqoaG93KvhY=",
"CBC"),
+ // Test passing non-null, but empty arrays for IV and AAD
+ TestCase(
+ "Spark",
+ "abcdefghijklmnop12345678ABCDEFGH",
+ "+MgyzJxhusYVGWCljk7fhhl6C6oUqWmtdqoaG93KvhY=",
+ "CBC",
+ ivHexOpt = Some(""),
+ aadOpt = Some("")),
TestCase(
"Apache Spark",
"1234567890abcdef",
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index eef61195357..32c4c02b1b2 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -7,8 +7,8 @@
| org.apache.spark.sql.catalyst.expressions.Acosh | acosh | SELECT acosh(1) | struct<ACOSH(1):double> |
| org.apache.spark.sql.catalyst.expressions.Add | + | SELECT 1 + 2 | struct<(1 + 2):int> |
| org.apache.spark.sql.catalyst.expressions.AddMonths | add_months | SELECT add_months('2016-08-31', 1) | struct<add_months(2016-08-31, 1):date> |
-| org.apache.spark.sql.catalyst.expressions.AesDecrypt | aes_decrypt | SELECT aes_decrypt(unhex('83F16B2AA704794132802D248E6BFD4E380078182D1544813898AC97E709B28A94'), '0000111122223333') | struct<aes_decrypt(unhex(83F16B2AA704794132802D248E6BFD4E380078182D1544813898AC97E709B28A94), 0000111122223333, GCM, DEFAULT):binary> |
-| org.apache.spark.sql.catalyst.expressions.AesEncrypt | aes_encrypt | SELECT hex(aes_encrypt('Spark', '0000111122223333')) | struct<hex(aes_encrypt(Spark, 0000111122223333, GCM, DEFAULT)):string> |
+| org.apache.spark.sql.catalyst.expressions.AesDecrypt | aes_decrypt | SELECT aes_decrypt(unhex('83F16B2AA704794132802D248E6BFD4E380078182D1544813898AC97E709B28A94'), '0000111122223333') | struct<aes_decrypt(unhex(83F16B2AA704794132802D248E6BFD4E380078182D1544813898AC97E709B28A94), 0000111122223333, GCM, DEFAULT, ):binary> |
+| org.apache.spark.sql.catalyst.expressions.AesEncrypt | aes_encrypt | SELECT hex(aes_encrypt('Spark', '0000111122223333')) | struct<hex(aes_encrypt(Spark, 0000111122223333, GCM, DEFAULT, , )):string> |
| org.apache.spark.sql.catalyst.expressions.And | and | SELECT true and true | struct<(true AND true):boolean> |
| org.apache.spark.sql.catalyst.expressions.ArrayAggregate | aggregate | SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x) | struct<aggregate(array(1, 2, 3), 0, lambdafunction((namedlambdavariable() + namedlambdavariable()), namedlambdavariable(), namedlambdavariable()), lambdafunction(namedlambdavariable(), namedlambdavariable())):int> |
| org.apache.spark.sql.catalyst.expressions.ArrayAggregate | reduce | SELECT reduce(array(1, 2, 3), 0, (acc, x) -> acc + x) | struct<reduce(array(1, 2, 3), 0, lambdafunction((namedlambdavariable() + namedlambdavariable()), namedlambdavariable(), namedlambdavariable()), lambdafunction(namedlambdavariable(), namedlambdavariable())):int> |
@@ -331,7 +331,7 @@
| org.apache.spark.sql.catalyst.expressions.TruncDate | trunc | SELECT trunc('2019-08-04', 'week') | struct<trunc(2019-08-04, week):date> |
| org.apache.spark.sql.catalyst.expressions.TruncTimestamp | date_trunc | SELECT date_trunc('YEAR', '2015-03-05T09:32:05.359') | struct<date_trunc(YEAR, 2015-03-05T09:32:05.359):timestamp> |
| org.apache.spark.sql.catalyst.expressions.TryAdd | try_add | SELECT try_add(1, 2) | struct<try_add(1, 2):int> |
-| org.apache.spark.sql.catalyst.expressions.TryAesDecrypt | try_aes_decrypt | SELECT try_aes_decrypt(unhex('6E7CA17BBB468D3084B5744BCA729FB7B2B7BCB8E4472847D02670489D95FA97DBBA7D3210'), '0000111122223333', 'GCM') | struct<try_aes_decrypt(unhex(6E7CA17BBB468D3084B5744BCA729FB7B2B7BCB8E4472847D02670489D95FA97DBBA7D3210), 0000111122223333, GCM, DEFAULT):binary> |
+| org.apache.spark.sql.catalyst.expressions.TryAesDecrypt | try_aes_decrypt | SELECT try_aes_decrypt(unhex('6E7CA17BBB468D3084B5744BCA729FB7B2B7BCB8E4472847D02670489D95FA97DBBA7D3210'), '0000111122223333', 'GCM') | struct<try_aes_decrypt(unhex(6E7CA17BBB468D3084B5744BCA729FB7B2B7BCB8E4472847D02670489D95FA97DBBA7D3210), 0000111122223333, GCM, DEFAULT, ):binary> |
| org.apache.spark.sql.catalyst.expressions.TryDivide | try_divide | SELECT try_divide(3, 2) | struct<try_divide(3, 2):double> |
| org.apache.spark.sql.catalyst.expressions.TryElementAt | try_element_at | SELECT try_element_at(array(1, 2, 3), 2) | struct<try_element_at(array(1, 2, 3), 2):int> |
| org.apache.spark.sql.catalyst.expressions.TryMultiply | try_multiply | SELECT try_multiply(2, 3) | struct<try_multiply(2, 3):int> |
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 037202de9c9..4d7e8cbb351 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -408,6 +408,56 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
}
}
+ test("aes IV test function") {
+ val key32 = "abcdefghijklmnop12345678ABCDEFGH"
+ val gcmIv = "000000000000000000000000"
+ val encryptedGcm = "AAAAAAAAAAAAAAAAQiYi+sRNYDAOTjdSEcYBFsAWPL1f"
+ val cbcIv = "00000000000000000000000000000000"
+ val encryptedCbc = "AAAAAAAAAAAAAAAAAAAAAPSd4mWyMZ5mhvjiAPQJnfg="
+ val df1 = Seq("Spark").toDF
+ Seq(
+ (key32, encryptedGcm, "GCM", gcmIv),
+ (key32, encryptedCbc, "CBC", cbcIv)).foreach {
+ case (key, ciphertext, mode, iv) =>
+ checkAnswer(
+ df1.selectExpr(s"cast(aes_decrypt(unbase64('$ciphertext'), " +
+ s"'$key', '$mode', 'DEFAULT') as string)"),
+ Seq(Row("Spark")))
+ checkAnswer(
+ df1.selectExpr(s"cast(aes_decrypt(unbase64('$ciphertext'), " +
+ s"binary('$key'), '$mode', 'DEFAULT') as string)"),
+ Seq(Row("Spark")))
+ checkAnswer(
+ df1.selectExpr(
+ s"base64(aes_encrypt(value, '$key32', '$mode', 'DEFAULT', unhex('$iv')))"),
+ Seq(Row(ciphertext)))
+ }
+ }
+
+ test("aes IV and AAD test function") {
+ val key32 = "abcdefghijklmnop12345678ABCDEFGH"
+ val gcmIv = "000000000000000000000000"
+ val aad = "This is an AAD mixed into the input"
+ val encryptedGcm = "AAAAAAAAAAAAAAAAQiYi+sTLm7KD9UcZ2nlRdYDe/PX4"
+ val df1 = Seq("Spark").toDF
+ Seq(
+ (key32, encryptedGcm, "GCM", gcmIv, aad)).foreach {
+ case (key, ciphertext, mode, iv, aad) =>
+ checkAnswer(
+ df1.selectExpr(s"cast(aes_decrypt(unbase64('$ciphertext'), " +
+ s"'$key', '$mode', 'DEFAULT', '$aad') as string)"),
+ Seq(Row("Spark")))
+ checkAnswer(
+ df1.selectExpr(s"cast(aes_decrypt(unbase64('$ciphertext'), " +
+ s"binary('$key'), '$mode', 'DEFAULT', '$aad') as string)"),
+ Seq(Row("Spark")))
+ checkAnswer(
+ df1.selectExpr(
+ s"base64(aes_encrypt(value, '$key32', '$mode', 'DEFAULT', unhex('$iv'), '$aad'))"),
+ Seq(Row(ciphertext)))
+ }
+ }
+
test("misc aes ECB function") {
val key16 = "abcdefghijklmnop"
val key24 = "abcdefghijklmnop12345678"
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org