You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ya...@apache.org on 2023/03/29 04:37:58 UTC
[spark] branch branch-3.4 updated: [SPARK-42946][SQL] Redact sensitive data which is nested by variable substitution
This is an automated email from the ASF dual-hosted git repository.
yao pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push:
new a9cacc137fa7 [SPARK-42946][SQL] Redact sensitive data which is nested by variable substitution
a9cacc137fa7 is described below
commit a9cacc137fa7d442c567c819f2f96de08e4975f2
Author: Kent Yao <ya...@apache.org>
AuthorDate: Wed Mar 29 12:36:57 2023 +0800
[SPARK-42946][SQL] Redact sensitive data which is nested by variable substitution
### What changes were proposed in this pull request?
Redact sensitive data which is nested by variable substitution
#### Case 1 by SET syntax's key part
```sql
spark-sql> set ${spark.ssl.keyPassword};
abc <undefined>
```
#### Case 2 by SELECT as String literal
```sql
spark-sql> set spark.ssl.keyPassword;
spark.ssl.keyPassword *********(redacted)
Time taken: 0.009 seconds, Fetched 1 row(s)
spark-sql> select '${spark.ssl.keyPassword}';
abc
```
### Why are the changes needed?
data security
### Does this PR introduce _any_ user-facing change?
yes, sensitive data can not be extracted by variable substitution
### How was this patch tested?
new tests
Closes #40576 from yaooqinn/SPARK-42946.
Authored-by: Kent Yao <ya...@apache.org>
Signed-off-by: Kent Yao <ya...@apache.org>
(cherry picked from commit c227d789a5fedb2178858768e1fe425169f489d2)
Signed-off-by: Kent Yao <ya...@apache.org>
---
.../org/apache/spark/sql/internal/VariableSubstitution.scala | 5 ++++-
.../test/scala/org/apache/spark/sql/SetCommandSuite.scala | 12 ++++++++++++
.../spark/sql/internal/VariableSubstitutionSuite.scala | 9 +++++++++
3 files changed, 25 insertions(+), 1 deletion(-)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
index 248dfa107bc4..f6a0f5a64221 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
@@ -29,7 +29,10 @@ import org.apache.spark.sql.catalyst.SQLConfHelper
class VariableSubstitution extends SQLConfHelper {
private val provider = new ConfigProvider {
- override def get(key: String): Option[String] = Option(conf.getConfString(key, ""))
+ override def get(key: String): Option[String] = {
+ val value = conf.getConfString(key, "")
+ conf.redactOptions(Seq((key, value))).headOption.map(_._2)
+ }
}
private val reader = new ConfigReader(provider)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SetCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SetCommandSuite.scala
index 8e2a44f6e5b4..c38a29557b99 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SetCommandSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SetCommandSuite.scala
@@ -17,6 +17,7 @@
package org.apache.spark.sql
+import org.apache.spark.sql.catalyst.parser.ParseException
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.{SharedSparkSession, TestSQLContext}
import org.apache.spark.util.ResetSystemProperties
@@ -143,4 +144,15 @@ class SetCommandSuite extends QueryTest with SharedSparkSession with ResetSystem
assert(!allValues.exists(v => v.contains(value1) || v.contains(value2)))
}
}
+
+ test("SPARK-42946: Set command could expose sensitive data through key") {
+ val key1 = "test.password"
+ val value1 = "test.value1"
+ withSQLConf(key1 -> value1) {
+ checkError(
+ intercept[ParseException](sql("SET ${test.password}")),
+ errorClass = "INVALID_SET_SYNTAX"
+ )
+ }
+ }
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/VariableSubstitutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/VariableSubstitutionSuite.scala
index d5da2553c718..509ef567ca14 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/VariableSubstitutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/VariableSubstitutionSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.internal
import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.util.Utils
class VariableSubstitutionSuite extends SparkFunSuite with SQLHelper {
@@ -56,4 +57,12 @@ class VariableSubstitutionSuite extends SparkFunSuite with SQLHelper {
}
}
+ test("SPARK-42946: redact sensitive data in query with variable substitution") {
+ val q = "select '${password}', ${spark:password} this is great"
+ val rt = Utils.REDACTION_REPLACEMENT_TEXT
+ withSQLConf("bar" -> "1", "foo" -> "${bar}") {
+ assert(sub.substitute(q) === s"select '$rt', $rt this is great")
+ }
+ }
+
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org