You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ya...@apache.org on 2023/03/29 04:37:58 UTC

[spark] branch branch-3.4 updated: [SPARK-42946][SQL] Redact sensitive data which is nested by variable substitution

This is an automated email from the ASF dual-hosted git repository.

yao pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.4 by this push:
     new a9cacc137fa7 [SPARK-42946][SQL] Redact sensitive data which is nested by variable substitution
a9cacc137fa7 is described below

commit a9cacc137fa7d442c567c819f2f96de08e4975f2
Author: Kent Yao <ya...@apache.org>
AuthorDate: Wed Mar 29 12:36:57 2023 +0800

    [SPARK-42946][SQL] Redact sensitive data which is nested by variable substitution
    
    ### What changes were proposed in this pull request?
    
    Redact sensitive data which is nested by variable substitution
    
    #### Case 1 by SET syntax's key part
    
    ```sql
    
    spark-sql> set ${spark.ssl.keyPassword};
    abc    <undefined>
    
    ```
    #### Case 2 by SELECT as String literal
    
    ```sql
    spark-sql> set spark.ssl.keyPassword;
    spark.ssl.keyPassword    *********(redacted)
    Time taken: 0.009 seconds, Fetched 1 row(s)
    spark-sql> select '${spark.ssl.keyPassword}';
    abc
    ```
    
    ### Why are the changes needed?
    
    data security
    
    ### Does this PR introduce _any_ user-facing change?
    
    yes, sensitive data can not be extracted by variable substitution
    
    ### How was this patch tested?
    
    new tests
    
    Closes #40576 from yaooqinn/SPARK-42946.
    
    Authored-by: Kent Yao <ya...@apache.org>
    Signed-off-by: Kent Yao <ya...@apache.org>
    (cherry picked from commit c227d789a5fedb2178858768e1fe425169f489d2)
    Signed-off-by: Kent Yao <ya...@apache.org>
---
 .../org/apache/spark/sql/internal/VariableSubstitution.scala |  5 ++++-
 .../test/scala/org/apache/spark/sql/SetCommandSuite.scala    | 12 ++++++++++++
 .../spark/sql/internal/VariableSubstitutionSuite.scala       |  9 +++++++++
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
index 248dfa107bc4..f6a0f5a64221 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
@@ -29,7 +29,10 @@ import org.apache.spark.sql.catalyst.SQLConfHelper
 class VariableSubstitution extends SQLConfHelper {
 
   private val provider = new ConfigProvider {
-    override def get(key: String): Option[String] = Option(conf.getConfString(key, ""))
+    override def get(key: String): Option[String] = {
+      val value = conf.getConfString(key, "")
+      conf.redactOptions(Seq((key, value))).headOption.map(_._2)
+    }
   }
 
   private val reader = new ConfigReader(provider)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SetCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SetCommandSuite.scala
index 8e2a44f6e5b4..c38a29557b99 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SetCommandSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SetCommandSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql
 
+import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.{SharedSparkSession, TestSQLContext}
 import org.apache.spark.util.ResetSystemProperties
@@ -143,4 +144,15 @@ class SetCommandSuite extends QueryTest with SharedSparkSession with ResetSystem
       assert(!allValues.exists(v => v.contains(value1) || v.contains(value2)))
     }
   }
+
+  test("SPARK-42946: Set command could expose sensitive data through key") {
+    val key1 = "test.password"
+    val value1 = "test.value1"
+    withSQLConf(key1 -> value1) {
+      checkError(
+        intercept[ParseException](sql("SET ${test.password}")),
+        errorClass = "INVALID_SET_SYNTAX"
+      )
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/VariableSubstitutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/VariableSubstitutionSuite.scala
index d5da2553c718..509ef567ca14 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/VariableSubstitutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/VariableSubstitutionSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.internal
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.util.Utils
 
 class VariableSubstitutionSuite extends SparkFunSuite with SQLHelper {
 
@@ -56,4 +57,12 @@ class VariableSubstitutionSuite extends SparkFunSuite with SQLHelper {
     }
   }
 
+  test("SPARK-42946: redact sensitive data in query with variable substitution") {
+    val q = "select '${password}', ${spark:password} this is great"
+    val rt = Utils.REDACTION_REPLACEMENT_TEXT
+    withSQLConf("bar" -> "1", "foo" -> "${bar}") {
+      assert(sub.substitute(q) === s"select '$rt', $rt this is great")
+    }
+  }
+
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org