You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by va...@apache.org on 2018/05/18 23:25:38 UTC
spark git commit: [SPARK-23850][SQL][BRANCH-2.2] Add separate config
for SQL options redaction.
Repository: spark
Updated Branches:
refs/heads/branch-2.2 8c223b651 -> 6a55d8b03
[SPARK-23850][SQL][BRANCH-2.2] Add separate config for SQL options redaction.
The old code was relying on a core configuration and extended its
default value to include things that redact desired things in the
app's environment. Instead, add a SQL-specific option for which
options to redact, and apply both the core and SQL-specific rules
when redacting the options in the save command.
This is a little sub-optimal since it adds another config, but it
retains the current default behavior.
While there I also fixed a typo and a couple of minor config API
usage issues in the related redaction option that SQL already had.
Tested with existing unit tests, plus checking the env page on
a shell UI.
(cherry picked from commit ed7ba7db8fa344ff182b72d23ae458e711f63432)
Author: Marcelo Vanzin <va...@cloudera.com>
Closes #21365 from vanzin/SPARK-23850-2.2.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6a55d8b0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6a55d8b0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6a55d8b0
Branch: refs/heads/branch-2.2
Commit: 6a55d8b03053e616dcacb79cd2c29a06d219dc32
Parents: 8c223b6
Author: Marcelo Vanzin <va...@cloudera.com>
Authored: Fri May 18 16:25:30 2018 -0700
Committer: Marcelo Vanzin <va...@cloudera.com>
Committed: Fri May 18 16:25:30 2018 -0700
----------------------------------------------------------------------
.../apache/spark/internal/config/package.scala | 2 +-
.../scala/org/apache/spark/util/Utils.scala | 11 ++++++++++
.../org/apache/spark/sql/internal/SQLConf.scala | 21 ++++++++++++++++++++
.../datasources/SaveIntoDataSourceCommand.scala | 8 ++++----
.../SaveIntoDataSourceCommandSuite.scala | 3 ---
5 files changed, 37 insertions(+), 8 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/6a55d8b0/core/src/main/scala/org/apache/spark/internal/config/package.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index db4c9f9..f65a9d7 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -247,7 +247,7 @@ package object config {
"a property key or value, the value is redacted from the environment UI and various logs " +
"like YARN and event logs.")
.regexConf
- .createWithDefault("(?i)secret|password|url|user|username".r)
+ .createWithDefault("(?i)secret|password".r)
private[spark] val STRING_REDACTION_PATTERN =
ConfigBuilder("spark.redaction.string.regex")
http://git-wip-us.apache.org/repos/asf/spark/blob/6a55d8b0/core/src/main/scala/org/apache/spark/util/Utils.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 6bcaf10..626b656 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2625,6 +2625,17 @@ private[spark] object Utils extends Logging {
}
}
+ /**
+ * Redact the sensitive values in the given map. If a map key matches the redaction pattern then
+ * its value is replaced with a dummy text.
+ */
+ def redact(regex: Option[Regex], kvs: Seq[(String, String)]): Seq[(String, String)] = {
+ regex match {
+ case None => kvs
+ case Some(r) => redact(r, kvs)
+ }
+ }
+
private def redact(redactionPattern: Regex, kvs: Seq[(String, String)]): Seq[(String, String)] = {
// If the sensitive information regex matches with either the key or the value, redact the value
// While the original intent was to only redact the value if the key matched with the regex,
http://git-wip-us.apache.org/repos/asf/spark/blob/6a55d8b0/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 4c29f8e..ebabd1a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -29,6 +29,7 @@ import org.apache.spark.internal.Logging
import org.apache.spark.internal.config._
import org.apache.spark.network.util.ByteUnit
import org.apache.spark.sql.catalyst.analysis.Resolver
+import org.apache.spark.util.Utils
import org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter
////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -819,6 +820,15 @@ object SQLConf {
.intConf
.createWithDefault(UnsafeExternalSorter.DEFAULT_NUM_ELEMENTS_FOR_SPILL_THRESHOLD.toInt)
+ val SQL_OPTIONS_REDACTION_PATTERN =
+ buildConf("spark.sql.redaction.options.regex")
+ .doc("Regex to decide which keys in a Spark SQL command's options map contain sensitive " +
+ "information. The values of options whose names that match this regex will be redacted " +
+ "in the explain output. This redaction is applied on top of the global redaction " +
+ s"configuration defined by ${SECRET_REDACTION_PATTERN.key}.")
+ .regexConf
+ .createWithDefault("(?i)url".r)
+
object Deprecated {
val MAPRED_REDUCE_TASKS = "mapred.reduce.tasks"
}
@@ -1182,6 +1192,17 @@ class SQLConf extends Serializable with Logging {
}
/**
+ * Redacts the given option map according to the description of SQL_OPTIONS_REDACTION_PATTERN.
+ */
+ def redactOptions(options: Map[String, String]): Map[String, String] = {
+ val regexes = Seq(
+ getConf(SQL_OPTIONS_REDACTION_PATTERN),
+ SECRET_REDACTION_PATTERN.readFrom(reader))
+
+ regexes.foldLeft(options.toSeq) { case (opts, r) => Utils.redact(Some(r), opts) }.toMap
+ }
+
+ /**
* Return whether a given key is set in this [[SQLConf]].
*/
def contains(key: String): Boolean = {
http://git-wip-us.apache.org/repos/asf/spark/blob/6a55d8b0/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala
index 53868d4..b92684c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala
@@ -17,12 +17,10 @@
package org.apache.spark.sql.execution.datasources
-import org.apache.spark.SparkEnv
import org.apache.spark.sql.{Dataset, Row, SaveMode, SparkSession}
import org.apache.spark.sql.catalyst.plans.QueryPlan
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.execution.command.RunnableCommand
-import org.apache.spark.util.Utils
/**
* Saves the results of `query` in to a data source.
@@ -53,7 +51,9 @@ case class SaveIntoDataSourceCommand(
}
override def simpleString: String = {
- val redacted = Utils.redact(SparkEnv.get.conf, options.toSeq).toMap
- s"SaveIntoDataSourceCommand ${provider}, ${partitionColumns}, ${redacted}, ${mode}"
+ val redacted = SparkSession.getActiveSession
+ .map(_.sessionState.conf.redactOptions(options))
+ .getOrElse(Map())
+ s"SaveIntoDataSourceCommand ${provider}, ${redacted}, ${mode}"
}
}
http://git-wip-us.apache.org/repos/asf/spark/blob/6a55d8b0/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala
index 6b9ddb1..cf340d0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala
@@ -23,9 +23,6 @@ import org.apache.spark.sql.test.SharedSQLContext
class SaveIntoDataSourceCommandSuite extends SharedSQLContext {
- override protected def sparkConf: SparkConf = super.sparkConf
- .set("spark.redaction.regex", "(?i)password|url")
-
test("simpleString is redacted") {
val URL = "connection.url"
val PASS = "123"
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org