You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kyuubi.apache.org by ch...@apache.org on 2021/12/29 05:37:12 UTC

[incubator-kyuubi] branch master updated: [KYUUBI #1634][FEATURE] Redact secret information from SparkSQLEngine UI

This is an automated email from the ASF dual-hosted git repository.

chengpan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-kyuubi.git


The following commit(s) were added to refs/heads/master by this push:
     new 7d68aa0  [KYUUBI #1634][FEATURE] Redact secret information from SparkSQLEngine UI
7d68aa0 is described below

commit 7d68aa07ff172c2c8deb65c891cce04370a446ca
Author: minyk <mi...@gmail.com>
AuthorDate: Wed Dec 29 13:37:03 2021 +0800

    [KYUUBI #1634][FEATURE] Redact secret information from SparkSQLEngine UI
    
    <!--
    Thanks for sending a pull request!
    
    Here are some tips for you:
      1. If this is your first time, please read our contributor guidelines: https://kyuubi.readthedocs.io/en/latest/community/contributions.html
      2. If the PR is related to an issue in https://github.com/apache/incubator-kyuubi/issues, add '[KYUUBI #XXXX]' in your PR title, e.g., '[KYUUBI #XXXX] Your PR title ...'.
      3. If the PR is unfinished, add '[WIP]' in your PR title, e.g., '[WIP][KYUUBI #XXXX] Your PR title ...'.
    -->
    
    ### _Why are the changes needed?_
    <!--
    Please clarify why the changes are needed. For instance,
      1. If you add a feature, you can talk about the use case of it.
      2. If you fix a bug, you can clarify why it is a bug.
    -->
    SparkSQLEngine's Kyuubi UI tab shows SQL statements. In some cases, these informations are secret information like S3 access/secret key. Spark SQL UI tab already redact with `spark.redaction.string.regex` configuration. Kyuubi also should redact secret informations with this property.
    
    ### _How was this patch tested?_
    - [X] Add some test cases that check the changes thoroughly including negative and positive cases if possible
    
    - [X] Add screenshots for manual tests if appropriate
    with below property and query:
    spark-defaults.conf:
    ```
    spark.redaction.string.regex (?i)url|access.*|secret.*|password
    ```
    query:
    ```
    SET fs.s3a.access.key=testkey
    ```
    
    ![Screen Shot 2021-12-28 at 9 00 07 PM](https://user-images.githubusercontent.com/1802676/147564262-bf06869f-2c2f-4e76-8380-34a92b2a390c.png)
    
    - [X] [Run test](https://kyuubi.readthedocs.io/en/latest/develop_tools/testing.html#running-tests) locally before make a pull request
    
    Closes #1640 from minyk/redact-ui.
    
    Closes #1634
    
    4051522a [minyk] set job description with setJobGroup()
    3a8e0e91 [minyk] use redactedStatement
    c84778e3 [minyk] move to SparkOperation
    97a4fb4a [minyk] fix code formatting
    79fe52d3 [minyk] redact UI with `spark.redaction.string.regex`
    
    Authored-by: minyk <mi...@gmail.com>
    Signed-off-by: Cheng Pan <ch...@apache.org>
---
 .../engine/spark/events/SparkOperationEvent.scala  |  2 +-
 .../engine/spark/operation/SparkOperation.scala    |  5 ++-
 .../org/apache/spark/kyuubi/SparkUtilsHelper.scala | 38 ++++++++++++++++++++++
 .../scala/org/apache/spark/ui/EngineTabSuite.scala | 26 ++++++++++++++-
 4 files changed, 68 insertions(+), 3 deletions(-)

diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/events/SparkOperationEvent.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/events/SparkOperationEvent.scala
index 489d901..4b3a598 100644
--- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/events/SparkOperationEvent.scala
+++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/events/SparkOperationEvent.scala
@@ -77,7 +77,7 @@ object SparkOperationEvent {
     val status = operation.getStatus
     new SparkOperationEvent(
       operation.statementId,
-      operation.statement,
+      operation.redactedStatement,
       operation.shouldRunAsync,
       status.state.name(),
       status.lastModified,
diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/SparkOperation.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/SparkOperation.scala
index f0a6c6f..c440879 100644
--- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/SparkOperation.scala
+++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/SparkOperation.scala
@@ -22,6 +22,7 @@ import java.time.ZoneId
 
 import org.apache.commons.lang3.StringUtils
 import org.apache.hive.service.rpc.thrift.{TRowSet, TTableSchema}
+import org.apache.spark.kyuubi.SparkUtilsHelper.redact
 import org.apache.spark.sql.{DataFrame, Row, SparkSession}
 import org.apache.spark.sql.types.StructType
 
@@ -57,6 +58,8 @@ abstract class SparkOperation(opType: OperationType, session: Session)
 
   protected def resultSchema: StructType
 
+  def redactedStatement: String = redact(spark.sessionState.conf.stringRedactionPattern, statement)
+
   protected def cleanup(targetState: OperationState): Unit = state.synchronized {
     if (!isTerminalState(state)) {
       setState(targetState)
@@ -97,7 +100,7 @@ abstract class SparkOperation(opType: OperationType, session: Session)
 
   protected def withLocalProperties[T](f: => T): T = {
     try {
-      spark.sparkContext.setJobGroup(statementId, statement, forceCancel)
+      spark.sparkContext.setJobGroup(statementId, redactedStatement, forceCancel)
       spark.sparkContext.setLocalProperty(KYUUBI_SESSION_USER_KEY, session.user)
       spark.sparkContext.setLocalProperty(KYUUBI_STATEMENT_ID_KEY, statementId)
       schedulerPool match {
diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/kyuubi/SparkUtilsHelper.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/kyuubi/SparkUtilsHelper.scala
new file mode 100644
index 0000000..4af4002
--- /dev/null
+++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/spark/kyuubi/SparkUtilsHelper.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.kyuubi
+
+import scala.util.matching.Regex
+
+import org.apache.spark.util.Utils
+
+import org.apache.kyuubi.Logging
+
+/**
+ * A place to invoke non-public APIs of [[Utils]], anything to be added here need to
+ * think twice
+ */
+object SparkUtilsHelper extends Logging {
+
+  /**
+   * Redact the sensitive information in the given string.
+   */
+  def redact(regex: Option[Regex], text: String): String = {
+    Utils.redact(regex, text)
+  }
+}
diff --git a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/spark/ui/EngineTabSuite.scala b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/spark/ui/EngineTabSuite.scala
index 6b7f091..50ddf0a 100644
--- a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/spark/ui/EngineTabSuite.scala
+++ b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/spark/ui/EngineTabSuite.scala
@@ -28,7 +28,8 @@ import org.apache.kyuubi.operation.HiveJDBCTestHelper
 class EngineTabSuite extends WithSparkSQLEngine with HiveJDBCTestHelper {
   override def withKyuubiConf: Map[String, String] = Map(
     "spark.ui.enabled" -> "true",
-    "spark.ui.port" -> "0")
+    "spark.ui.port" -> "0",
+    "spark.sql.redaction.string.regex" -> "(?i)url|access|secret|password")
 
   override def beforeAll(): Unit = {
     SparkContext.getActive.foreach(_.stop())
@@ -136,5 +137,28 @@ class EngineTabSuite extends WithSparkSQLEngine with HiveJDBCTestHelper {
     }
   }
 
+  test("statement redact for engine tab") {
+    assert(spark.sparkContext.ui.nonEmpty)
+    val client = HttpClients.createDefault()
+    val req = new HttpGet(spark.sparkContext.uiWebUrl.get + "/kyuubi/")
+    val response = client.execute(req)
+    assert(response.getStatusLine.getStatusCode === 200)
+    val resp = EntityUtils.toString(response.getEntity)
+    assert(resp.contains("0 session(s) are online,"))
+    withJdbcStatement() { statement =>
+      statement.execute(
+        """
+          |SET
+          |  fs.s3a.access.key=testkey
+        """.stripMargin)
+      val response = client.execute(req)
+      assert(response.getStatusLine.getStatusCode === 200)
+      val resp = EntityUtils.toString(response.getEntity)
+
+      // check redacted sql
+      assert(resp.contains("redacted"))
+    }
+  }
+
   override protected def jdbcUrl: String = getJdbcUrl
 }