You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by cu...@apache.org on 2019/01/08 19:27:20 UTC

[spark] branch master updated: [SPARK-26349][PYSPARK] Forbid insecure py4j gateways

This is an automated email from the ASF dual-hosted git repository.

cutlerb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 32515d2  [SPARK-26349][PYSPARK] Forbid insecure py4j gateways
32515d2 is described below

commit 32515d205a4de4d8838226fa5e5c4e4f66935193
Author: Imran Rashid <ir...@cloudera.com>
AuthorDate: Tue Jan 8 11:26:36 2019 -0800

    [SPARK-26349][PYSPARK] Forbid insecure py4j gateways
    
    Spark always creates secure py4j connections between java and python,
    but it also allows users to pass in their own connection. This ensures
    that even passed in connections are secure.
    
    Added test cases verifying the failure with a (mocked) insecure gateway.
    
    This is closely related to SPARK-26019, but this entirely forbids the
    insecure connection, rather than creating the "escape-hatch".
    
    Closes #23441 from squito/SPARK-26349.
    
    Authored-by: Imran Rashid <ir...@cloudera.com>
    Signed-off-by: Bryan Cutler <cu...@gmail.com>
---
 python/pyspark/context.py            |  5 +++++
 python/pyspark/tests/test_context.py | 10 ++++++++++
 2 files changed, 15 insertions(+)

diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 6137ed2..64178eb 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -115,6 +115,11 @@ class SparkContext(object):
         ValueError:...
         """
         self._callsite = first_spark_call() or CallSite(None, None, None)
+        if gateway is not None and gateway.gateway_parameters.auth_token is None:
+            raise ValueError(
+                "You are trying to pass an insecure Py4j gateway to Spark. This"
+                " is not allowed as it is a security risk.")
+
         SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
         try:
             self._do_init(master, appName, sparkHome, pyFiles, environment, batchSize, serializer,
diff --git a/python/pyspark/tests/test_context.py b/python/pyspark/tests/test_context.py
index 201baf4..18d9cd4 100644
--- a/python/pyspark/tests/test_context.py
+++ b/python/pyspark/tests/test_context.py
@@ -20,6 +20,7 @@ import tempfile
 import threading
 import time
 import unittest
+from collections import namedtuple
 
 from pyspark import SparkFiles, SparkContext
 from pyspark.testing.utils import ReusedPySparkTestCase, PySparkTestCase, QuietTest, SPARK_HOME
@@ -246,6 +247,15 @@ class ContextTests(unittest.TestCase):
         with SparkContext() as sc:
             self.assertGreater(sc.startTime, 0)
 
+    def test_forbid_insecure_gateway(self):
+        # Fail immediately if you try to create a SparkContext
+        # with an insecure gateway
+        parameters = namedtuple('MockGatewayParameters', 'auth_token')(None)
+        mock_insecure_gateway = namedtuple('MockJavaGateway', 'gateway_parameters')(parameters)
+        with self.assertRaises(ValueError) as context:
+            SparkContext(gateway=mock_insecure_gateway)
+        self.assertIn("insecure Py4j gateway", str(context.exception))
+
 
 if __name__ == "__main__":
     from pyspark.tests.test_context import *


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org