You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@dolphinscheduler.apache.org by zh...@apache.org on 2022/11/15 08:06:53 UTC

[dolphinscheduler-sdk-python] branch main updated: [feat] Add token as authentication for python gateway (#13)

This is an automated email from the ASF dual-hosted git repository.

zhongjiajie pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/dolphinscheduler-sdk-python.git


The following commit(s) were added to refs/heads/main by this push:
     new 0b7c0be  [feat] Add token as authentication for python gateway (#13)
0b7c0be is described below

commit 0b7c0be905aa8ce710f33887472af9231f55951f
Author: Jay Chung <zh...@gmail.com>
AuthorDate: Tue Nov 15 16:06:48 2022 +0800

    [feat] Add token as authentication for python gateway (#13)
    
    separate from apache/dolphinscheduler#6407. Authentication,
    add secret to ensure only trusted people could
    connect to gateway.
    
    fix: apache/dolphinscheduler#8255
---
 .github/PULL_REQUEST_TEMPLATE.md                  |  2 +-
 docs/source/concept.rst                           |  8 ++++++++
 docs/source/config.rst                            |  2 ++
 docs/source/start.rst                             |  9 +++++++-
 src/pydolphinscheduler/configuration.py           | 25 +++++++++++++++++++++++
 src/pydolphinscheduler/default_config.yaml        |  4 ++++
 src/pydolphinscheduler/java_gateway.py            |  8 +++++++-
 tests/integration/test_java_gateway.py            | 17 ++++++++++-----
 tests/{core => }/test_configuration.py            | 21 ++++++++++++++++++-
 tests/testing/constants.py                        |  3 +++
 tests/{core => utils}/test_default_config_yaml.py |  0
 tests/utils/test_yaml_parser.py                   |  2 ++
 12 files changed, 92 insertions(+), 9 deletions(-)

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index ea422e3..9e067ba 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -9,5 +9,5 @@
 I confirm that the following checklist has been completed.
 
 - [ ] Add/Change **test cases** for the changes.
-- [ ] Add/Change the related **documentation**.
+- [ ] Add/Change the related **documentation**, should also change `docs/source/config.rst` when you change file `default_config.yaml`.
 - [ ] (Optional) Add your change to `UPDATING.md` when it is an incompatible change.
diff --git a/docs/source/concept.rst b/docs/source/concept.rst
index de49c9c..9db389b 100644
--- a/docs/source/concept.rst
+++ b/docs/source/concept.rst
@@ -181,3 +181,11 @@ decide workflow of task. You could set `process_definition` in both normal assig
        shell_task = Shell(name="shell", command="echo shell task",
 
 With both `Process Definition`_, `Tasks`_  and `Tasks Dependence`_, we could build a workflow with multiple tasks.
+
+Authentication Token
+--------------------
+
+pydolphinscheduler use token as authentication when communication with dolphinscheduler server, and we have a default auth
+token to make it out-of-box. For security reason, we highly recommend you to change your own auth token when you
+deploy in production environment or test dolphinscheduler in public network. The auth token keyword in ``auth_token``
+and it can be set in multiple ways which you can read :doc:`config` section for more detail.
diff --git a/docs/source/config.rst b/docs/source/config.rst
index 3f7fff8..c5753cf 100644
--- a/docs/source/config.rst
+++ b/docs/source/config.rst
@@ -81,6 +81,8 @@ All environment variables as below, and you could modify their value via `Bash <
 +------------------+------------------------------------+---------------------------------------------------------------------------------------------------------------------+
 | Variable Section | Variable Name                      | description                                                                                                         |
 +==================+====================================+=====================================================================================================================+
+|                  | ``PYDS_JAVA_GATEWAY_AUTH_TOKEN``   | Default Java gateway auth token, should changed to custom value when deploy in public network or in production.     |
++                  +------------------------------------+---------------------------------------------------------------------------------------------------------------------+
 |                  | ``PYDS_JAVA_GATEWAY_ADDRESS``      | Default Java gateway address, will use its value when it is set.                                                    |
 +                  +------------------------------------+---------------------------------------------------------------------------------------------------------------------+
 |   Java Gateway   | ``PYDS_JAVA_GATEWAY_PORT``         | Default Java gateway port, will use its value when it is set.                                                       |
diff --git a/docs/source/start.rst b/docs/source/start.rst
index aa86f71..434d80e 100644
--- a/docs/source/start.rst
+++ b/docs/source/start.rst
@@ -155,7 +155,14 @@ from the API server, you should first change pydolphinscheduler configuration an
    You could see more information in :doc:`config` about all the configurations pydolphinscheduler supported.
 
 After that, you could go and see your DolphinScheduler web UI to find out a new workflow created by pydolphinscheduler,
-and the path of web UI is `Project -> Workflow -> Workflow Definition`.
+and the path of web UI is `Project -> Workflow -> Workflow Definition`, and you can see a workflow and workflow instance
+had been created and DAG is auto formatter by web UI.
+
+.. note::
+
+   We have default authentication token when in first launch dolphinscheduler and pydolphinscheduler. Please change
+   the parameter ``auth_token`` when you deploy in production environment or test dolphinscheduler in public network.
+   See :ref:`authentication token <concept:authentication token>` for more detail.
 
 
 What's More
diff --git a/src/pydolphinscheduler/configuration.py b/src/pydolphinscheduler/configuration.py
index 2f0c2c0..d12e47c 100644
--- a/src/pydolphinscheduler/configuration.py
+++ b/src/pydolphinscheduler/configuration.py
@@ -16,6 +16,7 @@
 # under the License.
 
 """Configuration module for pydolphinscheduler."""
+import logging
 import os
 from pathlib import Path
 from typing import Any
@@ -26,6 +27,8 @@ from pydolphinscheduler.utils.yaml_parser import YamlParser
 
 BUILD_IN_CONFIG_PATH = Path(__file__).resolve().parent.joinpath("default_config.yaml")
 
+logger = logging.getLogger(__name__)
+
 
 def config_path() -> Path:
     """Get the path of pydolphinscheduler configuration file."""
@@ -118,6 +121,25 @@ def set_single_config(key: str, value: Any) -> None:
     file.write(content=str(config), to_path=str(config_path()), overwrite=True)
 
 
+def token_alert(auth_token: str) -> None:
+    """Alert when auth token is default token or None or not.
+
+    To avoid user forget to change the default token, we will alert user we they use it.
+    """
+    if auth_token is None:
+        logger.warning(
+            "Auth token is None, highly recommend add a token in production, "
+            "especially you deploy in public network."
+        )
+    with open(BUILD_IN_CONFIG_PATH, mode="r") as f:
+        config = YamlParser(f.read())
+        if config.get("java_gateway.auth_token") == auth_token:
+            logger.warning(
+                "Auth token is default token, highly recommend add a token in production, "
+                "especially you deploy in public network."
+            )
+
+
 def get_int(val: Any) -> int:
     """Covert value to int."""
     return int(val)
@@ -152,6 +174,9 @@ JAVA_GATEWAY_AUTO_CONVERT = get_bool(
         "PYDS_JAVA_GATEWAY_AUTO_CONVERT", configs.get("java_gateway.auto_convert")
     )
 )
+JAVA_GATEWAY_AUTH_TOKEN = os.environ.get(
+    "PYDS_JAVA_GATEWAY_AUTH_TOKEN", configs.get("java_gateway.auth_token")
+)
 
 # User Settings
 USER_NAME = os.environ.get("PYDS_USER_NAME", configs.get("default.user.name"))
diff --git a/src/pydolphinscheduler/default_config.yaml b/src/pydolphinscheduler/default_config.yaml
index 5ad3064..0c51880 100644
--- a/src/pydolphinscheduler/default_config.yaml
+++ b/src/pydolphinscheduler/default_config.yaml
@@ -17,6 +17,10 @@
 
 # Setting about Java gateway server
 java_gateway:
+  # Authentication token for connection from python api to python gateway server. Should be changed the default value
+  # when you deploy in public network.
+  auth_token: jwUDzpLsNKEFER4*a8gruBH_GsAurNxU7A@Xc
+
   # The address of Python gateway server start. Set its value to `0.0.0.0` if your Python API run in different
   # between Python gateway server. It could be be specific to other address like `127.0.0.1` or `localhost`
   address: 127.0.0.1
diff --git a/src/pydolphinscheduler/java_gateway.py b/src/pydolphinscheduler/java_gateway.py
index cd03d32..21e2115 100644
--- a/src/pydolphinscheduler/java_gateway.py
+++ b/src/pydolphinscheduler/java_gateway.py
@@ -36,6 +36,7 @@ def launch_gateway(
     address: Optional[str] = None,
     port: Optional[int] = None,
     auto_convert: Optional[bool] = True,
+    auth_token: Optional[str] = None,
 ) -> JavaGateway:
     """Launch java gateway to pydolphinscheduler.
 
@@ -43,10 +44,14 @@ def launch_gateway(
     in the worst case, Py4J needs to go through all registered converters for all parameters.
     This is why automatic conversion is disabled by default.
     """
+    auth_token = auth_token or configuration.JAVA_GATEWAY_AUTH_TOKEN
+    configuration.token_alert(auth_token)
+
     gateway_parameters = GatewayParameters(
         address=address or configuration.JAVA_GATEWAY_ADDRESS,
         port=port or configuration.JAVA_GATEWAY_PORT,
         auto_convert=auto_convert or configuration.JAVA_GATEWAY_AUTO_CONVERT,
+        auth_token=auth_token,
     )
     gateway = JavaGateway(gateway_parameters=gateway_parameters)
     return gateway
@@ -78,8 +83,9 @@ class JavaGate:
         address: Optional[str] = None,
         port: Optional[int] = None,
         auto_convert: Optional[bool] = True,
+        auth_token: Optional[str] = None,
     ):
-        self.java_gateway = launch_gateway(address, port, auto_convert)
+        self.java_gateway = launch_gateway(address, port, auto_convert, auth_token)
         gateway_version = "unknown"
         with contextlib.suppress(Py4JError):
             # 1. Java gateway version is too old: doesn't have method 'getGatewayVersion()'
diff --git a/tests/integration/test_java_gateway.py b/tests/integration/test_java_gateway.py
index 8b7c5ff..bc16e50 100644
--- a/tests/integration/test_java_gateway.py
+++ b/tests/integration/test_java_gateway.py
@@ -16,21 +16,30 @@
 # under the License.
 
 """Test pydolphinscheduler java gateway."""
+import pytest
+from py4j.java_gateway import GatewayParameters, JavaGateway, java_import
 
+from tests.testing.constants import TOKEN
 
-from py4j.java_gateway import JavaGateway, java_import
+gateway_parameters = GatewayParameters(auth_token=TOKEN)
+gateway = JavaGateway(gateway_parameters=gateway_parameters)
+
+
+@pytest.fixture(scope="module")
+def class_tear_down():
+    """Tear down java gateway by close it."""
+    yield
+    gateway.close()
 
 
 def test_gateway_connect():
     """Test weather client could connect java gate way or not."""
-    gateway = JavaGateway()
     app = gateway.entry_point
     assert app.ping() == "PONG"
 
 
 def test_jvm_simple():
     """Test use JVM build-in object and operator from java gateway."""
-    gateway = JavaGateway()
     smallest = gateway.jvm.java.lang.Integer.MIN_VALUE
     biggest = gateway.jvm.java.lang.Integer.MAX_VALUE
     assert smallest is not None and biggest is not None
@@ -39,14 +48,12 @@ def test_jvm_simple():
 
 def test_python_client_java_import_single():
     """Test import single class from java gateway."""
-    gateway = JavaGateway()
     java_import(gateway.jvm, "org.apache.dolphinscheduler.common.utils.FileUtils")
     assert hasattr(gateway.jvm, "FileUtils")
 
 
 def test_python_client_java_import_package():
     """Test import package contain multiple class from java gateway."""
-    gateway = JavaGateway()
     java_import(gateway.jvm, "org.apache.dolphinscheduler.common.utils.*")
     # test if jvm view have some common utils
     for util in ("FileUtils", "OSUtils", "DateUtils"):
diff --git a/tests/core/test_configuration.py b/tests/test_configuration.py
similarity index 93%
rename from tests/core/test_configuration.py
rename to tests/test_configuration.py
index b9dc8cb..a3dd07f 100644
--- a/tests/core/test_configuration.py
+++ b/tests/test_configuration.py
@@ -18,7 +18,9 @@
 """Test class :mod:`pydolphinscheduler.core.configuration`' method."""
 
 import importlib
+import logging
 import os
+import re
 from pathlib import Path
 from typing import Any
 
@@ -33,7 +35,7 @@ from pydolphinscheduler.configuration import (
 )
 from pydolphinscheduler.exceptions import PyDSConfException
 from pydolphinscheduler.utils.yaml_parser import YamlParser
-from tests.testing.constants import DEV_MODE, ENV_PYDS_HOME
+from tests.testing.constants import DEV_MODE, ENV_PYDS_HOME, TOKEN
 from tests.testing.file import get_file_content
 
 
@@ -270,3 +272,20 @@ def test_get_configuration_env(config_name: str, src: Any, dest: Any):
     importlib.reload(configuration)
     assert getattr(configuration, config_name) == src
     assert env_name not in os.environ
+
+
+def test_token_alert(caplog):
+    """Test alert message in function :func:`token_alert`."""
+    with caplog.at_level(logging.WARNING):
+        configuration.token_alert(TOKEN)
+    assert all(
+        [
+            "highly recommend add a token in production, especially you deploy in public network."
+            in caplog.text,
+            re.findall(
+                "Auth token is.*?, highly recommend add a token in production, "
+                "especially you deploy in public network.",
+                caplog.text,
+            ),
+        ]
+    )
diff --git a/tests/testing/constants.py b/tests/testing/constants.py
index ed2ee37..6a4b6e4 100644
--- a/tests/testing/constants.py
+++ b/tests/testing/constants.py
@@ -46,3 +46,6 @@ ENV_PYDS_HOME = "PYDS_HOME"
 DEV_MODE = str(
     os.environ.get("PY_DOLPHINSCHEDULER_DEV_MODE", False)
 ).strip().lower() in {"true", "t", "1"}
+
+# default token
+TOKEN = "jwUDzpLsNKEFER4*a8gruBH_GsAurNxU7A@Xc"
diff --git a/tests/core/test_default_config_yaml.py b/tests/utils/test_default_config_yaml.py
similarity index 100%
rename from tests/core/test_default_config_yaml.py
rename to tests/utils/test_default_config_yaml.py
diff --git a/tests/utils/test_yaml_parser.py b/tests/utils/test_yaml_parser.py
index 3abdda6..6ea8b52 100644
--- a/tests/utils/test_yaml_parser.py
+++ b/tests/utils/test_yaml_parser.py
@@ -23,6 +23,7 @@ import pytest
 from ruamel.yaml import YAML
 
 from pydolphinscheduler.utils.yaml_parser import YamlParser
+from tests.testing.constants import TOKEN
 from tests.testing.path import path_default_config_yaml
 
 yaml = YAML()
@@ -40,6 +41,7 @@ expects = [
     {
         # yaml.load("no need test") is a flag about skipping it because it to different to maintainer
         "java_gateway": yaml.load("no need test"),
+        "java_gateway.auth_token": (TOKEN, "new-token"),
         "java_gateway.address": ("127.0.0.1", "127.1.1.1"),
         "java_gateway.port": (25333, 25555),
         "java_gateway.auto_convert": (True, False),