You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@dolphinscheduler.apache.org by zh...@apache.org on 2022/11/15 08:06:53 UTC
[dolphinscheduler-sdk-python] branch main updated: [feat] Add token as authentication for python gateway (#13)
This is an automated email from the ASF dual-hosted git repository.
zhongjiajie pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/dolphinscheduler-sdk-python.git
The following commit(s) were added to refs/heads/main by this push:
new 0b7c0be [feat] Add token as authentication for python gateway (#13)
0b7c0be is described below
commit 0b7c0be905aa8ce710f33887472af9231f55951f
Author: Jay Chung <zh...@gmail.com>
AuthorDate: Tue Nov 15 16:06:48 2022 +0800
[feat] Add token as authentication for python gateway (#13)
separate from apache/dolphinscheduler#6407. Authentication,
add secret to ensure only trusted people could
connect to gateway.
fix: apache/dolphinscheduler#8255
---
.github/PULL_REQUEST_TEMPLATE.md | 2 +-
docs/source/concept.rst | 8 ++++++++
docs/source/config.rst | 2 ++
docs/source/start.rst | 9 +++++++-
src/pydolphinscheduler/configuration.py | 25 +++++++++++++++++++++++
src/pydolphinscheduler/default_config.yaml | 4 ++++
src/pydolphinscheduler/java_gateway.py | 8 +++++++-
tests/integration/test_java_gateway.py | 17 ++++++++++-----
tests/{core => }/test_configuration.py | 21 ++++++++++++++++++-
tests/testing/constants.py | 3 +++
tests/{core => utils}/test_default_config_yaml.py | 0
tests/utils/test_yaml_parser.py | 2 ++
12 files changed, 92 insertions(+), 9 deletions(-)
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index ea422e3..9e067ba 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -9,5 +9,5 @@
I confirm that the following checklist has been completed.
- [ ] Add/Change **test cases** for the changes.
-- [ ] Add/Change the related **documentation**.
+- [ ] Add/Change the related **documentation**, should also change `docs/source/config.rst` when you change file `default_config.yaml`.
- [ ] (Optional) Add your change to `UPDATING.md` when it is an incompatible change.
diff --git a/docs/source/concept.rst b/docs/source/concept.rst
index de49c9c..9db389b 100644
--- a/docs/source/concept.rst
+++ b/docs/source/concept.rst
@@ -181,3 +181,11 @@ decide workflow of task. You could set `process_definition` in both normal assig
shell_task = Shell(name="shell", command="echo shell task",
With both `Process Definition`_, `Tasks`_ and `Tasks Dependence`_, we could build a workflow with multiple tasks.
+
+Authentication Token
+--------------------
+
+pydolphinscheduler use token as authentication when communication with dolphinscheduler server, and we have a default auth
+token to make it out-of-box. For security reason, we highly recommend you to change your own auth token when you
+deploy in production environment or test dolphinscheduler in public network. The auth token keyword in ``auth_token``
+and it can be set in multiple ways which you can read :doc:`config` section for more detail.
diff --git a/docs/source/config.rst b/docs/source/config.rst
index 3f7fff8..c5753cf 100644
--- a/docs/source/config.rst
+++ b/docs/source/config.rst
@@ -81,6 +81,8 @@ All environment variables as below, and you could modify their value via `Bash <
+------------------+------------------------------------+---------------------------------------------------------------------------------------------------------------------+
| Variable Section | Variable Name | description |
+==================+====================================+=====================================================================================================================+
+| | ``PYDS_JAVA_GATEWAY_AUTH_TOKEN`` | Default Java gateway auth token, should changed to custom value when deploy in public network or in production. |
++ +------------------------------------+---------------------------------------------------------------------------------------------------------------------+
| | ``PYDS_JAVA_GATEWAY_ADDRESS`` | Default Java gateway address, will use its value when it is set. |
+ +------------------------------------+---------------------------------------------------------------------------------------------------------------------+
| Java Gateway | ``PYDS_JAVA_GATEWAY_PORT`` | Default Java gateway port, will use its value when it is set. |
diff --git a/docs/source/start.rst b/docs/source/start.rst
index aa86f71..434d80e 100644
--- a/docs/source/start.rst
+++ b/docs/source/start.rst
@@ -155,7 +155,14 @@ from the API server, you should first change pydolphinscheduler configuration an
You could see more information in :doc:`config` about all the configurations pydolphinscheduler supported.
After that, you could go and see your DolphinScheduler web UI to find out a new workflow created by pydolphinscheduler,
-and the path of web UI is `Project -> Workflow -> Workflow Definition`.
+and the path of web UI is `Project -> Workflow -> Workflow Definition`, and you can see a workflow and workflow instance
+had been created and DAG is auto formatter by web UI.
+
+.. note::
+
+ We have default authentication token when in first launch dolphinscheduler and pydolphinscheduler. Please change
+ the parameter ``auth_token`` when you deploy in production environment or test dolphinscheduler in public network.
+ See :ref:`authentication token <concept:authentication token>` for more detail.
What's More
diff --git a/src/pydolphinscheduler/configuration.py b/src/pydolphinscheduler/configuration.py
index 2f0c2c0..d12e47c 100644
--- a/src/pydolphinscheduler/configuration.py
+++ b/src/pydolphinscheduler/configuration.py
@@ -16,6 +16,7 @@
# under the License.
"""Configuration module for pydolphinscheduler."""
+import logging
import os
from pathlib import Path
from typing import Any
@@ -26,6 +27,8 @@ from pydolphinscheduler.utils.yaml_parser import YamlParser
BUILD_IN_CONFIG_PATH = Path(__file__).resolve().parent.joinpath("default_config.yaml")
+logger = logging.getLogger(__name__)
+
def config_path() -> Path:
"""Get the path of pydolphinscheduler configuration file."""
@@ -118,6 +121,25 @@ def set_single_config(key: str, value: Any) -> None:
file.write(content=str(config), to_path=str(config_path()), overwrite=True)
+def token_alert(auth_token: str) -> None:
+ """Alert when auth token is default token or None or not.
+
+ To avoid user forget to change the default token, we will alert user we they use it.
+ """
+ if auth_token is None:
+ logger.warning(
+ "Auth token is None, highly recommend add a token in production, "
+ "especially you deploy in public network."
+ )
+ with open(BUILD_IN_CONFIG_PATH, mode="r") as f:
+ config = YamlParser(f.read())
+ if config.get("java_gateway.auth_token") == auth_token:
+ logger.warning(
+ "Auth token is default token, highly recommend add a token in production, "
+ "especially you deploy in public network."
+ )
+
+
def get_int(val: Any) -> int:
"""Covert value to int."""
return int(val)
@@ -152,6 +174,9 @@ JAVA_GATEWAY_AUTO_CONVERT = get_bool(
"PYDS_JAVA_GATEWAY_AUTO_CONVERT", configs.get("java_gateway.auto_convert")
)
)
+JAVA_GATEWAY_AUTH_TOKEN = os.environ.get(
+ "PYDS_JAVA_GATEWAY_AUTH_TOKEN", configs.get("java_gateway.auth_token")
+)
# User Settings
USER_NAME = os.environ.get("PYDS_USER_NAME", configs.get("default.user.name"))
diff --git a/src/pydolphinscheduler/default_config.yaml b/src/pydolphinscheduler/default_config.yaml
index 5ad3064..0c51880 100644
--- a/src/pydolphinscheduler/default_config.yaml
+++ b/src/pydolphinscheduler/default_config.yaml
@@ -17,6 +17,10 @@
# Setting about Java gateway server
java_gateway:
+ # Authentication token for connection from python api to python gateway server. Should be changed the default value
+ # when you deploy in public network.
+ auth_token: jwUDzpLsNKEFER4*a8gruBH_GsAurNxU7A@Xc
+
# The address of Python gateway server start. Set its value to `0.0.0.0` if your Python API run in different
# between Python gateway server. It could be be specific to other address like `127.0.0.1` or `localhost`
address: 127.0.0.1
diff --git a/src/pydolphinscheduler/java_gateway.py b/src/pydolphinscheduler/java_gateway.py
index cd03d32..21e2115 100644
--- a/src/pydolphinscheduler/java_gateway.py
+++ b/src/pydolphinscheduler/java_gateway.py
@@ -36,6 +36,7 @@ def launch_gateway(
address: Optional[str] = None,
port: Optional[int] = None,
auto_convert: Optional[bool] = True,
+ auth_token: Optional[str] = None,
) -> JavaGateway:
"""Launch java gateway to pydolphinscheduler.
@@ -43,10 +44,14 @@ def launch_gateway(
in the worst case, Py4J needs to go through all registered converters for all parameters.
This is why automatic conversion is disabled by default.
"""
+ auth_token = auth_token or configuration.JAVA_GATEWAY_AUTH_TOKEN
+ configuration.token_alert(auth_token)
+
gateway_parameters = GatewayParameters(
address=address or configuration.JAVA_GATEWAY_ADDRESS,
port=port or configuration.JAVA_GATEWAY_PORT,
auto_convert=auto_convert or configuration.JAVA_GATEWAY_AUTO_CONVERT,
+ auth_token=auth_token,
)
gateway = JavaGateway(gateway_parameters=gateway_parameters)
return gateway
@@ -78,8 +83,9 @@ class JavaGate:
address: Optional[str] = None,
port: Optional[int] = None,
auto_convert: Optional[bool] = True,
+ auth_token: Optional[str] = None,
):
- self.java_gateway = launch_gateway(address, port, auto_convert)
+ self.java_gateway = launch_gateway(address, port, auto_convert, auth_token)
gateway_version = "unknown"
with contextlib.suppress(Py4JError):
# 1. Java gateway version is too old: doesn't have method 'getGatewayVersion()'
diff --git a/tests/integration/test_java_gateway.py b/tests/integration/test_java_gateway.py
index 8b7c5ff..bc16e50 100644
--- a/tests/integration/test_java_gateway.py
+++ b/tests/integration/test_java_gateway.py
@@ -16,21 +16,30 @@
# under the License.
"""Test pydolphinscheduler java gateway."""
+import pytest
+from py4j.java_gateway import GatewayParameters, JavaGateway, java_import
+from tests.testing.constants import TOKEN
-from py4j.java_gateway import JavaGateway, java_import
+gateway_parameters = GatewayParameters(auth_token=TOKEN)
+gateway = JavaGateway(gateway_parameters=gateway_parameters)
+
+
+@pytest.fixture(scope="module")
+def class_tear_down():
+ """Tear down java gateway by close it."""
+ yield
+ gateway.close()
def test_gateway_connect():
"""Test weather client could connect java gate way or not."""
- gateway = JavaGateway()
app = gateway.entry_point
assert app.ping() == "PONG"
def test_jvm_simple():
"""Test use JVM build-in object and operator from java gateway."""
- gateway = JavaGateway()
smallest = gateway.jvm.java.lang.Integer.MIN_VALUE
biggest = gateway.jvm.java.lang.Integer.MAX_VALUE
assert smallest is not None and biggest is not None
@@ -39,14 +48,12 @@ def test_jvm_simple():
def test_python_client_java_import_single():
"""Test import single class from java gateway."""
- gateway = JavaGateway()
java_import(gateway.jvm, "org.apache.dolphinscheduler.common.utils.FileUtils")
assert hasattr(gateway.jvm, "FileUtils")
def test_python_client_java_import_package():
"""Test import package contain multiple class from java gateway."""
- gateway = JavaGateway()
java_import(gateway.jvm, "org.apache.dolphinscheduler.common.utils.*")
# test if jvm view have some common utils
for util in ("FileUtils", "OSUtils", "DateUtils"):
diff --git a/tests/core/test_configuration.py b/tests/test_configuration.py
similarity index 93%
rename from tests/core/test_configuration.py
rename to tests/test_configuration.py
index b9dc8cb..a3dd07f 100644
--- a/tests/core/test_configuration.py
+++ b/tests/test_configuration.py
@@ -18,7 +18,9 @@
"""Test class :mod:`pydolphinscheduler.core.configuration`' method."""
import importlib
+import logging
import os
+import re
from pathlib import Path
from typing import Any
@@ -33,7 +35,7 @@ from pydolphinscheduler.configuration import (
)
from pydolphinscheduler.exceptions import PyDSConfException
from pydolphinscheduler.utils.yaml_parser import YamlParser
-from tests.testing.constants import DEV_MODE, ENV_PYDS_HOME
+from tests.testing.constants import DEV_MODE, ENV_PYDS_HOME, TOKEN
from tests.testing.file import get_file_content
@@ -270,3 +272,20 @@ def test_get_configuration_env(config_name: str, src: Any, dest: Any):
importlib.reload(configuration)
assert getattr(configuration, config_name) == src
assert env_name not in os.environ
+
+
+def test_token_alert(caplog):
+ """Test alert message in function :func:`token_alert`."""
+ with caplog.at_level(logging.WARNING):
+ configuration.token_alert(TOKEN)
+ assert all(
+ [
+ "highly recommend add a token in production, especially you deploy in public network."
+ in caplog.text,
+ re.findall(
+ "Auth token is.*?, highly recommend add a token in production, "
+ "especially you deploy in public network.",
+ caplog.text,
+ ),
+ ]
+ )
diff --git a/tests/testing/constants.py b/tests/testing/constants.py
index ed2ee37..6a4b6e4 100644
--- a/tests/testing/constants.py
+++ b/tests/testing/constants.py
@@ -46,3 +46,6 @@ ENV_PYDS_HOME = "PYDS_HOME"
DEV_MODE = str(
os.environ.get("PY_DOLPHINSCHEDULER_DEV_MODE", False)
).strip().lower() in {"true", "t", "1"}
+
+# default token
+TOKEN = "jwUDzpLsNKEFER4*a8gruBH_GsAurNxU7A@Xc"
diff --git a/tests/core/test_default_config_yaml.py b/tests/utils/test_default_config_yaml.py
similarity index 100%
rename from tests/core/test_default_config_yaml.py
rename to tests/utils/test_default_config_yaml.py
diff --git a/tests/utils/test_yaml_parser.py b/tests/utils/test_yaml_parser.py
index 3abdda6..6ea8b52 100644
--- a/tests/utils/test_yaml_parser.py
+++ b/tests/utils/test_yaml_parser.py
@@ -23,6 +23,7 @@ import pytest
from ruamel.yaml import YAML
from pydolphinscheduler.utils.yaml_parser import YamlParser
+from tests.testing.constants import TOKEN
from tests.testing.path import path_default_config_yaml
yaml = YAML()
@@ -40,6 +41,7 @@ expects = [
{
# yaml.load("no need test") is a flag about skipping it because it to different to maintainer
"java_gateway": yaml.load("no need test"),
+ "java_gateway.auth_token": (TOKEN, "new-token"),
"java_gateway.address": ("127.0.0.1", "127.1.1.1"),
"java_gateway.port": (25333, 25555),
"java_gateway.auto_convert": (True, False),