You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@dolphinscheduler.apache.org by GitBox <gi...@apache.org> on 2022/09/10 14:10:37 UTC

[GitHub] [dolphinscheduler] EricGao888 commented on a diff in pull request #11831: [DSIP-13][Feature]github_resource_plugin

EricGao888 commented on code in PR #11831:
URL: https://github.com/apache/dolphinscheduler/pull/11831#discussion_r967656045


##########
dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/resource-plugin.rst:
##########
@@ -58,7 +58,7 @@ How to use
 Resource plug-ins can be used in task subclasses and workflows. You can use the resource plug-ins by adding the `resource_plugin` parameter when they are initialized.
 For example, local resource plug-ins, add `resource_plugin = Local("/tmp")`.
 
-The resource plug-ins we currently support is `local`.
+The resource plug-ins we currently support are `local`, 'github'.

Review Comment:
   Maybe we could keep consistent with `plugins` or `plug-ins` but not use both of them in different places.



##########
dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_github.py:
##########
@@ -0,0 +1,228 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Test github resource plugin."""
+from unittest.mock import PropertyMock, patch
+
+import pytest
+
+from pydolphinscheduler.resources_plugin import GitHub
+from pydolphinscheduler.resources_plugin.base.git import GitFileInfo
+
+
+@pytest.mark.parametrize(
+    "attr, expected",
+    [
+        (
+            {
+                "user": "apache",
+                "repo_name": "dolphinscheduler",
+                "file_path": "script/install.sh",
+                "api": "https://api.github.com/repos/{user}/{repo_name}/contents/{file_path}",
+            },
+            "https://api.github.com/repos/apache/dolphinscheduler/contents/script/install.sh",
+        ),
+    ],
+)
+def test_github_build_req_api(attr, expected):
+    """Test the build_req_api function of the github resource plug-in."""
+    github = GitHub(prefix="prefix")
+    assert expected == github.build_req_api(**attr)
+
+
+@pytest.mark.parametrize(
+    "attr, expected",
+    [
+        (
+            "https://github.com/apache/dolphinscheduler/blob/dev/script/install.sh",
+            {
+                "_user": "apache",
+                "_repo_name": "dolphinscheduler",
+                "_branch": "dev",
+                "_file_path": "script/install.sh",
+            },
+        ),
+        (
+            "https://github.com/apache/dolphinscheduler/blob/master/pom.xml",
+            {
+                "_user": "apache",
+                "_repo_name": "dolphinscheduler",
+                "_branch": "master",
+                "_file_path": "pom.xml",
+            },
+        ),
+        (
+            "https://github.com/apache/dolphinscheduler/blob/1.3.9-release/docker/build/startup.sh",
+            {
+                "_user": "apache",
+                "_repo_name": "dolphinscheduler",
+                "_branch": "1.3.9-release",
+                "_file_path": "docker/build/startup.sh",
+            },
+        ),
+    ],
+)
+def test_github_get_git_file_info(attr, expected):
+    """Test the get_git_file_info function of the github resource plug-in."""
+    github = GitHub(prefix="prefix")
+    github.get_git_file_info(attr)
+    assert expected == github._git_file_info.__dict__
+
+
+@pytest.mark.parametrize(
+    "attr, expected",
+    [
+        (
+            (
+                {
+                    "user": "apache",
+                    "repo_name": "dolphinscheduler",
+                    "file_path": "docker/build/startup.sh",
+                }
+            ),
+            "https://api.github.com/repos/apache/dolphinscheduler/contents/docker/build/startup.sh",
+        ),
+        (
+            (
+                {
+                    "user": "apache",
+                    "repo_name": "dolphinscheduler",
+                    "file_path": "pom.xml",
+                }
+            ),
+            "https://api.github.com/repos/apache/dolphinscheduler/contents/pom.xml",
+        ),
+        (
+            (
+                {
+                    "user": "apache",
+                    "repo_name": "dolphinscheduler",
+                    "file_path": "script/create-dolphinscheduler.sh",
+                }
+            ),
+            "https://api.github.com/repos/apache/dolphinscheduler/contents/script/create-dolphinscheduler.sh",
+        ),
+    ],
+)
+@patch(
+    "pydolphinscheduler.resources_plugin.github.GitHub._git_file_info",
+    new_callable=PropertyMock,
+)
+def test_github_get_req_url(m_git_file_info, attr, expected):
+    """Test the get_req_url function of the github resource plug-in."""
+    github = GitHub(prefix="prefix")
+    m_git_file_info.return_value = GitFileInfo(**attr)
+    assert expected == github.get_req_url()
+
+
+@pytest.mark.skip(reason="Lack of test environment, need stable repository")
+@pytest.mark.parametrize(
+    "attr, expected",
+    [
+        (
+            "lombok.config",
+            "#\n"
+            "# Licensed to the Apache Software Foundation (ASF) under one or more\n"
+            "# contributor license agreements.  See the NOTICE file distributed with\n"
+            "# this work for additional information regarding copyright ownership.\n"
+            "# The ASF licenses this file to You under the Apache License, Version 2.0\n"
+            '# (the "License"); you may not use this file except in compliance with\n'
+            "# the License.  You may obtain a copy of the License at\n"
+            "#\n"
+            "#     http://www.apache.org/licenses/LICENSE-2.0\n"
+            "#\n"
+            "# Unless required by applicable law or agreed to in writing, software\n"
+            '# distributed under the License is distributed on an "AS IS" BASIS,\n'
+            "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n"
+            "# See the License for the specific language governing permissions and\n"
+            "# limitations under the License.\n"
+            "#\n"
+            "\n"
+            "lombok.addLombokGeneratedAnnotation = true\n",
+        ),
+    ],
+)
+def test_github_read_file(attr, expected):
+    """Test the read_file function of the github resource plug-in."""
+    github = GitHub(
+        prefix="https://github.com/apache/dolphinscheduler/blob/dev",
+    )
+    assert expected == github.read_file(attr)
+
+
+@pytest.mark.skip(reason="Lack of test environment, need stable repository")
+@pytest.mark.parametrize(
+    "attr, expected",
+    [
+        (
+            "https://github.com/apache/dolphinscheduler/blob/dev/lombok.config",
+            "#\n"
+            "# Licensed to the Apache Software Foundation (ASF) under one or more\n"
+            "# contributor license agreements.  See the NOTICE file distributed with\n"
+            "# this work for additional information regarding copyright ownership.\n"
+            "# The ASF licenses this file to You under the Apache License, Version 2.0\n"
+            '# (the "License"); you may not use this file except in compliance with\n'
+            "# the License.  You may obtain a copy of the License at\n"
+            "#\n"
+            "#     http://www.apache.org/licenses/LICENSE-2.0\n"
+            "#\n"
+            "# Unless required by applicable law or agreed to in writing, software\n"
+            '# distributed under the License is distributed on an "AS IS" BASIS,\n'
+            "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n"
+            "# See the License for the specific language governing permissions and\n"
+            "# limitations under the License.\n"
+            "#\n"
+            "\n"
+            "lombok.addLombokGeneratedAnnotation = true\n",
+        ),
+    ],
+)
+def test_github_req(attr, expected):
+    """Test the req function of the github resource plug-in."""
+    github = GitHub(
+        prefix="prefix",
+    )
+    assert expected == github.req(attr)
+
+
+@pytest.mark.skip(

Review Comment:
   Same as above.



##########
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/github.py:
##########
@@ -0,0 +1,109 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""DolphinScheduler github resource plugin."""
+import base64
+from typing import Optional
+from urllib.parse import urljoin
+
+import requests
+
+from pydolphinscheduler.core.resource_plugin import ResourcePlugin
+from pydolphinscheduler.resources_plugin.base.git import Git, GitFileInfo
+
+
+class GitHub(ResourcePlugin, Git):

Review Comment:
   Personally I would suggest using a different name for this class, maybe something like `GitHubHook` or other. `GitHub` is kind of ambiguous.



##########
dolphinscheduler-python/pydolphinscheduler/docs/source/resources_plugin/github.rst:
##########
@@ -0,0 +1,35 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+..   http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+
+GitHub
+======
+
+`GitHub` is a github resource plugin for pydolphinscheduler.
+
+When using a github resource plugin, you only need to add the `resource_plugin` parameter in the task subclass or workflow definition,
+such as `resource_plugin=GitHub(prefix="https://github.com/xxx", access_token="ghpxx")`.
+The token parameter is optional. You need to add it when your warehouse is a private warehouse.

Review Comment:
   What is GitHub `warehouse`? Do you mean `repository`?



##########
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/github.py:
##########
@@ -0,0 +1,109 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""DolphinScheduler github resource plugin."""
+import base64
+from typing import Optional
+from urllib.parse import urljoin
+
+import requests
+
+from pydolphinscheduler.core.resource_plugin import ResourcePlugin
+from pydolphinscheduler.resources_plugin.base.git import Git, GitFileInfo
+
+
+class GitHub(ResourcePlugin, Git):
+    """GitHub resource plugin, a plugin for task and workflow to dolphinscheduler to read resource.
+
+    :param prefix: A string representing the prefix of GitHub.
+    :param access_token: A string used for identity authentication of GitHub private warehouse.
+    """
+
+    # [start init_method]
+    def __init__(
+        self, prefix: str, access_token: Optional[str] = None, *args, **kwargs
+    ):
+        super().__init__(prefix, *args, **kwargs)
+        self.access_token = access_token
+
+    # [end init_method]
+
+    def build_req_api(
+        self,
+        user: str,
+        repo_name: str,
+        file_path: str,
+        api: str,
+    ):
+        """Build request file content API."""
+        api = api.replace("{user}", user)
+        api = api.replace("{repo_name}", repo_name)
+        api = api.replace("{file_path}", file_path)
+        return api
+
+    def get_git_file_info(self, path: str):
+        """Get file information from the file url, like repository name, user, branch, and file path."""
+        elements = path.split("/")
+        index = self.get_index(path, "/", 7)
+        index = index + 1
+        file_info = GitFileInfo(
+            user=elements[3],
+            repo_name=elements[4],
+            branch=elements[6],
+            file_path=path[index:],
+        )
+        self._git_file_info = file_info
+
+    def get_req_url(self):
+        """Build request URL according to file information."""
+        return self.build_req_api(
+            user=self._git_file_info.user,
+            repo_name=self._git_file_info.repo_name,
+            file_path=self._git_file_info.file_path,
+            api="https://api.github.com/repos/{user}/{repo_name}/contents/{file_path}",
+        )
+
+    # [start read_file_method]

Review Comment:
   Same as above. If function name has already indicated what it does, usually we do not need comments.



##########
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/github.py:
##########
@@ -0,0 +1,109 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""DolphinScheduler github resource plugin."""
+import base64
+from typing import Optional
+from urllib.parse import urljoin
+
+import requests
+
+from pydolphinscheduler.core.resource_plugin import ResourcePlugin
+from pydolphinscheduler.resources_plugin.base.git import Git, GitFileInfo
+
+
+class GitHub(ResourcePlugin, Git):
+    """GitHub resource plugin, a plugin for task and workflow to dolphinscheduler to read resource.
+
+    :param prefix: A string representing the prefix of GitHub.
+    :param access_token: A string used for identity authentication of GitHub private warehouse.
+    """
+
+    # [start init_method]

Review Comment:
   Maybe we don't need this comment here. The same for `end init_method` comment.



##########
dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_github.py:
##########
@@ -0,0 +1,228 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Test github resource plugin."""
+from unittest.mock import PropertyMock, patch
+
+import pytest
+
+from pydolphinscheduler.resources_plugin import GitHub
+from pydolphinscheduler.resources_plugin.base.git import GitFileInfo
+
+
+@pytest.mark.parametrize(
+    "attr, expected",
+    [
+        (
+            {
+                "user": "apache",
+                "repo_name": "dolphinscheduler",
+                "file_path": "script/install.sh",
+                "api": "https://api.github.com/repos/{user}/{repo_name}/contents/{file_path}",
+            },
+            "https://api.github.com/repos/apache/dolphinscheduler/contents/script/install.sh",
+        ),
+    ],
+)
+def test_github_build_req_api(attr, expected):
+    """Test the build_req_api function of the github resource plug-in."""
+    github = GitHub(prefix="prefix")
+    assert expected == github.build_req_api(**attr)
+
+
+@pytest.mark.parametrize(
+    "attr, expected",
+    [
+        (
+            "https://github.com/apache/dolphinscheduler/blob/dev/script/install.sh",
+            {
+                "_user": "apache",
+                "_repo_name": "dolphinscheduler",
+                "_branch": "dev",
+                "_file_path": "script/install.sh",
+            },
+        ),
+        (
+            "https://github.com/apache/dolphinscheduler/blob/master/pom.xml",
+            {
+                "_user": "apache",
+                "_repo_name": "dolphinscheduler",
+                "_branch": "master",
+                "_file_path": "pom.xml",
+            },
+        ),
+        (
+            "https://github.com/apache/dolphinscheduler/blob/1.3.9-release/docker/build/startup.sh",
+            {
+                "_user": "apache",
+                "_repo_name": "dolphinscheduler",
+                "_branch": "1.3.9-release",
+                "_file_path": "docker/build/startup.sh",
+            },
+        ),
+    ],
+)
+def test_github_get_git_file_info(attr, expected):
+    """Test the get_git_file_info function of the github resource plug-in."""
+    github = GitHub(prefix="prefix")
+    github.get_git_file_info(attr)
+    assert expected == github._git_file_info.__dict__
+
+
+@pytest.mark.parametrize(
+    "attr, expected",
+    [
+        (
+            (
+                {
+                    "user": "apache",
+                    "repo_name": "dolphinscheduler",
+                    "file_path": "docker/build/startup.sh",
+                }
+            ),
+            "https://api.github.com/repos/apache/dolphinscheduler/contents/docker/build/startup.sh",
+        ),
+        (
+            (
+                {
+                    "user": "apache",
+                    "repo_name": "dolphinscheduler",
+                    "file_path": "pom.xml",
+                }
+            ),
+            "https://api.github.com/repos/apache/dolphinscheduler/contents/pom.xml",
+        ),
+        (
+            (
+                {
+                    "user": "apache",
+                    "repo_name": "dolphinscheduler",
+                    "file_path": "script/create-dolphinscheduler.sh",
+                }
+            ),
+            "https://api.github.com/repos/apache/dolphinscheduler/contents/script/create-dolphinscheduler.sh",
+        ),
+    ],
+)
+@patch(
+    "pydolphinscheduler.resources_plugin.github.GitHub._git_file_info",
+    new_callable=PropertyMock,
+)
+def test_github_get_req_url(m_git_file_info, attr, expected):
+    """Test the get_req_url function of the github resource plug-in."""
+    github = GitHub(prefix="prefix")
+    m_git_file_info.return_value = GitFileInfo(**attr)
+    assert expected == github.get_req_url()
+
+
+@pytest.mark.skip(reason="Lack of test environment, need stable repository")

Review Comment:
   For unit tests, we usually decouple the external system. You may mock the interactions with the real `GitHub` and focus on testing the logic within `github.read_file(attr)` function. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@dolphinscheduler.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org