You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@dolphinscheduler.apache.org by GitBox <gi...@apache.org> on 2022/09/27 13:22:39 UTC

[GitHub] [dolphinscheduler] zhongjiajie commented on a diff in pull request #12025: [DSIP-13][python]Sub problem: gitlab, OSS and S3 resource plug-ins

zhongjiajie commented on code in PR #12025:
URL: https://github.com/apache/dolphinscheduler/pull/12025#discussion_r981223176


##########
dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_s3.py:
##########
@@ -0,0 +1,79 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Test oss resource plugin."""
+import pytest
+
+from pydolphinscheduler.resources_plugin import S3
+
+
+@pytest.mark.parametrize(
+    "attr, expected",
+    [
+        (
+            "https://ds-resource-plugin-private.s3.amazonaws.com/a.sh",
+            {
+                "file_path": "a.sh",
+                "bucket": "ds-resource-plugin-private",
+            },
+        ),
+        (
+            "https://ds-resource-plugin-public.s3.amazonaws.com/dir/a.sh",
+            {
+                "file_path": "dir/a.sh",
+                "bucket": "ds-resource-plugin-public",
+            },
+        ),
+    ],
+)
+def test_s3_get_bucket_file_info(attr, expected):
+    """Test the get_bucket_file_info function of the s3 resource plugin."""
+    s3 = S3(prefix="prefix")
+    s3.get_bucket_file_info(attr)
+    assert expected == s3._bucket_file_info.__dict__
+
+
+@pytest.mark.skip(reason="This test requires s3 services")
+@pytest.mark.parametrize(
+    "attr, expected",
+    [
+        (
+            {
+                "init": {
+                    "prefix": "https://ds-resource-plugin-private.s3.amazonaws.com/dir/",
+                    "access_key_id": "LTAI5tP25Mxx",
+                    "access_key_secret": "cSur23Qbxx",

Review Comment:
   Is this the true access key and secret, or just mark the ending?



##########
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/oss.py:
##########
@@ -0,0 +1,77 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""DolphinScheduler oss resource plugin."""
+from typing import Optional
+from urllib.parse import urljoin
+
+import oss2
+
+from pydolphinscheduler.core.resource_plugin import ResourcePlugin
+from pydolphinscheduler.resources_plugin.base.bucket import Bucket, OSSFileInfo
+
+
+class OSS(ResourcePlugin, Bucket):
+    """OSS object, declare OSS resource plugin for task and workflow to dolphinscheduler.
+
+    :param prefix: A string representing the prefix of OSS.
+    :param access_key_id: A string representing the ID of AccessKey for AliCloud OSS
+
+    to access private files.
+
+    :param access_key_secret: A string representing the secret of AccessKey for AliCloud OSS
+
+    to access private files.
+    """
+
+    def __init__(
+        self,
+        prefix: str,
+        access_key_id: Optional[str] = None,
+        access_key_secret: Optional[str] = None,
+        *args,
+        **kwargs
+    ):
+        super().__init__(prefix, *args, **kwargs)
+        self.access_key_id = access_key_id
+        self.access_key_secret = access_key_secret
+
+    _bucket_file_info: Optional[OSSFileInfo] = None
+
+    def get_bucket_file_info(self, path: str):
+        """Get file information from the file url, like repository name, user, branch, and file path."""
+        elements = path.split("/")
+        self.get_index(path, "/", 3)
+        self._bucket_file_info = OSSFileInfo(
+            endpoint="https://" + elements[2].split(".")[1] + ".aliyuncs.com",
+            bucket=elements[2].split(".")[0],
+            file_path="/".join(str(elements[i]) for i in range(3, len(elements))),
+        )

Review Comment:
   same here, we should not use too many bare constant here 



##########
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/s3.py:
##########
@@ -0,0 +1,76 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""DolphinScheduler S3 resource plugin."""
+
+from typing import Optional
+from urllib.parse import urljoin
+
+import boto3
+
+from pydolphinscheduler.core.resource_plugin import ResourcePlugin
+from pydolphinscheduler.resources_plugin.base.bucket import Bucket, S3FileInfo
+
+
+class S3(ResourcePlugin, Bucket):
+    """S3 object, declare S3 resource plugin for task and workflow to dolphinscheduler.
+
+    :param prefix: A string representing the prefix of S3.
+    :param access_key_id: A string representing the ID of AccessKey for Amazon S3
+
+    to access private files.
+
+    :param access_key_secret: A string representing the secret of AccessKey for Amazon S3
+
+    to access private files.

Review Comment:
   remove the unnecessary blank line here too



##########
dolphinscheduler-python/pydolphinscheduler/tests/resources_plugin/test_gitlab.py:
##########
@@ -0,0 +1,116 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Test github resource plugin."""
+import pytest
+
+from pydolphinscheduler.resources_plugin.gitlab import GitLab
+
+
+@pytest.mark.parametrize(
+    "attr, expected",
+    [
+        (
+            "https://gitlab.com/chenruijie/ds-gitlab/-/blob/main/union.sh",

Review Comment:
   can we remove all author-related message `chenruijie` to other like `pydolphinscheduler` or some thing else?
   ```suggestion
               "https://gitlab.com/pydolphinscheduler/ds-gitlab/-/blob/main/union.sh",
   ```



##########
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/oss.py:
##########
@@ -0,0 +1,77 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""DolphinScheduler oss resource plugin."""
+from typing import Optional
+from urllib.parse import urljoin
+
+import oss2
+
+from pydolphinscheduler.core.resource_plugin import ResourcePlugin
+from pydolphinscheduler.resources_plugin.base.bucket import Bucket, OSSFileInfo
+
+
+class OSS(ResourcePlugin, Bucket):
+    """OSS object, declare OSS resource plugin for task and workflow to dolphinscheduler.
+
+    :param prefix: A string representing the prefix of OSS.
+    :param access_key_id: A string representing the ID of AccessKey for AliCloud OSS
+
+    to access private files.
+
+    :param access_key_secret: A string representing the secret of AccessKey for AliCloud OSS
+
+    to access private files.

Review Comment:
   ```suggestion
       :param access_key_id: A string representing the ID of AccessKey for AliCloud OSS to access private files.
   
       :param access_key_secret: A string representing the secret of AccessKey for AliCloud OSS to access private files.
   ```



##########
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/oss.py:
##########
@@ -0,0 +1,77 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""DolphinScheduler oss resource plugin."""
+from typing import Optional
+from urllib.parse import urljoin
+
+import oss2
+
+from pydolphinscheduler.core.resource_plugin import ResourcePlugin
+from pydolphinscheduler.resources_plugin.base.bucket import Bucket, OSSFileInfo
+
+
+class OSS(ResourcePlugin, Bucket):
+    """OSS object, declare OSS resource plugin for task and workflow to dolphinscheduler.
+
+    :param prefix: A string representing the prefix of OSS.
+    :param access_key_id: A string representing the ID of AccessKey for AliCloud OSS
+
+    to access private files.
+
+    :param access_key_secret: A string representing the secret of AccessKey for AliCloud OSS
+
+    to access private files.

Review Comment:
   please remove the unnecessary blank line



##########
dolphinscheduler-python/pydolphinscheduler/src/pydolphinscheduler/resources_plugin/gitlab.py:
##########
@@ -0,0 +1,118 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""DolphinScheduler gitlab resource plugin."""
+from typing import Optional
+from urllib.parse import urljoin
+
+import gitlab
+import requests
+
+from pydolphinscheduler.core.resource_plugin import ResourcePlugin
+from pydolphinscheduler.resources_plugin.base.git import Git, GitLabFileInfo
+
+
+class GitLab(ResourcePlugin, Git):
+    """GitLab object, declare GitLab resource plugin for task and workflow to dolphinscheduler.
+
+    :param prefix: A string representing the prefix of GitLab.
+    :param private_token: A string used for identity authentication of GitLab private or Internal warehouse.
+    :param oauth_token: A string used for identity authentication of GitLab private or Internal warehouse.
+    :param username: A string representing the user of the warehouse.
+    :param password: A string representing the user password.
+    """
+
+    def __init__(
+        self,
+        prefix: str,
+        private_token: Optional[str] = None,
+        oauth_token: Optional[str] = None,
+        username: Optional[str] = None,
+        password: Optional[str] = None,
+        *args,
+        **kwargs
+    ):
+        super().__init__(prefix, *args, **kwargs)
+        self.private_token = private_token
+        self.oauth_token = oauth_token
+        self.username = username
+        self.password = password
+
+    def get_git_file_info(self, path: str):
+        """Get file information from the file url, like repository name, user, branch, and file path."""
+        elements = path.split("/")
+        self.get_index(path, "/", 8)
+        for i in range(0, len(elements)):
+            if (
+                i + 3 < len(elements)
+                and elements[i + 1] == "-"
+                and elements[i + 2] == "blob"
+            ):
+                host_end = self.get_index(path, "/", 3)
+                self._git_file_info = GitLabFileInfo(
+                    host=path[0:host_end],
+                    repo_name=elements[i],
+                    branch=elements[i + 3],
+                    file_path="/".join(
+                        str(elements[j]) for j in range(i + 4, len(elements))
+                    ),
+                    user="/".join(str(elements[j]) for j in range(3, i)),
+                )

Review Comment:
   can we use other way to handle this, I think we use too many constant in this method



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@dolphinscheduler.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org