You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by mi...@apache.org on 2023/01/05 19:57:32 UTC

[superset] 12/14: fix: datasource save, improve data validation (#22038)

This is an automated email from the ASF dual-hosted git repository.

michaelsmolina pushed a commit to branch 1.5
in repository https://gitbox.apache.org/repos/asf/superset.git

commit 81ad9cb0a70d922d3e1d1aab8c4d0402badbe4ab
Author: Daniel Vaz Gaspar <da...@gmail.com>
AuthorDate: Mon Nov 7 10:33:24 2022 +0000

    fix: datasource save, improve data validation (#22038)
---
 superset/config.py                          |  3 +++
 superset/utils/urls.py                      | 19 ++++++++++++++-
 superset/views/datasource/views.py          | 17 ++++++++++++-
 tests/integration_tests/datasource_tests.py | 38 +++++++++++++++++++++++++++++
 tests/unit_tests/utils/urls_tests.py        | 26 ++++++++++++++++++++
 5 files changed, 101 insertions(+), 2 deletions(-)

diff --git a/superset/config.py b/superset/config.py
index eb64349739..38c8b4fdcb 100644
--- a/superset/config.py
+++ b/superset/config.py
@@ -1286,6 +1286,9 @@ STATIC_ASSETS_PREFIX = ""
 # Typically these should not be allowed.
 PREVENT_UNSAFE_DB_CONNECTIONS = True
 
+# Prevents unsafe default endpoints to be registered on datasets.
+PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET = True
+
 # Path used to store SSL certificates that are generated when using custom certs.
 # Defaults to temporary directory.
 # Example: SSL_CERT_PATH = "/certs"
diff --git a/superset/utils/urls.py b/superset/utils/urls.py
index a8a6148813..c31bfb1a51 100644
--- a/superset/utils/urls.py
+++ b/superset/utils/urls.py
@@ -14,10 +14,12 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+import unicodedata
 import urllib
 from typing import Any
+from urllib.parse import urlparse
 
-from flask import current_app, url_for
+from flask import current_app, request, url_for
 
 
 def get_url_host(user_friendly: bool = False) -> str:
@@ -48,3 +50,18 @@ def modify_url_query(url: str, **kwargs: Any) -> str:
 
     parts[3] = "&".join(f"{k}={urllib.parse.quote(v[0])}" for k, v in params.items())
     return urllib.parse.urlunsplit(parts)
+
+
+def is_safe_url(url: str) -> bool:
+    if url.startswith("///"):
+        return False
+    try:
+        ref_url = urlparse(request.host_url)
+        test_url = urlparse(url)
+    except ValueError:
+        return False
+    if unicodedata.category(url[0])[0] == "C":
+        return False
+    if test_url.scheme != ref_url.scheme or ref_url.netloc != test_url.netloc:
+        return False
+    return True
diff --git a/superset/views/datasource/views.py b/superset/views/datasource/views.py
index 560c12d6f1..09319c6683 100644
--- a/superset/views/datasource/views.py
+++ b/superset/views/datasource/views.py
@@ -18,7 +18,7 @@ import json
 from collections import Counter
 from typing import Any
 
-from flask import g, request
+from flask import current_app, g, request
 from flask_appbuilder import expose
 from flask_appbuilder.api import rison
 from flask_appbuilder.security.decorators import has_access_api
@@ -39,6 +39,7 @@ from superset.exceptions import SupersetException, SupersetSecurityException
 from superset.extensions import security_manager
 from superset.models.core import Database
 from superset.superset_typing import FlaskResponse
+from superset.utils.urls import is_safe_url
 from superset.views.base import (
     api,
     BaseSupersetView,
@@ -74,6 +75,20 @@ class Datasource(BaseSupersetView):
         datasource_id = datasource_dict.get("id")
         datasource_type = datasource_dict.get("type")
         database_id = datasource_dict["database"].get("id")
+        default_endpoint = datasource_dict["default_endpoint"]
+        if (
+            default_endpoint
+            and not is_safe_url(default_endpoint)
+            and current_app.config["PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET"]
+        ):
+            return json_error_response(
+                _(
+                    "The submitted URL is not considered safe,"
+                    " only use URLs with the same domain as Superset."
+                ),
+                status=400,
+            )
+
         orm_datasource = ConnectorRegistry.get_datasource(
             datasource_type, datasource_id, db.session
         )
diff --git a/tests/integration_tests/datasource_tests.py b/tests/integration_tests/datasource_tests.py
index 6d46afa0a9..19cc35c5c1 100644
--- a/tests/integration_tests/datasource_tests.py
+++ b/tests/integration_tests/datasource_tests.py
@@ -292,6 +292,44 @@ class TestDatasource(SupersetTestCase):
                 print(k)
                 self.assertEqual(resp[k], datasource_post[k])
 
+    def test_save_default_endpoint_validation_fail(self):
+        self.login(username="admin")
+        tbl_id = self.get_table(name="birth_names").id
+
+        datasource_post = get_datasource_post()
+        datasource_post["id"] = tbl_id
+        datasource_post["owners"] = [1]
+        datasource_post["default_endpoint"] = "http://www.google.com"
+        data = dict(data=json.dumps(datasource_post))
+        resp = self.client.post("/datasource/save/", data=data)
+        assert resp.status_code == 400
+
+    def test_save_default_endpoint_validation_unsafe(self):
+        self.app.config["PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET"] = False
+        self.login(username="admin")
+        tbl_id = self.get_table(name="birth_names").id
+
+        datasource_post = get_datasource_post()
+        datasource_post["id"] = tbl_id
+        datasource_post["owners"] = [1]
+        datasource_post["default_endpoint"] = "http://www.google.com"
+        data = dict(data=json.dumps(datasource_post))
+        resp = self.client.post("/datasource/save/", data=data)
+        assert resp.status_code == 200
+        self.app.config["PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET"] = True
+
+    def test_save_default_endpoint_validation_success(self):
+        self.login(username="admin")
+        tbl_id = self.get_table(name="birth_names").id
+
+        datasource_post = get_datasource_post()
+        datasource_post["id"] = tbl_id
+        datasource_post["owners"] = [1]
+        datasource_post["default_endpoint"] = "http://localhost/superset/1"
+        data = dict(data=json.dumps(datasource_post))
+        resp = self.client.post("/datasource/save/", data=data)
+        assert resp.status_code == 200
+
     def save_datasource_from_dict(self, datasource_post):
         data = dict(data=json.dumps(datasource_post))
         resp = self.get_json_resp("/datasource/save/", data)
diff --git a/tests/unit_tests/utils/urls_tests.py b/tests/unit_tests/utils/urls_tests.py
index dba38cb81a..e3a8b75fa2 100644
--- a/tests/unit_tests/utils/urls_tests.py
+++ b/tests/unit_tests/utils/urls_tests.py
@@ -15,6 +15,8 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+import pytest
+
 from superset.utils.urls import modify_url_query
 
 EXPLORE_CHART_LINK = "http://localhost:9000/superset/explore/?form_data=%7B%22slice_id%22%3A+76%7D&standalone=true&force=false"
@@ -33,3 +35,27 @@ def test_convert_chart_link() -> None:
 def test_convert_dashboard_link() -> None:
     test_url = modify_url_query(EXPLORE_DASHBOARD_LINK, standalone="0")
     assert test_url == "http://localhost:9000/superset/dashboard/3/?standalone=0"
+
+
+@pytest.mark.parametrize(
+    "url,is_safe",
+    [
+        ("http://localhost/", True),
+        ("http://localhost/superset/1", True),
+        ("https://localhost/", False),
+        ("https://localhost/superset/1", False),
+        ("localhost/superset/1", False),
+        ("ftp://localhost/superset/1", False),
+        ("http://external.com", False),
+        ("https://external.com", False),
+        ("external.com", False),
+        ("///localhost", False),
+        ("xpto://localhost:[3/1/", False),
+    ],
+)
+def test_is_safe_url(url: str, is_safe: bool) -> None:
+    from superset import app
+    from superset.utils.urls import is_safe_url
+
+    with app.test_request_context("/"):
+        assert is_safe_url(url) == is_safe