You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by mi...@apache.org on 2023/08/23 12:58:50 UTC

[superset] 06/06: fix: dataset safe URL for explore_url (#24686)

This is an automated email from the ASF dual-hosted git repository.

michaelsmolina pushed a commit to branch 3.0
in repository https://gitbox.apache.org/repos/asf/superset.git

commit 931e1b21396c55a7da8fa8b026c64c63b333e3eb
Author: Daniel Vaz Gaspar <da...@gmail.com>
AuthorDate: Wed Aug 23 13:31:44 2023 +0100

    fix: dataset safe URL for explore_url (#24686)
    
    (cherry picked from commit a9efd4b2e307b0df68e88ebbd02d22d7032fa451)
---
 UPDATING.md                                        |  1 +
 .../src/pages/DatasetList/DatasetList.test.tsx     | 60 +++++++++++++++++++++-
 superset-frontend/src/pages/DatasetList/index.tsx  | 27 +++++++---
 superset/config.py                                 |  2 +-
 superset/datasets/commands/exceptions.py           | 17 ------
 superset/datasets/commands/update.py               | 12 -----
 superset/utils/urls.py                             | 19 +------
 superset/views/base.py                             |  1 +
 superset/views/datasource/views.py                 | 17 +-----
 tests/integration_tests/datasets/api_tests.py      | 26 ----------
 tests/integration_tests/datasource_tests.py        | 26 ----------
 tests/unit_tests/utils/urls_tests.py               | 24 ---------
 12 files changed, 85 insertions(+), 147 deletions(-)

diff --git a/UPDATING.md b/UPDATING.md
index 5a29c43dfa..19c60a19b7 100644
--- a/UPDATING.md
+++ b/UPDATING.md
@@ -51,6 +51,7 @@ assists people when migrating to a new version.
 
 ### Breaking Changes
 
+- [24686]https://github.com/apache/superset/pull/24686): All dataset's custom explore_url are handled as relative URLs on the frontend, behaviour controlled by PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET.
 - [24262](https://github.com/apache/superset/pull/24262): Enabled `TALISMAN_ENABLED` flag by default and provided stricter default Content Security Policy
 - [24415](https://github.com/apache/superset/pull/24415): Removed the obsolete Druid NoSQL REGEX operator.
 - [24423](https://github.com/apache/superset/pull/24423): Removed deprecated APIs `/superset/slice_json/...`, `/superset/annotation_json/...`
diff --git a/superset-frontend/src/pages/DatasetList/DatasetList.test.tsx b/superset-frontend/src/pages/DatasetList/DatasetList.test.tsx
index 115a861bdd..916dd0615b 100644
--- a/superset-frontend/src/pages/DatasetList/DatasetList.test.tsx
+++ b/superset-frontend/src/pages/DatasetList/DatasetList.test.tsx
@@ -35,6 +35,7 @@ import IndeterminateCheckbox from 'src/components/IndeterminateCheckbox';
 import waitForComponentToPaint from 'spec/helpers/waitForComponentToPaint';
 import { act } from 'react-dom/test-utils';
 import SubMenu from 'src/features/home/SubMenu';
+import * as reactRedux from 'react-redux';
 
 // store needed for withToasts(DatasetList)
 const mockStore = configureStore([thunk]);
@@ -47,13 +48,15 @@ const datasetsDuplicateEndpoint = 'glob:*/api/v1/dataset/duplicate*';
 const databaseEndpoint = 'glob:*/api/v1/dataset/related/database*';
 const datasetsEndpoint = 'glob:*/api/v1/dataset/?*';
 
+const useSelectorMock = jest.spyOn(reactRedux, 'useSelector');
+
 const mockdatasets = [...new Array(3)].map((_, i) => ({
   changed_by_name: 'user',
   kind: i === 0 ? 'virtual' : 'physical', // ensure there is 1 virtual
   changed_by: 'user',
   changed_on: new Date().toISOString(),
   database_name: `db ${i}`,
-  explore_url: `/explore/?datasource_type=table&datasource_id=${i}`,
+  explore_url: `https://www.google.com?${i}`,
   id: i,
   schema: `schema ${i}`,
   table_name: `coolest table ${i}`,
@@ -280,3 +283,58 @@ describe('RTL', () => {
     expect(importTooltip).toBeInTheDocument();
   });
 });
+
+describe('Prevent unsafe URLs', () => {
+  const mockedProps = {};
+  let wrapper: any;
+
+  it('Check prevent unsafe is on renders relative links', async () => {
+    const tdColumnsNumber = 9;
+    useSelectorMock.mockReturnValue(true);
+    wrapper = await mountAndWait(mockedProps);
+    const tdElements = wrapper.find(ListView).find('td');
+    expect(
+      tdElements
+        .at(0 * tdColumnsNumber + 1)
+        .find('a')
+        .prop('href'),
+    ).toBe('/https://www.google.com?0');
+    expect(
+      tdElements
+        .at(1 * tdColumnsNumber + 1)
+        .find('a')
+        .prop('href'),
+    ).toBe('/https://www.google.com?1');
+    expect(
+      tdElements
+        .at(2 * tdColumnsNumber + 1)
+        .find('a')
+        .prop('href'),
+    ).toBe('/https://www.google.com?2');
+  });
+
+  it('Check prevent unsafe is off renders absolute links', async () => {
+    const tdColumnsNumber = 9;
+    useSelectorMock.mockReturnValue(false);
+    wrapper = await mountAndWait(mockedProps);
+    const tdElements = wrapper.find(ListView).find('td');
+    expect(
+      tdElements
+        .at(0 * tdColumnsNumber + 1)
+        .find('a')
+        .prop('href'),
+    ).toBe('https://www.google.com?0');
+    expect(
+      tdElements
+        .at(1 * tdColumnsNumber + 1)
+        .find('a')
+        .prop('href'),
+    ).toBe('https://www.google.com?1');
+    expect(
+      tdElements
+        .at(2 * tdColumnsNumber + 1)
+        .find('a')
+        .prop('href'),
+    ).toBe('https://www.google.com?2');
+  });
+});
diff --git a/superset-frontend/src/pages/DatasetList/index.tsx b/superset-frontend/src/pages/DatasetList/index.tsx
index 7633edb016..0f3fb84ab1 100644
--- a/superset-frontend/src/pages/DatasetList/index.tsx
+++ b/superset-frontend/src/pages/DatasetList/index.tsx
@@ -30,7 +30,7 @@ import React, {
   useMemo,
   useCallback,
 } from 'react';
-import { useHistory } from 'react-router-dom';
+import { Link, useHistory } from 'react-router-dom';
 import rison from 'rison';
 import {
   createFetchRelated,
@@ -69,6 +69,7 @@ import {
   CONFIRM_OVERWRITE_MESSAGE,
 } from 'src/features/datasets/constants';
 import DuplicateDatasetModal from 'src/features/datasets/DuplicateDatasetModal';
+import { useSelector } from 'react-redux';
 
 const extensionsRegistry = getExtensionsRegistry();
 const DatasetDeleteRelatedExtension = extensionsRegistry.get(
@@ -181,6 +182,11 @@ const DatasetList: FunctionComponent<DatasetListProps> = ({
     setSSHTunnelPrivateKeyPasswordFields,
   ] = useState<string[]>([]);
 
+  const PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET = useSelector<any, boolean>(
+    state =>
+      state.common?.conf?.PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET || false,
+  );
+
   const openDatasetImportModal = () => {
     showImportModal(true);
   };
@@ -309,11 +315,20 @@ const DatasetList: FunctionComponent<DatasetListProps> = ({
             },
           },
         }: any) => {
-          const titleLink = (
-            // exploreUrl can be a link to Explore or an external link
-            // in the first case use SPA routing, else use HTML anchor
-            <GenericLink to={exploreURL}>{datasetTitle}</GenericLink>
-          );
+          let titleLink: JSX.Element;
+          if (PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET) {
+            titleLink = (
+              <Link data-test="internal-link" to={exploreURL}>
+                {datasetTitle}
+              </Link>
+            );
+          } else {
+            titleLink = (
+              // exploreUrl can be a link to Explore or an external link
+              // in the first case use SPA routing, else use HTML anchor
+              <GenericLink to={exploreURL}>{datasetTitle}</GenericLink>
+            );
+          }
           try {
             const parsedExtra = JSON.parse(extra);
             return (
diff --git a/superset/config.py b/superset/config.py
index 17f5bb72a2..a2ce1174a1 100644
--- a/superset/config.py
+++ b/superset/config.py
@@ -1460,7 +1460,7 @@ STATIC_ASSETS_PREFIX = ""
 # Typically these should not be allowed.
 PREVENT_UNSAFE_DB_CONNECTIONS = True
 
-# Prevents unsafe default endpoints to be registered on datasets.
+# If true all default urls on datasets will be handled as relative URLs by the frontend
 PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET = True
 
 # Define a list of allowed URLs for dataset data imports (v1).
diff --git a/superset/datasets/commands/exceptions.py b/superset/datasets/commands/exceptions.py
index fe9fe94cc6..f135294980 100644
--- a/superset/datasets/commands/exceptions.py
+++ b/superset/datasets/commands/exceptions.py
@@ -61,23 +61,6 @@ class DatasetExistsValidationError(ValidationError):
         )
 
 
-class DatasetEndpointUnsafeValidationError(ValidationError):
-    """
-    Marshmallow validation error for unsafe dataset default endpoint
-    """
-
-    def __init__(self) -> None:
-        super().__init__(
-            [
-                _(
-                    "The submitted URL is not considered safe,"
-                    " only use URLs with the same domain as Superset."
-                )
-            ],
-            field_name="default_endpoint",
-        )
-
-
 class DatasetColumnNotFoundValidationError(ValidationError):
     """
     Marshmallow validation error when dataset column for update does not exist
diff --git a/superset/datasets/commands/update.py b/superset/datasets/commands/update.py
index 1636805567..a38439fb7f 100644
--- a/superset/datasets/commands/update.py
+++ b/superset/datasets/commands/update.py
@@ -18,7 +18,6 @@ import logging
 from collections import Counter
 from typing import Any, Optional
 
-from flask import current_app
 from flask_appbuilder.models.sqla import Model
 from marshmallow import ValidationError
 
@@ -32,7 +31,6 @@ from superset.datasets.commands.exceptions import (
     DatasetColumnNotFoundValidationError,
     DatasetColumnsDuplicateValidationError,
     DatasetColumnsExistsValidationError,
-    DatasetEndpointUnsafeValidationError,
     DatasetExistsValidationError,
     DatasetForbiddenError,
     DatasetInvalidError,
@@ -43,7 +41,6 @@ from superset.datasets.commands.exceptions import (
     DatasetUpdateFailedError,
 )
 from superset.exceptions import SupersetSecurityException
-from superset.utils.urls import is_safe_url
 
 logger = logging.getLogger(__name__)
 
@@ -104,15 +101,6 @@ class UpdateDatasetCommand(UpdateMixin, BaseCommand):
             self._properties["owners"] = owners
         except ValidationError as ex:
             exceptions.append(ex)
-        # Validate default URL safety
-        default_endpoint = self._properties.get("default_endpoint")
-        if (
-            default_endpoint
-            and not is_safe_url(default_endpoint)
-            and current_app.config["PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET"]
-        ):
-            exceptions.append(DatasetEndpointUnsafeValidationError())
-
         # Validate columns
         if columns := self._properties.get("columns"):
             self._validate_columns(columns, exceptions)
diff --git a/superset/utils/urls.py b/superset/utils/urls.py
index 31fbd89337..57a1b63dd4 100644
--- a/superset/utils/urls.py
+++ b/superset/utils/urls.py
@@ -14,12 +14,10 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-import unicodedata
 import urllib
 from typing import Any
-from urllib.parse import urlparse
 
-from flask import current_app, request, url_for
+from flask import current_app, url_for
 
 
 def get_url_host(user_friendly: bool = False) -> str:
@@ -52,18 +50,3 @@ def modify_url_query(url: str, **kwargs: Any) -> str:
         f"{k}={urllib.parse.quote(str(v[0]))}" for k, v in params.items()
     )
     return urllib.parse.urlunsplit(parts)
-
-
-def is_safe_url(url: str) -> bool:
-    if url.startswith("///"):
-        return False
-    try:
-        ref_url = urlparse(request.host_url)
-        test_url = urlparse(url)
-    except ValueError:
-        return False
-    if unicodedata.category(url[0])[0] == "C":
-        return False
-    if test_url.scheme != ref_url.scheme or ref_url.netloc != test_url.netloc:
-        return False
-    return True
diff --git a/superset/views/base.py b/superset/views/base.py
index 250c16ebb2..ab53ff07da 100644
--- a/superset/views/base.py
+++ b/superset/views/base.py
@@ -120,6 +120,7 @@ FRONTEND_CONF_KEYS = (
     "ALERT_REPORTS_DEFAULT_RETENTION",
     "ALERT_REPORTS_DEFAULT_WORKING_TIMEOUT",
     "NATIVE_FILTER_DEFAULT_ROW_LIMIT",
+    "PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET",
 )
 
 logger = logging.getLogger(__name__)
diff --git a/superset/views/datasource/views.py b/superset/views/datasource/views.py
index b2fd387379..06dd37fbd9 100644
--- a/superset/views/datasource/views.py
+++ b/superset/views/datasource/views.py
@@ -18,7 +18,7 @@ import json
 from collections import Counter
 from typing import Any
 
-from flask import current_app, redirect, request
+from flask import redirect, request
 from flask_appbuilder import expose, permission_name
 from flask_appbuilder.api import rison
 from flask_appbuilder.security.decorators import has_access, has_access_api
@@ -40,7 +40,6 @@ from superset.exceptions import SupersetException, SupersetSecurityException
 from superset.models.core import Database
 from superset.superset_typing import FlaskResponse
 from superset.utils.core import DatasourceType
-from superset.utils.urls import is_safe_url
 from superset.views.base import (
     api,
     BaseSupersetView,
@@ -82,20 +81,6 @@ class Datasource(BaseSupersetView):
         datasource_id = datasource_dict.get("id")
         datasource_type = datasource_dict.get("type")
         database_id = datasource_dict["database"].get("id")
-        default_endpoint = datasource_dict["default_endpoint"]
-        if (
-            default_endpoint
-            and not is_safe_url(default_endpoint)
-            and current_app.config["PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET"]
-        ):
-            return json_error_response(
-                _(
-                    "The submitted URL is not considered safe,"
-                    " only use URLs with the same domain as Superset."
-                ),
-                status=400,
-            )
-
         orm_datasource = DatasourceDAO.get_datasource(
             db.session, DatasourceType(datasource_type), datasource_id
         )
diff --git a/tests/integration_tests/datasets/api_tests.py b/tests/integration_tests/datasets/api_tests.py
index 8884b1171a..2acefe48d0 100644
--- a/tests/integration_tests/datasets/api_tests.py
+++ b/tests/integration_tests/datasets/api_tests.py
@@ -1446,32 +1446,6 @@ class TestDatasetApi(SupersetTestCase):
         db.session.delete(ab_user)
         db.session.commit()
 
-    def test_update_dataset_unsafe_default_endpoint(self):
-        """
-        Dataset API: Test unsafe default endpoint
-        """
-        if backend() == "sqlite":
-            return
-
-        dataset = self.insert_default_dataset()
-        self.login(username="admin")
-        uri = f"api/v1/dataset/{dataset.id}"
-        table_data = {"default_endpoint": "http://www.google.com"}
-        rv = self.client.put(uri, json=table_data)
-        data = json.loads(rv.data.decode("utf-8"))
-        assert rv.status_code == 422
-        expected_response = {
-            "message": {
-                "default_endpoint": [
-                    "The submitted URL is not considered safe,"
-                    " only use URLs with the same domain as Superset."
-                ]
-            }
-        }
-        assert data == expected_response
-        db.session.delete(dataset)
-        db.session.commit()
-
     @patch("superset.daos.dataset.DatasetDAO.update")
     def test_update_dataset_sqlalchemy_error(self, mock_dao_update):
         """
diff --git a/tests/integration_tests/datasource_tests.py b/tests/integration_tests/datasource_tests.py
index 4c05898cfe..12293325d1 100644
--- a/tests/integration_tests/datasource_tests.py
+++ b/tests/integration_tests/datasource_tests.py
@@ -302,32 +302,6 @@ class TestDatasource(SupersetTestCase):
                 print(k)
                 self.assertEqual(resp[k], datasource_post[k])
 
-    def test_save_default_endpoint_validation_fail(self):
-        self.login(username="admin")
-        tbl_id = self.get_table(name="birth_names").id
-
-        datasource_post = get_datasource_post()
-        datasource_post["id"] = tbl_id
-        datasource_post["owners"] = [1]
-        datasource_post["default_endpoint"] = "http://www.google.com"
-        data = dict(data=json.dumps(datasource_post))
-        resp = self.client.post("/datasource/save/", data=data)
-        assert resp.status_code == 400
-
-    def test_save_default_endpoint_validation_unsafe(self):
-        self.app.config["PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET"] = False
-        self.login(username="admin")
-        tbl_id = self.get_table(name="birth_names").id
-
-        datasource_post = get_datasource_post()
-        datasource_post["id"] = tbl_id
-        datasource_post["owners"] = [1]
-        datasource_post["default_endpoint"] = "http://www.google.com"
-        data = dict(data=json.dumps(datasource_post))
-        resp = self.client.post("/datasource/save/", data=data)
-        assert resp.status_code == 200
-        self.app.config["PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET"] = True
-
     def test_save_default_endpoint_validation_success(self):
         self.login(username="admin")
         tbl_id = self.get_table(name="birth_names").id
diff --git a/tests/unit_tests/utils/urls_tests.py b/tests/unit_tests/utils/urls_tests.py
index 287f346c3d..8ead2dacb4 100644
--- a/tests/unit_tests/utils/urls_tests.py
+++ b/tests/unit_tests/utils/urls_tests.py
@@ -39,27 +39,3 @@ def test_convert_dashboard_link() -> None:
 def test_convert_dashboard_link_with_integer() -> None:
     test_url = modify_url_query(EXPLORE_DASHBOARD_LINK, standalone=0)
     assert test_url == "http://localhost:9000/superset/dashboard/3/?standalone=0"
-
-
-@pytest.mark.parametrize(
-    "url,is_safe",
-    [
-        ("http://localhost/", True),
-        ("http://localhost/superset/1", True),
-        ("https://localhost/", False),
-        ("https://localhost/superset/1", False),
-        ("localhost/superset/1", False),
-        ("ftp://localhost/superset/1", False),
-        ("http://external.com", False),
-        ("https://external.com", False),
-        ("external.com", False),
-        ("///localhost", False),
-        ("xpto://localhost:[3/1/", False),
-    ],
-)
-def test_is_safe_url(url: str, is_safe: bool) -> None:
-    from superset import app
-    from superset.utils.urls import is_safe_url
-
-    with app.test_request_context("/"):
-        assert is_safe_url(url) == is_safe