You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by mi...@apache.org on 2023/08/23 12:58:50 UTC
[superset] 06/06: fix: dataset safe URL for explore_url (#24686)
This is an automated email from the ASF dual-hosted git repository.
michaelsmolina pushed a commit to branch 3.0
in repository https://gitbox.apache.org/repos/asf/superset.git
commit 931e1b21396c55a7da8fa8b026c64c63b333e3eb
Author: Daniel Vaz Gaspar <da...@gmail.com>
AuthorDate: Wed Aug 23 13:31:44 2023 +0100
fix: dataset safe URL for explore_url (#24686)
(cherry picked from commit a9efd4b2e307b0df68e88ebbd02d22d7032fa451)
---
UPDATING.md | 1 +
.../src/pages/DatasetList/DatasetList.test.tsx | 60 +++++++++++++++++++++-
superset-frontend/src/pages/DatasetList/index.tsx | 27 +++++++---
superset/config.py | 2 +-
superset/datasets/commands/exceptions.py | 17 ------
superset/datasets/commands/update.py | 12 -----
superset/utils/urls.py | 19 +------
superset/views/base.py | 1 +
superset/views/datasource/views.py | 17 +-----
tests/integration_tests/datasets/api_tests.py | 26 ----------
tests/integration_tests/datasource_tests.py | 26 ----------
tests/unit_tests/utils/urls_tests.py | 24 ---------
12 files changed, 85 insertions(+), 147 deletions(-)
diff --git a/UPDATING.md b/UPDATING.md
index 5a29c43dfa..19c60a19b7 100644
--- a/UPDATING.md
+++ b/UPDATING.md
@@ -51,6 +51,7 @@ assists people when migrating to a new version.
### Breaking Changes
+- [24686]https://github.com/apache/superset/pull/24686): All dataset's custom explore_url are handled as relative URLs on the frontend, behaviour controlled by PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET.
- [24262](https://github.com/apache/superset/pull/24262): Enabled `TALISMAN_ENABLED` flag by default and provided stricter default Content Security Policy
- [24415](https://github.com/apache/superset/pull/24415): Removed the obsolete Druid NoSQL REGEX operator.
- [24423](https://github.com/apache/superset/pull/24423): Removed deprecated APIs `/superset/slice_json/...`, `/superset/annotation_json/...`
diff --git a/superset-frontend/src/pages/DatasetList/DatasetList.test.tsx b/superset-frontend/src/pages/DatasetList/DatasetList.test.tsx
index 115a861bdd..916dd0615b 100644
--- a/superset-frontend/src/pages/DatasetList/DatasetList.test.tsx
+++ b/superset-frontend/src/pages/DatasetList/DatasetList.test.tsx
@@ -35,6 +35,7 @@ import IndeterminateCheckbox from 'src/components/IndeterminateCheckbox';
import waitForComponentToPaint from 'spec/helpers/waitForComponentToPaint';
import { act } from 'react-dom/test-utils';
import SubMenu from 'src/features/home/SubMenu';
+import * as reactRedux from 'react-redux';
// store needed for withToasts(DatasetList)
const mockStore = configureStore([thunk]);
@@ -47,13 +48,15 @@ const datasetsDuplicateEndpoint = 'glob:*/api/v1/dataset/duplicate*';
const databaseEndpoint = 'glob:*/api/v1/dataset/related/database*';
const datasetsEndpoint = 'glob:*/api/v1/dataset/?*';
+const useSelectorMock = jest.spyOn(reactRedux, 'useSelector');
+
const mockdatasets = [...new Array(3)].map((_, i) => ({
changed_by_name: 'user',
kind: i === 0 ? 'virtual' : 'physical', // ensure there is 1 virtual
changed_by: 'user',
changed_on: new Date().toISOString(),
database_name: `db ${i}`,
- explore_url: `/explore/?datasource_type=table&datasource_id=${i}`,
+ explore_url: `https://www.google.com?${i}`,
id: i,
schema: `schema ${i}`,
table_name: `coolest table ${i}`,
@@ -280,3 +283,58 @@ describe('RTL', () => {
expect(importTooltip).toBeInTheDocument();
});
});
+
+describe('Prevent unsafe URLs', () => {
+ const mockedProps = {};
+ let wrapper: any;
+
+ it('Check prevent unsafe is on renders relative links', async () => {
+ const tdColumnsNumber = 9;
+ useSelectorMock.mockReturnValue(true);
+ wrapper = await mountAndWait(mockedProps);
+ const tdElements = wrapper.find(ListView).find('td');
+ expect(
+ tdElements
+ .at(0 * tdColumnsNumber + 1)
+ .find('a')
+ .prop('href'),
+ ).toBe('/https://www.google.com?0');
+ expect(
+ tdElements
+ .at(1 * tdColumnsNumber + 1)
+ .find('a')
+ .prop('href'),
+ ).toBe('/https://www.google.com?1');
+ expect(
+ tdElements
+ .at(2 * tdColumnsNumber + 1)
+ .find('a')
+ .prop('href'),
+ ).toBe('/https://www.google.com?2');
+ });
+
+ it('Check prevent unsafe is off renders absolute links', async () => {
+ const tdColumnsNumber = 9;
+ useSelectorMock.mockReturnValue(false);
+ wrapper = await mountAndWait(mockedProps);
+ const tdElements = wrapper.find(ListView).find('td');
+ expect(
+ tdElements
+ .at(0 * tdColumnsNumber + 1)
+ .find('a')
+ .prop('href'),
+ ).toBe('https://www.google.com?0');
+ expect(
+ tdElements
+ .at(1 * tdColumnsNumber + 1)
+ .find('a')
+ .prop('href'),
+ ).toBe('https://www.google.com?1');
+ expect(
+ tdElements
+ .at(2 * tdColumnsNumber + 1)
+ .find('a')
+ .prop('href'),
+ ).toBe('https://www.google.com?2');
+ });
+});
diff --git a/superset-frontend/src/pages/DatasetList/index.tsx b/superset-frontend/src/pages/DatasetList/index.tsx
index 7633edb016..0f3fb84ab1 100644
--- a/superset-frontend/src/pages/DatasetList/index.tsx
+++ b/superset-frontend/src/pages/DatasetList/index.tsx
@@ -30,7 +30,7 @@ import React, {
useMemo,
useCallback,
} from 'react';
-import { useHistory } from 'react-router-dom';
+import { Link, useHistory } from 'react-router-dom';
import rison from 'rison';
import {
createFetchRelated,
@@ -69,6 +69,7 @@ import {
CONFIRM_OVERWRITE_MESSAGE,
} from 'src/features/datasets/constants';
import DuplicateDatasetModal from 'src/features/datasets/DuplicateDatasetModal';
+import { useSelector } from 'react-redux';
const extensionsRegistry = getExtensionsRegistry();
const DatasetDeleteRelatedExtension = extensionsRegistry.get(
@@ -181,6 +182,11 @@ const DatasetList: FunctionComponent<DatasetListProps> = ({
setSSHTunnelPrivateKeyPasswordFields,
] = useState<string[]>([]);
+ const PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET = useSelector<any, boolean>(
+ state =>
+ state.common?.conf?.PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET || false,
+ );
+
const openDatasetImportModal = () => {
showImportModal(true);
};
@@ -309,11 +315,20 @@ const DatasetList: FunctionComponent<DatasetListProps> = ({
},
},
}: any) => {
- const titleLink = (
- // exploreUrl can be a link to Explore or an external link
- // in the first case use SPA routing, else use HTML anchor
- <GenericLink to={exploreURL}>{datasetTitle}</GenericLink>
- );
+ let titleLink: JSX.Element;
+ if (PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET) {
+ titleLink = (
+ <Link data-test="internal-link" to={exploreURL}>
+ {datasetTitle}
+ </Link>
+ );
+ } else {
+ titleLink = (
+ // exploreUrl can be a link to Explore or an external link
+ // in the first case use SPA routing, else use HTML anchor
+ <GenericLink to={exploreURL}>{datasetTitle}</GenericLink>
+ );
+ }
try {
const parsedExtra = JSON.parse(extra);
return (
diff --git a/superset/config.py b/superset/config.py
index 17f5bb72a2..a2ce1174a1 100644
--- a/superset/config.py
+++ b/superset/config.py
@@ -1460,7 +1460,7 @@ STATIC_ASSETS_PREFIX = ""
# Typically these should not be allowed.
PREVENT_UNSAFE_DB_CONNECTIONS = True
-# Prevents unsafe default endpoints to be registered on datasets.
+# If true all default urls on datasets will be handled as relative URLs by the frontend
PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET = True
# Define a list of allowed URLs for dataset data imports (v1).
diff --git a/superset/datasets/commands/exceptions.py b/superset/datasets/commands/exceptions.py
index fe9fe94cc6..f135294980 100644
--- a/superset/datasets/commands/exceptions.py
+++ b/superset/datasets/commands/exceptions.py
@@ -61,23 +61,6 @@ class DatasetExistsValidationError(ValidationError):
)
-class DatasetEndpointUnsafeValidationError(ValidationError):
- """
- Marshmallow validation error for unsafe dataset default endpoint
- """
-
- def __init__(self) -> None:
- super().__init__(
- [
- _(
- "The submitted URL is not considered safe,"
- " only use URLs with the same domain as Superset."
- )
- ],
- field_name="default_endpoint",
- )
-
-
class DatasetColumnNotFoundValidationError(ValidationError):
"""
Marshmallow validation error when dataset column for update does not exist
diff --git a/superset/datasets/commands/update.py b/superset/datasets/commands/update.py
index 1636805567..a38439fb7f 100644
--- a/superset/datasets/commands/update.py
+++ b/superset/datasets/commands/update.py
@@ -18,7 +18,6 @@ import logging
from collections import Counter
from typing import Any, Optional
-from flask import current_app
from flask_appbuilder.models.sqla import Model
from marshmallow import ValidationError
@@ -32,7 +31,6 @@ from superset.datasets.commands.exceptions import (
DatasetColumnNotFoundValidationError,
DatasetColumnsDuplicateValidationError,
DatasetColumnsExistsValidationError,
- DatasetEndpointUnsafeValidationError,
DatasetExistsValidationError,
DatasetForbiddenError,
DatasetInvalidError,
@@ -43,7 +41,6 @@ from superset.datasets.commands.exceptions import (
DatasetUpdateFailedError,
)
from superset.exceptions import SupersetSecurityException
-from superset.utils.urls import is_safe_url
logger = logging.getLogger(__name__)
@@ -104,15 +101,6 @@ class UpdateDatasetCommand(UpdateMixin, BaseCommand):
self._properties["owners"] = owners
except ValidationError as ex:
exceptions.append(ex)
- # Validate default URL safety
- default_endpoint = self._properties.get("default_endpoint")
- if (
- default_endpoint
- and not is_safe_url(default_endpoint)
- and current_app.config["PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET"]
- ):
- exceptions.append(DatasetEndpointUnsafeValidationError())
-
# Validate columns
if columns := self._properties.get("columns"):
self._validate_columns(columns, exceptions)
diff --git a/superset/utils/urls.py b/superset/utils/urls.py
index 31fbd89337..57a1b63dd4 100644
--- a/superset/utils/urls.py
+++ b/superset/utils/urls.py
@@ -14,12 +14,10 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-import unicodedata
import urllib
from typing import Any
-from urllib.parse import urlparse
-from flask import current_app, request, url_for
+from flask import current_app, url_for
def get_url_host(user_friendly: bool = False) -> str:
@@ -52,18 +50,3 @@ def modify_url_query(url: str, **kwargs: Any) -> str:
f"{k}={urllib.parse.quote(str(v[0]))}" for k, v in params.items()
)
return urllib.parse.urlunsplit(parts)
-
-
-def is_safe_url(url: str) -> bool:
- if url.startswith("///"):
- return False
- try:
- ref_url = urlparse(request.host_url)
- test_url = urlparse(url)
- except ValueError:
- return False
- if unicodedata.category(url[0])[0] == "C":
- return False
- if test_url.scheme != ref_url.scheme or ref_url.netloc != test_url.netloc:
- return False
- return True
diff --git a/superset/views/base.py b/superset/views/base.py
index 250c16ebb2..ab53ff07da 100644
--- a/superset/views/base.py
+++ b/superset/views/base.py
@@ -120,6 +120,7 @@ FRONTEND_CONF_KEYS = (
"ALERT_REPORTS_DEFAULT_RETENTION",
"ALERT_REPORTS_DEFAULT_WORKING_TIMEOUT",
"NATIVE_FILTER_DEFAULT_ROW_LIMIT",
+ "PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET",
)
logger = logging.getLogger(__name__)
diff --git a/superset/views/datasource/views.py b/superset/views/datasource/views.py
index b2fd387379..06dd37fbd9 100644
--- a/superset/views/datasource/views.py
+++ b/superset/views/datasource/views.py
@@ -18,7 +18,7 @@ import json
from collections import Counter
from typing import Any
-from flask import current_app, redirect, request
+from flask import redirect, request
from flask_appbuilder import expose, permission_name
from flask_appbuilder.api import rison
from flask_appbuilder.security.decorators import has_access, has_access_api
@@ -40,7 +40,6 @@ from superset.exceptions import SupersetException, SupersetSecurityException
from superset.models.core import Database
from superset.superset_typing import FlaskResponse
from superset.utils.core import DatasourceType
-from superset.utils.urls import is_safe_url
from superset.views.base import (
api,
BaseSupersetView,
@@ -82,20 +81,6 @@ class Datasource(BaseSupersetView):
datasource_id = datasource_dict.get("id")
datasource_type = datasource_dict.get("type")
database_id = datasource_dict["database"].get("id")
- default_endpoint = datasource_dict["default_endpoint"]
- if (
- default_endpoint
- and not is_safe_url(default_endpoint)
- and current_app.config["PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET"]
- ):
- return json_error_response(
- _(
- "The submitted URL is not considered safe,"
- " only use URLs with the same domain as Superset."
- ),
- status=400,
- )
-
orm_datasource = DatasourceDAO.get_datasource(
db.session, DatasourceType(datasource_type), datasource_id
)
diff --git a/tests/integration_tests/datasets/api_tests.py b/tests/integration_tests/datasets/api_tests.py
index 8884b1171a..2acefe48d0 100644
--- a/tests/integration_tests/datasets/api_tests.py
+++ b/tests/integration_tests/datasets/api_tests.py
@@ -1446,32 +1446,6 @@ class TestDatasetApi(SupersetTestCase):
db.session.delete(ab_user)
db.session.commit()
- def test_update_dataset_unsafe_default_endpoint(self):
- """
- Dataset API: Test unsafe default endpoint
- """
- if backend() == "sqlite":
- return
-
- dataset = self.insert_default_dataset()
- self.login(username="admin")
- uri = f"api/v1/dataset/{dataset.id}"
- table_data = {"default_endpoint": "http://www.google.com"}
- rv = self.client.put(uri, json=table_data)
- data = json.loads(rv.data.decode("utf-8"))
- assert rv.status_code == 422
- expected_response = {
- "message": {
- "default_endpoint": [
- "The submitted URL is not considered safe,"
- " only use URLs with the same domain as Superset."
- ]
- }
- }
- assert data == expected_response
- db.session.delete(dataset)
- db.session.commit()
-
@patch("superset.daos.dataset.DatasetDAO.update")
def test_update_dataset_sqlalchemy_error(self, mock_dao_update):
"""
diff --git a/tests/integration_tests/datasource_tests.py b/tests/integration_tests/datasource_tests.py
index 4c05898cfe..12293325d1 100644
--- a/tests/integration_tests/datasource_tests.py
+++ b/tests/integration_tests/datasource_tests.py
@@ -302,32 +302,6 @@ class TestDatasource(SupersetTestCase):
print(k)
self.assertEqual(resp[k], datasource_post[k])
- def test_save_default_endpoint_validation_fail(self):
- self.login(username="admin")
- tbl_id = self.get_table(name="birth_names").id
-
- datasource_post = get_datasource_post()
- datasource_post["id"] = tbl_id
- datasource_post["owners"] = [1]
- datasource_post["default_endpoint"] = "http://www.google.com"
- data = dict(data=json.dumps(datasource_post))
- resp = self.client.post("/datasource/save/", data=data)
- assert resp.status_code == 400
-
- def test_save_default_endpoint_validation_unsafe(self):
- self.app.config["PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET"] = False
- self.login(username="admin")
- tbl_id = self.get_table(name="birth_names").id
-
- datasource_post = get_datasource_post()
- datasource_post["id"] = tbl_id
- datasource_post["owners"] = [1]
- datasource_post["default_endpoint"] = "http://www.google.com"
- data = dict(data=json.dumps(datasource_post))
- resp = self.client.post("/datasource/save/", data=data)
- assert resp.status_code == 200
- self.app.config["PREVENT_UNSAFE_DEFAULT_URLS_ON_DATASET"] = True
-
def test_save_default_endpoint_validation_success(self):
self.login(username="admin")
tbl_id = self.get_table(name="birth_names").id
diff --git a/tests/unit_tests/utils/urls_tests.py b/tests/unit_tests/utils/urls_tests.py
index 287f346c3d..8ead2dacb4 100644
--- a/tests/unit_tests/utils/urls_tests.py
+++ b/tests/unit_tests/utils/urls_tests.py
@@ -39,27 +39,3 @@ def test_convert_dashboard_link() -> None:
def test_convert_dashboard_link_with_integer() -> None:
test_url = modify_url_query(EXPLORE_DASHBOARD_LINK, standalone=0)
assert test_url == "http://localhost:9000/superset/dashboard/3/?standalone=0"
-
-
-@pytest.mark.parametrize(
- "url,is_safe",
- [
- ("http://localhost/", True),
- ("http://localhost/superset/1", True),
- ("https://localhost/", False),
- ("https://localhost/superset/1", False),
- ("localhost/superset/1", False),
- ("ftp://localhost/superset/1", False),
- ("http://external.com", False),
- ("https://external.com", False),
- ("external.com", False),
- ("///localhost", False),
- ("xpto://localhost:[3/1/", False),
- ],
-)
-def test_is_safe_url(url: str, is_safe: bool) -> None:
- from superset import app
- from superset.utils.urls import is_safe_url
-
- with app.test_request_context("/"):
- assert is_safe_url(url) == is_safe