You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by dp...@apache.org on 2023/10/17 17:28:17 UTC
[superset] branch master updated: fix: improve upload ZIP file validation (#25658)
This is an automated email from the ASF dual-hosted git repository.
dpgaspar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/master by this push:
new f473d13d0d fix: improve upload ZIP file validation (#25658)
f473d13d0d is described below
commit f473d13d0d89de5990209ff81b17dfe2cee884d3
Author: Daniel Vaz Gaspar <da...@gmail.com>
AuthorDate: Tue Oct 17 18:28:09 2023 +0100
fix: improve upload ZIP file validation (#25658)
---
superset/commands/importers/v1/utils.py | 2 ++
superset/config.py | 5 +++
superset/utils/core.py | 19 +++++++++++
tests/unit_tests/utils/test_core.py | 57 +++++++++++++++++++++++++++++++++
4 files changed, 83 insertions(+)
diff --git a/superset/commands/importers/v1/utils.py b/superset/commands/importers/v1/utils.py
index 8ca008b3e2..8cb0c1b553 100644
--- a/superset/commands/importers/v1/utils.py
+++ b/superset/commands/importers/v1/utils.py
@@ -26,6 +26,7 @@ from superset import db
from superset.commands.importers.exceptions import IncorrectVersionError
from superset.databases.ssh_tunnel.models import SSHTunnel
from superset.models.core import Database
+from superset.utils.core import check_is_safe_zip
METADATA_FILE_NAME = "metadata.yaml"
IMPORT_VERSION = "1.0.0"
@@ -207,6 +208,7 @@ def is_valid_config(file_name: str) -> bool:
def get_contents_from_bundle(bundle: ZipFile) -> dict[str, str]:
+ check_is_safe_zip(bundle)
return {
remove_root(file_name): bundle.read(file_name).decode()
for file_name in bundle.namelist()
diff --git a/superset/config.py b/superset/config.py
index 28e3d4fd5b..2f3cf9baa7 100644
--- a/superset/config.py
+++ b/superset/config.py
@@ -1600,6 +1600,11 @@ WELCOME_PAGE_LAST_TAB: (
Literal["examples", "all"] | tuple[str, list[dict[str, Any]]]
) = "all"
+# Max allowed size for a zipped file
+ZIPPED_FILE_MAX_SIZE = 100 * 1024 * 1024 # 100MB
+# Max allowed compression ratio for a zipped file
+ZIP_FILE_MAX_COMPRESS_RATIO = 200.0
+
# Configuration for environment tag shown on the navbar. Setting 'text' to '' will hide the tag.
# 'color' can either be a hex color code, or a dot-indexed theme color (e.g. error.base)
ENVIRONMENT_TAG_CONFIG = {
diff --git a/superset/utils/core.py b/superset/utils/core.py
index a4d1ec6523..7ec36981cc 100644
--- a/superset/utils/core.py
+++ b/superset/utils/core.py
@@ -1917,6 +1917,25 @@ def create_zip(files: dict[str, Any]) -> BytesIO:
return buf
+def check_is_safe_zip(zip_file: ZipFile) -> None:
+ """
+ Checks whether a ZIP file is safe, raises SupersetException if not.
+
+ :param zip_file:
+ :return:
+ """
+ uncompress_size = 0
+ compress_size = 0
+ for zip_file_element in zip_file.infolist():
+ if zip_file_element.file_size > current_app.config["ZIPPED_FILE_MAX_SIZE"]:
+ raise SupersetException("Found file with size above allowed threshold")
+ uncompress_size += zip_file_element.file_size
+ compress_size += zip_file_element.compress_size
+ compress_ratio = uncompress_size / compress_size
+ if compress_ratio > current_app.config["ZIP_FILE_MAX_COMPRESS_RATIO"]:
+ raise SupersetException("Zip compress ratio above allowed threshold")
+
+
def remove_extra_adhoc_filters(form_data: dict[str, Any]) -> None:
"""
Remove filters from slice data that originate from a filter box or native filter
diff --git a/tests/unit_tests/utils/test_core.py b/tests/unit_tests/utils/test_core.py
index 562ebe582e..a8d5a2af29 100644
--- a/tests/unit_tests/utils/test_core.py
+++ b/tests/unit_tests/utils/test_core.py
@@ -15,13 +15,17 @@
# specific language governing permissions and limitations
# under the License.
import os
+from dataclasses import dataclass
from typing import Any, Optional
+from unittest.mock import MagicMock
import pandas as pd
import pytest
+from superset.exceptions import SupersetException
from superset.utils.core import (
cast_to_boolean,
+ check_is_safe_zip,
DateColumn,
is_test,
normalize_dttm_col,
@@ -44,6 +48,12 @@ EXTRA_FILTER: QueryObjectFilterClause = {
}
+@dataclass
+class MockZipInfo:
+ file_size: int
+ compress_size: int
+
+
@pytest.mark.parametrize(
"original,expected",
[
@@ -201,3 +211,50 @@ def test_normalize_dttm_col() -> None:
normalize_dttm_col(df, dttm_cols)
assert df["__time"].astype(str).tolist() == ["2017-07-01"]
+
+
+def test_check_if_safe_zip_success(app_context: None) -> None:
+ """
+ Test if ZIP files are safe
+ """
+ ZipFile = MagicMock()
+ ZipFile.infolist.return_value = [
+ MockZipInfo(file_size=1000, compress_size=10),
+ MockZipInfo(file_size=1000, compress_size=10),
+ MockZipInfo(file_size=1000, compress_size=10),
+ MockZipInfo(file_size=1000, compress_size=10),
+ MockZipInfo(file_size=1000, compress_size=10),
+ ]
+ check_is_safe_zip(ZipFile)
+
+
+def test_check_if_safe_zip_high_rate(app_context: None) -> None:
+ """
+ Test if ZIP files is not highly compressed
+ """
+ ZipFile = MagicMock()
+ ZipFile.infolist.return_value = [
+ MockZipInfo(file_size=1000, compress_size=1),
+ MockZipInfo(file_size=1000, compress_size=1),
+ MockZipInfo(file_size=1000, compress_size=1),
+ MockZipInfo(file_size=1000, compress_size=1),
+ MockZipInfo(file_size=1000, compress_size=1),
+ ]
+ with pytest.raises(SupersetException):
+ check_is_safe_zip(ZipFile)
+
+
+def test_check_if_safe_zip_hidden_bomb(app_context: None) -> None:
+ """
+ Test if ZIP file does not contain a big file highly compressed
+ """
+ ZipFile = MagicMock()
+ ZipFile.infolist.return_value = [
+ MockZipInfo(file_size=1000, compress_size=100),
+ MockZipInfo(file_size=1000, compress_size=100),
+ MockZipInfo(file_size=1000, compress_size=100),
+ MockZipInfo(file_size=1000, compress_size=100),
+ MockZipInfo(file_size=1000 * (1024 * 1024), compress_size=100),
+ ]
+ with pytest.raises(SupersetException):
+ check_is_safe_zip(ZipFile)