You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by el...@apache.org on 2023/12/02 01:21:03 UTC
(superset) 08/08: chore: Use nh3 lib instead of bleach (#23862)
This is an automated email from the ASF dual-hosted git repository.
elizabeth pushed a commit to branch 2.1
in repository https://gitbox.apache.org/repos/asf/superset.git
commit 52cdd57dd990e450fc9062ca65d485a3140d66a5
Author: EugeneTorap <ev...@gmail.com>
AuthorDate: Fri Apr 28 16:36:51 2023 +0300
chore: Use nh3 lib instead of bleach (#23862)
---
requirements/base.txt | 8 ++-----
setup.cfg | 2 +-
setup.py | 2 +-
.../dashboard/components/gridComponents/Chart.jsx | 4 ++--
superset/reports/notifications/email.py | 27 ++++++++++++----------
superset/utils/async_query_manager.py | 4 ++--
superset/utils/core.py | 13 ++++++-----
tests/unit_tests/notifications/email_tests.py | 5 +++-
8 files changed, 34 insertions(+), 31 deletions(-)
diff --git a/requirements/base.txt b/requirements/base.txt
index 1ae306e8f3..a8f7e0c18b 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -23,8 +23,6 @@ bcrypt==4.0.1
# via paramiko
billiard==3.6.4.0
# via celery
-bleach==3.3.1
- # via apache-superset
brotli==1.0.9
# via flask-compress
cachelib==0.4.1
@@ -174,6 +172,8 @@ marshmallow-sqlalchemy==0.23.1
# via flask-appbuilder
msgpack==1.0.2
# via apache-superset
+nh3==0.2.11
+ # via apache-superset
numpy==1.23.5
# via
# apache-superset
@@ -181,7 +181,6 @@ numpy==1.23.5
# pyarrow
packaging==21.3
# via
- # bleach
# deprecation
pandas==1.5.3
# via apache-superset
@@ -252,7 +251,6 @@ simplejson==3.17.3
# via apache-superset
six==1.16.0
# via
- # bleach
# click-repl
# isodate
# jsonschema
@@ -297,8 +295,6 @@ vine==5.0.0
# kombu
wcwidth==0.2.5
# via prompt-toolkit
-webencodings==0.5.1
- # via bleach
werkzeug==2.3.3
# via
# apache-superset
diff --git a/setup.cfg b/setup.cfg
index a9470d51bd..970fd4ca82 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -30,7 +30,7 @@ combine_as_imports = true
include_trailing_comma = true
line_length = 88
known_first_party = superset
-known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,cron_descriptor,croniter,cryptography,dateutil,deprecation,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_jwt_extended,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,graphlib,holidays,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,marshmallow_enum,msgpack,numpy,pandas,parameterized,parsedatetime,pgsanity,pkg_resour [...]
+known_third_party =alembic,apispec,backoff,cachelib,celery,click,colorama,cron_descriptor,croniter,cryptography,dateutil,deprecation,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_jwt_extended,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,graphlib,holidays,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,marshmallow_enum,msgpack,nh3,numpy,pandas,parameterized,parsedatetime,pgsanity,pkg_resources [...]
multi_line_output = 3
order_by_type = false
diff --git a/setup.py b/setup.py
index d7d028ba93..66314ed9c4 100644
--- a/setup.py
+++ b/setup.py
@@ -73,7 +73,6 @@ setup(
},
install_requires=[
"backoff>=1.8.0",
- "bleach>=3.0.2, <4.0.0",
"cachelib>=0.4.1,<0.5",
"celery>=5.2.2, <6.0.0",
"click>=8.0.3",
@@ -100,6 +99,7 @@ setup(
"isodate",
"markdown>=3.0",
"msgpack>=1.0.0, <1.1",
+ "nh3>=0.2.11, <0.3",
"numpy==1.23.5",
"pandas>=1.5.3, <1.6",
"parsedatetime",
diff --git a/superset-frontend/src/dashboard/components/gridComponents/Chart.jsx b/superset-frontend/src/dashboard/components/gridComponents/Chart.jsx
index 89d4fb94ce..cf06a1d7ed 100644
--- a/superset-frontend/src/dashboard/components/gridComponents/Chart.jsx
+++ b/superset-frontend/src/dashboard/components/gridComponents/Chart.jsx
@@ -486,10 +486,10 @@ class Chart extends React.Component {
{/*
This usage of dangerouslySetInnerHTML is safe since it is being used to render
- markdown that is sanitized with bleach. See:
+ markdown that is sanitized with nh3. See:
https://github.com/apache/superset/pull/4390
and
- https://github.com/apache/superset/commit/b6fcc22d5a2cb7a5e92599ed5795a0169385a825
+ https://github.com/apache/superset/pull/23862
*/}
{isExpanded && slice.description_markeddown && (
<div
diff --git a/superset/reports/notifications/email.py b/superset/reports/notifications/email.py
index 22b1714f99..10a76e7573 100644
--- a/superset/reports/notifications/email.py
+++ b/superset/reports/notifications/email.py
@@ -22,7 +22,7 @@ from dataclasses import dataclass
from email.utils import make_msgid, parseaddr
from typing import Any, Dict, Optional
-import bleach
+import nh3
from flask_babel import gettext as __
from superset import app
@@ -35,10 +35,10 @@ from superset.utils.decorators import statsd_gauge
logger = logging.getLogger(__name__)
-TABLE_TAGS = ["table", "th", "tr", "td", "thead", "tbody", "tfoot"]
-TABLE_ATTRIBUTES = ["colspan", "rowspan", "halign", "border", "class"]
+TABLE_TAGS = {"table", "th", "tr", "td", "thead", "tbody", "tfoot"}
+TABLE_ATTRIBUTES = {"colspan", "rowspan", "halign", "border", "class"}
-ALLOWED_TAGS = [
+ALLOWED_TAGS = {
"a",
"abbr",
"acronym",
@@ -54,13 +54,14 @@ ALLOWED_TAGS = [
"p",
"strong",
"ul",
-] + TABLE_TAGS
+}.union(TABLE_TAGS)
+ALLOWED_TABLE_ATTRIBUTES = {tag: TABLE_ATTRIBUTES for tag in TABLE_TAGS}
ALLOWED_ATTRIBUTES = {
- "a": ["href", "title"],
- "abbr": ["title"],
- "acronym": ["title"],
- **{tag: TABLE_ATTRIBUTES for tag in TABLE_TAGS},
+ "a": {"href", "title"},
+ "abbr": {"title"},
+ "acronym": {"title"},
+ **ALLOWED_TABLE_ATTRIBUTES,
}
@@ -108,7 +109,8 @@ class EmailNotification(BaseNotification): # pylint: disable=too-few-public-met
}
# Strip any malicious HTML from the description
- description = bleach.clean(
+ # pylint: disable=no-member
+ description = nh3.clean(
self._content.description or "",
tags=ALLOWED_TAGS,
attributes=ALLOWED_ATTRIBUTES,
@@ -117,12 +119,13 @@ class EmailNotification(BaseNotification): # pylint: disable=too-few-public-met
# Strip malicious HTML from embedded data, allowing only table elements
if self._content.embedded_data is not None:
df = self._content.embedded_data
- html_table = bleach.clean(
+ # pylint: disable=no-member
+ html_table = nh3.clean(
df.to_html(na_rep="", index=True, escape=True),
# pandas will escape the HTML in cells already, so passing
# more allowed tags here will not work
tags=TABLE_TAGS,
- attributes=TABLE_ATTRIBUTES,
+ attributes=ALLOWED_TABLE_ATTRIBUTES,
)
else:
html_table = ""
diff --git a/superset/utils/async_query_manager.py b/superset/utils/async_query_manager.py
index 71559aaa3d..b6c608948d 100644
--- a/superset/utils/async_query_manager.py
+++ b/superset/utils/async_query_manager.py
@@ -192,5 +192,5 @@ class AsyncQueryManager:
logger.debug("********** logging event data to stream %s", scoped_stream_name)
logger.debug(event_data)
- self._redis.xadd(scoped_stream_name, event_data, "*", self._stream_limit)
- self._redis.xadd(full_stream_name, event_data, "*", self._stream_limit_firehose)
+ self._redis.xadd(scoped_stream_name, event_data, "*", self._stream_limit) # type: ignore
+ self._redis.xadd(full_stream_name, event_data, "*", self._stream_limit_firehose) # type: ignore
diff --git a/superset/utils/core.py b/superset/utils/core.py
index 109d6742b1..84a4bde9b3 100644
--- a/superset/utils/core.py
+++ b/superset/utils/core.py
@@ -70,8 +70,8 @@ from typing import (
from urllib.parse import unquote_plus
from zipfile import ZipFile
-import bleach
import markdown as md
+import nh3
import numpy as np
import pandas as pd
import sqlalchemy as sa
@@ -664,7 +664,7 @@ def error_msg_from_exception(ex: Exception) -> str:
def markdown(raw: str, markup_wrap: Optional[bool] = False) -> str:
- safe_markdown_tags = [
+ safe_markdown_tags = {
"h1",
"h2",
"h3",
@@ -690,10 +690,10 @@ def markdown(raw: str, markup_wrap: Optional[bool] = False) -> str:
"dt",
"img",
"a",
- ]
+ }
safe_markdown_attrs = {
- "img": ["src", "alt", "title"],
- "a": ["href", "alt", "title"],
+ "img": {"src", "alt", "title"},
+ "a": {"href", "alt", "title"},
}
safe = md.markdown(
raw or "",
@@ -703,7 +703,8 @@ def markdown(raw: str, markup_wrap: Optional[bool] = False) -> str:
"markdown.extensions.codehilite",
],
)
- safe = bleach.clean(safe, safe_markdown_tags, safe_markdown_attrs)
+ # pylint: disable=no-member
+ safe = nh3.clean(safe, tags=safe_markdown_tags, attributes=safe_markdown_attrs)
if markup_wrap:
safe = Markup(safe)
return safe
diff --git a/tests/unit_tests/notifications/email_tests.py b/tests/unit_tests/notifications/email_tests.py
index 4ce34b99ca..697a9bac40 100644
--- a/tests/unit_tests/notifications/email_tests.py
+++ b/tests/unit_tests/notifications/email_tests.py
@@ -50,5 +50,8 @@ def test_render_description_with_html() -> None:
._get_content()
.body
)
- assert '<p>This is <a href="#">a test</a> alert</p><br>' in email_body
+ assert (
+ '<p>This is <a href="#" rel="noopener noreferrer">a test</a> alert</p><br>'
+ in email_body
+ )
assert '<td><a href="http://www.example.com">333</a></td>' in email_body