You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@superset.apache.org by el...@apache.org on 2023/12/02 01:21:03 UTC

(superset) 08/08: chore: Use nh3 lib instead of bleach (#23862)

This is an automated email from the ASF dual-hosted git repository.

elizabeth pushed a commit to branch 2.1
in repository https://gitbox.apache.org/repos/asf/superset.git

commit 52cdd57dd990e450fc9062ca65d485a3140d66a5
Author: EugeneTorap <ev...@gmail.com>
AuthorDate: Fri Apr 28 16:36:51 2023 +0300

    chore: Use nh3 lib instead of bleach (#23862)
---
 requirements/base.txt                              |  8 ++-----
 setup.cfg                                          |  2 +-
 setup.py                                           |  2 +-
 .../dashboard/components/gridComponents/Chart.jsx  |  4 ++--
 superset/reports/notifications/email.py            | 27 ++++++++++++----------
 superset/utils/async_query_manager.py              |  4 ++--
 superset/utils/core.py                             | 13 ++++++-----
 tests/unit_tests/notifications/email_tests.py      |  5 +++-
 8 files changed, 34 insertions(+), 31 deletions(-)

diff --git a/requirements/base.txt b/requirements/base.txt
index 1ae306e8f3..a8f7e0c18b 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -23,8 +23,6 @@ bcrypt==4.0.1
     # via paramiko
 billiard==3.6.4.0
     # via celery
-bleach==3.3.1
-    # via apache-superset
 brotli==1.0.9
     # via flask-compress
 cachelib==0.4.1
@@ -174,6 +172,8 @@ marshmallow-sqlalchemy==0.23.1
     # via flask-appbuilder
 msgpack==1.0.2
     # via apache-superset
+nh3==0.2.11
+    # via apache-superset
 numpy==1.23.5
     # via
     #   apache-superset
@@ -181,7 +181,6 @@ numpy==1.23.5
     #   pyarrow
 packaging==21.3
     # via
-    #   bleach
     #   deprecation
 pandas==1.5.3
     # via apache-superset
@@ -252,7 +251,6 @@ simplejson==3.17.3
     # via apache-superset
 six==1.16.0
     # via
-    #   bleach
     #   click-repl
     #   isodate
     #   jsonschema
@@ -297,8 +295,6 @@ vine==5.0.0
     #   kombu
 wcwidth==0.2.5
     # via prompt-toolkit
-webencodings==0.5.1
-    # via bleach
 werkzeug==2.3.3
     # via
     #   apache-superset
diff --git a/setup.cfg b/setup.cfg
index a9470d51bd..970fd4ca82 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -30,7 +30,7 @@ combine_as_imports = true
 include_trailing_comma = true
 line_length = 88
 known_first_party = superset
-known_third_party =alembic,apispec,backoff,bleach,cachelib,celery,click,colorama,cron_descriptor,croniter,cryptography,dateutil,deprecation,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_jwt_extended,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,graphlib,holidays,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,marshmallow_enum,msgpack,numpy,pandas,parameterized,parsedatetime,pgsanity,pkg_resour [...]
+known_third_party =alembic,apispec,backoff,cachelib,celery,click,colorama,cron_descriptor,croniter,cryptography,dateutil,deprecation,flask,flask_appbuilder,flask_babel,flask_caching,flask_compress,flask_jwt_extended,flask_login,flask_migrate,flask_sqlalchemy,flask_talisman,flask_testing,flask_wtf,freezegun,geohash,geopy,graphlib,holidays,humanize,isodate,jinja2,jwt,markdown,markupsafe,marshmallow,marshmallow_enum,msgpack,nh3,numpy,pandas,parameterized,parsedatetime,pgsanity,pkg_resources [...]
 multi_line_output = 3
 order_by_type = false
 
diff --git a/setup.py b/setup.py
index d7d028ba93..66314ed9c4 100644
--- a/setup.py
+++ b/setup.py
@@ -73,7 +73,6 @@ setup(
     },
     install_requires=[
         "backoff>=1.8.0",
-        "bleach>=3.0.2, <4.0.0",
         "cachelib>=0.4.1,<0.5",
         "celery>=5.2.2, <6.0.0",
         "click>=8.0.3",
@@ -100,6 +99,7 @@ setup(
         "isodate",
         "markdown>=3.0",
         "msgpack>=1.0.0, <1.1",
+        "nh3>=0.2.11, <0.3",
         "numpy==1.23.5",
         "pandas>=1.5.3, <1.6",
         "parsedatetime",
diff --git a/superset-frontend/src/dashboard/components/gridComponents/Chart.jsx b/superset-frontend/src/dashboard/components/gridComponents/Chart.jsx
index 89d4fb94ce..cf06a1d7ed 100644
--- a/superset-frontend/src/dashboard/components/gridComponents/Chart.jsx
+++ b/superset-frontend/src/dashboard/components/gridComponents/Chart.jsx
@@ -486,10 +486,10 @@ class Chart extends React.Component {
 
         {/*
           This usage of dangerouslySetInnerHTML is safe since it is being used to render
-          markdown that is sanitized with bleach. See:
+          markdown that is sanitized with nh3. See:
              https://github.com/apache/superset/pull/4390
           and
-             https://github.com/apache/superset/commit/b6fcc22d5a2cb7a5e92599ed5795a0169385a825
+             https://github.com/apache/superset/pull/23862
         */}
         {isExpanded && slice.description_markeddown && (
           <div
diff --git a/superset/reports/notifications/email.py b/superset/reports/notifications/email.py
index 22b1714f99..10a76e7573 100644
--- a/superset/reports/notifications/email.py
+++ b/superset/reports/notifications/email.py
@@ -22,7 +22,7 @@ from dataclasses import dataclass
 from email.utils import make_msgid, parseaddr
 from typing import Any, Dict, Optional
 
-import bleach
+import nh3
 from flask_babel import gettext as __
 
 from superset import app
@@ -35,10 +35,10 @@ from superset.utils.decorators import statsd_gauge
 
 logger = logging.getLogger(__name__)
 
-TABLE_TAGS = ["table", "th", "tr", "td", "thead", "tbody", "tfoot"]
-TABLE_ATTRIBUTES = ["colspan", "rowspan", "halign", "border", "class"]
+TABLE_TAGS = {"table", "th", "tr", "td", "thead", "tbody", "tfoot"}
+TABLE_ATTRIBUTES = {"colspan", "rowspan", "halign", "border", "class"}
 
-ALLOWED_TAGS = [
+ALLOWED_TAGS = {
     "a",
     "abbr",
     "acronym",
@@ -54,13 +54,14 @@ ALLOWED_TAGS = [
     "p",
     "strong",
     "ul",
-] + TABLE_TAGS
+}.union(TABLE_TAGS)
 
+ALLOWED_TABLE_ATTRIBUTES = {tag: TABLE_ATTRIBUTES for tag in TABLE_TAGS}
 ALLOWED_ATTRIBUTES = {
-    "a": ["href", "title"],
-    "abbr": ["title"],
-    "acronym": ["title"],
-    **{tag: TABLE_ATTRIBUTES for tag in TABLE_TAGS},
+    "a": {"href", "title"},
+    "abbr": {"title"},
+    "acronym": {"title"},
+    **ALLOWED_TABLE_ATTRIBUTES,
 }
 
 
@@ -108,7 +109,8 @@ class EmailNotification(BaseNotification):  # pylint: disable=too-few-public-met
             }
 
         # Strip any malicious HTML from the description
-        description = bleach.clean(
+        # pylint: disable=no-member
+        description = nh3.clean(
             self._content.description or "",
             tags=ALLOWED_TAGS,
             attributes=ALLOWED_ATTRIBUTES,
@@ -117,12 +119,13 @@ class EmailNotification(BaseNotification):  # pylint: disable=too-few-public-met
         # Strip malicious HTML from embedded data, allowing only table elements
         if self._content.embedded_data is not None:
             df = self._content.embedded_data
-            html_table = bleach.clean(
+            # pylint: disable=no-member
+            html_table = nh3.clean(
                 df.to_html(na_rep="", index=True, escape=True),
                 # pandas will escape the HTML in cells already, so passing
                 # more allowed tags here will not work
                 tags=TABLE_TAGS,
-                attributes=TABLE_ATTRIBUTES,
+                attributes=ALLOWED_TABLE_ATTRIBUTES,
             )
         else:
             html_table = ""
diff --git a/superset/utils/async_query_manager.py b/superset/utils/async_query_manager.py
index 71559aaa3d..b6c608948d 100644
--- a/superset/utils/async_query_manager.py
+++ b/superset/utils/async_query_manager.py
@@ -192,5 +192,5 @@ class AsyncQueryManager:
         logger.debug("********** logging event data to stream %s", scoped_stream_name)
         logger.debug(event_data)
 
-        self._redis.xadd(scoped_stream_name, event_data, "*", self._stream_limit)
-        self._redis.xadd(full_stream_name, event_data, "*", self._stream_limit_firehose)
+        self._redis.xadd(scoped_stream_name, event_data, "*", self._stream_limit)  # type: ignore
+        self._redis.xadd(full_stream_name, event_data, "*", self._stream_limit_firehose)  # type: ignore
diff --git a/superset/utils/core.py b/superset/utils/core.py
index 109d6742b1..84a4bde9b3 100644
--- a/superset/utils/core.py
+++ b/superset/utils/core.py
@@ -70,8 +70,8 @@ from typing import (
 from urllib.parse import unquote_plus
 from zipfile import ZipFile
 
-import bleach
 import markdown as md
+import nh3
 import numpy as np
 import pandas as pd
 import sqlalchemy as sa
@@ -664,7 +664,7 @@ def error_msg_from_exception(ex: Exception) -> str:
 
 
 def markdown(raw: str, markup_wrap: Optional[bool] = False) -> str:
-    safe_markdown_tags = [
+    safe_markdown_tags = {
         "h1",
         "h2",
         "h3",
@@ -690,10 +690,10 @@ def markdown(raw: str, markup_wrap: Optional[bool] = False) -> str:
         "dt",
         "img",
         "a",
-    ]
+    }
     safe_markdown_attrs = {
-        "img": ["src", "alt", "title"],
-        "a": ["href", "alt", "title"],
+        "img": {"src", "alt", "title"},
+        "a": {"href", "alt", "title"},
     }
     safe = md.markdown(
         raw or "",
@@ -703,7 +703,8 @@ def markdown(raw: str, markup_wrap: Optional[bool] = False) -> str:
             "markdown.extensions.codehilite",
         ],
     )
-    safe = bleach.clean(safe, safe_markdown_tags, safe_markdown_attrs)
+    # pylint: disable=no-member
+    safe = nh3.clean(safe, tags=safe_markdown_tags, attributes=safe_markdown_attrs)
     if markup_wrap:
         safe = Markup(safe)
     return safe
diff --git a/tests/unit_tests/notifications/email_tests.py b/tests/unit_tests/notifications/email_tests.py
index 4ce34b99ca..697a9bac40 100644
--- a/tests/unit_tests/notifications/email_tests.py
+++ b/tests/unit_tests/notifications/email_tests.py
@@ -50,5 +50,8 @@ def test_render_description_with_html() -> None:
         ._get_content()
         .body
     )
-    assert '<p>This is <a href="#">a test</a> alert</p><br>' in email_body
+    assert (
+        '<p>This is <a href="#" rel="noopener noreferrer">a test</a> alert</p><br>'
+        in email_body
+    )
     assert '<td>&lt;a href="http://www.example.com"&gt;333&lt;/a&gt;</td>' in email_body