You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by ka...@apache.org on 2021/03/20 00:39:37 UTC

[airflow] branch master updated: Simplify cleaning string passed to origin param (#14738) (#14905)

This is an automated email from the ASF dual-hosted git repository.

kaxilnaik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/master by this push:
     new 178dee9  Simplify cleaning string passed to origin param (#14738) (#14905)
178dee9 is described below

commit 178dee9a5ed0cde3d7a7d4a47daeae85408fcd67
Author: Kaxil Naik <ka...@gmail.com>
AuthorDate: Sat Mar 20 00:39:21 2021 +0000

    Simplify cleaning string passed to origin param (#14738) (#14905)
    
    Looks like "trying to be smart approach" in https://github.com/apache/airflow/pull/14738
    does not work on old Python versions. The "smart" part being if semicolon exists in URL
    only those specific query argument were removed. While this solves the issue for Py 3.6.13
     it didn't fix for 3.6.12 (although it minimzed it).
    
    Python 3.6.12:
    
    ```python
    >>> parse_qsl("r=3;a=b")
    [('r', '3'), ('a', 'b')]
    ```
    
    Python 3.6.13:
    
    ```python
    >>> parse_qsl("r=3;a=b")
    [('r', '3;a=b')]
    ```
    
    This commit simplifies it and check if the url contains `;`, it just redirects to
    `/home`.
---
 airflow/www/views.py    | 10 +++-------
 tests/www/test_views.py |  8 ++++----
 2 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/airflow/www/views.py b/airflow/www/views.py
index f31ccdc..633cf7c 100644
--- a/airflow/www/views.py
+++ b/airflow/www/views.py
@@ -129,18 +129,14 @@ def get_safe_url(url):
 
     parsed = urlparse(url)
 
-    # If the url is relative & it contains semicolon, redirect it to homepage to avoid
+    # If the url contains semicolon, redirect it to homepage to avoid
     # potential XSS. (Similar to https://github.com/python/cpython/pull/24297/files (bpo-42967))
-    if parsed.netloc == '' and parsed.scheme == '' and ';' in unquote(url):
+    if ';' in unquote(url):
         return url_for('Airflow.index')
 
     query = parse_qsl(parsed.query, keep_blank_values=True)
 
-    # Remove all the query elements containing semicolon
-    # As part of https://github.com/python/cpython/pull/24297/files (bpo-42967)
-    # semicolon was already removed as a separator for query arguments by default
-    sanitized_query = [query_arg for query_arg in query if ';' not in query_arg[1]]
-    url = parsed._replace(query=urlencode(sanitized_query)).geturl()
+    url = parsed._replace(query=urlencode(query)).geturl()
 
     if parsed.scheme in valid_schemes and parsed.netloc in valid_netlocs:
         return url
diff --git a/tests/www/test_views.py b/tests/www/test_views.py
index 48aaa01..2e61c03 100644
--- a/tests/www/test_views.py
+++ b/tests/www/test_views.py
@@ -3353,15 +3353,15 @@ class TestHelperFunctions(TestBase):
             ("36539'%3balert(1)%2f%2f166", "/home"),
             (
                 "http://localhost:8080/trigger?dag_id=test&origin=36539%27%3balert(1)%2f%2f166&abc=2",
-                "http://localhost:8080/trigger?dag_id=test&abc=2",
+                "/home",
             ),
             (
                 "http://localhost:8080/trigger?dag_id=test_dag&origin=%2Ftree%3Fdag_id%test_dag';alert(33)//",
-                "http://localhost:8080/trigger?dag_id=test_dag",
+                "/home",
             ),
             (
-                "http://localhost:8080/trigger?dag_id=test_dag&origin=%2Ftree%3Fdag_id%test_dag",
-                "http://localhost:8080/trigger?dag_id=test_dag&origin=%2Ftree%3Fdag_id%25test_dag",
+                "http://localhost:8080/trigger?dag_id=test_dag&origin=%2Ftree%3Fdag_id%3Dtest_dag",
+                "http://localhost:8080/trigger?dag_id=test_dag&origin=%2Ftree%3Fdag_id%3Dtest_dag",
             ),
         ]
     )