You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by ka...@apache.org on 2021/03/19 22:02:32 UTC

[airflow] 04/04: Webserver: Sanitize string passed to origin param (#14738)

This is an automated email from the ASF dual-hosted git repository.

kaxilnaik pushed a commit to branch v2-0-test
in repository https://gitbox.apache.org/repos/asf/airflow.git

commit 9854c432b5c88cbfb5e0c9e8462c624ba6eaab02
Author: Kaxil Naik <ka...@gmail.com>
AuthorDate: Fri Mar 12 09:48:59 2021 +0000

    Webserver: Sanitize string passed to origin param (#14738)
    
    Follow-up of #12459 & #10334
    
    Since https://github.com/python/cpython/pull/24297/files (bpo-42967)
    also removed ';' as query argument separator, we remove query arguments
    with semicolons.
    
    (cherry picked from commit 409c249121bd9c8902fc2ba551b21873ab41f953)
---
 airflow/www/views.py    | 12 +++++++++++-
 tests/www/test_views.py | 27 +++++++++------------------
 2 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/airflow/www/views.py b/airflow/www/views.py
index 4c272b5..f9f7f5c 100644
--- a/airflow/www/views.py
+++ b/airflow/www/views.py
@@ -129,8 +129,18 @@ def get_safe_url(url):
 
     parsed = urlparse(url)
 
+    # If the url is relative & it contains semicolon, redirect it to homepage to avoid
+    # potential XSS. (Similar to https://github.com/python/cpython/pull/24297/files (bpo-42967))
+    if parsed.netloc == '' and parsed.scheme == '' and ';' in unquote(url):
+        return url_for('Airflow.index')
+
     query = parse_qsl(parsed.query, keep_blank_values=True)
-    url = parsed._replace(query=urlencode(query)).geturl()
+
+    # Remove all the query elements containing semicolon
+    # As part of https://github.com/python/cpython/pull/24297/files (bpo-42967)
+    # semicolon was already removed as a separator for query arguments by default
+    sanitized_query = [query_arg for query_arg in query if ';' not in query_arg[1]]
+    url = parsed._replace(query=urlencode(sanitized_query)).geturl()
 
     if parsed.scheme in valid_schemes and parsed.netloc in valid_netlocs:
         return url
diff --git a/tests/www/test_views.py b/tests/www/test_views.py
index c8ced89..f67c478 100644
--- a/tests/www/test_views.py
+++ b/tests/www/test_views.py
@@ -32,7 +32,7 @@ from datetime import datetime as dt, timedelta
 from typing import Any, Dict, Generator, List, NamedTuple
 from unittest import mock
 from unittest.mock import PropertyMock
-from urllib.parse import parse_qsl, quote_plus
+from urllib.parse import quote_plus
 
 import jinja2
 import pytest
@@ -2755,9 +2755,10 @@ class TestTriggerDag(TestBase):
         [
             ("javascript:alert(1)", "/home"),
             ("http://google.com", "/home"),
+            ("36539'%3balert(1)%2f%2f166", "/home"),
             (
                 "%2Ftree%3Fdag_id%3Dexample_bash_operator';alert(33)//",
-                "/tree?dag_id=example_bash_operator%27%3Balert%2833%29%2F%2F",
+                "/home",
             ),
             ("%2Ftree%3Fdag_id%3Dexample_bash_operator", "/tree?dag_id=example_bash_operator"),
             ("%2Fgraph%3Fdag_id%3Dexample_bash_operator", "/graph?dag_id=example_bash_operator"),
@@ -2766,13 +2767,6 @@ class TestTriggerDag(TestBase):
     def test_trigger_dag_form_origin_url(self, test_origin, expected_origin):
         test_dag_id = "example_bash_operator"
 
-        # https://github.com/python/cpython/pull/24297/files
-        # Check if tests are running with a Python version containing the above fix
-        # where ";" is removed as a separator
-        if parse_qsl(";a=b") != [(';a', 'b')] and ";" in test_origin:
-            expected_origin = expected_origin.replace("%3B", "&")
-            expected_origin += "="
-
         resp = self.client.get(f'trigger?dag_id={test_dag_id}&origin={test_origin}')
         self.check_content_in_response(
             '<button type="button" class="btn" onclick="location.href = \'{}\'; return false">'.format(
@@ -3302,10 +3296,14 @@ class TestHelperFunctions(TestBase):
         [
             ("", "/home"),
             ("http://google.com", "/home"),
+            ("36539'%3balert(1)%2f%2f166", "/home"),
+            (
+                "http://localhost:8080/trigger?dag_id=test&origin=36539%27%3balert(1)%2f%2f166&abc=2",
+                "http://localhost:8080/trigger?dag_id=test&abc=2",
+            ),
             (
                 "http://localhost:8080/trigger?dag_id=test_dag&origin=%2Ftree%3Fdag_id%test_dag';alert(33)//",
-                "http://localhost:8080/trigger?dag_id=test_dag&origin=%2Ftree%3F"
-                "dag_id%25test_dag%27%3Balert%2833%29%2F%2F",
+                "http://localhost:8080/trigger?dag_id=test_dag",
             ),
             (
                 "http://localhost:8080/trigger?dag_id=test_dag&origin=%2Ftree%3Fdag_id%test_dag",
@@ -3315,13 +3313,6 @@ class TestHelperFunctions(TestBase):
     )
     @mock.patch("airflow.www.views.url_for")
     def test_get_safe_url(self, test_url, expected_url, mock_url_for):
-        # https://github.com/python/cpython/pull/24297/files
-        # Check if tests are running with a Python version containing the above fix
-        # where ";" is removed as a separator
-        if parse_qsl(";a=b") != [(';a', 'b')] and ";" in test_url:
-            expected_url = expected_url.replace("%3B", "&")
-            expected_url += "="
-
         mock_url_for.return_value = "/home"
         with self.app.test_request_context(base_url="http://localhost:8080"):
             assert get_safe_url(test_url) == expected_url