You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@airflow.apache.org by as...@apache.org on 2020/06/08 21:27:44 UTC

[airflow] branch master updated: Don't use the `|safe` filter in code, it's risky (#9180)

This is an automated email from the ASF dual-hosted git repository.

ash pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/master by this push:
     new 7fd3695  Don't use the `|safe` filter in code, it's risky (#9180)
7fd3695 is described below

commit 7fd3695766095a358803b79454bde26e5e28c1f0
Author: Ash Berlin-Taylor <as...@firemirror.com>
AuthorDate: Mon Jun 8 22:27:02 2020 +0100

    Don't use the `|safe` filter in code, it's risky (#9180)
    
    Most things already use the `Markup` class to correctly escape problem
    areas, this commit just fixes the last instances so that we can assert
    that `|safe` is never used.
---
 .pre-commit-config.yaml                            |  7 +++++++
 airflow/contrib/plugins/metastore_browser/main.py  |  4 ++--
 .../templates/metastore_browser/dbs.html           |  2 +-
 airflow/www/static/css/main.css                    |  4 ++++
 airflow/www/templates/airflow/chart.html           |  2 +-
 airflow/www/templates/airflow/code.html            |  2 +-
 airflow/www/templates/airflow/config.html          |  2 +-
 airflow/www/templates/airflow/dag.html             |  6 +++---
 airflow/www/templates/airflow/dag_code.html        |  2 +-
 airflow/www/templates/airflow/duration_chart.html  |  4 ++--
 airflow/www/templates/airflow/gantt.html           |  4 ++--
 airflow/www/templates/airflow/graph.html           | 14 ++++++-------
 airflow/www/templates/airflow/model_list.html      |  2 +-
 airflow/www/templates/airflow/task.html            |  8 +-------
 airflow/www/templates/airflow/task_instance.html   |  2 +-
 airflow/www/templates/airflow/ti_code.html         |  2 +-
 airflow/www/utils.py                               | 23 +++++++++++-----------
 airflow/www/views.py                               | 22 +++++++++------------
 18 files changed, 57 insertions(+), 55 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 4e9d87f..0bec834 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -246,6 +246,13 @@ metastore_browser/templates/.*\\.html$|.*\\.jinja2"
         entry: "pydevd.*settrace\\("
         pass_filenames: true
         files: \.py$
+      - id: dont-use-safe-filter
+        language: pygrep
+        name: Don't use safe in templates
+        description: the Safe filter is error-prone, use Markup() in code instead
+        entry: "\\|\\s*safe"
+        files: \.html$
+        pass_filenames: true
       - id: language-matters
         language: pygrep
         name: Check for language that we do not accept as community
diff --git a/airflow/contrib/plugins/metastore_browser/main.py b/airflow/contrib/plugins/metastore_browser/main.py
index b134cf9..95b176c 100644
--- a/airflow/contrib/plugins/metastore_browser/main.py
+++ b/airflow/contrib/plugins/metastore_browser/main.py
@@ -23,7 +23,7 @@ from datetime import datetime
 from typing import List
 
 import pandas as pd
-from flask import Blueprint, request
+from flask import Blueprint, Markup, request
 from flask_appbuilder import BaseView, expose
 
 from airflow.plugins_manager import AirflowPlugin
@@ -76,7 +76,7 @@ class MetastoreBrowserView(BaseView):
             escape=False,
             na_rep='',)
         return self.render_template(
-            "metastore_browser/dbs.html", table=table)
+            "metastore_browser/dbs.html", table=Markup(table))
 
     @expose('/table/')
     def table(self):
diff --git a/airflow/contrib/plugins/metastore_browser/templates/metastore_browser/dbs.html b/airflow/contrib/plugins/metastore_browser/templates/metastore_browser/dbs.html
index 9555a02..6a6e187 100644
--- a/airflow/contrib/plugins/metastore_browser/templates/metastore_browser/dbs.html
+++ b/airflow/contrib/plugins/metastore_browser/templates/metastore_browser/dbs.html
@@ -23,5 +23,5 @@
     <h4>
         <span>Hive Databases</span>
     </h4>
-    {{ table|safe }}
+    {{ table }}
 {% endblock %}
diff --git a/airflow/www/static/css/main.css b/airflow/www/static/css/main.css
index 062d975..1c00e42 100644
--- a/airflow/www/static/css/main.css
+++ b/airflow/www/static/css/main.css
@@ -291,6 +291,10 @@ label[for="timezone-other"],
   display: block;
 }
 
+.dag-doc {
+  margin-bottom: 15px;
+}
+
 /* stylelint-disable declaration-block-single-line-max-declarations */
 .hll { background-color: #ffc; }
 .c { color: #408080; font-style: italic; } /* Comment */
diff --git a/airflow/www/templates/airflow/chart.html b/airflow/www/templates/airflow/chart.html
index 69ece26..08ebac4 100644
--- a/airflow/www/templates/airflow/chart.html
+++ b/airflow/www/templates/airflow/chart.html
@@ -45,7 +45,7 @@
         <input name="_csrf_token" type="hidden" value="{{ csrf_token() }}">
     </form>
 </div>
-<div style="clear: both;">{{ chart |safe }}</div>
+<div style="clear: both;">{{ chart }}</div>
 <hr/>
 {% endblock %}
 
diff --git a/airflow/www/templates/airflow/code.html b/airflow/www/templates/airflow/code.html
index e51456e..1a94875 100644
--- a/airflow/www/templates/airflow/code.html
+++ b/airflow/www/templates/airflow/code.html
@@ -38,6 +38,6 @@
     {% endif %}
 
     {% if code_html %}
-        {{ code_html|safe }}
+        {{ code_html }}
     {% endif %}
 {% endblock %}
diff --git a/airflow/www/templates/airflow/config.html b/airflow/www/templates/airflow/config.html
index 046baa6..0710ea2 100644
--- a/airflow/www/templates/airflow/config.html
+++ b/airflow/www/templates/airflow/config.html
@@ -36,7 +36,7 @@
     {% endif %}
 
     {% if code_html %}
-        {{ code_html|safe }}
+        {{ code_html }}
     {% endif %}
 
     <hr>
diff --git a/airflow/www/templates/airflow/dag.html b/airflow/www/templates/airflow/dag.html
index cbfa15f..49dda5a 100644
--- a/airflow/www/templates/airflow/dag.html
+++ b/airflow/www/templates/airflow/dag.html
@@ -427,8 +427,8 @@ function updateQueryStringParameter(uri, key, value) {
       });
       subdag_id = sd;
       execution_date = d;
-      $('#task_id').html(t);
-      $('#execution_date').html(d);
+      $('#task_id').text(t);
+      $('#execution_date').text(d);
       $('#myModal').modal({});
       $("#myModal").css("margin-top","0px");
       $('#extra_links').prev('hr').hide();
@@ -527,7 +527,7 @@ function updateQueryStringParameter(uri, key, value) {
     function call_modal_dag(dag) {
       id = dag && dag.id;
       execution_date = dag && dag.execution_date;
-      $('#dag_id').html(dag_id);
+      $('#dag_id').text(dag_id);
       $('#dagModal').modal({});
       $("#dagModal").css("margin-top","0px");
     }
diff --git a/airflow/www/templates/airflow/dag_code.html b/airflow/www/templates/airflow/dag_code.html
index ca578cf..82d1e23 100644
--- a/airflow/www/templates/airflow/dag_code.html
+++ b/airflow/www/templates/airflow/dag_code.html
@@ -27,7 +27,7 @@
     <div class="active">
       <a onclick="toggleWrap()">Toggle wrap</a>
     </div>
-    {{ html_code|safe }}
+    {{ html_code }}
 {% endblock %}
 
 {% block tail %}
diff --git a/airflow/www/templates/airflow/duration_chart.html b/airflow/www/templates/airflow/duration_chart.html
index 9c49f9c..7d2c521 100644
--- a/airflow/www/templates/airflow/duration_chart.html
+++ b/airflow/www/templates/airflow/duration_chart.html
@@ -48,8 +48,8 @@
         <input name="_csrf_token" type="hidden" value="{{ csrf_token() }}">
     </form>
 </div>
-<div id="dur_chart" style="clear: both;">{{ chart |safe }}</div>
-<div id="cum_dur_chart" style="clear: both;">{{ cum_chart | safe}}</div>
+<div id="dur_chart" style="clear: both;">{{ chart }}</div>
+<div id="cum_dur_chart" style="clear: both;">{{ cum_chart }}</div>
 <hr/>
 {% endblock %}
 
diff --git a/airflow/www/templates/airflow/gantt.html b/airflow/www/templates/airflow/gantt.html
index 5de3789..d31a1e6 100644
--- a/airflow/www/templates/airflow/gantt.html
+++ b/airflow/www/templates/airflow/gantt.html
@@ -34,7 +34,7 @@
     Base date: {{ form.base_date(class_="form-control") }}
     Number of runs: {{ form.num_runs(class_="form-control") }}
     Run:<input type="hidden" value="{{ dag.dag_id }}" name="dag_id">
-    {{ form.execution_date(class_="form-control") | safe }}
+    {{ form.execution_date(class_="form-control") }}
     <input type="submit" value="Go" class="btn btn-default" action="" method="get">
     <input type="hidden" name="root" value="{{ root if root else '' }}">
     <input name="_csrf_token" type="hidden" value="{{ csrf_token() }}">
@@ -57,7 +57,7 @@
     var dag_id = '{{ dag.dag_id }}';
     var task_id = '';
     var execution_date = '';
-    data = {{ data |tojson|safe }};
+    data = {{ data |tojson }};
     var gantt = d3.gantt()
       .taskTypes(data.taskNames)
       .height(data.height)
diff --git a/airflow/www/templates/airflow/graph.html b/airflow/www/templates/airflow/graph.html
index 27dc598..d312e4d 100644
--- a/airflow/www/templates/airflow/graph.html
+++ b/airflow/www/templates/airflow/graph.html
@@ -29,7 +29,7 @@
 {% block content %}
 {{ super() }}
 {% if doc_md %}
-<div class="rich_doc" style="margin-bottom: 15px;">{{ doc_md|safe }}</div>
+{{ doc_md }}
 {% endif %}
 <div class="form-inline">
   <form method="get" style="float:left;">
@@ -37,9 +37,9 @@
     Base date: {{ form.base_date(class_="form-control") }}
     Number of runs: {{ form.num_runs(class_="form-control") }}
     Run:
-    {{ form.execution_date(class_="form-control") | safe }}
+    {{ form.execution_date(class_="form-control") }}
     Layout:
-    {{ form.arrange(class_="form-control") | safe }}
+    {{ form.arrange(class_="form-control") }}
     <input type="hidden" name="root" value="{{ root }}">
     <input type="hidden" value="{{ dag.dag_id }}" name="dag_id">
     <input name="_csrf_token" type="hidden" value="{{ csrf_token() }}">
@@ -107,14 +107,14 @@
     var initialStrokeWidth = '3px';
     var highlightStrokeWidth = '5px';
 
-    var nodes = {{ nodes|tojson|safe }};
-    var edges = {{ edges|tojson|safe }};
+    var nodes = {{ nodes|tojson }};
+    var edges = {{ edges|tojson }};
     var execution_date = "{{ execution_date }}";
     var arrange = "{{ arrange }}";
 
     // Below variables are being used in dag.js
-    var tasks = {{ tasks|tojson|safe }};
-    var task_instances = {{ task_instances|tojson|safe }};
+    var tasks = {{ tasks|tojson }};
+    var task_instances = {{ task_instances|tojson }};
     var getTaskInstanceURL = "{{ url_for('Airflow.task_instances') }}" +
       "?dag_id=" + encodeURIComponent(dag_id) + "&execution_date=" +
       encodeURIComponent(execution_date);
diff --git a/airflow/www/templates/airflow/model_list.html b/airflow/www/templates/airflow/model_list.html
index 47b7703..b22e191 100644
--- a/airflow/www/templates/airflow/model_list.html
+++ b/airflow/www/templates/airflow/model_list.html
@@ -79,7 +79,7 @@
                     {% if formatter and formatter(item) %}
                         <td>{{ formatter(item) }}</td>
                     {% elif item[value] != None %}
-                        <td>{{ item[value]|safe }}</td>
+                        <td>{{ item[value] }}</td>
                     {% else %}
                         <td></td>
                     {% endif %}
diff --git a/airflow/www/templates/airflow/task.html b/airflow/www/templates/airflow/task.html
index 70786f9..dbe586f 100644
--- a/airflow/www/templates/airflow/task.html
+++ b/airflow/www/templates/airflow/task.html
@@ -39,14 +39,11 @@
                 </tr>
             {% endfor %}
         </table>
-        {% if html_code is defined %}
-            {{ html_code|safe }}
-        {% endif %}
     </div>
     <div>
         {% for attr, value in special_attrs_rendered.items() %}
             <h5>Attribute: {{ attr }}</h5>
-            {{ value|safe }}
+            {{ value }}
         {% endfor %}
         <h5>Task Instance Attributes</h5>
         <table class="table table-striped table-bordered">
@@ -74,8 +71,5 @@
                 </tr>
             {% endfor %}
         </table>
-        {% if html_code is defined %}
-            {{ html_code|safe }}
-        {% endif %}
     </div>
 {% endblock %}
diff --git a/airflow/www/templates/airflow/task_instance.html b/airflow/www/templates/airflow/task_instance.html
index 28791c8..488aee3 100644
--- a/airflow/www/templates/airflow/task_instance.html
+++ b/airflow/www/templates/airflow/task_instance.html
@@ -31,7 +31,7 @@
               {{ task_id }}
               <input type="hidden" value="{{ task_id }}" name="task_id">
             </span>
-        {{ form.execution_date(class_="form-control") | safe }}
+        {{ form.execution_date(class_="form-control") }}
 
     </div>
 </form>
diff --git a/airflow/www/templates/airflow/ti_code.html b/airflow/www/templates/airflow/ti_code.html
index 76d9423..0d63ddd 100644
--- a/airflow/www/templates/airflow/ti_code.html
+++ b/airflow/www/templates/airflow/ti_code.html
@@ -25,6 +25,6 @@
     <h4>{{ title }}</h4>
     {% for k, v in html_dict.items() %}
         <h5>{{ k }}</h5>
-        {{ v|safe }}
+        {{ v }}
     {% endfor %}
 {% endblock %}
diff --git a/airflow/www/utils.py b/airflow/www/utils.py
index 5835f1e..3c71b0d 100644
--- a/airflow/www/utils.py
+++ b/airflow/www/utils.py
@@ -290,24 +290,25 @@ def pygment_html_render(s, lexer=lexers.TextLexer):
 def render(obj, lexer):
     out = ""
     if isinstance(obj, str):
-        out += pygment_html_render(obj, lexer)
+        out = Markup(pygment_html_render(obj, lexer))
     elif isinstance(obj, (tuple, list)):
         for i, s in enumerate(obj):
-            out += "<div>List item #{}</div>".format(i)
-            out += "<div>" + pygment_html_render(s, lexer) + "</div>"
+            out += Markup("<div>List item #{}</div>").format(i)
+            out += Markup("<div>" + pygment_html_render(s, lexer) + "</div>")
     elif isinstance(obj, dict):
         for k, v in obj.items():
-            out += '<div>Dict item "{}"</div>'.format(k)
-            out += "<div>" + pygment_html_render(v, lexer) + "</div>"
+            out += Markup('<div>Dict item "{}"</div>').format(k)
+            out += Markup("<div>" + pygment_html_render(v, lexer) + "</div>")
     return out
 
 
-def wrapped_markdown(s):
-    return (
-        '<div class="rich_doc">' + markdown.markdown(s) + "</div>"
-        if s is not None
-        else None
-    )
+def wrapped_markdown(s, css_class=None):
+    if s is None:
+        return None
+
+    return Markup(
+        '<div class="rich_doc {css_class}" >' + markdown.markdown(s) + "</div>"
+    ).format(css_class=css_class)
 
 
 def get_attr_renderer():
diff --git a/airflow/www/views.py b/airflow/www/views.py
index bb0a028..f31c0b1 100644
--- a/airflow/www/views.py
+++ b/airflow/www/views.py
@@ -32,7 +32,6 @@ from typing import Dict, List, Optional, Tuple
 from urllib.parse import quote, unquote
 
 import lazy_object_proxy
-import markdown
 import nvd3
 import sqlalchemy as sqla
 from flask import (
@@ -555,15 +554,15 @@ class Airflow(AirflowBaseView):
             dag_id = request.args.get('dag_id')
             dag_orm = DagModel.get_dagmodel(dag_id, session=session)
             code = DagCode.get_code_by_fileloc(dag_orm.fileloc)
-            html_code = highlight(
-                code, lexers.PythonLexer(), HtmlFormatter(linenos=True))
+            html_code = Markup(highlight(
+                code, lexers.PythonLexer(), HtmlFormatter(linenos=True)))
 
         except Exception as e:
             all_errors += (
                 "Exception encountered during " +
                 "dag_id retrieval/dag retrieval fallback/code highlighting:\n\n{}\n".format(e)
             )
-            html_code = '<p>Failed to load file.</p><p>Details: {}</p>'.format(
+            html_code = Markup('<p>Failed to load file.</p><p>Details: {}</p>').format(
                 escape(all_errors))
 
         return self.render_template(
@@ -631,11 +630,9 @@ class Airflow(AirflowBaseView):
         for template_field in task.template_fields:
             content = getattr(task, template_field)
             if template_field in wwwutils.get_attr_renderer():
-                html_dict[template_field] = \
-                    wwwutils.get_attr_renderer()[template_field](content)
+                html_dict[template_field] = wwwutils.get_attr_renderer()[template_field](content)
             else:
-                html_dict[template_field] = (
-                    "<pre><code>" + str(content) + "</pre></code>")
+                html_dict[template_field] = Markup("<pre><code>{}</pre></code>").format(str(content))
 
         return self.render_template(
             'airflow/ti_code.html',
@@ -1602,8 +1599,7 @@ class Airflow(AirflowBaseView):
         if not tasks:
             flash("No tasks found", "error")
         session.commit()
-        doc_md = markdown.markdown(dag.doc_md) \
-            if hasattr(dag, 'doc_md') and dag.doc_md else ''
+        doc_md = wwwutils.wrapped_markdown(getattr(dag, 'doc_md', None), css_class='dag-doc')
 
         external_logs = conf.get('elasticsearch', 'frontend')
         return self.render_template(
@@ -1733,8 +1729,8 @@ class Airflow(AirflowBaseView):
             demo_mode=conf.getboolean('webserver', 'demo_mode'),
             root=root,
             form=form,
-            chart=chart.htmlcontent,
-            cum_chart=cum_chart.htmlcontent
+            chart=Markup(chart.htmlcontent),
+            cum_chart=Markup(cum_chart.htmlcontent),
         )
 
     @expose('/tries')
@@ -1798,7 +1794,7 @@ class Airflow(AirflowBaseView):
             demo_mode=conf.getboolean('webserver', 'demo_mode'),
             root=root,
             form=form,
-            chart=chart.htmlcontent,
+            chart=Markup(chart.htmlcontent),
             tab_title='Tries',
         )