You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zeppelin.apache.org by pr...@apache.org on 2021/02/08 03:26:22 UTC

[zeppelin] branch master updated: ZEPPELIN-4952: Markdown interpreter can be used to store XSS in notebooks

This is an automated email from the ASF dual-hosted git repository.

prabhjyotsingh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/zeppelin.git


The following commit(s) were added to refs/heads/master by this push:
     new 30c4231  ZEPPELIN-4952: Markdown interpreter can be used to store XSS in notebooks
30c4231 is described below

commit 30c423116be6d9a59d46a32b13bbea53d7366957
Author: Prabhjyot Singh <pr...@gmail.com>
AuthorDate: Fri Feb 5 16:26:41 2021 +0530

    ZEPPELIN-4952: Markdown interpreter can be used to store XSS in notebooks
    
    ### What is this PR for?
    Markdown interpreter can be used to store XSS in notebooks
    The %md interpreter can be used to store XSS in notebooks. These cells are automatically loaded by the user when opening the notebook, so, no manual user interaction is needed.
    
    Also, it doesn't matter if the cell has already a result or not.
    
    ```
    %md
    foo
    <script>alert(String.fromCharCode(88,83,83))</script>
    bar
    <bold onclick='alert("a");'>qqq</bold>
    ```
    
    ### What type of PR is it?
    [Bug Fix]
    
    ### What is the Jira issue?
    * https://issues.apache.org/jira/browse/ZEPPELIN-4952
    
    ### How should this be tested?
    * Use the above paragraph, and there should not be any XSS.
    
    ### Questions:
    * Does the licenses files need update? n/a
    * Is there breaking changes for older versions? n/a
    * Does this needs documentation? n/a
    
    Author: Prabhjyot Singh <pr...@gmail.com>
    
    Closes #4045 from prabhjyotsingh/ZEPPELIN-4952 and squashes the following commits:
    
    4609d4b78 [Prabhjyot Singh] fix rat-check
    fb853c3bb [Prabhjyot Singh] add unit test
    2c15d5c02 [Prabhjyot Singh] refactor it into a function
    b470c8241 [Prabhjyot Singh] bumup flexmark.all.version to 0.62.2
    36745e417 [Prabhjyot Singh] remove unsafe tags from input
    
    Change-Id: I65b40a77fed5c34bc842168259a1b598b21ff1de
---
 markdown/pom.xml                                   |  2 +-
 .../org/apache/zeppelin/markdown/Markdown.java     | 31 ++++++++++--
 .../zeppelin/markdown/UMLBlockQuoteParser.java     |  5 +-
 .../org/apache/zeppelin/markdown/UMLExtension.java |  2 +-
 .../apache/zeppelin/markdown/UMLNodeRenderer.java  |  2 +-
 .../org/apache/zeppelin/markdown/MarkdownTest.java | 56 ++++++++++++++++++++++
 6 files changed, 90 insertions(+), 8 deletions(-)

diff --git a/markdown/pom.xml b/markdown/pom.xml
index 275853f..faf6532 100644
--- a/markdown/pom.xml
+++ b/markdown/pom.xml
@@ -37,7 +37,7 @@
     <interpreter.name>md</interpreter.name>
     <markdown4j.version>2.2-cj-1.0</markdown4j.version>
     <pegdown.version>1.6.0</pegdown.version>
-    <flexmark.all.version>0.50.40</flexmark.all.version>
+    <flexmark.all.version>0.62.2</flexmark.all.version>
   </properties>
 
   <dependencies>
diff --git a/markdown/src/main/java/org/apache/zeppelin/markdown/Markdown.java b/markdown/src/main/java/org/apache/zeppelin/markdown/Markdown.java
index 8e3ae25..f88e5e2 100644
--- a/markdown/src/main/java/org/apache/zeppelin/markdown/Markdown.java
+++ b/markdown/src/main/java/org/apache/zeppelin/markdown/Markdown.java
@@ -17,11 +17,10 @@
 
 package org.apache.zeppelin.markdown;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 import java.util.List;
 import java.util.Properties;
-
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 import org.apache.zeppelin.interpreter.Interpreter;
 import org.apache.zeppelin.interpreter.InterpreterContext;
 import org.apache.zeppelin.interpreter.InterpreterResult;
@@ -30,6 +29,8 @@ import org.apache.zeppelin.interpreter.InterpreterUtils;
 import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
 import org.apache.zeppelin.scheduler.Scheduler;
 import org.apache.zeppelin.scheduler.SchedulerFactory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * MarkdownInterpreter interpreter for Zeppelin.
@@ -39,6 +40,8 @@ public class Markdown extends Interpreter {
 
   private MarkdownParser parser;
 
+  private final String[] unsafeTags = new String[]{"script", "object", "iframe", "embed"};
+
   /**
    * Markdown Parser Type.
    */
@@ -103,6 +106,7 @@ public class Markdown extends Interpreter {
     String html;
 
     try {
+      markdownText = sanitizeInput(markdownText);
       html = parser.render(markdownText);
     } catch (RuntimeException e) {
       LOGGER.error("Exception in MarkdownInterpreter while interpret ", e);
@@ -112,6 +116,27 @@ public class Markdown extends Interpreter {
     return new InterpreterResult(Code.SUCCESS, "%html " + html);
   }
 
+  public String sanitizeInput(String input) {
+    if (input != null) {
+      for (String unsafeTag : unsafeTags) {
+        String unsafeRegex = "<" + unsafeTag + ">(.*)</" + unsafeTag + ">";
+        Pattern pattern = Pattern.compile(unsafeRegex);
+        Matcher matcher = pattern.matcher(input);
+        if (matcher.find()) {
+          input = matcher.replaceAll("");
+        }
+      }
+
+      String onclickRegex = "onclick=[\"'](.*)[\"']";
+      Pattern pattern = Pattern.compile(onclickRegex);
+      Matcher matcher = pattern.matcher(input);
+      if (matcher.find()) {
+        input = matcher.replaceAll("");
+      }
+    }
+    return input;
+  }
+
   @Override
   public void cancel(InterpreterContext context) {
   }
diff --git a/markdown/src/main/java/org/apache/zeppelin/markdown/UMLBlockQuoteParser.java b/markdown/src/main/java/org/apache/zeppelin/markdown/UMLBlockQuoteParser.java
index 92a12ba..0fa86f3 100644
--- a/markdown/src/main/java/org/apache/zeppelin/markdown/UMLBlockQuoteParser.java
+++ b/markdown/src/main/java/org/apache/zeppelin/markdown/UMLBlockQuoteParser.java
@@ -129,13 +129,14 @@ public class UMLBlockQuoteParser extends AbstractBlockParser {
    * Generic Factory
    */
   public static class Factory implements CustomBlockParserFactory {
+
     @Override
-    public Set<Class<? extends CustomBlockParserFactory>> getAfterDependents() {
+    public Set<Class<?>> getAfterDependents() {
       return null;
     }
 
     @Override
-    public Set<Class<? extends CustomBlockParserFactory>> getBeforeDependents() {
+    public Set<Class<?>> getBeforeDependents() {
       return null;
     }
 
diff --git a/markdown/src/main/java/org/apache/zeppelin/markdown/UMLExtension.java b/markdown/src/main/java/org/apache/zeppelin/markdown/UMLExtension.java
index 4956a31..c00d49a 100644
--- a/markdown/src/main/java/org/apache/zeppelin/markdown/UMLExtension.java
+++ b/markdown/src/main/java/org/apache/zeppelin/markdown/UMLExtension.java
@@ -20,7 +20,7 @@ package org.apache.zeppelin.markdown;
 import com.vladsch.flexmark.html.HtmlRenderer;
 import com.vladsch.flexmark.parser.Parser;
 import com.vladsch.flexmark.util.data.MutableDataHolder;
-import com.vladsch.flexmark.util.builder.Extension;
+import com.vladsch.flexmark.util.misc.Extension;
 
 
 /**
diff --git a/markdown/src/main/java/org/apache/zeppelin/markdown/UMLNodeRenderer.java b/markdown/src/main/java/org/apache/zeppelin/markdown/UMLNodeRenderer.java
index b2cc8dc..c75a5ac 100644
--- a/markdown/src/main/java/org/apache/zeppelin/markdown/UMLNodeRenderer.java
+++ b/markdown/src/main/java/org/apache/zeppelin/markdown/UMLNodeRenderer.java
@@ -18,12 +18,12 @@
 package org.apache.zeppelin.markdown;
 
 import com.vladsch.flexmark.ext.gitlab.internal.GitLabOptions;
-import com.vladsch.flexmark.html.CustomNodeRenderer;
 import com.vladsch.flexmark.html.HtmlWriter;
 import com.vladsch.flexmark.html.renderer.NodeRenderer;
 import com.vladsch.flexmark.html.renderer.NodeRendererFactory;
 import com.vladsch.flexmark.html.renderer.NodeRendererContext;
 import com.vladsch.flexmark.html.renderer.NodeRenderingHandler;
+import com.vladsch.flexmark.html.renderer.NodeRenderingHandler.CustomNodeRenderer;
 import com.vladsch.flexmark.util.data.DataHolder;
 
 import org.slf4j.Logger;
diff --git a/markdown/src/test/java/org/apache/zeppelin/markdown/MarkdownTest.java b/markdown/src/test/java/org/apache/zeppelin/markdown/MarkdownTest.java
new file mode 100644
index 0000000..d9d3425
--- /dev/null
+++ b/markdown/src/test/java/org/apache/zeppelin/markdown/MarkdownTest.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.zeppelin.markdown;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.util.Properties;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class MarkdownTest {
+
+  Markdown md;
+
+  @Before
+  public void setUp() {
+    Properties props = new Properties();
+    props.put(Markdown.MARKDOWN_PARSER_TYPE, Markdown.PARSER_TYPE_MARKDOWN4J);
+    md = new Markdown(props);
+    md.open();
+  }
+
+  @After
+  public void tearDown() {
+    md.close();
+  }
+
+  @Test
+  public void sanitizeInput() {
+    String input = "This is "
+        + "<script>alert(1);</script> "
+        + "<div onclick='alert(2)'>this is div</div> "
+        + "text";
+    String output = md.sanitizeInput(input);
+    assertFalse(output.contains("<script>"));
+    assertFalse(output.contains("onclick"));
+    assertTrue(output.contains("this is div"));
+  }
+}