You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@shindig.apache.org by lr...@apache.org on 2009/05/20 03:01:41 UTC

svn commit: r776511 - in /incubator/shindig/trunk/java/gadgets/src: main/java/org/apache/shindig/gadgets/parse/ main/java/org/apache/shindig/gadgets/render/ test/java/org/apache/shindig/gadgets/render/

Author: lryan
Date: Wed May 20 01:01:40 2009
New Revision: 776511

URL: http://svn.apache.org/viewvc?rev=776511&view=rev
Log:
Allow link tags to bypass sanitization where rel=stylesheet or type=text/css. Modularized the sanitization code to simplify adding other tag specific behavior
Fix serializer to support documents with no elements

Modified:
    incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/DefaultHtmlSerializer.java
    incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/RenderModule.java
    incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/SanitizingGadgetRewriter.java
    incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/SanitizingGadgetRewriterTest.java

Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/DefaultHtmlSerializer.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/DefaultHtmlSerializer.java?rev=776511&r1=776510&r2=776511&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/DefaultHtmlSerializer.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/DefaultHtmlSerializer.java Wed May 20 01:01:40 2009
@@ -57,6 +57,7 @@
 
   private void serialize(Node n, Appendable output, boolean xmlMode)
       throws IOException {
+    if (n == null) return;
     switch (n.getNodeType()) {
       case Node.CDATA_SECTION_NODE: {
         break;

Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/RenderModule.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/RenderModule.java?rev=776511&r1=776510&r2=776511&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/RenderModule.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/RenderModule.java Wed May 20 01:01:40 2009
@@ -39,8 +39,8 @@
         .toInstance(ImmutableSet.of("a", "abbr", "acronym", "area", "b", "bdo", "big", "blockquote",
             "body", "br", "caption", "center", "cite", "code", "col", "colgroup", "dd", "del",
             "dfn", "div", "dl", "dt", "em", "font", "h1", "h2", "h3", "h4", "h5", "h6", "head",
-            "hr", "html", "i", "img", "ins", "legend", "li", "map", "ol", "p", "pre", "q", "s",
-            "samp", "small", "span", "strike", "strong", "style", "sub", "sup", "table",
+            "hr", "html", "i", "img", "ins", "legend", "li", "link", "map", "ol", "p", "pre",
+            "q", "s", "samp", "small", "span", "strike", "strong", "style", "sub", "sup", "table",
             "tbody", "td", "tfoot", "th", "thead", "tr", "tt", "u", "ul"));
 
     bind(setLiteral)
@@ -54,5 +54,4 @@
             "vspace", "width"));
 
   }
-
 }

Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/SanitizingGadgetRewriter.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/SanitizingGadgetRewriter.java?rev=776511&r1=776510&r2=776511&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/SanitizingGadgetRewriter.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/SanitizingGadgetRewriter.java Wed May 20 01:01:40 2009
@@ -42,11 +42,10 @@
 import java.lang.annotation.Target;
 import java.util.ArrayList;
 import java.util.List;
-import java.util.Map;
 import java.util.Set;
 
-import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.ImmutableList;
 import com.google.inject.BindingAnnotation;
 import com.google.inject.Inject;
 
@@ -61,14 +60,9 @@
  * rendering, OSML, etc.)
  */
 public class SanitizingGadgetRewriter implements GadgetRewriter {
-  private static final Set<String> URI_ATTRIBUTES = ImmutableSet.of("href", "src");
 
   /** Key stored as element user-data to bypass sanitization */
   private static final String BYPASS_SANITIZATION_KEY = "shindig.bypassSanitization";
-  
-  /** Attributes to forcibly rewrite and require an image mime type */
-  private static final Map<String, ImmutableSet<String>> PROXY_IMAGE_ATTRIBUTES =
-      ImmutableMap.of("img", ImmutableSet.of("src"));
 
   /**
    * Is the Gadget to be rendered sanitized?
@@ -144,10 +138,8 @@
    * Utiliity class to sanitize HTML nodes recursively.
    */
   class NodeSanitizer {
-
-    private final LinkRewriter cssRewriter;
-    private final LinkRewriter imageRewriter;
     private final Uri context;
+    private final List<DomFilter> filters;
 
     NodeSanitizer(Gadget gadget) {
       this.context = gadget.getSpec().getUrl();
@@ -156,10 +148,20 @@
           rewriterFeatureFactory.createRewriteAllFeature(expires == null ? -1 : expires);
 
       String proxyBaseNoGadget = rewriterUris.getProxyBase(gadget.getContext().getContainer());
-      cssRewriter = new SanitizingProxyingLinkRewriter(gadget.getSpec().getUrl(),
+      LinkRewriter cssRewriter = new SanitizingProxyingLinkRewriter(gadget.getSpec().getUrl(),
           rewriterFeature, proxyBaseNoGadget, "text/css");
-      imageRewriter = new SanitizingProxyingLinkRewriter(gadget.getSpec().getUrl(),
+      LinkRewriter imageRewriter = new SanitizingProxyingLinkRewriter(gadget.getSpec().getUrl(),
           rewriterFeature, proxyBaseNoGadget, "image/*");
+
+      // Create the set of filters to process in order.
+      filters = ImmutableList.of(
+        new BasicElementFilter(allowedTags, allowedAttributes),
+        new LinkSchemeCheckFilter(),
+        new StyleFilter(cssSanitizer, cssRewriter),
+        new LinkFilter(cssRewriter),
+        new ImageFilter(imageRewriter),
+        new TargetFilter()
+      );
     }
 
     private void sanitize(Node node) {
@@ -178,18 +180,30 @@
             for (Node child : toList(node.getChildNodes())) {
               sanitize(child);
             }
-          } else if (allowedTags.contains(element.getTagName().toLowerCase())) {
-            // TODO - Add special case for stylesheet LINK nodes.
-            // Special case handling for style nodes
-            if (element.getTagName().equalsIgnoreCase("style")) {
-              cssSanitizer.sanitize(element, context, cssRewriter);
+          } else {
+            boolean removed = false;
+            for (DomFilter filter : filters) {
+              DomFilter.Result tagResult = filter.filterTag(element, context);
+              if (tagResult == DomFilter.Result.REMOVE) {
+                element.getParentNode().removeChild(element);
+                removed = true;
+                break;
+              } else if (tagResult == DomFilter.Result.CONTINUE) {
+                for (Attr attr : toList(node.getAttributes())) {
+                  DomFilter.Result attrResult = filter.filterAttribute(attr, context);
+                  if (attrResult == DomFilter.Result.PASS) {
+                    break; // No need to process more attributes
+                  } else if (attrResult == DomFilter.Result.REMOVE) {
+                    element.removeAttributeNode(attr);
+                  }
+                }
+              }
             }
-            filterAttributes(element);
-            for (Node child : toList(node.getChildNodes())) {
-              sanitize(child);
+            if (!removed) {
+              for (Node child : toList(node.getChildNodes())) {
+                sanitize(child);
+              }
             }
-          } else {
-            node.getParentNode().removeChild(node);
           }
           break;
         case Node.COMMENT_NODE:
@@ -200,33 +214,6 @@
           break;
       }
     }
-
-    private void filterAttributes(Element element) {
-      Set<String> rewriteImageAttrs = PROXY_IMAGE_ATTRIBUTES.get(element.getNodeName().toLowerCase());
-      for (Attr attribute : toList(element.getAttributes())) {
-        String name = attribute.getNodeName().toLowerCase();
-        if (allowedAttributes.contains(name)) {
-          if (URI_ATTRIBUTES.contains(name)) {
-            try {
-              Uri uri = Uri.parse(attribute.getNodeValue());
-              String scheme = uri.getScheme();
-              if (!isAllowedScheme(scheme)) {
-                element.removeAttributeNode(attribute);
-              } else if (rewriteImageAttrs != null && rewriteImageAttrs.contains(name)) {
-                // Force rewrite the src of the image through the proxy. This is necessary
-                // because IE will run arbitrary script in files referenced from src
-                attribute.setValue(imageRewriter.rewrite(attribute.getNodeValue(), context));
-              }
-            } catch (IllegalArgumentException e) {
-              // Not a valid URI.
-              element.removeAttributeNode(attribute);
-            }
-          }
-        } else {
-          element.removeAttributeNode(attribute);
-        }
-      }
-    }
   }
 
 
@@ -241,6 +228,17 @@
     return list;
   }
 
+  /** Convert a NamedNodeMap to a list for easy and safe operations */
+  private static List<Node> toList(NodeList nodes) {
+    List<Node> list = new ArrayList<Node>(nodes.getLength());
+
+    for (int i = 0, j = nodes.getLength(); i < j; ++i) {
+      list.add(nodes.item(i));
+    }
+
+    return list;
+  }
+
   private static Bypass canBypassSanitization(Element element) {
     Bypass bypass = (Bypass) element.getUserData(BYPASS_SANITIZATION_KEY);
     if (bypass == null) {
@@ -249,19 +247,187 @@
     return bypass;
   }
 
-  /** Convert a NamedNodeMap to a list for easy and safe operations */
-  private static List<Node> toList(NodeList nodes) {
-    List<Node> list = new ArrayList<Node>(nodes.getLength());
+  /**
+   * Filter DOM elements and attributes to check their validity and
+   * restrict their allowed content
+   */
+  static interface DomFilter {
+    enum Result {
+      PASS,  // Check passed, do not process further checks on this filter
+      CONTINUE, // Check passed, process further checks on this filter
+      REMOVE // Check failed, remove item
+    };
+
+    /**
+     * Filter the element and possibly transform it.
+     */
+    Result filterTag(Element elem, Uri context);
+
+    /**
+     * Filter the attribute and possibly transform it
+\     */
+    Result filterAttribute(Attr attr, Uri context);
+  }
 
-    for (int i = 0, j = nodes.getLength(); i < j; ++i) {
-      list.add(nodes.item(i));
+  /**
+   * Restrict the set of allowed tags and attributes
+   */
+  static class BasicElementFilter implements DomFilter {
+    final Set<String> allowedTags;
+    final Set<String> allowedAttrs;
+
+    BasicElementFilter(Set<String> allowedTags, Set<String> allowedAttrs) {
+      this.allowedTags = allowedTags;
+      this.allowedAttrs = allowedAttrs;
     }
 
-    return list;
+    public Result filterTag(Element elem, Uri context) {
+      return allowedTags.contains(elem.getNodeName().toLowerCase()) ?
+          Result.CONTINUE : Result.REMOVE;
+    }
+
+    public Result filterAttribute(Attr attr, Uri context) {
+      return allowedAttrs.contains(attr.getName().toLowerCase()) ?
+          Result.CONTINUE : Result.REMOVE;
+    }
   }
 
-  private static boolean isAllowedScheme(String scheme) {
-    return scheme == null || scheme.equals("http") || scheme.equals("https");
+  /**
+   * Enfore that all uri's in the document have either http or https as
+   * their scheme
+   */
+  static class LinkSchemeCheckFilter implements DomFilter {
+    protected Set<String> uriAttributes;
+
+    LinkSchemeCheckFilter() {
+      uriAttributes = ImmutableSet.of("href", "src");
+    }
+
+    public Result filterTag(Element elem, Uri context) {
+      return Result.CONTINUE;
+    }
+
+    public Result filterAttribute(Attr attr, Uri context) {
+      if (uriAttributes.contains(attr.getName().toLowerCase())) {
+        try {
+          Uri uri = Uri.parse(attr.getValue());
+          String scheme = uri.getScheme();
+          if (scheme != null && !scheme.equals("http") && !scheme.equals("https")) {
+            return Result.REMOVE;
+          }
+        } catch (IllegalArgumentException iae) {
+          return Result.REMOVE;
+        }
+      }
+      return Result.CONTINUE;
+    }
+  }
+
+  /**
+   * Enfore that all images in the document are rewritten through the proxy.
+   * Prevents issues in IE where the image content contains script
+   */
+  static class ImageFilter implements DomFilter {
+    protected final LinkRewriter imageRewriter;
+
+    ImageFilter(LinkRewriter imageRewriter) {
+      this.imageRewriter = imageRewriter;
+    }
+
+    public Result filterTag(Element elem, Uri context) {
+      if ("img".equalsIgnoreCase(elem.getNodeName())) {
+        return Result.CONTINUE;
+      }
+      return Result.PASS;
+    }
+
+    public Result filterAttribute(Attr attr, Uri context) {
+      if ("src".equalsIgnoreCase(attr.getName())) {
+        attr.setValue(imageRewriter.rewrite(attr.getValue(), context));
+      }
+      return Result.PASS;
+    }
+  }
+
+  /**
+   * Pass the contents of style tags through the CSS sanitizer
+   */
+  static class StyleFilter implements DomFilter {
+    final CajaCssSanitizer sanitizer;
+    final LinkRewriter rewriter;
+
+    StyleFilter(CajaCssSanitizer sanitizer, LinkRewriter rewriter) {
+      this.sanitizer = sanitizer;
+      this.rewriter = rewriter;
+    }
+
+    public Result filterTag(Element elem, Uri context) {
+      if ("style".equalsIgnoreCase(elem.getNodeName())) {
+        sanitizer.sanitize(elem, context, rewriter);
+      }
+      return Result.PASS;
+    }
+
+    public Result filterAttribute(Attr attr, Uri context) {
+      return Result.PASS;
+    }
+  }
+
+  /**
+   * Restrict link tags to stylesheet content only and force the link to
+   * be rewritten through the proxy and sanitized
+   */
+  static class LinkFilter implements DomFilter {
+    final LinkRewriter rewriter;
+
+    LinkFilter(LinkRewriter rewriter) {
+      this.rewriter = rewriter;
+    }
+
+    public Result filterTag(Element elem, Uri context) {
+      if (!elem.getNodeName().equalsIgnoreCase("link")) {
+        return Result.CONTINUE;
+      }
+      boolean hasType = false;
+      for (Attr attr : toList(elem.getAttributes())) {
+        if ("rel".equalsIgnoreCase(attr.getName())) {
+          hasType |= "stylesheet".equalsIgnoreCase(attr.getValue());
+        } else if ("type".equalsIgnoreCase(attr.getName())) {
+          hasType |= "text/css".equalsIgnoreCase(attr.getValue());
+        } else if ("href".equalsIgnoreCase(attr.getName())) {
+          attr.setValue(rewriter.rewrite(attr.getValue(), context));
+        }
+      }
+      return hasType ? Result.PASS : Result.REMOVE;
+    }
+
+    public Result filterAttribute(Attr attr, Uri context) {
+      return Result.PASS;
+    }
+  }
+
+  /**
+   * Restrict the value of the target attribute on anchors etc. to
+   * _blank or _self or remove the node
+   */
+  static class TargetFilter implements DomFilter {
+
+    TargetFilter() {
+    }
+
+    public Result filterTag(Element elem, Uri context) {
+      return Result.CONTINUE;
+    }
+
+    public Result filterAttribute(Attr attr, Uri context) {
+      if ("target".equalsIgnoreCase(attr.getName())) {
+        String value = attr.getValue().toLowerCase();
+        if (!("_blank".equals(value) || "_self".equals(value))) {
+          return Result.REMOVE;
+        }
+      }
+      return Result.CONTINUE;
+    }
   }
 
   /**

Modified: incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/SanitizingGadgetRewriterTest.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/SanitizingGadgetRewriterTest.java?rev=776511&r1=776510&r2=776511&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/SanitizingGadgetRewriterTest.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/SanitizingGadgetRewriterTest.java Wed May 20 01:01:40 2009
@@ -124,6 +124,30 @@
   }
 
   @Test
+  public void enforceStyleLinkRewritten() throws Exception {
+    String markup =
+        "<link rel=\"stylesheet\" "
+            + "href=\"http://www.test.com/dir/proxy?"
+            + "url=http%3A%2F%2Fwww.evil.com%2Fx.css&gadget=www.example.org%2Fgadget.xml&"
+            + "fp=45508rewriteMime=text/css\"/>";
+    String sanitized = 
+        "<link href=\"http://www.test.com/dir/proxy?"
+            + "url=http%3A%2F%2Fwww.evil.com%2Fx.css&gadget=www.example.org%2Fgadget.xml&"
+            + "fp=45508&sanitize=1&rewriteMime=text/css\" rel=\"stylesheet\">";
+    String rewritten = rewrite(gadget, markup, set("link"), set("rel", "href"));
+    assertEquals(sanitized, rewritten);
+  }
+
+  @Test
+  public void enforceNonStyleLinkStripped() throws Exception {
+    String markup =
+        "<link rel=\"script\" "
+            + "href=\"www.exmaple.org/evil.js\"/>";
+    String rewritten = rewrite(gadget, markup, set("link"), set("rel", "href", "type"));
+    assertEquals("", rewritten);
+  }
+
+  @Test
   public void enforceCssImportLinkRewritten() throws Exception {
     String markup =
         "<style type=\"text/css\">@import url('www.evil.com/x.js');</style>";
@@ -171,6 +195,25 @@
   }
 
   @Test
+  public void enforceTargetTopRestricted() throws Exception {
+    String markup = "<a href=\"http://www.example.com\" target=\"_top\">x</a>";
+    String sanitized = "<a href=\"http://www.example.com\">x</a>";
+    assertEquals(sanitized, rewrite(gadget, markup, set("a"), set("href", "target")));
+  }
+
+  @Test
+  public void enforceTargetSelfAllowed() throws Exception {
+    String markup = "<a href=\"http://www.example.com\" target=\"_self\">x</a>";
+    assertEquals(markup, rewrite(gadget, markup, set("a"), set("href", "target")));
+  }
+
+  @Test
+  public void enforceTargetBlankAllowed() throws Exception {
+    String markup = "<a href=\"http://www.example.com\" target=\"_BlAnK\">x</a>";
+    assertEquals(markup, rewrite(gadget, markup, set("a"), set("href", "target")));
+  }
+
+  @Test
   public void sanitizationBypassAllowed() throws Exception {
     String markup = "<p foo=\"bar\"><b>Parag</b><!--raph--></p>";
     // Create a rewriter that would strip everything