You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@shindig.apache.org by et...@apache.org on 2009/02/03 01:35:53 UTC

svn commit: r740163 - in /incubator/shindig/trunk/java/gadgets/src: main/java/org/apache/shindig/gadgets/ main/java/org/apache/shindig/gadgets/parse/ main/java/org/apache/shindig/gadgets/render/ test/java/org/apache/shindig/gadgets/render/

Author: etnu
Date: Tue Feb  3 00:35:53 2009
New Revision: 740163

URL: http://svn.apache.org/viewvc?rev=740163&view=rev
Log:
Added support for sanitized content rendering.

To view sanitized output, you  need to do two things:

1. Bind NekoHtmlParser instead of NekoSimplifiedHtmlParser. The code will work without this, but it is not guaranteed to be safe.
2. Pass the 'sanitize' argument with a value of 1 in a gadget context. This can be done in an iframe by passing &sanitize=1 in the query string.

This does not currently sanitize CSS correctly, and it is not really any safer than 'standard' gadget rendering until that is done. Louis is working on making this work as noted in his recent parser commit messages.


Added:
    incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriter.java
    incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriterTest.java
Modified:
    incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/DefaultGuiceModule.java
    incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java
    incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/RenderingContentRewriter.java
    incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/RenderingContentRewriterTest.java

Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/DefaultGuiceModule.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/DefaultGuiceModule.java?rev=740163&r1=740162&r2=740163&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/DefaultGuiceModule.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/DefaultGuiceModule.java Tue Feb  3 00:35:53 2009
@@ -18,24 +18,27 @@
  */
 package org.apache.shindig.gadgets;
 
-import com.google.common.collect.Lists;
-import com.google.inject.AbstractModule;
-import com.google.inject.Inject;
-import com.google.inject.Provider;
-import com.google.inject.TypeLiteral;
-
 import org.apache.shindig.gadgets.http.HttpResponse;
 import org.apache.shindig.gadgets.parse.ParseModule;
 import org.apache.shindig.gadgets.preload.HttpPreloader;
 import org.apache.shindig.gadgets.preload.PipelinedDataPreloader;
 import org.apache.shindig.gadgets.preload.Preloader;
 import org.apache.shindig.gadgets.render.RenderingContentRewriter;
+import org.apache.shindig.gadgets.render.SanitizedRenderingContentRewriter;
 import org.apache.shindig.gadgets.rewrite.CSSContentRewriter;
 import org.apache.shindig.gadgets.rewrite.ContentRewriter;
 import org.apache.shindig.gadgets.rewrite.HTMLContentRewriter;
 import org.apache.shindig.gadgets.servlet.CajaContentRewriter;
 
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Lists;
+import com.google.inject.AbstractModule;
+import com.google.inject.Inject;
+import com.google.inject.Provider;
+import com.google.inject.TypeLiteral;
+
 import java.util.List;
+import java.util.Set;
 import java.util.concurrent.Executor;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
@@ -58,6 +61,29 @@
     bind(new TypeLiteral<List<ContentRewriter>>(){}).toProvider(ContentRewritersProvider.class);
     bind(new TypeLiteral<List<Preloader>>(){}).toProvider(PreloaderProvider.class);
 
+    TypeLiteral<Set<String>> setLiteral = new TypeLiteral<Set<String>>(){};
+
+    // NOTE: Sanitization only works when using the "full" Neko HTML parser. It is not recommended
+    // that you attempt to use sanitization without it.
+    bind(setLiteral)
+        .annotatedWith(SanitizedRenderingContentRewriter.AllowedTags.class)
+        .toInstance(ImmutableSet.of("a", "abbr", "acronym", "area", "b", "bdo", "big", "blockquote",
+            "body", "br", "caption", "center", "cite", "code", "col", "colgroup", "dd", "del",
+            "dfn", "div", "dl", "dt", "em", "font", "h1", "h2", "h3", "h4", "h5", "h6", "head",
+            "hr", "html", "i", "img", "ins", "legend", "li", "map", "ol", "p", "pre", "q", "s",
+            "samp", "small", "span", "strike", "strong", "style", "sub", "sup", "table",
+            "tbody", "td", "tfoot", "th", "thead", "tr", "tt", "u", "ul"));
+
+    bind(setLiteral)
+        .annotatedWith(SanitizedRenderingContentRewriter.AllowedAttributes.class)
+        .toInstance(ImmutableSet.of("abbr", "align", "alt", "axis", "bgcolor", "border",
+            "cellpadding", "cellspacing", "char", "charoff", "cite", "class", "clear", "color",
+            "cols", "colspan", "compact", "coords", "datetime", "dir", "face", "headers", "height",
+            "href", "hreflang", "hspace", "id", "ismap", "lang", "longdesc", "name", "nohref",
+            "noshade", "nowrap", "rel", "rev", "rowspan", "rules", "scope", "shape", "size", "span",
+            "src", "start", "style", "summary", "title", "type", "usemap", "valign", "value",
+            "vspace", "width"));
+
     // We perform static injection on HttpResponse for cache TTLs.
     requestStaticInjection(HttpResponse.class);
   }
@@ -69,11 +95,13 @@
     public ContentRewritersProvider(HTMLContentRewriter optimizingRewriter,
                                     CSSContentRewriter cssRewriter,
                                     CajaContentRewriter cajaRewriter,
+                                    SanitizedRenderingContentRewriter sanitizedRewriter,
                                     RenderingContentRewriter renderingRewriter) {
       rewriters = Lists.newArrayList();
       rewriters.add(optimizingRewriter);
       rewriters.add(cssRewriter);
       rewriters.add(cajaRewriter);
+      rewriters.add(sanitizedRewriter);
       rewriters.add(renderingRewriter);
     }
 

Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java?rev=740163&r1=740162&r2=740163&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/parse/GadgetHtmlParser.java Tue Feb  3 00:35:53 2009
@@ -57,7 +57,7 @@
 
   public final Document parseDom(String source) throws GadgetException {
     Document document = null;
-    String key = null;  
+    String key = null;
     // Avoid checksum overhead if we arent caching
     boolean shouldCache = shouldCache();
     if (shouldCache) {

Modified: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/RenderingContentRewriter.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/RenderingContentRewriter.java?rev=740163&r1=740162&r2=740163&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/RenderingContentRewriter.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/RenderingContentRewriter.java Tue Feb  3 00:35:53 2009
@@ -122,6 +122,11 @@
   }
 
   public RewriterResults rewrite(Gadget gadget, MutableContent mutableContent) {
+    // Don't touch sanitized gadgets.
+    if ("1".equals(gadget.getContext().getParameter("sanitize"))) {
+      return RewriterResults.notCacheable();
+    }
+
     try {
       Document document = mutableContent.getDocument();
 

Added: incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriter.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriter.java?rev=740163&view=auto
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriter.java (added)
+++ incubator/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriter.java Tue Feb  3 00:35:53 2009
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.shindig.gadgets.render;
+
+import org.apache.shindig.common.uri.Uri;
+import org.apache.shindig.gadgets.Gadget;
+import org.apache.shindig.gadgets.http.HttpRequest;
+import org.apache.shindig.gadgets.http.HttpResponse;
+import org.apache.shindig.gadgets.rewrite.ContentRewriter;
+import org.apache.shindig.gadgets.rewrite.MutableContent;
+import org.apache.shindig.gadgets.rewrite.RewriterResults;
+
+import com.google.common.collect.ImmutableSet;
+import com.google.inject.BindingAnnotation;
+import com.google.inject.Inject;
+
+import org.w3c.dom.Attr;
+import org.w3c.dom.Element;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * A content rewriter that will sanitize output for simple 'badge' like display.
+ *
+ * This is intentionally not as robust as Caja. It is a simple element whitelist. It can not be used
+ * for sanitizing either javascript or CSS. CSS is desired in the long run, but it can't be proven
+ * safe in the short term.
+ *
+ * Generally used in conjunction with a gadget that gets its dynamic behavior externally (proxied
+ * rendering, OSML, etc.)
+ */
+public class SanitizedRenderingContentRewriter implements ContentRewriter {
+  private static final Set<String> URI_ATTRIBUTES = ImmutableSet.of("href", "src");
+
+  private final Set<String> allowedTags;
+  private final Set<String> allowedAttributes;
+
+  @Inject
+  public SanitizedRenderingContentRewriter(@AllowedTags Set<String> allowedTags,
+                                           @AllowedAttributes Set<String> allowedAttributes) {
+    this.allowedTags = allowedTags;
+    this.allowedAttributes = allowedAttributes;
+  }
+
+  public RewriterResults rewrite(HttpRequest request, HttpResponse resp, MutableContent content) {
+    return null;
+  }
+
+  public RewriterResults rewrite(Gadget gadget, MutableContent content) {
+    if ("1".equals(gadget.getContext().getParameter("sanitize"))) {
+      sanitize(content.getDocument().getDocumentElement());
+      content.documentChanged();
+    }
+
+    return RewriterResults.notCacheable();
+  }
+
+  private void sanitize(Node node) {
+    switch (node.getNodeType()) {
+      case Node.CDATA_SECTION_NODE:
+      case Node.TEXT_NODE:
+      case Node.ENTITY_REFERENCE_NODE:
+        break;
+      case Node.ELEMENT_NODE:
+      case Node.DOCUMENT_NODE:
+        Element element = (Element) node;
+        if (allowedTags.contains(element.getTagName().toLowerCase())) {
+          filterAttributes(element);
+          for (Node child : toList(node.getChildNodes())) {
+            sanitize(child);
+          }
+        } else {
+          node.getParentNode().removeChild(node);
+        }
+        break;
+      case Node.COMMENT_NODE:
+      default:
+        // Must remove all comments to avoid conditional comment evaluation.
+        // There might be other, unknown types as well. Don't trust them.
+        node.getParentNode().removeChild(node);
+        break;
+    }
+  }
+
+  private void filterAttributes(Element element) {
+
+    for (Attr attribute : toList(element.getAttributes())) {
+      String name = attribute.getNodeName();
+      if (allowedAttributes.contains(name)) {
+        if (URI_ATTRIBUTES.contains(name)) {
+          try {
+            Uri uri = Uri.parse(attribute.getNodeValue());
+            String scheme = uri.getScheme();
+            if (!isAllowedScheme(scheme)) {
+              element.removeAttributeNode(attribute);
+            }
+          } catch (IllegalArgumentException e) {
+            // Not a valid URI.
+            element.removeAttributeNode(attribute);
+          }
+        }
+      } else {
+        element.removeAttributeNode(attribute);
+      }
+    }
+  }
+
+  /** Convert a NamedNodeMap to a list for easy and safe operations */
+  private static List<Attr> toList(NamedNodeMap nodes) {
+    List<Attr> list = new ArrayList<Attr>(nodes.getLength());
+
+    for (int i = 0, j = nodes.getLength(); i < j; ++i) {
+      list.add((Attr) nodes.item(i));
+    }
+
+    return list;
+  }
+
+  /** Convert a NamedNodeMap to a list for easy and safe operations */
+  private static List<Node> toList(NodeList nodes) {
+    List<Node> list = new ArrayList<Node>(nodes.getLength());
+
+    for (int i = 0, j = nodes.getLength(); i < j; ++i) {
+      list.add(nodes.item(i));
+    }
+
+    return list;
+  }
+
+  private static boolean isAllowedScheme(String scheme) {
+    return scheme == null || scheme.equals("http") || scheme.equals("https");
+  }
+
+  @Retention(RetentionPolicy.RUNTIME)
+  @Target(ElementType.PARAMETER)
+  @BindingAnnotation
+  public @interface AllowedTags { }
+
+  @Retention(RetentionPolicy.RUNTIME)
+  @Target(ElementType.PARAMETER)
+  @BindingAnnotation
+  public @interface AllowedAttributes { }
+}

Modified: incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/RenderingContentRewriterTest.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/RenderingContentRewriterTest.java?rev=740163&r1=740162&r2=740163&view=diff
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/RenderingContentRewriterTest.java (original)
+++ incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/RenderingContentRewriterTest.java Tue Feb  3 00:35:53 2009
@@ -65,7 +65,6 @@
 
 import java.util.Arrays;
 import java.util.Collection;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
@@ -79,14 +78,6 @@
 public class RenderingContentRewriterTest {
   private static final Uri SPEC_URL = Uri.parse("http://example.org/gadget.xml");
   private static final String BODY_CONTENT = "Some body content";
-  private final FakeMessageBundleFactory messageBundleFactory = new FakeMessageBundleFactory();
-  private final FakeContainerConfig config = new FakeContainerConfig();
-  private final UrlGenerator urlGenerator = new FakeUrlGenerator();
-
-  private FakeGadgetFeatureRegistry featureRegistry;
-  private RenderingContentRewriter rewriter;
-  private GadgetHtmlParser parser;
-
   static final Pattern DOCUMENT_SPLIT_PATTERN = Pattern.compile(
       "(.*)<head>(.*?)<\\/head>(?:.*)<body(.*?)>(.*?)<\\/body>(?:.*)", Pattern.DOTALL |
       Pattern.CASE_INSENSITIVE);
@@ -96,6 +87,15 @@
   static final int BODY_ATTRIBUTES_GROUP = 3;
   static final int BODY_GROUP = 4;
 
+  private final FakeMessageBundleFactory messageBundleFactory = new FakeMessageBundleFactory();
+  private final FakeContainerConfig config = new FakeContainerConfig();
+  private final UrlGenerator urlGenerator = new FakeUrlGenerator();
+  private final MapGadgetContext context = new MapGadgetContext();
+
+  private FakeGadgetFeatureRegistry featureRegistry;
+  private RenderingContentRewriter rewriter;
+  private GadgetHtmlParser parser;
+
   @Before
   public void setUp() throws Exception {
     featureRegistry = new FakeGadgetFeatureRegistry();
@@ -108,7 +108,7 @@
   private Gadget makeGadgetWithSpec(String gadgetXml) throws GadgetException {
     GadgetSpec spec = new GadgetSpec(SPEC_URL, gadgetXml);
     return new Gadget()
-        .setContext(new GadgetContext())
+        .setContext(context)
         .setPreloads(new NullPreloads())
         .setSpec(spec);
   }
@@ -682,6 +682,34 @@
     assertFalse("Base element injected incorrectly.", rewritten.contains("<base"));
   }
 
+  @Test
+  public void doesNotRewriteWhenSanitizeEquals1() throws Exception {
+    Gadget gadget = makeDefaultGadget();
+
+    context.params.put("sanitize", "1");
+
+    assertEquals(BODY_CONTENT, rewrite(gadget, BODY_CONTENT));
+  }
+
+  @Test
+  public void doesRewriteWhenSanitizeEquals0() throws Exception {
+    Gadget gadget = makeDefaultGadget();
+
+    context.params.put("sanitize", "0");
+
+    assertFalse("Didn't rewrite when sanitize was '0'.",
+        BODY_CONTENT.equals(rewrite(gadget, BODY_CONTENT)));
+  }
+
+  private static class MapGadgetContext extends GadgetContext {
+    protected final Map<String, String> params = Maps.newHashMap();
+
+    @Override
+    public String getParameter(String name) {
+      return params.get(name);
+    }
+  }
+
   private static class FakeContainerConfig extends AbstractContainerConfig {
     protected final Map<String, Object> data = Maps.newHashMap();
 
@@ -689,7 +717,6 @@
     public Object getProperty(String container, String name) {
       return data.get(name);
     }
-
   }
 
   /**

Added: incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriterTest.java
URL: http://svn.apache.org/viewvc/incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriterTest.java?rev=740163&view=auto
==============================================================================
--- incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriterTest.java (added)
+++ incubator/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/render/SanitizedRenderingContentRewriterTest.java Tue Feb  3 00:35:53 2009
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.shindig.gadgets.render;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.shindig.common.PropertiesModule;
+import org.apache.shindig.gadgets.Gadget;
+import org.apache.shindig.gadgets.GadgetContext;
+import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
+import org.apache.shindig.gadgets.parse.nekohtml.NekoHtmlParser;
+import org.apache.shindig.gadgets.rewrite.ContentRewriter;
+import org.apache.shindig.gadgets.rewrite.MutableContent;
+
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Sets;
+import com.google.inject.AbstractModule;
+import com.google.inject.Guice;
+import com.google.inject.Injector;
+import com.google.inject.Provider;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.w3c.dom.DOMImplementation;
+import org.w3c.dom.bootstrap.DOMImplementationRegistry;
+
+import java.util.HashSet;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class SanitizedRenderingContentRewriterTest {
+  private static final Set<String> DEFAULT_TAGS = ImmutableSet.of("html", "head", "body");
+  private static final Pattern BODY_REGEX = Pattern.compile(".*<body>(.*)</body>.*");
+
+  private final GadgetContext sanitaryGadgetContext = new GadgetContext() {
+    @Override
+    public String getParameter(String name) {
+      return "sanitize".equals(name) ? "1" : null;
+    }
+  };
+
+  private final GadgetContext unsanitaryGadgetContext = new GadgetContext();
+
+  private GadgetHtmlParser parser;
+
+  @Before
+  public void setUp() throws Exception {
+    Injector injector = Guice.createInjector(new TestParseModule(), new PropertiesModule());
+    parser = injector.getInstance(GadgetHtmlParser.class);
+  }
+
+  private String rewrite(Gadget gadget, String content, Set<String> tags, Set<String> attributes) {
+    ContentRewriter rewriter = createRewriter(tags, attributes);
+
+    MutableContent mc = new MutableContent(parser, content);
+    assertEquals(0, rewriter.rewrite(gadget, mc).getCacheTtl());
+
+    Matcher matcher = BODY_REGEX.matcher(mc.getContent());
+    if (matcher.matches()) {
+      return matcher.group(1);
+    }
+    return mc.getContent();
+  }
+
+  private static Set<String> set(String... items) {
+    return Sets.newHashSet(items);
+  }
+
+  private ContentRewriter createRewriter(Set<String> tags, Set<String> attributes) {
+    Set<String> newTags = new HashSet<String>(tags);
+    newTags.addAll(DEFAULT_TAGS);
+    return new SanitizedRenderingContentRewriter(newTags, attributes);
+  }
+
+  @Test
+  public void enforceTagWhiteList() {
+    String markup =
+        "<p><style type=\"text/css\">styles</style>text <b>bold text</b></p>" +
+        "<b>Bold text</b><i>Italic text<b>Bold text</b></i>";
+
+    String sanitized = "<p>text <b>bold text</b></p><b>Bold text</b>";
+
+
+
+    Gadget gadget = new Gadget().setContext(sanitaryGadgetContext);
+
+    assertEquals(sanitized, rewrite(gadget, markup, set("p", "b"), set()));
+  }
+
+  @Test
+  public void enforceAttributeWhiteList() {
+    String markup = "<p foo=\"bar\" bar=\"baz\">Paragraph</p>";
+    String sanitized = "<p bar=\"baz\">Paragraph</p>";
+
+    Gadget gadget = new Gadget().setContext(sanitaryGadgetContext);
+
+    assertEquals(sanitized, rewrite(gadget, markup, set("p"), set("bar")));
+  }
+
+  @Test
+  public void restrictHrefAndSrcAttributes() {
+    String markup =
+        "<element " +
+        "href=\"http://example.org/valid-href\" " +
+        "src=\"http://example.org/valid-src\"/> " +
+        "<element " +
+        "href=\"https://example.org/valid-href\" " +
+        "src=\"https://example.org/valid-src\"/> " +
+        "<element " +
+        "href=\"http-evil://example.org/valid-href\" " +
+        "src=\"http-evil://example.org/valid-src\"/> " +
+        "<element " +
+        "href=\"javascript:evil()\" " +
+        "src=\"javascript:evil()\" /> " +
+        "<element " +
+        "href=\"//example.org/valid-href\" " +
+        "src=\"//example.org/valid-src\"/>";
+
+    // TODO: This test is only valid when using a parser that converts empty tags to
+    // balanced tags. The default (Neko) parser does this, with special case logic for handling
+    // empty tags like br or link.
+    String sanitized =
+      "<element " +
+      "href=\"http://example.org/valid-href\" " +
+      "src=\"http://example.org/valid-src\"></element> " +
+      "<element " +
+      "href=\"https://example.org/valid-href\" " +
+      "src=\"https://example.org/valid-src\"></element> " +
+      "<element></element> " +
+      "<element></element> " +
+      "<element " +
+      "href=\"//example.org/valid-href\" " +
+      "src=\"//example.org/valid-src\"></element>";
+
+    Gadget gadget = new Gadget().setContext(sanitaryGadgetContext);
+
+    assertEquals(sanitized, rewrite(gadget, markup, set("element"), set("href", "src")));
+  }
+
+  @Test
+  public void allCommentsStripped() {
+    String markup = "<b>Hello, world</b><!--<b>evil</b>-->";
+
+    Gadget gadget = new Gadget().setContext(sanitaryGadgetContext);
+
+    assertEquals("<b>Hello, world</b>", rewrite(gadget, markup, set("b"), set()));
+  }
+
+  @Test
+  public void doesNothingWhenNotSanitized() {
+    String markup = "<script src=\"http://evil.org/evil\"></script> <b>hello</b>";
+
+    Gadget gadget = new Gadget().setContext(unsanitaryGadgetContext);
+
+    assertEquals(markup, rewrite(gadget, markup, set("b"), set()));
+  }
+
+  private static class TestParseModule extends AbstractModule {
+
+    @Override
+    protected void configure() {
+      bind(GadgetHtmlParser.class).to(NekoHtmlParser.class);
+      bind(DOMImplementation.class).toProvider(DOMImplementationProvider.class);
+    }
+
+    /**
+     * Provider of new HTMLDocument implementations. Used to hide XML parser weirdness
+     */
+    public static class DOMImplementationProvider implements Provider<DOMImplementation> {
+
+      DOMImplementation domImpl;
+
+      public DOMImplementationProvider() {
+        try {
+          DOMImplementationRegistry registry = DOMImplementationRegistry.newInstance();
+          // Require the traversal API
+          domImpl = registry.getDOMImplementation("XML 1.0 Traversal 2.0");
+        } catch (Exception e) {
+          // Try another
+        }
+        // This is ugly but effective
+        try {
+          if (domImpl == null) {
+            domImpl = (DOMImplementation)
+                Class.forName("org.apache.xerces.internal.dom.DOMImplementationImpl").
+                    getMethod("getDOMImplementation").invoke(null);
+          }
+        } catch (Exception ex) {
+          //try another
+        }
+        try {
+          if (domImpl == null) {
+          domImpl = (DOMImplementation)
+            Class.forName("com.sun.org.apache.xerces.internal.dom.DOMImplementationImpl").
+                getMethod("getDOMImplementation").invoke(null);
+          }
+        } catch (Exception ex) {
+          throw new RuntimeException("Could not find HTML DOM implementation", ex);
+        }
+      }
+
+      public DOMImplementation get() {
+        return domImpl;
+      }
+    }
+  }
+}