You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@shindig.apache.org by li...@apache.org on 2010/09/01 08:57:19 UTC

svn commit: r991446 - in /shindig/trunk/java/gadgets/src: main/java/org/apache/shindig/gadgets/rewrite/ main/java/org/apache/shindig/gadgets/uri/ test/java/org/apache/shindig/gadgets/rewrite/ test/java/org/apache/shindig/gadgets/uri/

Author: lindner
Date: Wed Sep  1 06:57:19 2010
New Revision: 991446

URL: http://svn.apache.org/viewvc?rev=991446&view=rev
Log:
Patch from Gagandeep Singh | Removing charset information from meta http-equiv content type

Added:
    shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverRewriter.java
    shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverVisitor.java
    shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverRewriterTest.java
Modified:
    shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/uri/UriUtils.java
    shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/uri/UriUtilsTest.java

Added: shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverRewriter.java
URL: http://svn.apache.org/viewvc/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverRewriter.java?rev=991446&view=auto
==============================================================================
--- shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverRewriter.java (added)
+++ shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverRewriter.java Wed Sep  1 06:57:19 2010
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.shindig.gadgets.rewrite;
+
+import com.google.common.collect.ImmutableList;
+import com.google.inject.Inject;
+
+/**
+ * Removes charset information from <meta http-equip="Content-Type">
+ */
+public class ContentTypeCharsetRemoverRewriter extends DomWalker.Rewriter {
+  @Inject
+  public ContentTypeCharsetRemoverRewriter() {
+    super(ImmutableList.<DomWalker.Visitor>of(new ContentTypeCharsetRemoverVisitor()));
+  }
+}

Added: shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverVisitor.java
URL: http://svn.apache.org/viewvc/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverVisitor.java?rev=991446&view=auto
==============================================================================
--- shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverVisitor.java (added)
+++ shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverVisitor.java Wed Sep  1 06:57:19 2010
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.shindig.gadgets.rewrite;
+
+import org.apache.shindig.gadgets.Gadget;
+import org.apache.shindig.gadgets.uri.UriUtils;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+
+import java.util.List;
+
+/**
+ * Removes charset information from <meta http-equip="Content-Type">
+ */
+public class ContentTypeCharsetRemoverVisitor implements DomWalker.Visitor {
+  public final static String CONTENT = "content";
+  public final static String CONTENT_TYPE = "content-type";
+  public final static String HTTP_EQUIV = "http-equiv";
+  public final static String META = "meta";
+
+  // @Override
+  public VisitStatus visit(Gadget gadget, Node node) throws RewritingException {
+    if (node.getNodeType() == Node.ELEMENT_NODE &&
+        META.equalsIgnoreCase(node.getNodeName())) {
+
+      Element elem = (Element) node;
+      String httpEquip = elem.getAttribute(HTTP_EQUIV);
+      String content = elem.getAttribute(CONTENT);
+      if (httpEquip != null && content != null &&
+          CONTENT_TYPE.equalsIgnoreCase(httpEquip)) {
+        elem.setAttribute(CONTENT, UriUtils.getContentTypeWithoutCharset(content));
+        return VisitStatus.MODIFY;
+      }
+    }
+    return VisitStatus.BYPASS;
+  }
+
+  // @Override
+  public boolean revisit(Gadget gadget, List<Node> nodes) {
+    // Edits in place.
+    return false;
+  }
+}

Modified: shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/uri/UriUtils.java
URL: http://svn.apache.org/viewvc/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/uri/UriUtils.java?rev=991446&r1=991445&r2=991446&view=diff
==============================================================================
--- shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/uri/UriUtils.java (original)
+++ shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/uri/UriUtils.java Wed Sep  1 06:57:19 2010
@@ -26,16 +26,17 @@ import org.apache.shindig.gadgets.http.H
 import org.apache.shindig.gadgets.http.HttpResponseBuilder;
 
 import java.io.IOException;
-import java.util.logging.Logger;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.logging.Logger;
 
 /**
  * Utility functions related to URI and Http servlet response management.
  */
 public class UriUtils {
+  public static final String CHARSET = "charset";
   private static final Logger LOG = Logger.getLogger(UriUtils.class.getName());
 
   private UriUtils() {}
@@ -248,4 +249,27 @@ public class UriUtils {
       }
     }
   }
+
+  /**
+   * Parses the value of content-type header and returns the content type header
+   * without the 'charset' attribute.
+   * @param content The content type header value.
+   * @return Content type header value without charset.
+   */
+  public static String getContentTypeWithoutCharset(String content) {
+    String contentTypeWithoutCharset = content;
+    String[] parts = StringUtils.split(content, ';');
+    if (parts.length >= 2) {
+      contentTypeWithoutCharset = parts[0];
+      for (int i = 1; i < parts.length; i++) {
+        String parameterAndValue = parts[i].trim().toLowerCase();
+        String[] splits = StringUtils.split(parameterAndValue, '=');
+        if (splits.length > 0 && !splits[0].trim().equals(CHARSET)) {
+          contentTypeWithoutCharset += ";" + parts[i];
+        }
+      }
+    }
+
+    return contentTypeWithoutCharset;
+  }
 }

Added: shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverRewriterTest.java
URL: http://svn.apache.org/viewvc/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverRewriterTest.java?rev=991446&view=auto
==============================================================================
--- shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverRewriterTest.java (added)
+++ shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverRewriterTest.java Wed Sep  1 06:57:19 2010
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.shindig.gadgets.rewrite;
+
+import org.apache.shindig.common.uri.Uri;
+import org.apache.shindig.gadgets.Gadget;
+import org.apache.shindig.gadgets.http.HttpRequest;
+import org.apache.shindig.gadgets.parse.ParseModule;
+import org.apache.shindig.gadgets.parse.caja.CajaHtmlParser;
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Tests for ContentTypeCharsetRemoverRewriter.
+ */
+public class ContentTypeCharsetRemoverRewriterTest extends DomWalkerTestBase {
+  private CajaHtmlParser htmlParser;
+
+  @Before
+  public void setUp() {
+    super.setUp();
+    ParseModule.DOMImplementationProvider domImpl =
+        new ParseModule.DOMImplementationProvider();
+    htmlParser = new CajaHtmlParser(domImpl.get());
+  }
+
+  @Test
+  public void testContentTypeCharsetRemoved() throws Exception {
+    String html = "<html><head>"
+                  + "<META Content=\"hello world\" "
+                  + "Http-equiv=\"Content-Title\">"
+                  + "<META Content=\"text/html ; charset = \'GBK\'\" "
+                  + "Http-equiv=\"Content-TYPE\">"
+                  + "<META Content=\"gzip\" "
+                  + "Http-EQuIv=\"Content-Encoding\">"
+                  + "</head><body><a href=\"hello\">Hello</a>"
+                  + "</body></html>";
+    String expected = "<html><head>"
+                      + "<meta content=\"hello world\" "
+                      + "http-equiv=\"Content-Title\">"
+                      + "<meta content=\"text/html \" "
+                      + "http-equiv=\"Content-TYPE\">"
+                      + "<meta content=\"gzip\" "
+                      + "http-equiv=\"Content-Encoding\">"
+                      + "</head><body><a href=\"hello\">Hello</a>\n"
+                      + "</body></html>";
+
+    ContentTypeCharsetRemoverRewriter rewriter =
+        new ContentTypeCharsetRemoverRewriter();
+    Gadget gadget = DomWalker.makeGadget(new HttpRequest(
+        Uri.parse("http://1.com/")));
+    MutableContent mc = new MutableContent(htmlParser, html);
+    rewriter.rewrite(gadget, mc);
+
+    assertEquals(expected, mc.getContent());
+  }
+
+  @Test
+  public void testNoMetaNode() throws Exception {
+    String html = "<html><head><title>hello</title>"
+                  + "</head><body><a href=\"hello\">Hello</a>"
+                  + "</body></html>";
+    String expected = "<html><head><title>hello</title>"
+                      + "</head><body><a href=\"hello\">Hello</a>"
+                      + "</body></html>";
+
+    ContentTypeCharsetRemoverRewriter rewriter =
+        new ContentTypeCharsetRemoverRewriter();
+    Gadget gadget = DomWalker.makeGadget(new HttpRequest(
+        Uri.parse("http://1.com/")));
+    MutableContent mc = new MutableContent(htmlParser, html);
+    rewriter.rewrite(gadget, mc);
+
+    assertEquals(expected, mc.getContent());
+  }
+
+  @Test
+  public void testMalformedCharset() throws Exception {
+    String html = "<html><head>"
+                  + "<META Content=\"text/html ; pharset=\'hello\'; hello=world\" "
+                  + "Http-equiv=\"Content-TYPE\">"
+                  + "</head><body><a href=\"hello\">Hello</a>"
+                  + "</body></html>";
+    String expected = "<html><head>"
+                      + "<meta content=\"text/html ; pharset=\'hello\'; hello=world\" "
+                      + "http-equiv=\"Content-TYPE\">"
+                      + "</head><body><a href=\"hello\">Hello</a>\n"
+                      + "</body></html>";
+
+    ContentTypeCharsetRemoverRewriter rewriter =
+        new ContentTypeCharsetRemoverRewriter();
+    Gadget gadget = DomWalker.makeGadget(new HttpRequest(
+        Uri.parse("http://1.com/")));
+    MutableContent mc = new MutableContent(htmlParser, html);
+    rewriter.rewrite(gadget, mc);
+
+    assertEquals(expected, mc.getContent());
+
+    html = "<html><head>"
+           + "<META Content=\"text/html ; charsett=\'hello\'; hello=world\" "
+           + "Http-equiv=\"Content-TYPE\">"
+           + "</head><body><a href=\"hello\">Hello</a>"
+           + "</body></html>";
+    expected = "<html><head>"
+               + "<meta content=\"text/html ; charsett='hello'; hello=world\" "
+               + "http-equiv=\"Content-TYPE\">"
+               + "</head><body><a href=\"hello\">Hello</a>\n"
+               + "</body></html>";
+
+    mc = new MutableContent(htmlParser, html);
+    rewriter.rewrite(gadget, mc);
+
+    assertEquals(expected, mc.getContent());
+  }
+}

Modified: shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/uri/UriUtilsTest.java
URL: http://svn.apache.org/viewvc/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/uri/UriUtilsTest.java?rev=991446&r1=991445&r2=991446&view=diff
==============================================================================
--- shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/uri/UriUtilsTest.java (original)
+++ shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/uri/UriUtilsTest.java Wed Sep  1 06:57:19 2010
@@ -18,8 +18,6 @@
  */
 package org.apache.shindig.gadgets.uri;
 
-import static org.junit.Assert.assertEquals;
-
 import com.google.inject.internal.ImmutableList;
 import com.google.inject.internal.ImmutableMap;
 import org.apache.shindig.common.uri.Uri;
@@ -30,6 +28,8 @@ import org.junit.Test;
 
 import java.util.*;
 
+import static org.junit.Assert.assertEquals;
+
 /**
  * Tests for UriUtils.
  */
@@ -252,4 +252,20 @@ public class UriUtilsTest {
 
     assertEquals(data, req.getPostBodyAsString());
   }
+
+  @Test
+  public void testGetContentTypeWithoutCharset() {
+    assertEquals("text/html",
+                 UriUtils.getContentTypeWithoutCharset("text/html"));
+    assertEquals("text/html;",
+                 UriUtils.getContentTypeWithoutCharset("text/html;"));
+    assertEquals("text/html",
+                 UriUtils.getContentTypeWithoutCharset("text/html; charset=hello"));
+    assertEquals("text/html; hello=world",
+                 UriUtils.getContentTypeWithoutCharset("text/html; charset=hello; hello=world"));
+    assertEquals("text/html; pharset=hello; hello=world",
+                 UriUtils.getContentTypeWithoutCharset("text/html; pharset=hello; hello=world"));
+    assertEquals("text/html; charsett=utf; hello=world",
+                 UriUtils.getContentTypeWithoutCharset("text/html; charsett=utf; ; hello=world"));
+  }
 }