You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@shindig.apache.org by li...@apache.org on 2010/09/01 08:57:19 UTC
svn commit: r991446 - in /shindig/trunk/java/gadgets/src:
main/java/org/apache/shindig/gadgets/rewrite/
main/java/org/apache/shindig/gadgets/uri/
test/java/org/apache/shindig/gadgets/rewrite/
test/java/org/apache/shindig/gadgets/uri/
Author: lindner
Date: Wed Sep 1 06:57:19 2010
New Revision: 991446
URL: http://svn.apache.org/viewvc?rev=991446&view=rev
Log:
Patch from Gagandeep Singh | Removing charset information from meta http-equiv content type
Added:
shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverRewriter.java
shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverVisitor.java
shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverRewriterTest.java
Modified:
shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/uri/UriUtils.java
shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/uri/UriUtilsTest.java
Added: shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverRewriter.java
URL: http://svn.apache.org/viewvc/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverRewriter.java?rev=991446&view=auto
==============================================================================
--- shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverRewriter.java (added)
+++ shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverRewriter.java Wed Sep 1 06:57:19 2010
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.shindig.gadgets.rewrite;
+
+import com.google.common.collect.ImmutableList;
+import com.google.inject.Inject;
+
+/**
+ * Removes charset information from <meta http-equip="Content-Type">
+ */
+public class ContentTypeCharsetRemoverRewriter extends DomWalker.Rewriter {
+ @Inject
+ public ContentTypeCharsetRemoverRewriter() {
+ super(ImmutableList.<DomWalker.Visitor>of(new ContentTypeCharsetRemoverVisitor()));
+ }
+}
Added: shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverVisitor.java
URL: http://svn.apache.org/viewvc/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverVisitor.java?rev=991446&view=auto
==============================================================================
--- shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverVisitor.java (added)
+++ shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverVisitor.java Wed Sep 1 06:57:19 2010
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.shindig.gadgets.rewrite;
+
+import org.apache.shindig.gadgets.Gadget;
+import org.apache.shindig.gadgets.uri.UriUtils;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+
+import java.util.List;
+
+/**
+ * Removes charset information from <meta http-equip="Content-Type">
+ */
+public class ContentTypeCharsetRemoverVisitor implements DomWalker.Visitor {
+ public final static String CONTENT = "content";
+ public final static String CONTENT_TYPE = "content-type";
+ public final static String HTTP_EQUIV = "http-equiv";
+ public final static String META = "meta";
+
+ // @Override
+ public VisitStatus visit(Gadget gadget, Node node) throws RewritingException {
+ if (node.getNodeType() == Node.ELEMENT_NODE &&
+ META.equalsIgnoreCase(node.getNodeName())) {
+
+ Element elem = (Element) node;
+ String httpEquip = elem.getAttribute(HTTP_EQUIV);
+ String content = elem.getAttribute(CONTENT);
+ if (httpEquip != null && content != null &&
+ CONTENT_TYPE.equalsIgnoreCase(httpEquip)) {
+ elem.setAttribute(CONTENT, UriUtils.getContentTypeWithoutCharset(content));
+ return VisitStatus.MODIFY;
+ }
+ }
+ return VisitStatus.BYPASS;
+ }
+
+ // @Override
+ public boolean revisit(Gadget gadget, List<Node> nodes) {
+ // Edits in place.
+ return false;
+ }
+}
Modified: shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/uri/UriUtils.java
URL: http://svn.apache.org/viewvc/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/uri/UriUtils.java?rev=991446&r1=991445&r2=991446&view=diff
==============================================================================
--- shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/uri/UriUtils.java (original)
+++ shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/uri/UriUtils.java Wed Sep 1 06:57:19 2010
@@ -26,16 +26,17 @@ import org.apache.shindig.gadgets.http.H
import org.apache.shindig.gadgets.http.HttpResponseBuilder;
import java.io.IOException;
-import java.util.logging.Logger;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.logging.Logger;
/**
* Utility functions related to URI and Http servlet response management.
*/
public class UriUtils {
+ public static final String CHARSET = "charset";
private static final Logger LOG = Logger.getLogger(UriUtils.class.getName());
private UriUtils() {}
@@ -248,4 +249,27 @@ public class UriUtils {
}
}
}
+
+ /**
+ * Parses the value of content-type header and returns the content type header
+ * without the 'charset' attribute.
+ * @param content The content type header value.
+ * @return Content type header value without charset.
+ */
+ public static String getContentTypeWithoutCharset(String content) {
+ String contentTypeWithoutCharset = content;
+ String[] parts = StringUtils.split(content, ';');
+ if (parts.length >= 2) {
+ contentTypeWithoutCharset = parts[0];
+ for (int i = 1; i < parts.length; i++) {
+ String parameterAndValue = parts[i].trim().toLowerCase();
+ String[] splits = StringUtils.split(parameterAndValue, '=');
+ if (splits.length > 0 && !splits[0].trim().equals(CHARSET)) {
+ contentTypeWithoutCharset += ";" + parts[i];
+ }
+ }
+ }
+
+ return contentTypeWithoutCharset;
+ }
}
Added: shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverRewriterTest.java
URL: http://svn.apache.org/viewvc/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverRewriterTest.java?rev=991446&view=auto
==============================================================================
--- shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverRewriterTest.java (added)
+++ shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/ContentTypeCharsetRemoverRewriterTest.java Wed Sep 1 06:57:19 2010
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.shindig.gadgets.rewrite;
+
+import org.apache.shindig.common.uri.Uri;
+import org.apache.shindig.gadgets.Gadget;
+import org.apache.shindig.gadgets.http.HttpRequest;
+import org.apache.shindig.gadgets.parse.ParseModule;
+import org.apache.shindig.gadgets.parse.caja.CajaHtmlParser;
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Tests for ContentTypeCharsetRemoverRewriter.
+ */
+public class ContentTypeCharsetRemoverRewriterTest extends DomWalkerTestBase {
+ private CajaHtmlParser htmlParser;
+
+ @Before
+ public void setUp() {
+ super.setUp();
+ ParseModule.DOMImplementationProvider domImpl =
+ new ParseModule.DOMImplementationProvider();
+ htmlParser = new CajaHtmlParser(domImpl.get());
+ }
+
+ @Test
+ public void testContentTypeCharsetRemoved() throws Exception {
+ String html = "<html><head>"
+ + "<META Content=\"hello world\" "
+ + "Http-equiv=\"Content-Title\">"
+ + "<META Content=\"text/html ; charset = \'GBK\'\" "
+ + "Http-equiv=\"Content-TYPE\">"
+ + "<META Content=\"gzip\" "
+ + "Http-EQuIv=\"Content-Encoding\">"
+ + "</head><body><a href=\"hello\">Hello</a>"
+ + "</body></html>";
+ String expected = "<html><head>"
+ + "<meta content=\"hello world\" "
+ + "http-equiv=\"Content-Title\">"
+ + "<meta content=\"text/html \" "
+ + "http-equiv=\"Content-TYPE\">"
+ + "<meta content=\"gzip\" "
+ + "http-equiv=\"Content-Encoding\">"
+ + "</head><body><a href=\"hello\">Hello</a>\n"
+ + "</body></html>";
+
+ ContentTypeCharsetRemoverRewriter rewriter =
+ new ContentTypeCharsetRemoverRewriter();
+ Gadget gadget = DomWalker.makeGadget(new HttpRequest(
+ Uri.parse("http://1.com/")));
+ MutableContent mc = new MutableContent(htmlParser, html);
+ rewriter.rewrite(gadget, mc);
+
+ assertEquals(expected, mc.getContent());
+ }
+
+ @Test
+ public void testNoMetaNode() throws Exception {
+ String html = "<html><head><title>hello</title>"
+ + "</head><body><a href=\"hello\">Hello</a>"
+ + "</body></html>";
+ String expected = "<html><head><title>hello</title>"
+ + "</head><body><a href=\"hello\">Hello</a>"
+ + "</body></html>";
+
+ ContentTypeCharsetRemoverRewriter rewriter =
+ new ContentTypeCharsetRemoverRewriter();
+ Gadget gadget = DomWalker.makeGadget(new HttpRequest(
+ Uri.parse("http://1.com/")));
+ MutableContent mc = new MutableContent(htmlParser, html);
+ rewriter.rewrite(gadget, mc);
+
+ assertEquals(expected, mc.getContent());
+ }
+
+ @Test
+ public void testMalformedCharset() throws Exception {
+ String html = "<html><head>"
+ + "<META Content=\"text/html ; pharset=\'hello\'; hello=world\" "
+ + "Http-equiv=\"Content-TYPE\">"
+ + "</head><body><a href=\"hello\">Hello</a>"
+ + "</body></html>";
+ String expected = "<html><head>"
+ + "<meta content=\"text/html ; pharset=\'hello\'; hello=world\" "
+ + "http-equiv=\"Content-TYPE\">"
+ + "</head><body><a href=\"hello\">Hello</a>\n"
+ + "</body></html>";
+
+ ContentTypeCharsetRemoverRewriter rewriter =
+ new ContentTypeCharsetRemoverRewriter();
+ Gadget gadget = DomWalker.makeGadget(new HttpRequest(
+ Uri.parse("http://1.com/")));
+ MutableContent mc = new MutableContent(htmlParser, html);
+ rewriter.rewrite(gadget, mc);
+
+ assertEquals(expected, mc.getContent());
+
+ html = "<html><head>"
+ + "<META Content=\"text/html ; charsett=\'hello\'; hello=world\" "
+ + "Http-equiv=\"Content-TYPE\">"
+ + "</head><body><a href=\"hello\">Hello</a>"
+ + "</body></html>";
+ expected = "<html><head>"
+ + "<meta content=\"text/html ; charsett='hello'; hello=world\" "
+ + "http-equiv=\"Content-TYPE\">"
+ + "</head><body><a href=\"hello\">Hello</a>\n"
+ + "</body></html>";
+
+ mc = new MutableContent(htmlParser, html);
+ rewriter.rewrite(gadget, mc);
+
+ assertEquals(expected, mc.getContent());
+ }
+}
Modified: shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/uri/UriUtilsTest.java
URL: http://svn.apache.org/viewvc/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/uri/UriUtilsTest.java?rev=991446&r1=991445&r2=991446&view=diff
==============================================================================
--- shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/uri/UriUtilsTest.java (original)
+++ shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/uri/UriUtilsTest.java Wed Sep 1 06:57:19 2010
@@ -18,8 +18,6 @@
*/
package org.apache.shindig.gadgets.uri;
-import static org.junit.Assert.assertEquals;
-
import com.google.inject.internal.ImmutableList;
import com.google.inject.internal.ImmutableMap;
import org.apache.shindig.common.uri.Uri;
@@ -30,6 +28,8 @@ import org.junit.Test;
import java.util.*;
+import static org.junit.Assert.assertEquals;
+
/**
* Tests for UriUtils.
*/
@@ -252,4 +252,20 @@ public class UriUtilsTest {
assertEquals(data, req.getPostBodyAsString());
}
+
+ @Test
+ public void testGetContentTypeWithoutCharset() {
+ assertEquals("text/html",
+ UriUtils.getContentTypeWithoutCharset("text/html"));
+ assertEquals("text/html;",
+ UriUtils.getContentTypeWithoutCharset("text/html;"));
+ assertEquals("text/html",
+ UriUtils.getContentTypeWithoutCharset("text/html; charset=hello"));
+ assertEquals("text/html; hello=world",
+ UriUtils.getContentTypeWithoutCharset("text/html; charset=hello; hello=world"));
+ assertEquals("text/html; pharset=hello; hello=world",
+ UriUtils.getContentTypeWithoutCharset("text/html; pharset=hello; hello=world"));
+ assertEquals("text/html; charsett=utf; hello=world",
+ UriUtils.getContentTypeWithoutCharset("text/html; charsett=utf; ; hello=world"));
+ }
}