You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@shindig.apache.org by jo...@apache.org on 2010/07/27 22:58:47 UTC
svn commit: r979867 - in /shindig/trunk/java/gadgets/src:
main/java/org/apache/shindig/gadgets/http/
main/java/org/apache/shindig/gadgets/rewrite/
test/java/org/apache/shindig/gadgets/http/
test/java/org/apache/shindig/gadgets/rewrite/
Author: johnh
Date: Tue Jul 27 20:58:46 2010
New Revision: 979867
URL: http://svn.apache.org/viewvc?rev=979867&view=rev
Log:
Ensure Content-Type and encoding consistency in HttpResponse.
Specifically, when creating an HttpResponse from an HttpResponseBuilder, which in turn has had its setContent or getDocument/documentChanged() methods called, the resulting HttpResponse's Content-Type header converts to UTF-8. This is because setContent and getDocument/documentChanged() implicitly convert all Strings to UTF-8.
In sum, this protects against input content of a non-UTF8 charset being converted to UTF8 with inconsistent headers, messing up browser interpretation of same.
Modified:
shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java
shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponseBuilder.java
shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/MutableContent.java
shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java
shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/MutableContentTest.java
Modified: shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java
URL: http://svn.apache.org/viewvc/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java?rev=979867&r1=979866&r2=979867&view=diff
==============================================================================
--- shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java (original)
+++ shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java Tue Jul 27 20:58:46 2010
@@ -163,10 +163,12 @@ public final class HttpResponse implemen
HttpResponse(HttpResponseBuilder builder) {
httpStatusCode = builder.getHttpStatusCode();
Multimap<String, String> headerCopy = HttpResponse.newHeaderMultimap();
- headerCopy.putAll(builder.getHeaders());
// Always safe, HttpResponseBuilder won't modify the body.
responseBytes = builder.getResponse();
+
+ // Copy headers after builder.getResponse(), since that can modify Content-Type.
+ headerCopy.putAll(builder.getHeaders());
Map<String, String> metadataCopy = Maps.newHashMap(builder.getMetadata());
metadata = Collections.unmodifiableMap(metadataCopy);
@@ -221,6 +223,13 @@ public final class HttpResponse implemen
public String getEncoding() {
return encoding.name();
}
+
+ /**
+ * @return The Charset of the response body's encoding, if we were able to determine it.
+ */
+ public Charset getEncodingCharset() {
+ return encoding;
+ }
/**
* @return the content length
@@ -246,8 +255,8 @@ public final class HttpResponse implemen
public String getResponseAsString() {
if (responseString == null) {
responseString = encoding.decode(ByteBuffer.wrap(responseBytes)).toString();
-
- // Strip BOM if present
+
+ // Strip BOM if present.
if (responseString.length() > 0 && responseString.codePointAt(0) == 0xFEFF) {
responseString = responseString.substring(1);
}
Modified: shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponseBuilder.java
URL: http://svn.apache.org/viewvc/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponseBuilder.java?rev=979867&r1=979866&r2=979867&view=diff
==============================================================================
--- shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponseBuilder.java (original)
+++ shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponseBuilder.java Tue Jul 27 20:58:46 2010
@@ -94,8 +94,7 @@ public class HttpResponseBuilder extends
* @param body The response string. Converted to UTF-8 bytes and copied when set.
*/
public HttpResponseBuilder setResponseString(String body) {
- setContentBytes(CharsetUtil.getUtf8Bytes(body));
- setEncoding(CharsetUtil.UTF8);
+ setContentBytes(CharsetUtil.getUtf8Bytes(body), CharsetUtil.UTF8);
return this;
}
@@ -108,10 +107,18 @@ public class HttpResponseBuilder extends
String[] parts = StringUtils.split(contentType, ';');
for (String part : parts) {
if (!part.contains("charset=")) {
- newContentTypeBuilder.append(part).append("; ");
+ if (newContentTypeBuilder.length() > 0) {
+ newContentTypeBuilder.append("; ");
+ }
+ newContentTypeBuilder.append(part);
}
}
- newContentTypeBuilder.append("charset=").append(charset.name());
+ if (charset != null) {
+ if (newContentTypeBuilder.length() > 0) {
+ newContentTypeBuilder.append("; ");
+ }
+ newContentTypeBuilder.append("charset=").append(charset.name());
+ }
values.clear();
String newContentType = newContentTypeBuilder.toString();
values.add(newContentType);
@@ -268,6 +275,22 @@ public class HttpResponseBuilder extends
return httpStatusCode;
}
+ /**
+ * Ensures that, when setting content bytes, the bytes' encoding is reflected
+ * in the current Content-Type header.
+ * Note, this method does NOT override existing Content-Type values if newEncoding is null.
+ * This allows charset to be set by header only, along with a byte array -- a very typical,
+ * and important, pattern when creating an HttpResponse in an HttpFetcher.
+ */
+ @Override
+ protected void setContentBytesState(byte[] newBytes, Charset newEncoding) {
+ super.setContentBytesState(newBytes, newEncoding);
+
+ // Set the new encoding of the raw bytes, in order to ensure that
+ // Content-Type headers are in sync w/ the content's encoding.
+ if (newEncoding != null) setEncoding(newEncoding);
+ }
+
private static GadgetHtmlParser unsupportedParser() {
return new GadgetHtmlParser(null) {
@Override
Modified: shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/MutableContent.java
URL: http://svn.apache.org/viewvc/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/MutableContent.java?rev=979867&r1=979866&r2=979867&view=diff
==============================================================================
--- shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/MutableContent.java (original)
+++ shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/MutableContent.java Tue Jul 27 20:58:46 2010
@@ -17,6 +17,9 @@
*/
package org.apache.shindig.gadgets.rewrite;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
+
import org.apache.commons.io.IOUtils;
import org.apache.shindig.common.util.CharsetUtil;
import org.apache.shindig.gadgets.GadgetException;
@@ -28,13 +31,11 @@ import org.w3c.dom.Document;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
-import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.Map;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Maps;
-
/**
* Object that maintains a String representation of arbitrary contents
* and a consistent view of those contents as an HTML parse tree.
@@ -42,9 +43,16 @@ import com.google.common.collect.Maps;
public class MutableContent {
private static final Map<String, Object> EMPTY_MAP = ImmutableMap.of();
+ // String representation of contentBytes taking into account the correct
+ // encoding of the content.
private String content;
private byte[] contentBytes;
+
+ // Encoding of the content bytes. UTF-8 by default.
+ private Charset contentEncoding;
+
private HttpResponse contentSource;
+
private Document document;
private int numChanges;
private final GadgetHtmlParser contentParser;
@@ -66,6 +74,7 @@ public class MutableContent {
this.contentParser = contentParser;
this.content = content;
this.numChanges = 0;
+ this.contentEncoding = CharsetUtil.UTF8;
}
/**
@@ -75,6 +84,7 @@ public class MutableContent {
public MutableContent(GadgetHtmlParser contentParser, HttpResponse contentSource) {
this.contentParser = contentParser;
this.contentSource = contentSource;
+ this.contentEncoding = contentSource != null ? contentSource.getEncodingCharset() : null;
}
/**
@@ -98,11 +108,8 @@ public class MutableContent {
} else if (document != null) {
content = HtmlSerialization.serialize(document);
} else if (contentBytes != null) {
- try {
- content = new String(contentBytes, "UTF8");
- } catch (UnsupportedEncodingException e) {
- // Never happens.
- }
+ Charset useEncoding = contentEncoding != null ? contentEncoding : CharsetUtil.UTF8;
+ content = useEncoding.decode(ByteBuffer.wrap(contentBytes)).toString();
}
}
return content;
@@ -136,37 +143,67 @@ public class MutableContent {
if (contentBytes == null) {
if (contentSource != null) {
try {
- contentBytes = IOUtils.toByteArray(contentSource.getResponse());
+ setContentBytesState(IOUtils.toByteArray(contentSource.getResponse()),
+ contentSource.getEncodingCharset());
contentSource = null;
} catch (IOException e) {
// Doesn't occur; responseBytes wrapped as a ByteArrayInputStream.
}
} else if (content != null) {
- contentBytes = CharsetUtil.getUtf8Bytes(content);
+ // If retrieving a String here, we've already converted to UTF8.
+ // Be sure to reflect this when setting bytes.
+ // In the case of HttpResponseBuilder, this re-sets charset in Content-Type
+ // to UTF-8 rather than whatever it was before. We do this to standardize
+ // on UTF-8 for all String handling.
+ setContentBytesState(CharsetUtil.getUtf8Bytes(content), CharsetUtil.UTF8);
} else if (document != null) {
- contentBytes = CharsetUtil.getUtf8Bytes(HtmlSerialization.serialize(document));
+ setContentBytesState(
+ CharsetUtil.getUtf8Bytes(HtmlSerialization.serialize(document)), CharsetUtil.UTF8);
}
}
return contentBytes;
}
/**
- * Sets the object's contentBytes as the given raw input.
+ * Sets the object's contentBytes as the given raw input. If ever interpreted
+ * as a String, the data will be decoded as the encoding specified.
* Note, this operation may clear the document if the content has changed.
* Also note, it's mandated that the new bytes array will NOT be modified
* by the caller of this API. The array is not copied, for performance reasons.
* If the caller may modify a byte array, it MUST pass in a new copy.
* @param newBytes New content.
*/
- public void setContentBytes(byte[] newBytes) {
+ public void setContentBytes(byte[] newBytes, Charset newEncoding) {
if (contentBytes == null || !Arrays.equals(contentBytes, newBytes)) {
- contentBytes = newBytes;
+ setContentBytesState(newBytes, newEncoding);
document = null;
contentSource = null;
content = null;
incrementNumChanges();
}
}
+
+ /**
+ * Sets content to new byte array, with unspecified charset. It is
+ * recommended to use the {@code setContentBytes(byte[], Charset)} API instead,
+ * where possible.
+ * @param newBytes New content.
+ */
+ public final void setContentBytes(byte[] newBytes) {
+ setContentBytes(newBytes, null);
+ }
+
+ /**
+ * Sets internal state having to do with content bytes, from the provided
+ * byte array and charset.
+ * This MUST be the only place in which MutableContent's notion of encoding is mutated.
+ * @param newBytes New content.
+ * @param newEncoding Encoding for the bytes, or null for unspecified.
+ */
+ protected void setContentBytesState(byte[] newBytes, Charset newEncoding) {
+ contentBytes = newBytes;
+ contentEncoding = newEncoding;
+ }
/**
* Notification that the content of the document has changed. Causes the content
Modified: shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java
URL: http://svn.apache.org/viewvc/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java?rev=979867&r1=979866&r2=979867&view=diff
==============================================================================
--- shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java (original)
+++ shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java Tue Jul 27 20:58:46 2010
@@ -50,7 +50,6 @@ public class HttpResponseTest extends As
private static final String BIG5_STRING = "\u4F60\u597D";
-
private static int roundToSeconds(long ts) {
return (int)(ts / 1000);
}
@@ -124,6 +123,15 @@ public class HttpResponseTest extends As
.setResponse(BIG5_DATA)
.create();
assertEquals(BIG5_STRING, response.getResponseAsString());
+ assertEquals("text/plain; charset=BIG5", response.getHeader("Content-Type"));
+
+ HttpResponseBuilder subResponseBuilder = new HttpResponseBuilder(response);
+ subResponseBuilder.setContent(response.getResponseAsString());
+ HttpResponse subResponse = subResponseBuilder.create();
+ // Same string.
+ assertEquals("text/plain; charset=UTF-8", subResponse.getHeader("Content-Type"));
+ assertEquals(BIG5_STRING, subResponse.getResponseAsString());
+ // New encoding.
}
@Test
Modified: shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/MutableContentTest.java
URL: http://svn.apache.org/viewvc/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/MutableContentTest.java?rev=979867&r1=979866&r2=979867&view=diff
==============================================================================
--- shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/MutableContentTest.java (original)
+++ shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/MutableContentTest.java Tue Jul 27 20:58:46 2010
@@ -20,6 +20,7 @@ package org.apache.shindig.gadgets.rewri
import org.apache.commons.io.IOUtils;
import org.apache.shindig.common.PropertiesModule;
+import org.apache.shindig.common.util.CharsetUtil;
import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
import org.apache.shindig.gadgets.parse.ParseModule;
@@ -94,7 +95,7 @@ public class MutableContentTest {
@Test
public void modifyBytesReflectedInContentAndTree() throws Exception {
assertEquals(0, mhc.getNumChanges());
- mhc.setContentBytes("NEW CONTENT".getBytes("UTF8"));
+ mhc.setContentBytes("NEW CONTENT".getBytes("UTF8"), CharsetUtil.UTF8);
assertEquals(1, mhc.getNumChanges());
Document document = mhc.getDocument();
assertEquals(1, document.getChildNodes().getLength());