You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@shindig.apache.org by jo...@apache.org on 2010/07/27 22:58:47 UTC

svn commit: r979867 - in /shindig/trunk/java/gadgets/src: main/java/org/apache/shindig/gadgets/http/ main/java/org/apache/shindig/gadgets/rewrite/ test/java/org/apache/shindig/gadgets/http/ test/java/org/apache/shindig/gadgets/rewrite/

Author: johnh
Date: Tue Jul 27 20:58:46 2010
New Revision: 979867

URL: http://svn.apache.org/viewvc?rev=979867&view=rev
Log:
Ensure Content-Type and encoding consistency in HttpResponse.

Specifically, when creating an HttpResponse from an HttpResponseBuilder, which in turn has had its setContent or getDocument/documentChanged() methods called, the resulting HttpResponse's Content-Type header converts to UTF-8. This is because setContent and getDocument/documentChanged() implicitly convert all Strings to UTF-8.

In sum, this protects against input content of a non-UTF8 charset being converted to UTF8 with inconsistent headers, messing up browser interpretation of same.


Modified:
    shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java
    shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponseBuilder.java
    shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/MutableContent.java
    shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java
    shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/MutableContentTest.java

Modified: shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java
URL: http://svn.apache.org/viewvc/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java?rev=979867&r1=979866&r2=979867&view=diff
==============================================================================
--- shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java (original)
+++ shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponse.java Tue Jul 27 20:58:46 2010
@@ -163,10 +163,12 @@ public final class HttpResponse implemen
   HttpResponse(HttpResponseBuilder builder) {
     httpStatusCode = builder.getHttpStatusCode();
     Multimap<String, String> headerCopy = HttpResponse.newHeaderMultimap();
-    headerCopy.putAll(builder.getHeaders());
 
     // Always safe, HttpResponseBuilder won't modify the body.
     responseBytes = builder.getResponse();
+    
+    // Copy headers after builder.getResponse(), since that can modify Content-Type.
+    headerCopy.putAll(builder.getHeaders());
 
     Map<String, String> metadataCopy = Maps.newHashMap(builder.getMetadata());
     metadata = Collections.unmodifiableMap(metadataCopy);
@@ -221,6 +223,13 @@ public final class HttpResponse implemen
   public String getEncoding() {
     return encoding.name();
   }
+  
+  /**
+   * @return The Charset of the response body's encoding, if we were able to determine it.
+   */
+  public Charset getEncodingCharset() {
+    return encoding;
+  }
 
   /**
    * @return the content length
@@ -246,8 +255,8 @@ public final class HttpResponse implemen
   public String getResponseAsString() {
     if (responseString == null) {
       responseString = encoding.decode(ByteBuffer.wrap(responseBytes)).toString();
-
-      // Strip BOM if present
+      
+      // Strip BOM if present.
       if (responseString.length() > 0 && responseString.codePointAt(0) == 0xFEFF) {
         responseString = responseString.substring(1);
       }

Modified: shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponseBuilder.java
URL: http://svn.apache.org/viewvc/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponseBuilder.java?rev=979867&r1=979866&r2=979867&view=diff
==============================================================================
--- shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponseBuilder.java (original)
+++ shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/http/HttpResponseBuilder.java Tue Jul 27 20:58:46 2010
@@ -94,8 +94,7 @@ public class HttpResponseBuilder extends
    * @param body The response string.  Converted to UTF-8 bytes and copied when set.
    */
   public HttpResponseBuilder setResponseString(String body) {
-    setContentBytes(CharsetUtil.getUtf8Bytes(body));
-    setEncoding(CharsetUtil.UTF8);
+    setContentBytes(CharsetUtil.getUtf8Bytes(body), CharsetUtil.UTF8);
     return this;
   }
 
@@ -108,10 +107,18 @@ public class HttpResponseBuilder extends
       String[] parts = StringUtils.split(contentType, ';');
       for (String part : parts) {
         if (!part.contains("charset=")) {
-          newContentTypeBuilder.append(part).append("; ");
+          if (newContentTypeBuilder.length() > 0) {
+            newContentTypeBuilder.append("; ");
+          }
+          newContentTypeBuilder.append(part);
         }
       }
-      newContentTypeBuilder.append("charset=").append(charset.name());
+      if (charset != null) {
+        if (newContentTypeBuilder.length() > 0) {
+          newContentTypeBuilder.append("; ");
+        }
+        newContentTypeBuilder.append("charset=").append(charset.name());
+      }
       values.clear();
       String newContentType = newContentTypeBuilder.toString();
       values.add(newContentType);
@@ -268,6 +275,22 @@ public class HttpResponseBuilder extends
     return httpStatusCode;
   }
   
+  /**
+   * Ensures that, when setting content bytes, the bytes' encoding is reflected
+   * in the current Content-Type header.
+   * Note, this method does NOT override existing Content-Type values if newEncoding is null.
+   * This allows charset to be set by header only, along with a byte array -- a very typical,
+   * and important, pattern when creating an HttpResponse in an HttpFetcher.
+   */
+  @Override
+  protected void setContentBytesState(byte[] newBytes, Charset newEncoding) {
+    super.setContentBytesState(newBytes, newEncoding);
+    
+    // Set the new encoding of the raw bytes, in order to ensure that
+    // Content-Type headers are in sync w/ the content's encoding.
+    if (newEncoding != null) setEncoding(newEncoding);
+  }
+  
   private static GadgetHtmlParser unsupportedParser() {
     return new GadgetHtmlParser(null) {
       @Override

Modified: shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/MutableContent.java
URL: http://svn.apache.org/viewvc/shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/MutableContent.java?rev=979867&r1=979866&r2=979867&view=diff
==============================================================================
--- shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/MutableContent.java (original)
+++ shindig/trunk/java/gadgets/src/main/java/org/apache/shindig/gadgets/rewrite/MutableContent.java Tue Jul 27 20:58:46 2010
@@ -17,6 +17,9 @@
  */
 package org.apache.shindig.gadgets.rewrite;
 
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
+
 import org.apache.commons.io.IOUtils;
 import org.apache.shindig.common.util.CharsetUtil;
 import org.apache.shindig.gadgets.GadgetException;
@@ -28,13 +31,11 @@ import org.w3c.dom.Document;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
 import java.util.Arrays;
 import java.util.Map;
 
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Maps;
-
 /**
  * Object that maintains a String representation of arbitrary contents
  * and a consistent view of those contents as an HTML parse tree.
@@ -42,9 +43,16 @@ import com.google.common.collect.Maps;
 public class MutableContent {
   private static final Map<String, Object> EMPTY_MAP = ImmutableMap.of();
 
+  // String representation of contentBytes taking into account the correct
+  // encoding of the content.
   private String content;
   private byte[] contentBytes;
+
+  // Encoding of the content bytes. UTF-8 by default.
+  private Charset contentEncoding;
+
   private HttpResponse contentSource;
+
   private Document document;
   private int numChanges;
   private final GadgetHtmlParser contentParser;
@@ -66,6 +74,7 @@ public class MutableContent {
     this.contentParser = contentParser;
     this.content = content;
     this.numChanges = 0;
+    this.contentEncoding = CharsetUtil.UTF8;
   }
 
   /**
@@ -75,6 +84,7 @@ public class MutableContent {
   public MutableContent(GadgetHtmlParser contentParser, HttpResponse contentSource) {
     this.contentParser = contentParser;
     this.contentSource = contentSource;
+    this.contentEncoding = contentSource != null ? contentSource.getEncodingCharset() : null;
   }
 
   /**
@@ -98,11 +108,8 @@ public class MutableContent {
       } else if (document != null) {
         content = HtmlSerialization.serialize(document);
       } else if (contentBytes != null) {
-        try {
-          content = new String(contentBytes, "UTF8");
-        } catch (UnsupportedEncodingException e) {
-          // Never happens.
-        }
+        Charset useEncoding = contentEncoding != null ? contentEncoding : CharsetUtil.UTF8;
+        content = useEncoding.decode(ByteBuffer.wrap(contentBytes)).toString();
       }
     }
     return content;
@@ -136,37 +143,67 @@ public class MutableContent {
     if (contentBytes == null) {
       if (contentSource != null) {
         try {
-          contentBytes = IOUtils.toByteArray(contentSource.getResponse());
+          setContentBytesState(IOUtils.toByteArray(contentSource.getResponse()),
+              contentSource.getEncodingCharset());
           contentSource = null;
         } catch (IOException e) {
           // Doesn't occur; responseBytes wrapped as a ByteArrayInputStream.
         }
       } else if (content != null) {
-        contentBytes = CharsetUtil.getUtf8Bytes(content);
+        // If retrieving a String here, we've already converted to UTF8.
+        // Be sure to reflect this when setting bytes.
+        // In the case of HttpResponseBuilder, this re-sets charset in Content-Type
+        // to UTF-8 rather than whatever it was before. We do this to standardize
+        // on UTF-8 for all String handling.
+        setContentBytesState(CharsetUtil.getUtf8Bytes(content), CharsetUtil.UTF8);
       } else if (document != null) {
-        contentBytes = CharsetUtil.getUtf8Bytes(HtmlSerialization.serialize(document));
+        setContentBytesState(
+            CharsetUtil.getUtf8Bytes(HtmlSerialization.serialize(document)), CharsetUtil.UTF8);
       }
     }
     return contentBytes;
   }
 
   /**
-   * Sets the object's contentBytes as the given raw input.
+   * Sets the object's contentBytes as the given raw input. If ever interpreted
+   * as a String, the data will be decoded as the encoding specified.
    * Note, this operation may clear the document if the content has changed.
    * Also note, it's mandated that the new bytes array will NOT be modified
    * by the caller of this API. The array is not copied, for performance reasons.
    * If the caller may modify a byte array, it MUST pass in a new copy.
    * @param newBytes New content.
    */
-  public void setContentBytes(byte[] newBytes) {
+  public void setContentBytes(byte[] newBytes, Charset newEncoding) {
     if (contentBytes == null || !Arrays.equals(contentBytes, newBytes)) {
-      contentBytes = newBytes;
+      setContentBytesState(newBytes, newEncoding);
       document = null;
       contentSource = null;
       content = null;
       incrementNumChanges();
     }
   }
+  
+  /**
+   * Sets content to new byte array, with unspecified charset. It is
+   * recommended to use the {@code setContentBytes(byte[], Charset)} API instead,
+   * where possible.
+   * @param newBytes New content.
+   */
+  public final void setContentBytes(byte[] newBytes) {
+    setContentBytes(newBytes, null);
+  }
+  
+  /**
+   * Sets internal state having to do with content bytes, from the provided
+   * byte array and charset.
+   * This MUST be the only place in which MutableContent's notion of encoding is mutated.
+   * @param newBytes New content.
+   * @param newEncoding Encoding for the bytes, or null for unspecified.
+   */
+  protected void setContentBytesState(byte[] newBytes, Charset newEncoding) {
+    contentBytes = newBytes;
+    contentEncoding = newEncoding;
+  }
 
   /**
    * Notification that the content of the document has changed. Causes the content

Modified: shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java
URL: http://svn.apache.org/viewvc/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java?rev=979867&r1=979866&r2=979867&view=diff
==============================================================================
--- shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java (original)
+++ shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/http/HttpResponseTest.java Tue Jul 27 20:58:46 2010
@@ -50,7 +50,6 @@ public class HttpResponseTest extends As
 
   private static final String BIG5_STRING = "\u4F60\u597D";
 
-
   private static int roundToSeconds(long ts) {
     return (int)(ts / 1000);
   }
@@ -124,6 +123,15 @@ public class HttpResponseTest extends As
         .setResponse(BIG5_DATA)
         .create();
     assertEquals(BIG5_STRING, response.getResponseAsString());
+    assertEquals("text/plain; charset=BIG5", response.getHeader("Content-Type"));
+    
+    HttpResponseBuilder subResponseBuilder = new HttpResponseBuilder(response);
+    subResponseBuilder.setContent(response.getResponseAsString());
+    HttpResponse subResponse = subResponseBuilder.create();
+    // Same string.
+    assertEquals("text/plain; charset=UTF-8", subResponse.getHeader("Content-Type"));
+    assertEquals(BIG5_STRING, subResponse.getResponseAsString());
+    // New encoding.
   }
 
   @Test

Modified: shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/MutableContentTest.java
URL: http://svn.apache.org/viewvc/shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/MutableContentTest.java?rev=979867&r1=979866&r2=979867&view=diff
==============================================================================
--- shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/MutableContentTest.java (original)
+++ shindig/trunk/java/gadgets/src/test/java/org/apache/shindig/gadgets/rewrite/MutableContentTest.java Tue Jul 27 20:58:46 2010
@@ -20,6 +20,7 @@ package org.apache.shindig.gadgets.rewri
 
 import org.apache.commons.io.IOUtils;
 import org.apache.shindig.common.PropertiesModule;
+import org.apache.shindig.common.util.CharsetUtil;
 import org.apache.shindig.gadgets.parse.GadgetHtmlParser;
 import org.apache.shindig.gadgets.parse.ParseModule;
 
@@ -94,7 +95,7 @@ public class MutableContentTest {
   @Test
   public void modifyBytesReflectedInContentAndTree() throws Exception {
     assertEquals(0, mhc.getNumChanges());
-    mhc.setContentBytes("NEW CONTENT".getBytes("UTF8"));
+    mhc.setContentBytes("NEW CONTENT".getBytes("UTF8"), CharsetUtil.UTF8);
     assertEquals(1, mhc.getNumChanges());
     Document document = mhc.getDocument();
     assertEquals(1, document.getChildNodes().getLength());