You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2011/03/09 23:32:10 UTC

svn commit: r1080038 - in /lucene/dev/trunk/solr: CHANGES.txt src/java/org/apache/solr/response/PHPSerializedResponseWriter.java

Author: uschindler
Date: Wed Mar  9 22:32:09 2011
New Revision: 1080038

URL: http://svn.apache.org/viewvc?rev=1080038&view=rev
Log:
SOLR-2414: All ResponseWriters now use only ServletOutputStreams and wrap their own Writer around it when serializing. This fixes the bug in PHPSerializedResponseWriter that produced wrong string length if the servlet container had a broken UTF-8 encoding that was in fact CESU-8 (see SOLR-1091). The hack was removed by this followup-patch.

Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1080038&r1=1080037&r2=1080038&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Wed Mar  9 22:32:09 2011
@@ -848,6 +848,14 @@ Other Changes
   hardcoded on text/xml as Content-Type, you have to change them.
   (uschindler, rmuir)
 
+* SOLR-2414: All ResponseWriters now use only ServletOutputStreams
+  and wrap their own Writer around it when serializing. This fixes
+  the bug in PHPSerializedResponseWriter that produced wrong string
+  length if the servlet container had a broken UTF-8 encoding that was
+  in fact CESU-8 (see SOLR-1091). The system property to enable the
+  CESU-8 byte counting in PHPSerializesResponseWriters for broken
+  servlet containers was therefore removed and is now ignored if set.
+  Output is always UTF-8.  (uschindler, yonik, rmuir)
 
 Build
 ----------------------

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java?rev=1080038&r1=1080037&r2=1080038&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/response/PHPSerializedResponseWriter.java Wed Mar  9 22:32:09 2011
@@ -37,31 +37,15 @@ import org.apache.solr.common.SolrDocume
 /**
  * A description of the PHP serialization format can be found here:
  * http://www.hurring.com/scott/code/perl/serialize/
- *
- * <p>
- * In order to support PHP Serialized strings with a proper byte count, This ResponseWriter
- * must know if the Writers passed to it will result in an output of CESU-8 (UTF-8 w/o support
- * for large code points outside of the BMP)
- * <p>
- * Solr versions before 3.1 assume that all Jetty servlet containers (detected using the "jetty.home"
- * system property) use CESU-8 instead of UTF-8 (verified to the current release of 6.1.26).
- * Solr 3.1 contains a patched version of Jetty that uses real UTF-8 (SOLR-2381)
- * <p>
- * In installations where Solr auto-detects incorrectly, the Solr Administrator should set the
- * "solr.phps.cesu8" system property to either "true" or "false" accordingly.
  */
 public class PHPSerializedResponseWriter implements QueryResponseWriter {
   static String CONTENT_TYPE_PHP_UTF8="text/x-php-serialized;charset=UTF-8";
 
-  // Is this servlet container's UTF-8 encoding actually CESU-8 (i.e. lacks support for
-  // large characters outside the BMP).
-  boolean CESU8 = false;
   public void init(NamedList n) {
-    CESU8 = "true".equals(System.getProperty("solr.phps.cesu8"));
   }
   
  public void write(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp) throws IOException {
-    PHPSerializedWriter w = new PHPSerializedWriter(writer, req, rsp, CESU8);
+    PHPSerializedWriter w = new PHPSerializedWriter(writer, req, rsp);
     try {
       w.writeResponse();
     } finally {
@@ -75,13 +59,11 @@ public class PHPSerializedResponseWriter
 }
 
 class PHPSerializedWriter extends JSONWriter {
-  final private boolean CESU8;
   final BytesRef utf8;
 
-  public PHPSerializedWriter(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp, boolean CESU8) {
+  public PHPSerializedWriter(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp) {
     super(writer, req, rsp);
-    this.CESU8 = CESU8;
-    this.utf8 = CESU8 ? null : new BytesRef();
+    this.utf8 = new BytesRef();
     // never indent serialized PHP data
     doIndent = false;
   }
@@ -391,23 +373,8 @@ class PHPSerializedWriter extends JSONWr
   public void writeStr(String name, String val, boolean needsEscaping) throws IOException {
     // serialized PHP strings don't need to be escaped at all, however the 
     // string size reported needs be the number of bytes rather than chars.
-    int nBytes;
-    if (CESU8) {
-      nBytes = 0;
-      for (int i=0; i<val.length(); i++) {
-        char ch = val.charAt(i);
-        if (ch<='\u007f') {
-          nBytes += 1;
-        } else if (ch<='\u07ff') {
-          nBytes += 2;
-        } else {
-          nBytes += 3;
-        }
-      }
-    } else {
-      UnicodeUtil.UTF16toUTF8(val, 0, val.length(), utf8);
-      nBytes = utf8.length;
-    }
+    UnicodeUtil.UTF16toUTF8(val, 0, val.length(), utf8);
+    int nBytes = utf8.length;
 
     writer.write("s:");
     writer.write(Integer.toString(nBytes));