You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by ry...@apache.org on 2007/05/08 01:35:56 UTC
svn commit: r536019 - in /lucene/solr/trunk: CHANGES.txt src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java

Author: ryan
Date: Mon May  7 16:35:55 2007
New Revision: 536019

URL: http://svn.apache.org/viewvc?view=rev&rev=536019
Log:
SOLR-214 - use the charset encoded in the contentType to decode the posted text.  Even though they are supposed to, some containers do not obey this specification.

Modified:
    lucene/solr/trunk/CHANGES.txt
    lucene/solr/trunk/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java

Modified: lucene/solr/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?view=diff&rev=536019&r1=536018&r2=536019
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Mon May  7 16:35:55 2007
@@ -271,6 +271,13 @@
 13. Changed the SOLR-104 RequestDispatcher so that /select?qt=xxx can not 
     access handlers that start with "/".  This makes path based authentication
     possible for path based request handlers.  (ryan)
+
+14. SOLR-214: Some servlet containers (including Tomcat and Resin) do not
+    obey the specified charset.  Rather then letting the the container handle 
+    it solr now uses the charset from the header contentType to decode posted
+    content.  Using the contentType: "text/xml; charset=utf-8" will force
+    utf-8 encoding.  If you do not specify a contentType, it will use the 
+    platform default.  (Koji Sekiguchi via ryan)
  
 Other Changes
  1. Updated to Lucene 2.1

Modified: lucene/solr/trunk/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java?view=diff&rev=536019&r1=536018&r2=536019
==============================================================================
--- lucene/solr/trunk/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java (original)
+++ lucene/solr/trunk/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java Mon May  7 16:35:55 2007
@@ -20,7 +20,6 @@
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.Reader;
 import java.io.UnsupportedEncodingException;
 import java.net.URL;
 import java.net.URLDecoder;
@@ -225,16 +224,27 @@
   public SolrParams parseParamsAndFillStreams( 
       final HttpServletRequest req, ArrayList<ContentStream> streams ) throws Exception
   {
-    streams.add( new ContentStream() {
+    // The javadocs for HttpServletRequest are clear that req.getReader() should take
+    // care of any character encoding issues.  BUT, there are problems while running on
+    // some servlet containers: including Tomcat 5 and resin.
+    //
+    // Rather than return req.getReader(), this uses the default ContentStreamBase method
+    // that checks for charset definitions in the ContentType.
+    
+    streams.add( new ContentStreamBase() {
+      @Override
       public String getContentType() {
         return req.getContentType();
       }
+      @Override
       public String getName() {
-        return null; // Is there any meaningfull name?
+        return null; // Is there any meaningful name?
       }
+      @Override
       public String getSourceInfo() {
-        return null; // Is there any meaningfull name?
+        return null; // Is there any meaningful source?
       }
+      @Override
       public Long getSize() { 
         String v = req.getHeader( "Content-Length" );
         if( v != null ) {
@@ -244,9 +254,6 @@
       }
       public InputStream getStream() throws IOException {
         return req.getInputStream();
-      }
-      public Reader getReader() throws IOException {
-        return req.getReader();
       }
     });
     return SolrRequestParsers.parseQueryString( req.getQueryString() );