You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by ry...@apache.org on 2007/05/08 01:35:56 UTC
svn commit: r536019 - in /lucene/solr/trunk: CHANGES.txt
src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java
Author: ryan
Date: Mon May 7 16:35:55 2007
New Revision: 536019
URL: http://svn.apache.org/viewvc?view=rev&rev=536019
Log:
SOLR-214 - use the charset encoded in the contentType to decode the posted text. Even though they are supposed to, some containers do not obey this specification.
Modified:
lucene/solr/trunk/CHANGES.txt
lucene/solr/trunk/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java
Modified: lucene/solr/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?view=diff&rev=536019&r1=536018&r2=536019
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Mon May 7 16:35:55 2007
@@ -271,6 +271,13 @@
13. Changed the SOLR-104 RequestDispatcher so that /select?qt=xxx can not
access handlers that start with "/". This makes path based authentication
possible for path based request handlers. (ryan)
+
+14. SOLR-214: Some servlet containers (including Tomcat and Resin) do not
+ obey the specified charset. Rather then letting the the container handle
+ it solr now uses the charset from the header contentType to decode posted
+ content. Using the contentType: "text/xml; charset=utf-8" will force
+ utf-8 encoding. If you do not specify a contentType, it will use the
+ platform default. (Koji Sekiguchi via ryan)
Other Changes
1. Updated to Lucene 2.1
Modified: lucene/solr/trunk/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java?view=diff&rev=536019&r1=536018&r2=536019
==============================================================================
--- lucene/solr/trunk/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java (original)
+++ lucene/solr/trunk/src/webapp/src/org/apache/solr/servlet/SolrRequestParsers.java Mon May 7 16:35:55 2007
@@ -20,7 +20,6 @@
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
-import java.io.Reader;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLDecoder;
@@ -225,16 +224,27 @@
public SolrParams parseParamsAndFillStreams(
final HttpServletRequest req, ArrayList<ContentStream> streams ) throws Exception
{
- streams.add( new ContentStream() {
+ // The javadocs for HttpServletRequest are clear that req.getReader() should take
+ // care of any character encoding issues. BUT, there are problems while running on
+ // some servlet containers: including Tomcat 5 and resin.
+ //
+ // Rather than return req.getReader(), this uses the default ContentStreamBase method
+ // that checks for charset definitions in the ContentType.
+
+ streams.add( new ContentStreamBase() {
+ @Override
public String getContentType() {
return req.getContentType();
}
+ @Override
public String getName() {
- return null; // Is there any meaningfull name?
+ return null; // Is there any meaningful name?
}
+ @Override
public String getSourceInfo() {
- return null; // Is there any meaningfull name?
+ return null; // Is there any meaningful source?
}
+ @Override
public Long getSize() {
String v = req.getHeader( "Content-Length" );
if( v != null ) {
@@ -244,9 +254,6 @@
}
public InputStream getStream() throws IOException {
return req.getInputStream();
- }
- public Reader getReader() throws IOException {
- return req.getReader();
}
});
return SolrRequestParsers.parseQueryString( req.getQueryString() );