You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2011/02/03 15:49:06 UTC

svn commit: r1066819 - in /lucene/dev/trunk/solr: CHANGES.txt src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java src/java/org/apache/solr/handler/XMLLoader.java

Author: uschindler
Date: Thu Feb  3 14:49:06 2011
New Revision: 1066819

URL: http://svn.apache.org/viewvc?rev=1066819&view=rev
Log:
SOLR-96: Fix XML parsing in XMLUpdateRequestHandler and DocumentAnalysisRequestHandler to respect charset from XML file and only use HTTP header's "Content-Type" as a "hint"

Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/handler/XMLLoader.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1066819&r1=1066818&r2=1066819&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Thu Feb  3 14:49:06 2011
@@ -658,6 +658,10 @@ Bug Fixes
 * SOLR-2156: SnapPuller fails to clean Old Index Directories on Full Copy
   (Jayendra Patil via yonik)
 
+* SOLR-96: Fix XML parsing in XMLUpdateRequestHandler and
+  DocumentAnalysisRequestHandler to respect charset from XML file and only
+  use HTTP header's "Content-Type" as a "hint". (Uwe Schindler)
+
 Other Changes
 ----------------------
 

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java?rev=1066819&r1=1066818&r2=1066819&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java Thu Feb  3 14:49:06 2011
@@ -27,6 +27,7 @@ import org.apache.solr.common.params.Ana
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.ContentStream;
+import org.apache.solr.common.util.ContentStreamBase;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.request.SolrQueryRequest;
@@ -41,7 +42,7 @@ import javax.xml.stream.XMLStreamConstan
 import javax.xml.stream.XMLStreamException;
 import javax.xml.stream.XMLStreamReader;
 import java.io.IOException;
-import java.io.Reader;
+import java.io.InputStream;
 import java.util.*;
 
 /**
@@ -157,10 +158,14 @@ public class DocumentAnalysisRequestHand
     request.setShowMatch(showMatch);
 
     ContentStream stream = extractSingleContentStream(req);
-    Reader reader = stream.getReader();
-    XMLStreamReader parser = inputFactory.createXMLStreamReader(reader);
-
+    InputStream is = null;
+    XMLStreamReader parser = null;
+    
     try {
+      is = stream.getStream();
+      final String charset = ContentStreamBase.getCharsetFromContentType(stream.getContentType());
+      parser = (charset == null) ?
+        inputFactory.createXMLStreamReader(is) : inputFactory.createXMLStreamReader(is, charset);
 
       while (true) {
         int event = parser.next();
@@ -182,8 +187,8 @@ public class DocumentAnalysisRequestHand
       }
 
     } finally {
-      parser.close();
-      IOUtils.closeQuietly(reader);
+      if (parser != null) parser.close();
+      IOUtils.closeQuietly(is);
     }
   }
 

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/handler/XMLLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/handler/XMLLoader.java?rev=1066819&r1=1066818&r2=1066819&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/handler/XMLLoader.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/handler/XMLLoader.java Thu Feb  3 14:49:06 2011
@@ -24,6 +24,7 @@ import org.apache.solr.update.DeleteUpda
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.common.util.ContentStream;
+import org.apache.solr.common.util.ContentStreamBase;
 import org.apache.solr.common.util.StrUtils;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrInputDocument;
@@ -36,8 +37,8 @@ import javax.xml.stream.FactoryConfigura
 import javax.xml.stream.XMLStreamConstants;
 import javax.xml.stream.XMLInputFactory;
 import javax.xml.transform.TransformerConfigurationException;
-import java.io.Reader;
-import java.io.StringReader;
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
 import java.io.IOException;
 
 
@@ -57,22 +58,28 @@ class XMLLoader extends ContentStreamLoa
   @Override
   public void load(SolrQueryRequest req, SolrQueryResponse rsp, ContentStream stream) throws Exception {
     errHeader = "XMLLoader: " + stream.getSourceInfo();
-    Reader reader = null;
+    InputStream is = null;
+    XMLStreamReader parser = null;
     try {
-      reader = stream.getReader();
+      is = stream.getStream();
+      final String charset = ContentStreamBase.getCharsetFromContentType(stream.getContentType());
       if (XmlUpdateRequestHandler.log.isTraceEnabled()) {
-        String body = IOUtils.toString(reader);
-        XmlUpdateRequestHandler.log.trace("body", body);
-        reader = new StringReader(body);
+        final byte[] body = IOUtils.toByteArray(is);
+        // TODO: The charset may be wrong, as the real charset is later
+        // determined by the XML parser, the content-type is only used as a hint!
+        XmlUpdateRequestHandler.log.trace("body", new String(body, (charset == null) ?
+          ContentStreamBase.DEFAULT_CHARSET : charset));
+        IOUtils.closeQuietly(is);
+        is = new ByteArrayInputStream(body);
       }
-
-      XMLStreamReader parser = inputFactory.createXMLStreamReader(reader);
+      parser = (charset == null) ?
+        inputFactory.createXMLStreamReader(is) : inputFactory.createXMLStreamReader(is, charset);
       this.processUpdate(req, processor, parser);
-    }
-    catch (XMLStreamException e) {
+    } catch (XMLStreamException e) {
       throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e.getMessage(), e);
     } finally {
-      IOUtils.closeQuietly(reader);
+      if (parser != null) parser.close();
+      IOUtils.closeQuietly(is);
     }
   }