You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2011/02/03 16:01:27 UTC

svn commit: r1066823 - in /lucene/dev/branches/branch_3x: ./ lucene/ solr/ solr/src/java/org/apache/solr/handler/

Author: uschindler
Date: Thu Feb  3 15:01:26 2011
New Revision: 1066823

URL: http://svn.apache.org/viewvc?rev=1066823&view=rev
Log:
SOLR-96: Fix XML parsing in XMLUpdateRequestHandler and DocumentAnalysisRequestHandler to respect charset from XML file and only use HTTP header's "Content-Type" as a "hint"

Modified:
    lucene/dev/branches/branch_3x/   (props changed)
    lucene/dev/branches/branch_3x/lucene/   (props changed)
    lucene/dev/branches/branch_3x/solr/   (props changed)
    lucene/dev/branches/branch_3x/solr/CHANGES.txt
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/XMLLoader.java
    lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/XmlUpdateRequestHandler.java

Modified: lucene/dev/branches/branch_3x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/CHANGES.txt?rev=1066823&r1=1066822&r2=1066823&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/solr/CHANGES.txt Thu Feb  3 15:01:26 2011
@@ -502,6 +502,9 @@ Bug Fixes
 * SOLR-2156: SnapPuller fails to clean Old Index Directories on Full Copy
   (Jayendra Patil via yonik)
 
+* SOLR-96: Fix XML parsing in XMLUpdateRequestHandler and
+  DocumentAnalysisRequestHandler to respect charset from XML file and only
+  use HTTP header's "Content-Type" as a "hint". (Uwe Schindler)
 
 Other Changes
 ----------------------

Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java?rev=1066823&r1=1066822&r2=1066823&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java Thu Feb  3 15:01:26 2011
@@ -29,6 +29,7 @@ import org.apache.solr.common.SolrExcept
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.ContentStream;
+import org.apache.solr.common.util.ContentStreamBase;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.request.SolrQueryRequest;
@@ -44,6 +45,7 @@ import javax.xml.stream.XMLStreamConstan
 import javax.xml.stream.XMLStreamException;
 import javax.xml.stream.XMLStreamReader;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.Reader;
 import java.io.StringReader;
 import java.util.Collection;
@@ -89,14 +91,19 @@ public class AnalysisRequestHandler exte
     Iterable<ContentStream> streams = req.getContentStreams();
     if (streams != null) {
       for (ContentStream stream : req.getContentStreams()) {
-        Reader reader = stream.getReader();
+        InputStream is = null;
+        XMLStreamReader parser = null;
         try {
-          XMLStreamReader parser = inputFactory.createXMLStreamReader(reader);
+          is = stream.getStream();
+          final String charset = ContentStreamBase.getCharsetFromContentType(stream.getContentType());
+          parser = (charset == null) ?
+            inputFactory.createXMLStreamReader(is) : inputFactory.createXMLStreamReader(is, charset);
+
           NamedList<Object> result = processContent(parser, req.getSchema());
           rsp.add("response", result);
-        }
-        finally {
-          IOUtils.closeQuietly(reader);
+        } finally {
+          if (parser != null) parser.close();
+          IOUtils.closeQuietly(is);
         }
       }
     }

Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java?rev=1066823&r1=1066822&r2=1066823&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java Thu Feb  3 15:01:26 2011
@@ -28,6 +28,7 @@ import org.apache.solr.common.params.Ana
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.ContentStream;
+import org.apache.solr.common.util.ContentStreamBase;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.request.SolrQueryRequest;
@@ -42,7 +43,7 @@ import javax.xml.stream.XMLStreamConstan
 import javax.xml.stream.XMLStreamException;
 import javax.xml.stream.XMLStreamReader;
 import java.io.IOException;
-import java.io.Reader;
+import java.io.InputStream;
 import java.util.*;
 
 /**
@@ -158,10 +159,14 @@ public class DocumentAnalysisRequestHand
     request.setShowMatch(showMatch);
 
     ContentStream stream = extractSingleContentStream(req);
-    Reader reader = stream.getReader();
-    XMLStreamReader parser = inputFactory.createXMLStreamReader(reader);
-
+    InputStream is = null;
+    XMLStreamReader parser = null;
+    
     try {
+      is = stream.getStream();
+      final String charset = ContentStreamBase.getCharsetFromContentType(stream.getContentType());
+      parser = (charset == null) ?
+        inputFactory.createXMLStreamReader(is) : inputFactory.createXMLStreamReader(is, charset);
 
       while (true) {
         int event = parser.next();
@@ -183,8 +188,8 @@ public class DocumentAnalysisRequestHand
       }
 
     } finally {
-      parser.close();
-      IOUtils.closeQuietly(reader);
+      if (parser != null) parser.close();
+      IOUtils.closeQuietly(is);
     }
   }
 

Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/XMLLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/XMLLoader.java?rev=1066823&r1=1066822&r2=1066823&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/XMLLoader.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/XMLLoader.java Thu Feb  3 15:01:26 2011
@@ -24,6 +24,7 @@ import org.apache.solr.update.DeleteUpda
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.common.util.ContentStream;
+import org.apache.solr.common.util.ContentStreamBase;
 import org.apache.solr.common.util.StrUtils;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrInputDocument;
@@ -36,8 +37,8 @@ import javax.xml.stream.FactoryConfigura
 import javax.xml.stream.XMLStreamConstants;
 import javax.xml.stream.XMLInputFactory;
 import javax.xml.transform.TransformerConfigurationException;
-import java.io.Reader;
-import java.io.StringReader;
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
 import java.io.IOException;
 
 
@@ -57,22 +58,28 @@ class XMLLoader extends ContentStreamLoa
   @Override
   public void load(SolrQueryRequest req, SolrQueryResponse rsp, ContentStream stream) throws Exception {
     errHeader = "XMLLoader: " + stream.getSourceInfo();
-    Reader reader = null;
+    InputStream is = null;
+    XMLStreamReader parser = null;
     try {
-      reader = stream.getReader();
+      is = stream.getStream();
+      final String charset = ContentStreamBase.getCharsetFromContentType(stream.getContentType());
       if (XmlUpdateRequestHandler.log.isTraceEnabled()) {
-        String body = IOUtils.toString(reader);
-        XmlUpdateRequestHandler.log.trace("body", body);
-        reader = new StringReader(body);
+        final byte[] body = IOUtils.toByteArray(is);
+        // TODO: The charset may be wrong, as the real charset is later
+        // determined by the XML parser, the content-type is only used as a hint!
+        XmlUpdateRequestHandler.log.trace("body", new String(body, (charset == null) ?
+          ContentStreamBase.DEFAULT_CHARSET : charset));
+        IOUtils.closeQuietly(is);
+        is = new ByteArrayInputStream(body);
       }
-
-      XMLStreamReader parser = inputFactory.createXMLStreamReader(reader);
+      parser = (charset == null) ?
+        inputFactory.createXMLStreamReader(is) : inputFactory.createXMLStreamReader(is, charset);
       this.processUpdate(processor, parser);
-    }
-    catch (XMLStreamException e) {
+    } catch (XMLStreamException e) {
       throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e.getMessage(), e);
     } finally {
-      IOUtils.closeQuietly(reader);
+      if (parser != null) parser.close();
+      IOUtils.closeQuietly(is);
     }
   }
 

Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/XmlUpdateRequestHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/XmlUpdateRequestHandler.java?rev=1066823&r1=1066822&r2=1066823&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/XmlUpdateRequestHandler.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/XmlUpdateRequestHandler.java Thu Feb  3 15:01:26 2011
@@ -108,11 +108,13 @@ public class XmlUpdateRequestHandler ext
    * success or failure from an XML formated Update (from the Reader)
    *
    * @since solr 1.2
-   * @deprecated Direct updates fro ma Reader, as well as the response 
+   * @deprecated Direct updates from a Reader, as well as the response 
    *             format produced by this method, have been deprecated 
    *             and will be removed in future versions.  Any code using
    *             this method should be changed to use {@link #handleRequest} 
    *             method with a ContentStream. 
+   *             This metrhod is also broken regarding XML charset detection,
+   *             as XML files need to be parsed as InputStream and not as Reader.
    */
   @Deprecated
   public void doLegacyUpdate(Reader input, Writer output) {