You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2011/02/03 16:01:27 UTC
svn commit: r1066823 - in /lucene/dev/branches/branch_3x: ./ lucene/ solr/
solr/src/java/org/apache/solr/handler/
Author: uschindler
Date: Thu Feb 3 15:01:26 2011
New Revision: 1066823
URL: http://svn.apache.org/viewvc?rev=1066823&view=rev
Log:
SOLR-96: Fix XML parsing in XMLUpdateRequestHandler and DocumentAnalysisRequestHandler to respect charset from XML file and only use HTTP header's "Content-Type" as a "hint"
Modified:
lucene/dev/branches/branch_3x/ (props changed)
lucene/dev/branches/branch_3x/lucene/ (props changed)
lucene/dev/branches/branch_3x/solr/ (props changed)
lucene/dev/branches/branch_3x/solr/CHANGES.txt
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/XMLLoader.java
lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/XmlUpdateRequestHandler.java
Modified: lucene/dev/branches/branch_3x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/CHANGES.txt?rev=1066823&r1=1066822&r2=1066823&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/solr/CHANGES.txt Thu Feb 3 15:01:26 2011
@@ -502,6 +502,9 @@ Bug Fixes
* SOLR-2156: SnapPuller fails to clean Old Index Directories on Full Copy
(Jayendra Patil via yonik)
+* SOLR-96: Fix XML parsing in XMLUpdateRequestHandler and
+ DocumentAnalysisRequestHandler to respect charset from XML file and only
+ use HTTP header's "Content-Type" as a "hint". (Uwe Schindler)
Other Changes
----------------------
Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java?rev=1066823&r1=1066822&r2=1066823&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java Thu Feb 3 15:01:26 2011
@@ -29,6 +29,7 @@ import org.apache.solr.common.SolrExcept
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ContentStream;
+import org.apache.solr.common.util.ContentStreamBase;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.request.SolrQueryRequest;
@@ -44,6 +45,7 @@ import javax.xml.stream.XMLStreamConstan
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import java.io.IOException;
+import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.util.Collection;
@@ -89,14 +91,19 @@ public class AnalysisRequestHandler exte
Iterable<ContentStream> streams = req.getContentStreams();
if (streams != null) {
for (ContentStream stream : req.getContentStreams()) {
- Reader reader = stream.getReader();
+ InputStream is = null;
+ XMLStreamReader parser = null;
try {
- XMLStreamReader parser = inputFactory.createXMLStreamReader(reader);
+ is = stream.getStream();
+ final String charset = ContentStreamBase.getCharsetFromContentType(stream.getContentType());
+ parser = (charset == null) ?
+ inputFactory.createXMLStreamReader(is) : inputFactory.createXMLStreamReader(is, charset);
+
NamedList<Object> result = processContent(parser, req.getSchema());
rsp.add("response", result);
- }
- finally {
- IOUtils.closeQuietly(reader);
+ } finally {
+ if (parser != null) parser.close();
+ IOUtils.closeQuietly(is);
}
}
}
Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java?rev=1066823&r1=1066822&r2=1066823&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java Thu Feb 3 15:01:26 2011
@@ -28,6 +28,7 @@ import org.apache.solr.common.params.Ana
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.ContentStream;
+import org.apache.solr.common.util.ContentStreamBase;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.request.SolrQueryRequest;
@@ -42,7 +43,7 @@ import javax.xml.stream.XMLStreamConstan
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import java.io.IOException;
-import java.io.Reader;
+import java.io.InputStream;
import java.util.*;
/**
@@ -158,10 +159,14 @@ public class DocumentAnalysisRequestHand
request.setShowMatch(showMatch);
ContentStream stream = extractSingleContentStream(req);
- Reader reader = stream.getReader();
- XMLStreamReader parser = inputFactory.createXMLStreamReader(reader);
-
+ InputStream is = null;
+ XMLStreamReader parser = null;
+
try {
+ is = stream.getStream();
+ final String charset = ContentStreamBase.getCharsetFromContentType(stream.getContentType());
+ parser = (charset == null) ?
+ inputFactory.createXMLStreamReader(is) : inputFactory.createXMLStreamReader(is, charset);
while (true) {
int event = parser.next();
@@ -183,8 +188,8 @@ public class DocumentAnalysisRequestHand
}
} finally {
- parser.close();
- IOUtils.closeQuietly(reader);
+ if (parser != null) parser.close();
+ IOUtils.closeQuietly(is);
}
}
Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/XMLLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/XMLLoader.java?rev=1066823&r1=1066822&r2=1066823&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/XMLLoader.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/XMLLoader.java Thu Feb 3 15:01:26 2011
@@ -24,6 +24,7 @@ import org.apache.solr.update.DeleteUpda
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.common.util.ContentStream;
+import org.apache.solr.common.util.ContentStreamBase;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
@@ -36,8 +37,8 @@ import javax.xml.stream.FactoryConfigura
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLInputFactory;
import javax.xml.transform.TransformerConfigurationException;
-import java.io.Reader;
-import java.io.StringReader;
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
import java.io.IOException;
@@ -57,22 +58,28 @@ class XMLLoader extends ContentStreamLoa
@Override
public void load(SolrQueryRequest req, SolrQueryResponse rsp, ContentStream stream) throws Exception {
errHeader = "XMLLoader: " + stream.getSourceInfo();
- Reader reader = null;
+ InputStream is = null;
+ XMLStreamReader parser = null;
try {
- reader = stream.getReader();
+ is = stream.getStream();
+ final String charset = ContentStreamBase.getCharsetFromContentType(stream.getContentType());
if (XmlUpdateRequestHandler.log.isTraceEnabled()) {
- String body = IOUtils.toString(reader);
- XmlUpdateRequestHandler.log.trace("body", body);
- reader = new StringReader(body);
+ final byte[] body = IOUtils.toByteArray(is);
+ // TODO: The charset may be wrong, as the real charset is later
+ // determined by the XML parser, the content-type is only used as a hint!
+ XmlUpdateRequestHandler.log.trace("body", new String(body, (charset == null) ?
+ ContentStreamBase.DEFAULT_CHARSET : charset));
+ IOUtils.closeQuietly(is);
+ is = new ByteArrayInputStream(body);
}
-
- XMLStreamReader parser = inputFactory.createXMLStreamReader(reader);
+ parser = (charset == null) ?
+ inputFactory.createXMLStreamReader(is) : inputFactory.createXMLStreamReader(is, charset);
this.processUpdate(processor, parser);
- }
- catch (XMLStreamException e) {
+ } catch (XMLStreamException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e.getMessage(), e);
} finally {
- IOUtils.closeQuietly(reader);
+ if (parser != null) parser.close();
+ IOUtils.closeQuietly(is);
}
}
Modified: lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/XmlUpdateRequestHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/XmlUpdateRequestHandler.java?rev=1066823&r1=1066822&r2=1066823&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/XmlUpdateRequestHandler.java (original)
+++ lucene/dev/branches/branch_3x/solr/src/java/org/apache/solr/handler/XmlUpdateRequestHandler.java Thu Feb 3 15:01:26 2011
@@ -108,11 +108,13 @@ public class XmlUpdateRequestHandler ext
* success or failure from an XML formated Update (from the Reader)
*
* @since solr 1.2
- * @deprecated Direct updates fro ma Reader, as well as the response
+ * @deprecated Direct updates from a Reader, as well as the response
* format produced by this method, have been deprecated
* and will be removed in future versions. Any code using
* this method should be changed to use {@link #handleRequest}
* method with a ContentStream.
+ * This metrhod is also broken regarding XML charset detection,
+ * as XML files need to be parsed as InputStream and not as Reader.
*/
@Deprecated
public void doLegacyUpdate(Reader input, Writer output) {