You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by eh...@apache.org on 2011/10/27 17:24:14 UTC
svn commit: r1189803 - in /lucene/dev/trunk/solr:
contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java
solrj/src/java/org/apache/solr/common/util/ContentStream.java
Author: ehatcher
Date: Thu Oct 27 15:24:14 2011
New Revision: 1189803
URL: http://svn.apache.org/viewvc?rev=1189803&view=rev
Log:
SOLR-2854: Fix ExtractingRequestHandler to call getStream before getting stream attributes.
Modified:
lucene/dev/trunk/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java
lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/util/ContentStream.java
Modified: lucene/dev/trunk/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java?rev=1189803&r1=1189802&r2=1189803&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java (original)
+++ lucene/dev/trunk/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java Thu Oct 27 15:24:14 2011
@@ -143,10 +143,6 @@ public class ExtractingDocumentLoader ex
}
if (parser != null) {
Metadata metadata = new Metadata();
- metadata.add(ExtractingMetadataConstants.STREAM_NAME, stream.getName());
- metadata.add(ExtractingMetadataConstants.STREAM_SOURCE_INFO, stream.getSourceInfo());
- metadata.add(ExtractingMetadataConstants.STREAM_SIZE, String.valueOf(stream.getSize()));
- metadata.add(ExtractingMetadataConstants.STREAM_CONTENT_TYPE, stream.getContentType());
// If you specify the resource name (the filename, roughly) with this parameter,
// then Tika can make use of it in guessing the appropriate MIME type:
@@ -155,12 +151,16 @@ public class ExtractingDocumentLoader ex
metadata.add(Metadata.RESOURCE_NAME_KEY, resourceName);
}
- SolrContentHandler handler = factory.createSolrContentHandler(metadata, params, schema);
InputStream inputStream = null;
try {
inputStream = stream.getStream();
+ metadata.add(ExtractingMetadataConstants.STREAM_NAME, stream.getName());
+ metadata.add(ExtractingMetadataConstants.STREAM_SOURCE_INFO, stream.getSourceInfo());
+ metadata.add(ExtractingMetadataConstants.STREAM_SIZE, String.valueOf(stream.getSize()));
+ metadata.add(ExtractingMetadataConstants.STREAM_CONTENT_TYPE, stream.getContentType());
String xpathExpr = params.get(ExtractingParams.XPATH_EXPRESSION);
boolean extractOnly = params.getBool(ExtractingParams.EXTRACT_ONLY, false);
+ SolrContentHandler handler = factory.createSolrContentHandler(metadata, params, schema);
ContentHandler parsingHandler = handler;
StringWriter writer = null;
Modified: lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/util/ContentStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/util/ContentStream.java?rev=1189803&r1=1189802&r2=1189803&view=diff
==============================================================================
--- lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/util/ContentStream.java (original)
+++ lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/util/ContentStream.java Thu Oct 27 15:24:14 2011
@@ -50,6 +50,10 @@ public interface ContentStream {
*
* Only the first call to <code>getStream()</code> or <code>getReader()</code>
* is guaranteed to work. The runtime behavior for additional calls is undefined.
+ *
+ * Note: you must call <code>getStream()</code> or <code>getReader()</code> before
+ * the attributes (name, contentType, etc) are guaranteed to be set. Streams may be
+ * lazy loaded only when this method is called.
*/
InputStream getStream() throws IOException;
@@ -68,6 +72,10 @@ public interface ContentStream {
*
* Only the first call to <code>getStream()</code> or <code>getReader()</code>
* is guaranteed to work. The runtime behavior for additional calls is undefined.
+ *
+ * Note: you must call <code>getStream()</code> or <code>getReader()</code> before
+ * the attributes (name, contentType, etc) are guaranteed to be set. Streams may be
+ * lazy loaded only when this method is called.
*/
Reader getReader() throws IOException;
}