You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by eh...@apache.org on 2011/10/27 17:24:14 UTC

svn commit: r1189803 - in /lucene/dev/trunk/solr: contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java solrj/src/java/org/apache/solr/common/util/ContentStream.java

Author: ehatcher
Date: Thu Oct 27 15:24:14 2011
New Revision: 1189803

URL: http://svn.apache.org/viewvc?rev=1189803&view=rev
Log:
SOLR-2854: Fix ExtractingRequestHandler to call getStream before getting stream attributes.

Modified:
    lucene/dev/trunk/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java
    lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/util/ContentStream.java

Modified: lucene/dev/trunk/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java?rev=1189803&r1=1189802&r2=1189803&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java (original)
+++ lucene/dev/trunk/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java Thu Oct 27 15:24:14 2011
@@ -143,10 +143,6 @@ public class ExtractingDocumentLoader ex
     }
     if (parser != null) {
       Metadata metadata = new Metadata();
-      metadata.add(ExtractingMetadataConstants.STREAM_NAME, stream.getName());
-      metadata.add(ExtractingMetadataConstants.STREAM_SOURCE_INFO, stream.getSourceInfo());
-      metadata.add(ExtractingMetadataConstants.STREAM_SIZE, String.valueOf(stream.getSize()));
-      metadata.add(ExtractingMetadataConstants.STREAM_CONTENT_TYPE, stream.getContentType());
 
       // If you specify the resource name (the filename, roughly) with this parameter,
       // then Tika can make use of it in guessing the appropriate MIME type:
@@ -155,12 +151,16 @@ public class ExtractingDocumentLoader ex
         metadata.add(Metadata.RESOURCE_NAME_KEY, resourceName);
       }
 
-      SolrContentHandler handler = factory.createSolrContentHandler(metadata, params, schema);
       InputStream inputStream = null;
       try {
         inputStream = stream.getStream();
+        metadata.add(ExtractingMetadataConstants.STREAM_NAME, stream.getName());
+        metadata.add(ExtractingMetadataConstants.STREAM_SOURCE_INFO, stream.getSourceInfo());
+        metadata.add(ExtractingMetadataConstants.STREAM_SIZE, String.valueOf(stream.getSize()));
+        metadata.add(ExtractingMetadataConstants.STREAM_CONTENT_TYPE, stream.getContentType());
         String xpathExpr = params.get(ExtractingParams.XPATH_EXPRESSION);
         boolean extractOnly = params.getBool(ExtractingParams.EXTRACT_ONLY, false);
+        SolrContentHandler handler = factory.createSolrContentHandler(metadata, params, schema);
         ContentHandler parsingHandler = handler;
 
         StringWriter writer = null;

Modified: lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/util/ContentStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/util/ContentStream.java?rev=1189803&r1=1189802&r2=1189803&view=diff
==============================================================================
--- lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/util/ContentStream.java (original)
+++ lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/util/ContentStream.java Thu Oct 27 15:24:14 2011
@@ -50,6 +50,10 @@ public interface ContentStream {
    *  
    * Only the first call to <code>getStream()</code> or <code>getReader()</code>
    * is guaranteed to work.  The runtime behavior for additional calls is undefined.
+   *
+   * Note: you must call <code>getStream()</code> or <code>getReader()</code> before
+   * the attributes (name, contentType, etc) are guaranteed to be set.  Streams may be
+   * lazy loaded only when this method is called.
    */
   InputStream getStream() throws IOException;
 
@@ -68,6 +72,10 @@ public interface ContentStream {
    *  
    * Only the first call to <code>getStream()</code> or <code>getReader()</code>
    * is guaranteed to work.  The runtime behavior for additional calls is undefined.
+   *
+   * Note: you must call <code>getStream()</code> or <code>getReader()</code> before
+   * the attributes (name, contentType, etc) are guaranteed to be set.  Streams may be
+   * lazy loaded only when this method is called.
    */
   Reader getReader() throws IOException;
 }