You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2010/05/10 16:36:54 UTC
svn commit: r942753 - in /lucene/dev/trunk/solr: ./ contrib/extraction/
contrib/extraction/lib/
contrib/extraction/src/main/java/org/apache/solr/handler/extraction/
src/java/org/apache/solr/core/
Author: gsingers
Date: Mon May 10 14:36:54 2010
New Revision: 942753
URL: http://svn.apache.org/viewvc?rev=942753&view=rev
Log:
SOLR-1902: fix Tika extraction issue
Added:
lucene/dev/trunk/solr/contrib/extraction/lib/tika-core-0.8-SNAPSHOT.jar (with props)
lucene/dev/trunk/solr/contrib/extraction/lib/tika-parsers-0.8-SNAPSHOT.jar (with props)
Removed:
lucene/dev/trunk/solr/contrib/extraction/lib/tika-core-0.7.jar
lucene/dev/trunk/solr/contrib/extraction/lib/tika-parsers-0.7.jar
Modified:
lucene/dev/trunk/solr/CHANGES.txt
lucene/dev/trunk/solr/contrib/extraction/CHANGES.txt
lucene/dev/trunk/solr/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java
lucene/dev/trunk/solr/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java
lucene/dev/trunk/solr/src/java/org/apache/solr/core/SolrResourceLoader.java
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=942753&r1=942752&r2=942753&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Mon May 10 14:36:54 2010
@@ -285,6 +285,8 @@ Bug Fixes
* SOLR-1706: fixed WordDelimiterFilter for certain combinations of options
where it would output incorrect tokens. (Robert Muir, Chris Male)
+* SOLR-1902: Exposed SolrResourceLoader's class loader for use by Tika
+
Other Changes
----------------------
Modified: lucene/dev/trunk/solr/contrib/extraction/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/extraction/CHANGES.txt?rev=942753&r1=942752&r2=942753&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/extraction/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/contrib/extraction/CHANGES.txt Mon May 10 14:36:54 2010
@@ -17,21 +17,23 @@ You will need Solr up and running. Then
to your Solr Home lib directory. See http://wiki.apache.org/solr/ExtractingRequestHandler for more details on hooking it in
and configuring.
+ Tika Dependency
+ ---------------
+
+Current Version: Tika 0.8-SNAPSHOT (rev 942725)
+
$Id:$
================== Release 1.5-dev ==================
-* SOLR-1567: Upgrade to Tika 0.5, which upgrades many of the underlying libraries (PDFBox, for example) too (gsingers)
* SOLR-1756: The date.format setting causes ClassCastException when enabled and the config code that
parses this setting does not properly use the same iterator instance. (Christoph Brill, Mark Miller)
-* SOLR-1738: Upgrade to Tika 0.6 (gsingers)
-
* SOLR-18913: Add ICU4j to libs and add tests for Arabic extraction (Robert Muir via gsingers)
-* SOLR-1819: Upgraded to Tika 0.7 (gsingers)
+* SOLR-1902: Upgraded to Tika 0.8-SNAPSHOT to incorporate passing in Solr's custom ClassLoader (gsingers)
================== Release 1.4.0 ==================
Added: lucene/dev/trunk/solr/contrib/extraction/lib/tika-core-0.8-SNAPSHOT.jar
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/extraction/lib/tika-core-0.8-SNAPSHOT.jar?rev=942753&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/dev/trunk/solr/contrib/extraction/lib/tika-core-0.8-SNAPSHOT.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: lucene/dev/trunk/solr/contrib/extraction/lib/tika-parsers-0.8-SNAPSHOT.jar
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/extraction/lib/tika-parsers-0.8-SNAPSHOT.jar?rev=942753&view=auto
==============================================================================
Binary file - no diff available.
Propchange: lucene/dev/trunk/solr/contrib/extraction/lib/tika-parsers-0.8-SNAPSHOT.jar
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Modified: lucene/dev/trunk/solr/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java?rev=942753&r1=942752&r2=942753&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java (original)
+++ lucene/dev/trunk/solr/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingDocumentLoader.java Mon May 10 14:36:54 2010
@@ -37,6 +37,7 @@ import org.apache.tika.sax.xpath.Matcher
import org.apache.tika.sax.xpath.MatchingContentHandler;
import org.apache.tika.sax.xpath.XPathParser;
import org.apache.tika.exception.TikaException;
+import org.apache.tika.mime.MediaType;
import org.apache.xml.serialize.OutputFormat;
import org.apache.xml.serialize.BaseMarkupSerializer;
import org.apache.xml.serialize.XMLSerializer;
@@ -134,7 +135,8 @@ public class ExtractingDocumentLoader ex
String streamType = req.getParams().get(ExtractingParams.STREAM_TYPE, null);
if (streamType != null) {
//Cache? Parsers are lightweight to construct and thread-safe, so I'm told
- parser = config.getParser(streamType.trim().toLowerCase());
+ MediaType mt = MediaType.parse(streamType.trim().toLowerCase());
+ parser = config.getParser(mt);
} else {
parser = autoDetectParser;
}
Modified: lucene/dev/trunk/solr/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java?rev=942753&r1=942752&r2=942753&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java (original)
+++ lucene/dev/trunk/solr/contrib/extraction/src/main/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java Mon May 10 14:36:54 2010
@@ -29,10 +29,12 @@ import org.apache.solr.handler.ContentSt
import org.apache.solr.handler.ContentStreamLoader;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.exception.TikaException;
+import org.apache.tika.mime.MimeTypeException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
+import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
@@ -77,8 +79,6 @@ public class ExtractingRequestHandler ex
} catch (Exception e) {
throw new SolrException(ErrorCode.SERVER_ERROR, e);
}
- } else {
- config = TikaConfig.getDefaultConfig();
}
NamedList configDateFormats = (NamedList) initArgs.get(DATE_FORMATS);
if (configDateFormats != null && configDateFormats.size() > 0) {
@@ -90,12 +90,23 @@ public class ExtractingRequestHandler ex
dateFormats.add(format);
}
}
- } else {
- config = TikaConfig.getDefaultConfig();
+ }
+ if (config == null) {
+ try {
+ config = getDefaultConfig(core.getResourceLoader().getClassLoader());
+ } catch (MimeTypeException e) {
+ throw new SolrException(ErrorCode.SERVER_ERROR, e);
+ } catch (IOException e) {
+ throw new SolrException(ErrorCode.SERVER_ERROR, e);
+ }
}
factory = createFactory();
}
+ private TikaConfig getDefaultConfig(ClassLoader classLoader) throws MimeTypeException, IOException {
+ return new TikaConfig(classLoader);
+ }
+
protected SolrContentHandlerFactory createFactory() {
return new SolrContentHandlerFactory(dateFormats);
}
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/core/SolrResourceLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/core/SolrResourceLoader.java?rev=942753&r1=942752&r2=942753&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/core/SolrResourceLoader.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/core/SolrResourceLoader.java Mon May 10 14:36:54 2010
@@ -214,6 +214,16 @@ public class SolrResourceLoader implemen
return coreProperties;
}
+ /**
+ * EXPERT
+ * <p/>
+ * The underlying class loader. Most applications will not need to use this.
+ * @return The {@link ClassLoader}
+ */
+ public ClassLoader getClassLoader() {
+ return classLoader;
+ }
+
/** Opens a schema resource by its name.
* Override this method to customize loading schema resources.
*@return the stream for the named schema