You are viewing a plain text version of this content. The canonical link for it is here.
Posted to droids-commits@incubator.apache.org by ol...@apache.org on 2008/11/13 11:18:57 UTC
svn commit: r713705 - in /incubator/droids/trunk:
droids-core/src/main/java/org/apache/droids/exception/DroidsException.java
droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java
Author: olegk
Date: Thu Nov 13 03:18:56 2008
New Revision: 713705
URL: http://svn.apache.org/viewvc?rev=713705&view=rev
Log:
Fixed Tika parser breakage due to the latest API changes
Modified:
incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/exception/DroidsException.java
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java
Modified: incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/exception/DroidsException.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/exception/DroidsException.java?rev=713705&r1=713704&r2=713705&view=diff
==============================================================================
--- incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/exception/DroidsException.java (original)
+++ incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/exception/DroidsException.java Thu Nov 13 03:18:56 2008
@@ -42,6 +42,16 @@
/**
* For more information {@link Exception}
*
+ * @param message
+ * @param cause
+ */
+ public DroidsException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ /**
+ * For more information {@link Exception}
+ *
* @param cause
*/
public DroidsException(Throwable cause) {
Modified: incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java?rev=713705&r1=713704&r2=713705&view=diff
==============================================================================
--- incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java (original)
+++ incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java Thu Nov 13 03:18:56 2008
@@ -16,17 +16,22 @@
*/
package org.apache.droids.tika;
+import java.io.IOException;
import java.io.InputStream;
import org.apache.droids.ParseData;
+import org.apache.droids.api.ContentEntity;
import org.apache.droids.api.Link;
import org.apache.droids.api.Parse;
import org.apache.droids.api.Parser;
+import org.apache.droids.exception.DroidsException;
import org.apache.droids.helper.Loggable;
import org.apache.droids.parse.ParseImpl;
+import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.sax.TeeContentHandler;
+import org.xml.sax.SAXException;
public class TikaHtmlParser extends Loggable implements Parser {
@@ -47,29 +52,32 @@
}
- public Parse getParse(InputStream openStream, Link link) {
+ public Parse getParse(ContentEntity entity, Link link) throws IOException, DroidsException {
// Init Tika objects
parser = new AutoDetectParser();
metadata = new Metadata();
- //Init handlers
- //TODO: Autodetect encoding
- EchoHandler data = new EchoHandler("UTF-8");
+ String charset = entity.getCharset();
+ if (charset == null) {
+ charset = "UTF-8";
+ }
+ EchoHandler data = new EchoHandler(charset);
extractor.setBase(link);
TeeContentHandler parallelHandler = new TeeContentHandler(data, extractor);
+ InputStream instream = entity.obtainContent();
try {
- parser.parse(openStream, parallelHandler, metadata);
+ parser.parse(instream, parallelHandler, metadata);
ParseData parseData = new ParseData(extractor.getLinks());
return new ParseImpl(data.toString(), parseData);
- } catch (Exception e) {
- log.error("Parse error." + e);
- // TODO Auto-generated catch block
- e.printStackTrace();
+ } catch (SAXException ex) {
+ throw new DroidsException("Failure parsing document " + link.getId(), ex);
+ } catch (TikaException ex) {
+ throw new DroidsException("Failure parsing document " + link.getId(), ex);
+ } finally {
+ instream.close();
}
-
- return null;
}
}