You are viewing a plain text version of this content. The canonical link for it is here.
Posted to droids-commits@incubator.apache.org by ol...@apache.org on 2008/11/13 11:18:57 UTC

svn commit: r713705 - in /incubator/droids/trunk: droids-core/src/main/java/org/apache/droids/exception/DroidsException.java droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java

Author: olegk
Date: Thu Nov 13 03:18:56 2008
New Revision: 713705

URL: http://svn.apache.org/viewvc?rev=713705&view=rev
Log:
Fixed Tika parser breakage due to the latest API changes

Modified:
    incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/exception/DroidsException.java
    incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java

Modified: incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/exception/DroidsException.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/exception/DroidsException.java?rev=713705&r1=713704&r2=713705&view=diff
==============================================================================
--- incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/exception/DroidsException.java (original)
+++ incubator/droids/trunk/droids-core/src/main/java/org/apache/droids/exception/DroidsException.java Thu Nov 13 03:18:56 2008
@@ -42,6 +42,16 @@
   /**
    * For more information {@link Exception}
    * 
+   * @param message
+   * @param cause
+   */
+  public DroidsException(String message, Throwable cause) {
+    super(message, cause);
+  }
+
+  /**
+   * For more information {@link Exception}
+   * 
    * @param cause
    */
   public DroidsException(Throwable cause) {

Modified: incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java?rev=713705&r1=713704&r2=713705&view=diff
==============================================================================
--- incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java (original)
+++ incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java Thu Nov 13 03:18:56 2008
@@ -16,17 +16,22 @@
  */
 package org.apache.droids.tika;
 
+import java.io.IOException;
 import java.io.InputStream;
 
 import org.apache.droids.ParseData;
+import org.apache.droids.api.ContentEntity;
 import org.apache.droids.api.Link;
 import org.apache.droids.api.Parse;
 import org.apache.droids.api.Parser;
+import org.apache.droids.exception.DroidsException;
 import org.apache.droids.helper.Loggable;
 import org.apache.droids.parse.ParseImpl;
+import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.sax.TeeContentHandler;
+import org.xml.sax.SAXException;
 
 public class TikaHtmlParser extends Loggable implements Parser {
 
@@ -47,29 +52,32 @@
   }
 
 
-  public Parse getParse(InputStream openStream, Link link) {
+  public Parse getParse(ContentEntity entity, Link link) throws IOException, DroidsException {
     // Init Tika objects
     parser = new AutoDetectParser();
     metadata = new Metadata();
     
-    //Init handlers
-    //TODO: Autodetect encoding
-    EchoHandler data = new EchoHandler("UTF-8"); 
+    String charset = entity.getCharset();
+    if (charset == null) {
+      charset = "UTF-8";
+    }
+    EchoHandler data = new EchoHandler(charset); 
     extractor.setBase(link);
     
     TeeContentHandler parallelHandler = new TeeContentHandler(data, extractor);
 
+    InputStream instream = entity.obtainContent();
     try {
-      parser.parse(openStream, parallelHandler, metadata);
+      parser.parse(instream, parallelHandler, metadata);
       ParseData parseData = new ParseData(extractor.getLinks());
       
       return new ParseImpl(data.toString(), parseData);
-    } catch (Exception e) {
-      log.error("Parse error." + e);
-      // TODO Auto-generated catch block
-      e.printStackTrace();
+    } catch (SAXException ex) {
+      throw new DroidsException("Failure parsing document " + link.getId(), ex);
+    } catch (TikaException ex) {
+      throw new DroidsException("Failure parsing document " + link.getId(), ex);
+    } finally {
+      instream.close();
     } 
-    
-    return null;
   }
 }