You are viewing a plain text version of this content. The canonical link for it is here.
Posted to droids-commits@incubator.apache.org by rf...@apache.org on 2011/11/23 00:58:17 UTC

svn commit: r1205253 - in /incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika: TikaDocumentParser.java TikaHtmlParser.java api/ api/TikaParse.java api/TikaParser.java parse/ parse/TikaParseImpl.java

Author: rfrovarp
Date: Wed Nov 23 00:58:16 2011
New Revision: 1205253

URL: http://svn.apache.org/viewvc?rev=1205253&view=rev
Log:
Starting to expand the Tika module.
It can do all sorts of cool stuff.

Added:
    incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/api/
    incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java
    incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParser.java
    incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/parse/
    incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java
Modified:
    incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java
    incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java

Modified: incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java?rev=1205253&r1=1205252&r2=1205253&view=diff
==============================================================================
--- incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java (original)
+++ incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java Wed Nov 23 00:58:16 2011
@@ -23,10 +23,10 @@ import java.io.InputStream;
 
 import org.apache.droids.api.ContentEntity;
 import org.apache.droids.api.Link;
-import org.apache.droids.api.Parse;
-import org.apache.droids.api.Parser;
 import org.apache.droids.exception.DroidsException;
-import org.apache.droids.parse.ParseImpl;
+import org.apache.droids.tika.api.TikaParse;
+import org.apache.droids.tika.api.TikaParser;
+import org.apache.droids.tika.parse.TikaParseImpl;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.AutoDetectParser;
@@ -34,10 +34,10 @@ import org.apache.tika.parser.ParseConte
 import org.apache.tika.sax.BodyContentHandler;
 import org.xml.sax.SAXException;
 
-public class TikaDocumentParser implements Parser {
+public class TikaDocumentParser implements TikaParser {
 
   @Override
-  public Parse parse(ContentEntity entity, Link link) throws DroidsException,
+  public TikaParse parse(ContentEntity entity, Link link) throws DroidsException,
       IOException {
     org.apache.tika.parser.Parser parser = new AutoDetectParser();
     Metadata metadata = new Metadata();
@@ -46,7 +46,7 @@ public class TikaDocumentParser implemen
     InputStream instream = entity.obtainContent();
     try {
       parser.parse(instream, handler, metadata, new ParseContext());
-      ParseImpl parse = new ParseImpl(handler.toString(),null);
+      TikaParseImpl parse = new TikaParseImpl(handler.toString(),null);
       
       return parse;
 

Modified: incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java?rev=1205253&r1=1205252&r2=1205253&view=diff
==============================================================================
--- incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java (original)
+++ incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java Wed Nov 23 00:58:16 2011
@@ -23,11 +23,11 @@ import java.util.Map;
 
 import org.apache.droids.api.ContentEntity;
 import org.apache.droids.api.Link;
-import org.apache.droids.api.Parse;
-import org.apache.droids.api.Parser;
 import org.apache.droids.exception.DroidsException;
-import org.apache.droids.parse.ParseImpl;
 import org.apache.droids.parse.html.LinkExtractor;
+import org.apache.droids.tika.api.TikaParse;
+import org.apache.droids.tika.api.TikaParser;
+import org.apache.droids.tika.parse.TikaParseImpl;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.AutoDetectParser;
@@ -35,7 +35,7 @@ import org.apache.tika.parser.ParseConte
 import org.apache.tika.sax.TeeContentHandler;
 import org.xml.sax.SAXException;
 
-public class TikaHtmlParser implements Parser {
+public class TikaHtmlParser implements TikaParser {
 
   private Map<String, String> elements= null;
 
@@ -51,7 +51,7 @@ public class TikaHtmlParser implements P
   }
 
   @Override
-  public Parse parse(ContentEntity entity, Link link) throws IOException, DroidsException {
+  public TikaParse parse(ContentEntity entity, Link link) throws IOException, DroidsException {
     // Init Tika objects
     org.apache.tika.parser.Parser parser = new AutoDetectParser();
     Metadata metadata = new Metadata();
@@ -69,7 +69,7 @@ public class TikaHtmlParser implements P
     try {
       parser.parse(instream, parallelHandler, metadata, new ParseContext());
       
-      return new ParseImpl(data.toString(), extractor.getLinks());
+      return new TikaParseImpl(data.toString(), extractor.getLinks());
     } catch (SAXException ex) {
       throw new DroidsException("Failure parsing document " + link.getId(), ex);
     } catch (TikaException ex) {

Added: incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java?rev=1205253&view=auto
==============================================================================
--- incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java (added)
+++ incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java Wed Nov 23 00:58:16 2011
@@ -0,0 +1,7 @@
+package org.apache.droids.tika.api;
+
+import org.apache.droids.api.Parse;
+
+public interface TikaParse extends Parse {
+
+}

Added: incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParser.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParser.java?rev=1205253&view=auto
==============================================================================
--- incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParser.java (added)
+++ incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParser.java Wed Nov 23 00:58:16 2011
@@ -0,0 +1,7 @@
+package org.apache.droids.tika.api;
+
+import org.apache.droids.api.Parser;
+
+public interface TikaParser extends Parser {
+
+}

Added: incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java?rev=1205253&view=auto
==============================================================================
--- incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java (added)
+++ incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java Wed Nov 23 00:58:16 2011
@@ -0,0 +1,18 @@
+package org.apache.droids.tika.parse;
+
+import java.util.Collection;
+
+import org.apache.droids.api.Link;
+import org.apache.droids.parse.ParseImpl;
+import org.apache.droids.tika.api.TikaParse;
+
+public class TikaParseImpl extends ParseImpl implements TikaParse {
+
+  public TikaParseImpl(String text, Collection<Link> outlinks) {
+    super(text,outlinks);
+  }
+
+  public TikaParseImpl(String text, Object data, Collection<Link> outlinks) {
+    super(text,data,outlinks);
+  }
+}