You are viewing a plain text version of this content. The canonical link for it is here.
Posted to droids-commits@incubator.apache.org by rf...@apache.org on 2011/11/23 00:58:17 UTC
svn commit: r1205253 - in
/incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika:
TikaDocumentParser.java TikaHtmlParser.java api/ api/TikaParse.java
api/TikaParser.java parse/ parse/TikaParseImpl.java
Author: rfrovarp
Date: Wed Nov 23 00:58:16 2011
New Revision: 1205253
URL: http://svn.apache.org/viewvc?rev=1205253&view=rev
Log:
Starting to expand the Tika module.
It can do all sorts of cool stuff.
Added:
incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/api/
incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java
incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParser.java
incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/parse/
incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java
Modified:
incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java
incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java
Modified: incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java?rev=1205253&r1=1205252&r2=1205253&view=diff
==============================================================================
--- incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java (original)
+++ incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/TikaDocumentParser.java Wed Nov 23 00:58:16 2011
@@ -23,10 +23,10 @@ import java.io.InputStream;
import org.apache.droids.api.ContentEntity;
import org.apache.droids.api.Link;
-import org.apache.droids.api.Parse;
-import org.apache.droids.api.Parser;
import org.apache.droids.exception.DroidsException;
-import org.apache.droids.parse.ParseImpl;
+import org.apache.droids.tika.api.TikaParse;
+import org.apache.droids.tika.api.TikaParser;
+import org.apache.droids.tika.parse.TikaParseImpl;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
@@ -34,10 +34,10 @@ import org.apache.tika.parser.ParseConte
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.SAXException;
-public class TikaDocumentParser implements Parser {
+public class TikaDocumentParser implements TikaParser {
@Override
- public Parse parse(ContentEntity entity, Link link) throws DroidsException,
+ public TikaParse parse(ContentEntity entity, Link link) throws DroidsException,
IOException {
org.apache.tika.parser.Parser parser = new AutoDetectParser();
Metadata metadata = new Metadata();
@@ -46,7 +46,7 @@ public class TikaDocumentParser implemen
InputStream instream = entity.obtainContent();
try {
parser.parse(instream, handler, metadata, new ParseContext());
- ParseImpl parse = new ParseImpl(handler.toString(),null);
+ TikaParseImpl parse = new TikaParseImpl(handler.toString(),null);
return parse;
Modified: incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java?rev=1205253&r1=1205252&r2=1205253&view=diff
==============================================================================
--- incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java (original)
+++ incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java Wed Nov 23 00:58:16 2011
@@ -23,11 +23,11 @@ import java.util.Map;
import org.apache.droids.api.ContentEntity;
import org.apache.droids.api.Link;
-import org.apache.droids.api.Parse;
-import org.apache.droids.api.Parser;
import org.apache.droids.exception.DroidsException;
-import org.apache.droids.parse.ParseImpl;
import org.apache.droids.parse.html.LinkExtractor;
+import org.apache.droids.tika.api.TikaParse;
+import org.apache.droids.tika.api.TikaParser;
+import org.apache.droids.tika.parse.TikaParseImpl;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
@@ -35,7 +35,7 @@ import org.apache.tika.parser.ParseConte
import org.apache.tika.sax.TeeContentHandler;
import org.xml.sax.SAXException;
-public class TikaHtmlParser implements Parser {
+public class TikaHtmlParser implements TikaParser {
private Map<String, String> elements= null;
@@ -51,7 +51,7 @@ public class TikaHtmlParser implements P
}
@Override
- public Parse parse(ContentEntity entity, Link link) throws IOException, DroidsException {
+ public TikaParse parse(ContentEntity entity, Link link) throws IOException, DroidsException {
// Init Tika objects
org.apache.tika.parser.Parser parser = new AutoDetectParser();
Metadata metadata = new Metadata();
@@ -69,7 +69,7 @@ public class TikaHtmlParser implements P
try {
parser.parse(instream, parallelHandler, metadata, new ParseContext());
- return new ParseImpl(data.toString(), extractor.getLinks());
+ return new TikaParseImpl(data.toString(), extractor.getLinks());
} catch (SAXException ex) {
throw new DroidsException("Failure parsing document " + link.getId(), ex);
} catch (TikaException ex) {
Added: incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java?rev=1205253&view=auto
==============================================================================
--- incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java (added)
+++ incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java Wed Nov 23 00:58:16 2011
@@ -0,0 +1,7 @@
+package org.apache.droids.tika.api;
+
+import org.apache.droids.api.Parse;
+
+public interface TikaParse extends Parse {
+
+}
Added: incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParser.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParser.java?rev=1205253&view=auto
==============================================================================
--- incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParser.java (added)
+++ incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParser.java Wed Nov 23 00:58:16 2011
@@ -0,0 +1,7 @@
+package org.apache.droids.tika.api;
+
+import org.apache.droids.api.Parser;
+
+public interface TikaParser extends Parser {
+
+}
Added: incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java
URL: http://svn.apache.org/viewvc/incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java?rev=1205253&view=auto
==============================================================================
--- incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java (added)
+++ incubator/droids/branches/0.1.x/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java Wed Nov 23 00:58:16 2011
@@ -0,0 +1,18 @@
+package org.apache.droids.tika.parse;
+
+import java.util.Collection;
+
+import org.apache.droids.api.Link;
+import org.apache.droids.parse.ParseImpl;
+import org.apache.droids.tika.api.TikaParse;
+
+public class TikaParseImpl extends ParseImpl implements TikaParse {
+
+ public TikaParseImpl(String text, Collection<Link> outlinks) {
+ super(text,outlinks);
+ }
+
+ public TikaParseImpl(String text, Object data, Collection<Link> outlinks) {
+ super(text,data,outlinks);
+ }
+}