You are viewing a plain text version of this content. The canonical link for it is here.
Posted to droids-commits@incubator.apache.org by rf...@apache.org on 2011/11/23 04:52:43 UTC
svn commit: r1205286 - in
/incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika:
TikaHtmlParser.java api/TikaParse.java parse/TikaParseImpl.java
Author: rfrovarp
Date: Wed Nov 23 04:52:43 2011
New Revision: 1205286
URL: http://svn.apache.org/viewvc?rev=1205286&view=rev
Log:
Expose all of the parsed goodness.
Modified:
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java
incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java
Modified: incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java?rev=1205286&r1=1205285&r2=1205286&view=diff
==============================================================================
--- incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java (original)
+++ incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java Wed Nov 23 04:52:43 2011
@@ -99,7 +99,7 @@ public class TikaHtmlParser implements T
}
}
- return new TikaParseImpl(dataBuffer.toString(), extractedTasks);
+ return new TikaParseImpl(dataBuffer.toString(), extractedTasks, bodyBuffer.toString(), mainContentBuffer.toString(), metadata);
} catch (SAXException ex) {
throw new DroidsException("Failure parsing document " + link.getId(), ex);
} catch (TikaException ex) {
Modified: incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java?rev=1205286&r1=1205285&r2=1205286&view=diff
==============================================================================
--- incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java (original)
+++ incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java Wed Nov 23 04:52:43 2011
@@ -1,7 +1,15 @@
package org.apache.droids.tika.api;
import org.apache.droids.api.Parse;
+import org.apache.tika.metadata.Metadata;
public interface TikaParse extends Parse {
+ public String getMainContent();
+
+ public Metadata getMetadata();
+
+ public String getXml();
+
+ public String getPlainText();
}
Modified: incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java?rev=1205286&r1=1205285&r2=1205286&view=diff
==============================================================================
--- incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java (original)
+++ incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java Wed Nov 23 04:52:43 2011
@@ -1,13 +1,19 @@
package org.apache.droids.tika.parse;
+import java.util.ArrayList;
import java.util.Collection;
import org.apache.droids.api.Link;
import org.apache.droids.parse.ParseImpl;
import org.apache.droids.tika.api.TikaParse;
+import org.apache.tika.metadata.Metadata;
public class TikaParseImpl extends ParseImpl implements TikaParse {
+ private String plainText;
+ private String mainContent;
+ private Metadata metadata;
+
public TikaParseImpl(String text, Collection<Link> outlinks) {
super(text,outlinks);
}
@@ -16,4 +22,32 @@ public class TikaParseImpl extends Parse
super(text,data,outlinks);
}
+ public TikaParseImpl(String xmlContent, ArrayList<Link> extractedTasks,
+ String plainText, String mainContent, Metadata metadata) {
+ this(xmlContent, extractedTasks);
+ this.plainText = plainText;
+ this.mainContent = mainContent;
+ this.metadata = metadata;
+ }
+
+ @Override
+ public String getMainContent() {
+ return mainContent;
+ }
+
+ @Override
+ public Metadata getMetadata() {
+ return metadata;
+ }
+
+ @Override
+ public String getXml() {
+ return super.text;
+ }
+
+ @Override
+ public String getPlainText() {
+ return plainText;
+ }
+
}