You are viewing a plain text version of this content. The canonical link for it is here.
Posted to droids-commits@incubator.apache.org by rf...@apache.org on 2011/11/23 04:52:43 UTC

svn commit: r1205286 - in /incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika: TikaHtmlParser.java api/TikaParse.java parse/TikaParseImpl.java

Author: rfrovarp
Date: Wed Nov 23 04:52:43 2011
New Revision: 1205286

URL: http://svn.apache.org/viewvc?rev=1205286&view=rev
Log:
Expose all of the parsed goodness.

Modified:
    incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java
    incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java
    incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java

Modified: incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java?rev=1205286&r1=1205285&r2=1205286&view=diff
==============================================================================
--- incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java (original)
+++ incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/TikaHtmlParser.java Wed Nov 23 04:52:43 2011
@@ -99,7 +99,7 @@ public class TikaHtmlParser implements T
         }
       }
       
-      return new TikaParseImpl(dataBuffer.toString(), extractedTasks);
+      return new TikaParseImpl(dataBuffer.toString(), extractedTasks, bodyBuffer.toString(), mainContentBuffer.toString(), metadata);
     } catch (SAXException ex) {
       throw new DroidsException("Failure parsing document " + link.getId(), ex);
     } catch (TikaException ex) {

Modified: incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java?rev=1205286&r1=1205285&r2=1205286&view=diff
==============================================================================
--- incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java (original)
+++ incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/api/TikaParse.java Wed Nov 23 04:52:43 2011
@@ -1,7 +1,15 @@
 package org.apache.droids.tika.api;
 
 import org.apache.droids.api.Parse;
+import org.apache.tika.metadata.Metadata;
 
 public interface TikaParse extends Parse {
 
+  public String getMainContent();
+  
+  public Metadata getMetadata();
+  
+  public String getXml();
+  
+  public String getPlainText();
 }

Modified: incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java
URL: http://svn.apache.org/viewvc/incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java?rev=1205286&r1=1205285&r2=1205286&view=diff
==============================================================================
--- incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java (original)
+++ incubator/droids/trunk/droids-tika/src/main/java/org/apache/droids/tika/parse/TikaParseImpl.java Wed Nov 23 04:52:43 2011
@@ -1,13 +1,19 @@
 package org.apache.droids.tika.parse;
 
+import java.util.ArrayList;
 import java.util.Collection;
 
 import org.apache.droids.api.Link;
 import org.apache.droids.parse.ParseImpl;
 import org.apache.droids.tika.api.TikaParse;
+import org.apache.tika.metadata.Metadata;
 
 public class TikaParseImpl extends ParseImpl implements TikaParse {
 
+  private String plainText;
+  private String mainContent;
+  private Metadata metadata;
+  
   public TikaParseImpl(String text, Collection<Link> outlinks) {
     super(text,outlinks);
   }
@@ -16,4 +22,32 @@ public class TikaParseImpl extends Parse
     super(text,data,outlinks);
   }
 
+  public TikaParseImpl(String xmlContent, ArrayList<Link> extractedTasks,
+      String plainText, String mainContent, Metadata metadata) {
+    this(xmlContent, extractedTasks);
+    this.plainText = plainText;
+    this.mainContent = mainContent;
+    this.metadata = metadata;
+  }
+
+  @Override
+  public String getMainContent() {
+    return mainContent;
+  }
+
+  @Override
+  public Metadata getMetadata() {
+    return metadata;
+  }
+
+  @Override
+  public String getXml() {
+    return super.text;
+  }
+
+  @Override
+  public String getPlainText() {
+    return plainText;
+  }
+
 }