You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by mi...@apache.org on 2011/09/17 16:15:05 UTC

svn commit: r1171988 - in /tika/trunk/tika-parsers/src: main/java/org/apache/tika/parser/rtf/TextExtractor.java test/java/org/apache/tika/TestParsers.java

Author: mikemccand
Date: Sat Sep 17 14:15:05 2011
New Revision: 1171988

URL: http://svn.apache.org/viewvc?rev=1171988&view=rev
Log:
TIKA-717: fix RTF parser to extract annotations (comments)

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java?rev=1171988&r1=1171987&r2=1171988&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java Sat Sep 17 14:15:05 2011
@@ -900,6 +900,18 @@ final class TextExtractor {
             pushText();
             // Text inside a shape
             groupState.ignore = false;
+        } else if (equals("atnid")) {
+            pushText();
+            // Annotation ID
+            groupState.ignore = false;
+        } else if (equals("atnauthor")) {
+            pushText();
+            // Annotation author
+            groupState.ignore = false;
+        } else if (equals("annotation")) {
+            pushText();
+            // Annotation
+            groupState.ignore = false;
         } else if (equals("cell")) {
             // TODO: we should produce a table output here?
             //addOutputChar(' ');

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java?rev=1171988&r1=1171987&r2=1171988&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java Sat Sep 17 14:15:05 2011
@@ -19,8 +19,6 @@ package org.apache.tika;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.List;
 
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.metadata.Metadata;
@@ -175,16 +173,15 @@ public class TestParsers extends TikaTes
     }
 
     public void testComment() throws Exception {
-        // TIKA-717: re-enable ppt, rtf once we fix it
+        // TIKA-717: re-enable ppt once we fix it
         //final String[] extensions = new String[] {"ppt", "pptx", "doc", "docx", "pdf", "rtf"};
-        final String[] extensions = new String[] {"pptx", "doc", "docx", "pdf"};
-        final List<String> failures = new ArrayList<String>();
+        final String[] extensions = new String[] {"pptx", "doc", "docx", "pdf", "rtf"};
         for(String extension : extensions) {
             verifyComment(extension, "testComment");
-            if (extension.equals("pdf")) {
-                // TIKA-717: re-enable once we fix this:
-                //verifyComment(extension, "testComment2");
-            }
+            // TIKA-717: re-enable once we fix this:
+            //if (extension.equals("pdf")) {
+            //verifyComment(extension, "testComment2");
+            //}
         }
     }
 }