You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by mi...@apache.org on 2011/09/17 16:15:05 UTC
svn commit: r1171988 - in /tika/trunk/tika-parsers/src:
main/java/org/apache/tika/parser/rtf/TextExtractor.java
test/java/org/apache/tika/TestParsers.java
Author: mikemccand
Date: Sat Sep 17 14:15:05 2011
New Revision: 1171988
URL: http://svn.apache.org/viewvc?rev=1171988&view=rev
Log:
TIKA-717: fix RTF parser to extract annotations (comments)
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java?rev=1171988&r1=1171987&r2=1171988&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java Sat Sep 17 14:15:05 2011
@@ -900,6 +900,18 @@ final class TextExtractor {
pushText();
// Text inside a shape
groupState.ignore = false;
+ } else if (equals("atnid")) {
+ pushText();
+ // Annotation ID
+ groupState.ignore = false;
+ } else if (equals("atnauthor")) {
+ pushText();
+ // Annotation author
+ groupState.ignore = false;
+ } else if (equals("annotation")) {
+ pushText();
+ // Annotation
+ groupState.ignore = false;
} else if (equals("cell")) {
// TODO: we should produce a table output here?
//addOutputChar(' ');
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java?rev=1171988&r1=1171987&r2=1171988&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java Sat Sep 17 14:15:05 2011
@@ -19,8 +19,6 @@ package org.apache.tika;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.List;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
@@ -175,16 +173,15 @@ public class TestParsers extends TikaTes
}
public void testComment() throws Exception {
- // TIKA-717: re-enable ppt, rtf once we fix it
+ // TIKA-717: re-enable ppt once we fix it
//final String[] extensions = new String[] {"ppt", "pptx", "doc", "docx", "pdf", "rtf"};
- final String[] extensions = new String[] {"pptx", "doc", "docx", "pdf"};
- final List<String> failures = new ArrayList<String>();
+ final String[] extensions = new String[] {"pptx", "doc", "docx", "pdf", "rtf"};
for(String extension : extensions) {
verifyComment(extension, "testComment");
- if (extension.equals("pdf")) {
- // TIKA-717: re-enable once we fix this:
- //verifyComment(extension, "testComment2");
- }
+ // TIKA-717: re-enable once we fix this:
+ //if (extension.equals("pdf")) {
+ //verifyComment(extension, "testComment2");
+ //}
}
}
}