You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by mi...@apache.org on 2011/09/05 16:35:51 UTC
svn commit: r1165300 - in /tika/trunk/tika-parsers/src/test:
java/org/apache/tika/parser/microsoft/
java/org/apache/tika/parser/microsoft/ooxml/
java/org/apache/tika/parser/pdf/ resources/test-documents/
Author: mikemccand
Date: Mon Sep 5 14:35:50 2011
New Revision: 1165300
URL: http://svn.apache.org/viewvc?rev=1165300&view=rev
Log:
add several test cases, derived from test case coming in TIKA-683
Added:
tika/trunk/tika-parsers/src/test/resources/test-documents/testPDFTwoTextBoxes.pdf (with props)
tika/trunk/tika-parsers/src/test/resources/test-documents/testPDFVarious.pdf (with props)
tika/trunk/tika-parsers/src/test/resources/test-documents/testPPT_various.ppt (with props)
tika/trunk/tika-parsers/src/test/resources/test-documents/testPPT_various.pptx (with props)
tika/trunk/tika-parsers/src/test/resources/test-documents/testWORD_various.doc (with props)
tika/trunk/tika-parsers/src/test/resources/test-documents/testWORD_various.docx (with props)
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java?rev=1165300&r1=1165299&r2=1165300&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/PowerPointParserTest.java Mon Sep 5 14:35:50 2011
@@ -18,14 +18,13 @@ package org.apache.tika.parser.microsoft
import java.io.InputStream;
+import org.apache.tika.TikaTest;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.ContentHandler;
-import junit.framework.TestCase;
-
-public class PowerPointParserTest extends TestCase {
+public class PowerPointParserTest extends TikaTest {
public void testPowerPointParser() throws Exception {
InputStream input = PowerPointParserTest.class.getResourceAsStream(
@@ -48,4 +47,70 @@ public class PowerPointParserTest extend
}
}
+ public void testVarious() throws Exception {
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+
+ InputStream stream = PowerPointParserTest.class.getResourceAsStream(
+ "/test-documents/testPPT_various.ppt");
+ try {
+ new OfficeParser().parse(stream, handler, metadata, new ParseContext());
+ } finally {
+ stream.close();
+ }
+
+ String content = handler.toString();
+ //content = content.replaceAll("\\s+"," ");
+ assertContains("Footnote appears here", content);
+ assertContains("This is a footnote.", content);
+ assertContains("This is the header text.", content);
+ assertContains("This is the footer text.", content);
+ assertContains("Here is a text box", content);
+ assertContains("Bold", content);
+ assertContains("italic", content);
+ assertContains("underline", content);
+ assertContains("superscript", content);
+ assertContains("subscript", content);
+ assertContains("Here is a citation:", content);
+ assertContains("Figure 1 This is a caption for Figure 1", content);
+ assertContains("(Kramer)", content);
+ assertContains("Row 1 column 1 Row 2 column 1 Row 1 column 2 Row 2 column 2", content.replaceAll("\\s+"," "));
+ assertContains("This is a hyperlink", content);
+ assertContains("Here is a list:", content);
+ for(int row=1;row<=3;row++) {
+ //assertContains("·\tBullet " + row, content);
+ //assertContains("\u00b7\tBullet " + row, content);
+ assertContains("Bullet " + row, content);
+ }
+ assertContains("Here is a numbered list:", content);
+ for(int row=1;row<=3;row++) {
+ //assertContains(row + ")\tNumber bullet " + row, content);
+ //assertContains(row + ") Number bullet " + row, content);
+ // TODO: OOXMLExtractor fails to number the bullets:
+ assertContains("Number bullet " + row, content);
+ }
+
+ for(int row=1;row<=2;row++) {
+ for(int col=1;col<=3;col++) {
+ assertContains("Row " + row + " Col " + col, content);
+ }
+ }
+
+ assertContains("Keyword1 Keyword2", content);
+ assertEquals("Keyword1 Keyword2",
+ metadata.get(Metadata.KEYWORDS));
+
+ assertContains("Subject is here", content);
+ assertEquals("Subject is here",
+ metadata.get(Metadata.SUBJECT));
+
+ assertContains("Suddenly some Japanese text:", content);
+ // Special version of (GHQ)
+ assertContains("\uff08\uff27\uff28\uff31\uff09", content);
+ // 6 other characters
+ assertContains("\u30be\u30eb\u30b2\u3068\u5c3e\u5d0e\u3001\u6de1\u3005\u3068\u6700\u671f", content);
+
+ assertContains("And then some Gothic text:", content);
+ assertContains("\uD800\uDF32\uD800\uDF3f\uD800\uDF44\uD800\uDF39\uD800\uDF43\uD800\uDF3A", content);
+ }
}
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java?rev=1165300&r1=1165299&r2=1165300&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java Mon Sep 5 14:35:50 2011
@@ -24,16 +24,14 @@ import javax.xml.transform.sax.SAXTransf
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
-import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.TikaTest;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.microsoft.ooxml.OOXMLParserTest;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.ContentHandler;
-import junit.framework.TestCase;
-
-public class WordParserTest extends TestCase {
+public class WordParserTest extends TikaTest {
public void testWordParser() throws Exception {
InputStream input = WordParserTest.class.getResourceAsStream(
@@ -185,4 +183,73 @@ public class WordParserTest extends Test
input.close();
}
}
+
+ public void testVarious() throws Exception {
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+
+ InputStream stream = WordParserTest.class.getResourceAsStream(
+ "/test-documents/testWORD_various.doc");
+ try {
+ new OfficeParser().parse(stream, handler, metadata, new ParseContext());
+ } finally {
+ stream.close();
+ }
+
+ String content = handler.toString();
+ //content = content.replaceAll("\\s+"," ");
+ assertContains("Footnote appears here", content);
+ assertContains("This is a footnote.", content);
+ assertContains("This is the header text.", content);
+ assertContains("This is the footer text.", content);
+ // TODO: WordExtractor misses this
+ //assertContains("Here is a text box", content);
+ assertContains("Bold", content);
+ assertContains("italic", content);
+ assertContains("underline", content);
+ assertContains("superscript", content);
+ assertContains("subscript", content);
+ assertContains("Here is a citation:", content);
+ assertContains("Figure 1 This is a caption for Figure 1", content);
+ assertContains("(Kramer)", content);
+ assertContains("Row 1 column 1 Row 2 column 1 Row 1 column 2 Row 2 column 2", content.replaceAll("\\s+"," "));
+ assertContains("This is a hyperlink", content);
+ assertContains("Here is a list:", content);
+ for(int row=1;row<=3;row++) {
+ //assertContains("·\tBullet " + row, content);
+ //assertContains("\u00b7\tBullet " + row, content);
+ assertContains("Bullet " + row, content);
+ }
+ assertContains("Here is a numbered list:", content);
+ for(int row=1;row<=3;row++) {
+ //assertContains(row + ")\tNumber bullet " + row, content);
+ //assertContains(row + ") Number bullet " + row, content);
+ // TODO: WordExtractor fails to number the bullets:
+ assertContains("Number bullet " + row, content);
+ }
+
+ for(int row=1;row<=2;row++) {
+ for(int col=1;col<=3;col++) {
+ assertContains("Row " + row + " Col " + col, content);
+ }
+ }
+
+ assertContains("Keyword1 Keyword2", content);
+ assertEquals("Keyword1 Keyword2",
+ metadata.get(Metadata.KEYWORDS));
+
+ assertContains("Subject is here", content);
+ assertEquals("Subject is here",
+ metadata.get(Metadata.SUBJECT));
+
+ assertContains("Suddenly some Japanese text:", content);
+ // Special version of (GHQ)
+ assertContains("\uff08\uff27\uff28\uff31\uff09", content);
+ // 6 other characters
+ assertContains("\u30be\u30eb\u30b2\u3068\u5c3e\u5d0e\u3001\u6de1\u3005\u3068\u6700\u671f", content);
+
+ assertContains("And then some Gothic text:", content);
+ assertContains("\uD800\uDF32\uD800\uDF3f\uD800\uDF44\uD800\uDF39\uD800\uDF43\uD800\uDF3A", content);
+ }
+
}
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java?rev=1165300&r1=1165299&r2=1165300&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java Mon Sep 5 14:35:50 2011
@@ -25,8 +25,7 @@ import javax.xml.transform.sax.SAXTransf
import javax.xml.transform.sax.TransformerHandler;
import javax.xml.transform.stream.StreamResult;
-import junit.framework.TestCase;
-
+import org.apache.tika.TikaTest;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.ContainerAwareDetector;
import org.apache.tika.io.TikaInputStream;
@@ -38,7 +37,7 @@ import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.ContentHandler;
-public class OOXMLParserTest extends TestCase {
+public class OOXMLParserTest extends TikaTest {
private Parser parser;
@Override
@@ -455,4 +454,141 @@ public class OOXMLParserTest extends Tes
input.close();
}
}
+
+ public void testVarious() throws Exception {
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+
+ InputStream stream = OOXMLParserTest.class.getResourceAsStream(
+ "/test-documents/testWORD_various.docx");
+ try {
+ new AutoDetectParser().parse(stream, handler, metadata, new ParseContext());
+ } finally {
+ stream.close();
+ }
+
+ String content = handler.toString();
+ //content = content.replaceAll("\\s+"," ");
+ assertContains("Footnote appears here", content);
+ assertContains("This is a footnote.", content);
+ assertContains("This is the header text.", content);
+ assertContains("This is the footer text.", content);
+ assertContains("Here is a text box", content);
+ assertContains("Bold", content);
+ assertContains("italic", content);
+ assertContains("underline", content);
+ assertContains("superscript", content);
+ assertContains("subscript", content);
+ assertContains("Here is a citation:", content);
+ assertContains("Figure 1 This is a caption for Figure 1", content);
+ assertContains("(Kramer)", content);
+ assertContains("Row 1 column 1 Row 2 column 1 Row 1 column 2 Row 2 column 2", content.replaceAll("\\s+"," "));
+ assertContains("This is a hyperlink", content);
+ assertContains("Here is a list:", content);
+ for(int row=1;row<=3;row++) {
+ //assertContains("·\tBullet " + row, content);
+ //assertContains("\u00b7\tBullet " + row, content);
+ assertContains("Bullet " + row, content);
+ }
+ assertContains("Here is a numbered list:", content);
+ for(int row=1;row<=3;row++) {
+ //assertContains(row + ")\tNumber bullet " + row, content);
+ //assertContains(row + ") Number bullet " + row, content);
+ // TODO: OOXMLExtractor fails to number the bullets:
+ assertContains("Number bullet " + row, content);
+ }
+
+ for(int row=1;row<=2;row++) {
+ for(int col=1;col<=3;col++) {
+ assertContains("Row " + row + " Col " + col, content);
+ }
+ }
+
+ assertContains("Keyword1 Keyword2", content);
+ assertEquals("Keyword1 Keyword2",
+ metadata.get(Metadata.KEYWORDS));
+
+ assertContains("Subject is here", content);
+ assertEquals("Subject is here",
+ metadata.get(Metadata.SUBJECT));
+
+ assertContains("Suddenly some Japanese text:", content);
+ // Special version of (GHQ)
+ assertContains("\uff08\uff27\uff28\uff31\uff09", content);
+ // 6 other characters
+ assertContains("\u30be\u30eb\u30b2\u3068\u5c3e\u5d0e\u3001\u6de1\u3005\u3068\u6700\u671f", content);
+
+ assertContains("And then some Gothic text:", content);
+ assertContains("\uD800\uDF32\uD800\uDF3f\uD800\uDF44\uD800\uDF39\uD800\uDF43\uD800\uDF3A", content);
+ }
+
+ // TODO: once we fix TIKA-705, enable this:
+ /*
+ public void testVariousPPTX() throws Exception {
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+
+ InputStream stream = OOXMLParserTest.class.getResourceAsStream(
+ "/test-documents/testPPT_various.pptx");
+ try {
+ new AutoDetectParser().parse(stream, handler, metadata, new ParseContext());
+ } finally {
+ stream.close();
+ }
+
+ String content = handler.toString();
+ //content = content.replaceAll("\\s+"," ");
+ assertContains("Footnote appears here", content);
+ assertContains("This is a footnote.", content);
+ assertContains("This is the header text.", content);
+ assertContains("This is the footer text.", content);
+ assertContains("Here is a text box", content);
+ assertContains("Bold", content);
+ assertContains("italic", content);
+ assertContains("underline", content);
+ assertContains("superscript", content);
+ assertContains("subscript", content);
+ assertContains("Here is a citation:", content);
+ assertContains("Figure 1 This is a caption for Figure 1", content);
+ assertContains("(Kramer)", content);
+ assertContains("Row 1 column 1 Row 2 column 1 Row 1 column 2 Row 2 column 2", content.replaceAll("\\s+"," "));
+ assertContains("This is a hyperlink", content);
+ assertContains("Here is a list:", content);
+ for(int row=1;row<=3;row++) {
+ //assertContains("·\tBullet " + row, content);
+ //assertContains("\u00b7\tBullet " + row, content);
+ assertContains("Bullet " + row, content);
+ }
+ assertContains("Here is a numbered list:", content);
+ for(int row=1;row<=3;row++) {
+ //assertContains(row + ")\tNumber bullet " + row, content);
+ //assertContains(row + ") Number bullet " + row, content);
+ // TODO: OOXMLExtractor fails to number the bullets:
+ assertContains("Number bullet " + row, content);
+ }
+
+ for(int row=1;row<=2;row++) {
+ for(int col=1;col<=3;col++) {
+ assertContains("Row " + row + " Col " + col, content);
+ }
+ }
+
+ assertContains("Keyword1 Keyword2", content);
+ assertEquals("Keyword1 Keyword2",
+ metadata.get(Metadata.KEYWORDS));
+
+ assertContains("Subject is here", content);
+ assertEquals("Subject is here",
+ metadata.get(Metadata.SUBJECT));
+
+ assertContains("Suddenly some Japanese text:", content);
+ // Special version of (GHQ)
+ assertContains("\uff08\uff27\uff28\uff31\uff09", content);
+ // 6 other characters
+ assertContains("\u30be\u30eb\u30b2\u3068\u5c3e\u5d0e\u3001\u6de1\u3005\u3068\u6700\u671f", content);
+
+ assertContains("And then some Gothic text:", content);
+ assertContains("\uD800\uDF32\uD800\uDF3f\uD800\uDF44\uD800\uDF39\uD800\uDF43\uD800\uDF3A", content);
+ }
+ */
}
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java?rev=1165300&r1=1165299&r2=1165300&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java Mon Sep 5 14:35:50 2011
@@ -18,8 +18,7 @@ package org.apache.tika.parser.pdf;
import java.io.InputStream;
-import junit.framework.TestCase;
-
+import org.apache.tika.TikaTest;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
@@ -30,7 +29,7 @@ import org.xml.sax.ContentHandler;
/**
* Test case for parsing pdf files.
*/
-public class PDFParserTest extends TestCase {
+public class PDFParserTest extends TikaTest {
public void testPdfParsing() throws Exception {
Parser parser = new AutoDetectParser(); // Should auto-detect!
@@ -123,4 +122,93 @@ public class PDFParserTest extends TestC
assertTrue(content.contains("On 16 November 2002"));
assertTrue(content.contains("In many important respects"));
}
+
+ public void testTwoTextBoxes() throws Exception {
+ Parser parser = new AutoDetectParser(); // Should auto-detect!
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+ ParseContext context = new ParseContext();
+
+ InputStream stream = PDFParserTest.class.getResourceAsStream(
+ "/test-documents/testPDFTwoTextBoxes.pdf");
+ try {
+ parser.parse(stream, handler, metadata, context);
+ } finally {
+ stream.close();
+ }
+
+ String content = handler.toString();
+ content = content.replaceAll("\\s+"," ");
+ assertTrue(content.contains("Left column line 1 Left column line 2 Right column line 1 Right column line 2"));
+ }
+
+ public void testVarious() throws Exception {
+ Parser parser = new AutoDetectParser(); // Should auto-detect!
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+ ParseContext context = new ParseContext();
+
+ InputStream stream = PDFParserTest.class.getResourceAsStream(
+ "/test-documents/testPDFVarious.pdf");
+ try {
+ parser.parse(stream, handler, metadata, context);
+ } finally {
+ stream.close();
+ }
+
+ String content = handler.toString();
+ //content = content.replaceAll("\\s+"," ");
+ assertContains("Footnote appears here", content);
+ assertContains("This is a footnote.", content);
+ assertContains("This is the header text.", content);
+ assertContains("This is the footer text.", content);
+ assertContains("Here is a text box", content);
+ assertContains("Bold", content);
+ assertContains("italic", content);
+ assertContains("underline", content);
+ assertContains("superscript", content);
+ assertContains("subscript", content);
+ assertContains("Here is a citation:", content);
+ assertContains("Figure 1 This is a caption for Figure 1", content);
+ assertContains("(Kramer)", content);
+ assertContains("Row 1 column 1 Row 2 column 1 Row 1 column 2 Row 2 column 2", content.replaceAll("\\s+"," "));
+ assertContains("This is a hyperlink", content);
+ assertContains("Here is a list:", content);
+ for(int row=1;row<=3;row++) {
+ //assertContains("·\tBullet " + row, content);
+ //assertContains("\u00b7\tBullet " + row, content);
+ assertContains("Bullet " + row, content);
+ }
+ assertContains("Here is a numbered list:", content);
+ for(int row=1;row<=3;row++) {
+ //assertContains(row + ")\tNumber bullet " + row, content);
+ assertContains(row + ") Number bullet " + row, content);
+ }
+
+ for(int row=1;row<=2;row++) {
+ for(int col=1;col<=3;col++) {
+ assertContains("Row " + row + " Col " + col, content);
+ }
+ }
+
+ assertContains("Keyword1 Keyword2", content);
+ assertEquals("Keyword1 Keyword2",
+ metadata.get(Metadata.KEYWORDS));
+
+ assertContains("Subject is here", content);
+ assertEquals("Subject is here",
+ metadata.get(Metadata.SUBJECT));
+
+ assertContains("Suddenly some Japanese text:", content);
+ // Special version of (GHQ)
+ assertContains("\uff08\uff27\uff28\uff31\uff09", content);
+ // 6 other characters
+ assertContains("\u30be\u30eb\u30b2\u3068\u5c3e\u5d0e\u3001\u6de1\u3005\u3068\u6700\u671f", content);
+
+ assertContains("And then some Gothic text:", content);
+ // TODO: I saved the word doc as a PDF, but that
+ // process somehow, apparently lost the gothic
+ // chars, so we cannot test this here:
+ //assertContains("\uD800\uDF32\uD800\uDF3f\uD800\uDF44\uD800\uDF39\uD800\uDF43\uD800\uDF3A", content);
+ }
}
Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testPDFTwoTextBoxes.pdf
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testPDFTwoTextBoxes.pdf?rev=1165300&view=auto
==============================================================================
Binary file - no diff available.
Propchange: tika/trunk/tika-parsers/src/test/resources/test-documents/testPDFTwoTextBoxes.pdf
------------------------------------------------------------------------------
svn:mime-type = application/pdf
Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testPDFVarious.pdf
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testPDFVarious.pdf?rev=1165300&view=auto
==============================================================================
Binary file - no diff available.
Propchange: tika/trunk/tika-parsers/src/test/resources/test-documents/testPDFVarious.pdf
------------------------------------------------------------------------------
svn:mime-type = application/pdf
Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testPPT_various.ppt
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testPPT_various.ppt?rev=1165300&view=auto
==============================================================================
Binary file - no diff available.
Propchange: tika/trunk/tika-parsers/src/test/resources/test-documents/testPPT_various.ppt
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testPPT_various.pptx
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testPPT_various.pptx?rev=1165300&view=auto
==============================================================================
Binary file - no diff available.
Propchange: tika/trunk/tika-parsers/src/test/resources/test-documents/testPPT_various.pptx
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testWORD_various.doc
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testWORD_various.doc?rev=1165300&view=auto
==============================================================================
Binary file - no diff available.
Propchange: tika/trunk/tika-parsers/src/test/resources/test-documents/testWORD_various.doc
------------------------------------------------------------------------------
svn:mime-type = application/msword
Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testWORD_various.docx
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testWORD_various.docx?rev=1165300&view=auto
==============================================================================
Binary file - no diff available.
Propchange: tika/trunk/tika-parsers/src/test/resources/test-documents/testWORD_various.docx
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream