You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by mi...@apache.org on 2012/11/09 14:32:35 UTC

svn commit: r1407447 - in /tika/trunk: ./ src/site/ tika-app/src/main/appended-resources/META-INF/ tika-app/src/main/assembly/ tika-app/src/main/java/org/apache/tika/cli/ tika-app/src/main/java/org/apache/tika/gui/ tika-core/ tika-core/src/ tika-parser...

Author: mikemccand
Date: Fri Nov  9 13:32:34 2012
New Revision: 1407447

URL: http://svn.apache.org/viewvc?rev=1407447&view=rev
Log:
TIKA-1019: revert for now: the test file is too large

Removed:
    tika/trunk/tika-parsers/src/test/resources/test-documents/testDocumentLink.doc
Modified:
    tika/trunk/CHANGES.txt
    tika/trunk/src/site/   (props changed)
    tika/trunk/tika-app/src/main/appended-resources/META-INF/LICENSE   (props changed)
    tika/trunk/tika-app/src/main/assembly/   (props changed)
    tika/trunk/tika-app/src/main/java/org/apache/tika/cli/   (props changed)
    tika/trunk/tika-app/src/main/java/org/apache/tika/gui/   (props changed)
    tika/trunk/tika-core/pom.xml   (props changed)
    tika/trunk/tika-core/src/   (props changed)
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/asm/   (props changed)
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/   (props changed)
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/   (props changed)
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/   (props changed)
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/   (props changed)
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java   (props changed)
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/   (props changed)
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/   (props changed)
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/   (props changed)
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/   (props changed)
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/   (props changed)
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/   (props changed)
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/   (props changed)
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/xml/   (props changed)
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java   (props changed)
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/   (props changed)
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/   (props changed)
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/   (props changed)
    tika/trunk/tika-parsers/src/test/resources/log4j.properties   (props changed)
    tika/trunk/tika-parsers/src/test/resources/test-documents/   (props changed)

Modified: tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1407447&r1=1407446&r2=1407447&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Fri Nov  9 13:32:34 2012
@@ -1,10 +1,9 @@
 Release 1.3 - Current Development
 
-  * MS Word: When a Word (.doc) document contains embedded files or
-    links to external documents, Tika now places a <div
-    class="embedded" id="_XXX"/> placeholder into the XHTML so you can
-    see where in the main text the embedded document occurred
-    (TIKA-956, TIKA-1019).  Embedded Wordpad/RTF documents are now
+  * MS Word: When a Word (.doc) document contains embedded files, Tika
+    now places a <div class="embedded" id="_XXX"/> into the XHTML so
+    you can see where in the main text the embedded document
+    occurred (TIKA-956).  Embedded Wordpad/RTF documents are now
     recognized (TIKA-982).
 
   * PDF: Text from pop-up annotations is now extracted (TIKA-981)

Propchange: tika/trunk/src/site/
            ('svn:mergeinfo' removed)

Propchange: tika/trunk/tika-app/src/main/appended-resources/META-INF/LICENSE
            ('svn:mergeinfo' removed)

Propchange: tika/trunk/tika-app/src/main/assembly/
            ('svn:mergeinfo' removed)

Propchange: tika/trunk/tika-app/src/main/java/org/apache/tika/cli/
            ('svn:mergeinfo' removed)

Propchange: tika/trunk/tika-app/src/main/java/org/apache/tika/gui/
            ('svn:mergeinfo' removed)

Propchange: tika/trunk/tika-core/pom.xml
            ('svn:mergeinfo' removed)

Propchange: tika/trunk/tika-core/src/
            ('svn:mergeinfo' removed)

Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/asm/
            ('svn:mergeinfo' removed)

Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/
            ('svn:mergeinfo' removed)

Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/
            ('svn:mergeinfo' removed)

Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/
            ('svn:mergeinfo' removed)

Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/
            ('svn:mergeinfo' removed)

Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
            ('svn:mergeinfo' removed)

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java?rev=1407447&r1=1407446&r2=1407447&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java Fri Nov  9 13:32:34 2012
@@ -195,9 +195,7 @@ public class WordExtractor extends Abstr
           if (cr.text().getBytes()[0] == 0x13) {
              Field field = document.getFields().getFieldByStartOffset(FieldsDocumentPart.MAIN,
                                                                       cr.getStartOffset());
-             // 58 is an embedded document
-             // 56 is a document link
-             if (field != null && (field.getType() == 58 || field.getType() == 56)) {
+             if (field != null && field.getType() == 58) {
                // Embedded Object: add a <div
                // class="embedded" id="_X"/> so consumer can see where
                // in the main text each embedded document

Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/
            ('svn:mergeinfo' removed)

Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/
            ('svn:mergeinfo' removed)

Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/
            ('svn:mergeinfo' removed)

Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/
            ('svn:mergeinfo' removed)

Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/
            ('svn:mergeinfo' removed)

Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/
            ('svn:mergeinfo' removed)

Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/
            ('svn:mergeinfo' removed)

Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/xml/
            ('svn:mergeinfo' removed)

Propchange: tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java
            ('svn:mergeinfo' removed)

Propchange: tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/
            ('svn:mergeinfo' removed)

Propchange: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/
            ('svn:mergeinfo' removed)

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java?rev=1407447&r1=1407446&r2=1407447&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java Fri Nov  9 13:32:34 2012
@@ -194,13 +194,6 @@ public class WordParserTest extends Tika
         assertTrue(result.indexOf("_1404039792.rtf") != -1);
     }
 
-    // TIKA-1019
-    public void testDocumentLink() throws Exception {
-        String result = getXML("/test-documents/testDocumentLink.doc").xml;
-        assertTrue(result.indexOf("<div class=\"embedded\" id=\"_1327495611\"/>") != -1);
-        assertTrue(result.indexOf("_1327495611.unknown") != -1);
-    }
-
     public void testWord6Parser() throws Exception {
         InputStream input = WordParserTest.class.getResourceAsStream(
                 "/test-documents/testWORD6.doc");

Propchange: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/
            ('svn:mergeinfo' removed)

Propchange: tika/trunk/tika-parsers/src/test/resources/log4j.properties
            ('svn:mergeinfo' removed)

Propchange: tika/trunk/tika-parsers/src/test/resources/test-documents/
            ('svn:mergeinfo' removed)