You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by mi...@apache.org on 2012/11/09 14:32:35 UTC
svn commit: r1407447 - in /tika/trunk: ./ src/site/
tika-app/src/main/appended-resources/META-INF/ tika-app/src/main/assembly/
tika-app/src/main/java/org/apache/tika/cli/
tika-app/src/main/java/org/apache/tika/gui/ tika-core/ tika-core/src/
tika-parser...
Author: mikemccand
Date: Fri Nov 9 13:32:34 2012
New Revision: 1407447
URL: http://svn.apache.org/viewvc?rev=1407447&view=rev
Log:
TIKA-1019: revert for now: the test file is too large
Removed:
tika/trunk/tika-parsers/src/test/resources/test-documents/testDocumentLink.doc
Modified:
tika/trunk/CHANGES.txt
tika/trunk/src/site/ (props changed)
tika/trunk/tika-app/src/main/appended-resources/META-INF/LICENSE (props changed)
tika/trunk/tika-app/src/main/assembly/ (props changed)
tika/trunk/tika-app/src/main/java/org/apache/tika/cli/ (props changed)
tika/trunk/tika-app/src/main/java/org/apache/tika/gui/ (props changed)
tika/trunk/tika-core/pom.xml (props changed)
tika/trunk/tika-core/src/ (props changed)
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/asm/ (props changed)
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/ (props changed)
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/ (props changed)
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ (props changed)
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ (props changed)
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java (props changed)
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ (props changed)
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/ (props changed)
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/ (props changed)
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/ (props changed)
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ (props changed)
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/ (props changed)
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/ (props changed)
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/xml/ (props changed)
tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java (props changed)
tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/ (props changed)
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/ (props changed)
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/ (props changed)
tika/trunk/tika-parsers/src/test/resources/log4j.properties (props changed)
tika/trunk/tika-parsers/src/test/resources/test-documents/ (props changed)
Modified: tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1407447&r1=1407446&r2=1407447&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Fri Nov 9 13:32:34 2012
@@ -1,10 +1,9 @@
Release 1.3 - Current Development
- * MS Word: When a Word (.doc) document contains embedded files or
- links to external documents, Tika now places a <div
- class="embedded" id="_XXX"/> placeholder into the XHTML so you can
- see where in the main text the embedded document occurred
- (TIKA-956, TIKA-1019). Embedded Wordpad/RTF documents are now
+ * MS Word: When a Word (.doc) document contains embedded files, Tika
+ now places a <div class="embedded" id="_XXX"/> into the XHTML so
+ you can see where in the main text the embedded document
+ occurred (TIKA-956). Embedded Wordpad/RTF documents are now
recognized (TIKA-982).
* PDF: Text from pop-up annotations is now extracted (TIKA-981)
Propchange: tika/trunk/src/site/
('svn:mergeinfo' removed)
Propchange: tika/trunk/tika-app/src/main/appended-resources/META-INF/LICENSE
('svn:mergeinfo' removed)
Propchange: tika/trunk/tika-app/src/main/assembly/
('svn:mergeinfo' removed)
Propchange: tika/trunk/tika-app/src/main/java/org/apache/tika/cli/
('svn:mergeinfo' removed)
Propchange: tika/trunk/tika-app/src/main/java/org/apache/tika/gui/
('svn:mergeinfo' removed)
Propchange: tika/trunk/tika-core/pom.xml
('svn:mergeinfo' removed)
Propchange: tika/trunk/tika-core/src/
('svn:mergeinfo' removed)
Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/asm/
('svn:mergeinfo' removed)
Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/
('svn:mergeinfo' removed)
Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/
('svn:mergeinfo' removed)
Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/
('svn:mergeinfo' removed)
Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/
('svn:mergeinfo' removed)
Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
('svn:mergeinfo' removed)
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java?rev=1407447&r1=1407446&r2=1407447&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java Fri Nov 9 13:32:34 2012
@@ -195,9 +195,7 @@ public class WordExtractor extends Abstr
if (cr.text().getBytes()[0] == 0x13) {
Field field = document.getFields().getFieldByStartOffset(FieldsDocumentPart.MAIN,
cr.getStartOffset());
- // 58 is an embedded document
- // 56 is a document link
- if (field != null && (field.getType() == 58 || field.getType() == 56)) {
+ if (field != null && field.getType() == 58) {
// Embedded Object: add a <div
// class="embedded" id="_X"/> so consumer can see where
// in the main text each embedded document
Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/
('svn:mergeinfo' removed)
Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/
('svn:mergeinfo' removed)
Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/
('svn:mergeinfo' removed)
Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/
('svn:mergeinfo' removed)
Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/
('svn:mergeinfo' removed)
Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/
('svn:mergeinfo' removed)
Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/
('svn:mergeinfo' removed)
Propchange: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/xml/
('svn:mergeinfo' removed)
Propchange: tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java
('svn:mergeinfo' removed)
Propchange: tika/trunk/tika-parsers/src/test/java/org/apache/tika/mime/
('svn:mergeinfo' removed)
Propchange: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/
('svn:mergeinfo' removed)
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java?rev=1407447&r1=1407446&r2=1407447&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java Fri Nov 9 13:32:34 2012
@@ -194,13 +194,6 @@ public class WordParserTest extends Tika
assertTrue(result.indexOf("_1404039792.rtf") != -1);
}
- // TIKA-1019
- public void testDocumentLink() throws Exception {
- String result = getXML("/test-documents/testDocumentLink.doc").xml;
- assertTrue(result.indexOf("<div class=\"embedded\" id=\"_1327495611\"/>") != -1);
- assertTrue(result.indexOf("_1327495611.unknown") != -1);
- }
-
public void testWord6Parser() throws Exception {
InputStream input = WordParserTest.class.getResourceAsStream(
"/test-documents/testWORD6.doc");
Propchange: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/pkg/
('svn:mergeinfo' removed)
Propchange: tika/trunk/tika-parsers/src/test/resources/log4j.properties
('svn:mergeinfo' removed)
Propchange: tika/trunk/tika-parsers/src/test/resources/test-documents/
('svn:mergeinfo' removed)