You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2011/09/19 18:34:26 UTC

svn commit: r1172690 - /tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java

Author: nick
Date: Mon Sep 19 16:34:26 2011
New Revision: 1172690

URL: http://svn.apache.org/viewvc?rev=1172690&view=rev
Log:
TIKA-705 Temporary workaround for the relative links issue, pending upgrade to POI 3.8 beta 5

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java?rev=1172690&r1=1172689&r2=1172690&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java Mon Sep 19 16:34:26 2011
@@ -19,6 +19,7 @@ package org.apache.tika.parser.microsoft
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.net.URI;
+import java.net.URISyntaxException;
 import java.util.List;
 
 import org.apache.poi.POIXMLDocument;
@@ -120,9 +121,21 @@ public abstract class AbstractOOXMLExtra
             for (PackagePart source : getMainDocumentParts()) {
                 for (PackageRelationship rel : source.getRelationships()) {
                     if (rel.getTargetMode() == TargetMode.INTERNAL) {
+                        // TODO Simply this when on POI 3.8 beta 5
                         URI uri = rel.getTargetURI();
+                        if(uri.getFragment() != null) {
+                           // TODO Workaround for TIKA-705 needed until 3.8 beta 5
+                           try {
+                              String u = uri.toString();
+                              uri = new URI(u.substring(0, u.indexOf('#')));
+                           } catch(URISyntaxException e) {
+                              throw new TikaException("Broken OOXML file", e);
+                           }
+                        }
                         PackagePart target = rel.getPackage().getPart(
                                 PackagingURIHelper.createPartName(uri));
+                        // TODO Simpler version in POI 3.8 beta 5
+                        // PackagePart target = source.getRelatedPart(rel);
 
                         String type = rel.getRelationshipType();
                         if (RELATION_OLE_OBJECT.equals(type)