You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2011/04/27 13:18:11 UTC

svn commit: r1097084 - /tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java

Author: maxcom
Date: Wed Apr 27 11:18:11 2011
New Revision: 1097084

URL: http://svn.apache.org/viewvc?rev=1097084&view=rev
Log:
OfficeParser: HWPF: ignore invalid style references

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java?rev=1097084&r1=1097083&r2=1097084&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java Wed Apr 27 11:18:11 2011
@@ -147,18 +147,22 @@ public class WordExtractor extends Abstr
           return (t.numParagraphs()-1);
        }
 
-       StyleDescription style = 
-          document.getStyleSheet().getStyleDescription(p.getStyleIndex());
-       TagAndStyle tas = buildParagraphTagAndStyle(
-             style.getName(), (parentTableLevel>0)
-       );
+       TagAndStyle tas;
+
+       if (document.getStyleSheet().numStyles()>p.getStyleIndex()) {
+           StyleDescription style =
+              document.getStyleSheet().getStyleDescription(p.getStyleIndex());
+           tas = buildParagraphTagAndStyle(style.getName(), (parentTableLevel>0));
+       } else {
+           tas = new TagAndStyle("p", null);
+       }
 
        if(tas.getStyleClass() != null) {
-          xhtml.startElement(tas.getTag(), "class", tas.getStyleClass());
+           xhtml.startElement(tas.getTag(), "class", tas.getStyleClass());
        } else {
-          xhtml.startElement(tas.getTag());
+           xhtml.startElement(tas.getTag());
        }
-       
+
        for(int j=0; j<p.numCharacterRuns(); j++) {
           CharacterRun cr = p.getCharacterRun(j);