You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2011/04/20 16:59:24 UTC

svn commit: r1095429 - /tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java

Author: nick
Date: Wed Apr 20 14:59:24 2011
New Revision: 1095429

URL: http://svn.apache.org/viewvc?rev=1095429&view=rev
Log:
TIKA-644 - When generating html headings from word, h6 is the highest the xhtml allows, so don't try generating h7 (or higher) even if Word has a 'Heading 7' style

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java?rev=1095429&r1=1095428&r2=1095429&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java Wed Apr 20 14:59:24 2011
@@ -377,7 +377,8 @@ public class WordExtractor extends Abstr
                    styleName.substring(styleName.length()-1)
              );
           } catch(NumberFormatException e) {}
-          tag = "h"+num;
+          // Turn it into a H1 - H6 (H7+ isn't valid!)
+          tag = "h" + Math.min(num, 6);
        } else if(styleName.equals("Title")) {
           tag = "h1";
           styleClass = "title";