You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2011/04/20 16:59:24 UTC
svn commit: r1095429 -
/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
Author: nick
Date: Wed Apr 20 14:59:24 2011
New Revision: 1095429
URL: http://svn.apache.org/viewvc?rev=1095429&view=rev
Log:
TIKA-644 - When generating html headings from word, h6 is the highest the xhtml allows, so don't try generating h7 (or higher) even if Word has a 'Heading 7' style
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java?rev=1095429&r1=1095428&r2=1095429&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java Wed Apr 20 14:59:24 2011
@@ -377,7 +377,8 @@ public class WordExtractor extends Abstr
styleName.substring(styleName.length()-1)
);
} catch(NumberFormatException e) {}
- tag = "h"+num;
+ // Turn it into a H1 - H6 (H7+ isn't valid!)
+ tag = "h" + Math.min(num, 6);
} else if(styleName.equals("Title")) {
tag = "h1";
styleClass = "title";