You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ni...@apache.org on 2010/09/28 12:15:38 UTC

svn commit: r1002097 - /tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java

Author: nick
Date: Tue Sep 28 10:15:38 2010
New Revision: 1002097

URL: http://svn.apache.org/viewvc?rev=1002097&view=rev
Log:
>From suggestion in TIKA-506, make Word paragraphs formatted in the style of "HTML Preformatted" use pre tags

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java?rev=1002097&r1=1002096&r2=1002097&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/WordExtractor.java Tue Sep 28 10:15:38 2010
@@ -358,6 +358,8 @@ public class WordExtractor extends Abstr
        } else if(styleName.equals("Subtitle")) {
           tag = "h2";
           styleClass = "subtitle";
+       } else if(styleName.equals("HTML Preformatted")) {
+          tag = "pre";
        } else {
           styleClass = styleName.replace(' ', '_');
           styleClass = styleClass.substring(0,1).toLowerCase() +