You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2011/02/01 17:15:52 UTC

svn commit: r1066081 - in /tika/trunk/tika-parsers/src: main/java/org/apache/tika/parser/rtf/ test/java/org/apache/tika/ test/resources/test-documents/

Author: jukka
Date: Tue Feb  1 16:15:51 2011
New Revision: 1066081

URL: http://svn.apache.org/viewvc?rev=1066081&view=rev
Log:
TIKA-422: Wrong charset conversion in some RTF documents.

Patch by Cristian Vat

Added:
    tika/trunk/tika-parsers/src/test/resources/test-documents/testRTF-ms932.rtf
    tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFTableCellSeparation.rtf
    tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFUmlautSpaces.rtf
    tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFWord2010CzechCharacters.rtf
    tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFWordPadCzechCharacters.rtf
Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java?rev=1066081&r1=1066080&r2=1066081&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java Tue Feb  1 16:15:51 2011
@@ -16,19 +16,8 @@
  */
 package org.apache.tika.parser.rtf;
 
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Collections;
-import java.util.Set;
-
-import javax.swing.text.AttributeSet;
-import javax.swing.text.BadLocationException;
-import javax.swing.text.DefaultStyledDocument;
-import javax.swing.text.Document;
-import javax.swing.text.StyleContext;
-import javax.swing.text.rtf.RTFEditorKit;
-
 import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.IOUtils;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.ParseContext;
@@ -37,45 +26,333 @@ import org.apache.tika.sax.XHTMLContentH
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
+import javax.swing.text.*;
+import javax.swing.text.rtf.RTFEditorKit;
+import java.io.*;
+import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
 /**
  * RTF parser
  */
 public class RTFParser implements Parser {
 
-    private static final Set<MediaType> SUPPORTED_TYPES =
-        Collections.singleton(MediaType.application("rtf"));
+    private static final Set<MediaType> SUPPORTED_TYPES = Collections
+            .singleton(MediaType.application("rtf"));
+
+    private static final Pattern F_PATTERN = Pattern.compile("\\\\a?f([0-9]+)");
+
+    private static final Pattern FCHARSET_PATTERN = Pattern
+            .compile("\\\\fcharset[0-9]+");
+
+    private static final Pattern ANSICPG_PATTERN = Pattern
+            .compile("\\\\ansicpg[0-9]+");
+
+    private static final Pattern DEFAULT_FONT_PATTERN = Pattern.compile("\\\\deff(0-9)+");
+
+    private static final Pattern FONT_FAMILY_PATTERN = Pattern.compile("\\\\f(nil|roman|swiss|modern|script|decor|tech|bidi)");
+
+    private static Map<Integer, String> FONTSET_MAP = new HashMap<Integer, String>();
+    static {
+        FONTSET_MAP.put(0, "windows-1251"); // ANSI
+        // charset 1 is Default
+        // charset 2 is Symbol
+
+        FONTSET_MAP.put(77, "MacRoman"); // Mac Roman
+        FONTSET_MAP.put(78, "Shift_JIS"); // Mac Shift Jis
+        FONTSET_MAP.put(79, "ms949"); // Mac Hangul
+        FONTSET_MAP.put(80, "GB2312"); // Mac GB2312
+        FONTSET_MAP.put(81, "Big5"); // Mac Big5
+        FONTSET_MAP.put(82, "johab"); // Mac Johab (old)
+        FONTSET_MAP.put(83, "MacHebrew"); // Mac Hebrew
+        FONTSET_MAP.put(84, "MacArabic"); // Mac Arabic
+        FONTSET_MAP.put(85, "MacGreek"); // Mac Greek
+        FONTSET_MAP.put(86, "MacTurkish"); // Mac Turkish
+        FONTSET_MAP.put(87, "MacThai"); // Mac Thai
+        FONTSET_MAP.put(88, "cp1250"); // Mac East Europe
+        FONTSET_MAP.put(89, "cp1251"); // Mac Russian
+
+        FONTSET_MAP.put(128, "MS932"); // Shift JIS
+        FONTSET_MAP.put(129, "ms949"); // Hangul
+        FONTSET_MAP.put(130, "ms1361"); // Johab
+        FONTSET_MAP.put(134, "ms936"); // GB2312
+        FONTSET_MAP.put(136, "ms950"); // Big5
+        FONTSET_MAP.put(161, "cp1253"); // Greek
+        FONTSET_MAP.put(162, "cp1254"); // Turkish
+        FONTSET_MAP.put(163, "cp1258"); // Vietnamese
+        FONTSET_MAP.put(177, "cp1255"); // Hebrew
+        FONTSET_MAP.put(178, "cp1256"); // Arabic
+        // FONTSET_MAP.put( 179, "" ); // Arabic Traditional
+        // FONTSET_MAP.put( 180, "" ); // Arabic user
+        // FONTSET_MAP.put( 181, "" ); // Hebrew user
+        FONTSET_MAP.put(186, "cp1257"); // Baltic
+
+        FONTSET_MAP.put(204, "cp1251"); // Russian
+        FONTSET_MAP.put(222, "ms874"); // Thai
+        FONTSET_MAP.put(238, "cp1250"); // Eastern European
+        FONTSET_MAP.put(254, "cp437"); // PC 437
+        FONTSET_MAP.put(255, "cp850"); // OEM
+    }
 
     public Set<MediaType> getSupportedTypes(ParseContext context) {
         return SUPPORTED_TYPES;
     }
 
-    public void parse(
-            InputStream stream, ContentHandler handler,
-            Metadata metadata, ParseContext context)
-            throws IOException, SAXException, TikaException {
+    public void parse(InputStream stream, ContentHandler handler,
+            Metadata metadata, ParseContext context) throws IOException,
+            SAXException, TikaException {
+        File tempFile = null;
+        InputStream in = null;
         try {
+            tempFile = createUnicodeRtfTempFile(stream);
+            in = new FileInputStream(tempFile);
+
             Document sd = new CustomStyledDocument();
-            new RTFEditorKit().read(stream, sd, 0);
+            new RTFEditorKit().read(in, sd, 0);
 
-            XHTMLContentHandler xhtml =
-                new XHTMLContentHandler(handler, metadata);
+            XHTMLContentHandler xhtml = new XHTMLContentHandler(handler,
+                    metadata);
             xhtml.startDocument();
             xhtml.element("p", sd.getText(0, sd.getLength()));
             xhtml.endDocument();
         } catch (BadLocationException e) {
             throw new TikaException("Error parsing an RTF document", e);
+        } finally {
+            IOUtils.closeQuietly(in);
+            if (tempFile != null) {
+                tempFile.delete();
+            }
         }
     }
 
     /**
      * @deprecated This method will be removed in Apache Tika 1.0.
      */
-    public void parse(
-            InputStream stream, ContentHandler handler, Metadata metadata)
-            throws IOException, SAXException, TikaException {
+    public void parse(InputStream stream, ContentHandler handler,
+            Metadata metadata) throws IOException, SAXException, TikaException {
         parse(stream, handler, metadata, new ParseContext());
     }
 
+    private String escapeByUnicode(String data, String enc) {
+        StringBuilder dataBuf = new StringBuilder(data.length() + 16);
+        StringBuilder keywordBuf = new StringBuilder(4);
+        StringBuilder origDataBuf = new StringBuilder();
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        for (int i = 0; i < data.length(); i++) {
+            char c1 = data.charAt(i);
+            keywordBuf.append(c1);
+            if (c1 == '\\' && data.length()>i+1) {
+                i++;
+                char c2 = data.charAt(i);
+                keywordBuf.append(c2);
+                if (c2 == '\'') {
+                    i++;
+                    char c3 = data.charAt(i);
+                    keywordBuf.append(c3);
+                    if ((c3 >= '0' && c3 <= '9') || (c3 >= 'a' && c3 <= 'f')
+                            || (c3 >= 'A' && c3 <= 'F')) {
+                        i++;
+                        char c4 = data.charAt(i);
+                        keywordBuf.append(c4);
+                        if ((c4 >= '0' && c4 <= '9')
+                                || (c4 >= 'a' && c4 <= 'f')
+                                || (c4 >= 'A' && c4 <= 'F')) {
+                            int value = Integer.parseInt(
+                                    String.valueOf(new char[] { c3, c4 }), 16);
+                            baos.write(value);
+                            origDataBuf.append(keywordBuf.toString());
+                            keywordBuf.delete(0, 4);
+                            continue;
+                        }
+                    }
+                }
+            }
+            if (baos.size() != 0) {
+                try {
+                    appendUnicodeStr(dataBuf, new String(baos.toByteArray(),
+                            enc));
+                } catch (UnsupportedEncodingException e) {
+                    dataBuf.append(origDataBuf.toString());
+                }
+                origDataBuf.delete(0, origDataBuf.length());
+                baos.reset();
+            }
+            dataBuf.append(keywordBuf.toString());
+            keywordBuf.delete(0, 4);
+        }
+
+        if (baos.size() != 0) {
+            try {
+                appendUnicodeStr(dataBuf, new String(baos.toByteArray(), enc));
+            } catch (UnsupportedEncodingException e) {
+                dataBuf.append(origDataBuf.toString());
+            }
+        }
+
+        return dataBuf.toString();
+    }
+
+    private void appendUnicodeStr(StringBuilder dataBuf, String value) {
+        for (int j = 0; j < value.length(); j++) {
+            char ch = value.charAt(j);
+            if (ch >= 20 && ch < 80) {
+                dataBuf.append(ch);
+            } else {
+                dataBuf.append("{\\u");
+                dataBuf.append((int) ch);
+                dataBuf.append('}');
+            }
+        }
+    }
+
+    private File createUnicodeRtfTempFile(InputStream in) throws IOException {
+        boolean isDelete = false;
+        File tempFile = null;
+        BufferedOutputStream out = null;
+        try {
+            tempFile = File.createTempFile("temp", ".rtf");
+            out = new BufferedOutputStream(new FileOutputStream(tempFile));
+
+            String defaultCharset = "windows-1251"; // ansi
+            String defaultFont = "0";
+            Map<String, String> fontTableMap = new HashMap<String, String>();
+            StringBuilder dataBuf = new StringBuilder(255);
+            int ch;
+            LinkedList<String> charsetQueue = new LinkedList<String>();
+            int depth = 0;
+            String prevFt = null;
+            int prevCh = -1;
+            while ((ch = in.read()) != -1) {
+                if ( ((ch == '{' || ch == '}') && prevCh!='\\') || ( ch == ' ' && (! FONT_FAMILY_PATTERN.matcher(dataBuf.toString()).find())) ) {
+                    if (charsetQueue.size() > depth + 1) {
+                        charsetQueue.removeLast();
+                    }
+
+                    String data = dataBuf.toString();
+                    data = data.replace("\\cell","\\u0020\\cell");
+
+                    if(data.indexOf("\\colortbl")!=-1){
+                        // End of font table, clear last/previous font encountered.
+                        prevFt = null;
+                    }
+
+                    if (depth == 1) {
+                        // check control words for a default charset
+                        String cset = loadAnsiCpg(data);
+                        if (cset != null) {
+                            defaultCharset = cset;
+                        }
+                        Matcher matcher = DEFAULT_FONT_PATTERN.matcher(data);
+                        if(matcher.find()){
+                            defaultFont = matcher.group(1);
+                        }
+                    }
+
+                    String ft = loadFontTable(data);
+                    String charset = loadCharset(data);
+                    if (ft != null && charset != null) {
+                        fontTableMap.put(ft, charset);
+                    }
+
+                    if (ft == null && prevCh == ' ') {
+                        ft = prevFt;
+                    } else if (ft != null) {
+                        prevFt = ft;
+                    }
+                    if(ft==null){
+                        ft = defaultFont;
+                    }
+
+                    // set a current charset
+                    if (charset == null && ft != null) {
+                        charset = fontTableMap.get(ft);
+                    }
+                    if (charset == null && charsetQueue.size() > 0) {
+                        charset = charsetQueue.getLast();
+                    }
+                    if (charset == null) {
+                        charset = defaultCharset;
+                    }
+
+                    // add the current charset to a queue
+                    if (charsetQueue.size() < depth + 1) {
+                        charsetQueue.add(charset);
+                    }
+
+                    String escapedStr = "windows-1251".equals(charset) ? data
+                            : escapeByUnicode(data, charset);
+                    out.write(escapedStr.getBytes("UTF-8"));
+                    out.write(ch);
+                    dataBuf.delete(0, dataBuf.length());
+
+                    prevCh = ch;
+
+                    // update a depth
+                    if (ch == '{') {
+                        depth++;
+                    } else if (ch == '}') {
+                        depth--;
+                    }
+                } else {
+                    dataBuf.append((char) ch);
+                }
+            }
+            out.flush();
+        } catch (IOException e) {
+            isDelete = true;
+            throw e;
+        } finally {
+            IOUtils.closeQuietly(out);
+            if (isDelete && tempFile != null) {
+                tempFile.delete();
+            }
+        }
+
+        return tempFile;
+    }
+
+    private String loadFontTable(String line) {
+        Matcher m = F_PATTERN.matcher(line);
+        String font = null;
+        while((m.find())) {
+            font = m.group(1);
+        }
+        return font;
+    }
+
+    private String loadAnsiCpg(String line) {
+        Matcher m = ANSICPG_PATTERN.matcher(line);
+        String charset = null;
+        if (m.find()) {
+            int encVal;
+            try {
+                encVal = Integer.parseInt(m.group().substring(8));
+                charset = FONTSET_MAP.get(encVal);
+            } catch (NumberFormatException e) {
+                // ignore
+            }
+        }
+
+        return charset;
+    }
+
+    private String loadCharset(String line) {
+        Matcher m = FCHARSET_PATTERN.matcher(line);
+        String charset = null;
+        if (m.find()) {
+            int encVal;
+            try {
+                encVal = Integer.parseInt(m.group().substring(9));
+            } catch (NumberFormatException e) {
+                encVal = 0;
+            }
+            charset = FONTSET_MAP.get(encVal);
+        }
+
+        return charset;
+    }
+
     /**
      * Customized version of {@link DefaultStyledDocument}. Adds whitespace
      * to places where words otherwise could have run together (see
@@ -84,21 +361,25 @@ public class RTFParser implements Parser
      * <a href="https://issues.apache.org/jira/browse/TIKA-282">TIKA-282</a>).
      */
     private static class CustomStyledDocument extends DefaultStyledDocument {
+        private boolean isPrevUnicode = false;
 
         public CustomStyledDocument() {
             super(new NoReclaimStyleContext());
         }
 
         @Override
-        public void insertString(
-                int offs, String str, AttributeSet a)
-        throws BadLocationException {
-            if (offs > 0 && offs == getLength()) {
+        public void insertString(int offs, String str, AttributeSet a)
+                throws BadLocationException {
+            boolean isUnicode = str.length() == 1 && str.charAt(0) > 127;
+
+            if (offs > 0 && offs == getLength() && !isPrevUnicode && !isUnicode) {
                 super.insertString(offs, " ", a);
                 super.insertString(getLength(), str, a);
             } else {
                 super.insertString(offs, str, a);
             }
+
+            isPrevUnicode = isUnicode;
         }
 
     }

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java?rev=1066081&r1=1066080&r2=1066081&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/TestParsers.java Tue Feb  1 16:15:51 2011
@@ -16,14 +16,7 @@
  */
 package org.apache.tika;
 
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.InputStream;
-import java.net.URISyntaxException;
-import java.net.URL;
-
 import junit.framework.TestCase;
-
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
@@ -31,6 +24,12 @@ import org.apache.tika.parser.Parser;
 import org.apache.tika.utils.ParseUtils;
 import org.xml.sax.helpers.DefaultHandler;
 
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.net.URISyntaxException;
+import java.net.URL;
+
 /**
  * Junit test class for Tika {@link Parser}s.
  */
@@ -66,6 +65,50 @@ public class TestParsers extends TestCas
         assertEquals(s1, s2);
     }
 
+    public void testRTFms932Extraction() throws Exception {
+        File file = getResourceAsFile("/test-documents/testRTF-ms932.rtf");
+        String s1 = ParseUtils.getStringContent(file, tc);
+        String s2 = ParseUtils.getStringContent(file, tc, "application/rtf");
+        assertEquals(s1, s2);
+        // Hello in Japanese
+        assertTrue(s1.contains("\u3053\u3093\u306b\u3061\u306f"));
+    }
+
+    public void testRTFUmlautSpacesExtraction() throws Exception {
+        File file = getResourceAsFile("/test-documents/testRTFUmlautSpaces.rtf");
+        String s1 = ParseUtils.getStringContent(file, tc);
+        String s2 = ParseUtils.getStringContent(file, tc, "application/rtf");
+        assertEquals(s1, s2);
+        assertTrue(s1.contains("\u00DCbersicht"));
+    }
+
+    public void testRTFWordPadCzechCharactersExtraction() throws Exception {
+        File file = getResourceAsFile("/test-documents/testRTFWordPadCzechCharacters.rtf");
+        String s1 = ParseUtils.getStringContent(file, tc);
+        String s2 = ParseUtils.getStringContent(file, tc, "application/rtf");
+        assertEquals(s1, s2);
+        assertTrue(s1.contains("\u010Cl\u00E1nek t\u00FDdne"));
+        assertTrue(s1.contains("starov\u011Bk\u00E9 \u017Eidovsk\u00E9 n\u00E1bo\u017Eensk\u00E9 texty"));
+    }
+
+    public void testRTFWord2010CzechCharactersExtraction() throws Exception {
+        File file = getResourceAsFile("/test-documents/testRTFWord2010CzechCharacters.rtf");
+        String s1 = ParseUtils.getStringContent(file, tc);
+        String s2 = ParseUtils.getStringContent(file, tc, "application/rtf");
+        assertEquals(s1, s2);
+        assertTrue(s1.contains("\u010Cl\u00E1nek t\u00FDdne"));
+        assertTrue(s1.contains("starov\u011Bk\u00E9 \u017Eidovsk\u00E9 n\u00E1bo\u017Eensk\u00E9 texty"));
+    }
+
+    public void testRTFTableCellSeparation() throws Exception {
+        File file = getResourceAsFile("/test-documents/testRTFTableCellSeparation.rtf");
+        String s1 = ParseUtils.getStringContent(file, tc);
+        String s2 = ParseUtils.getStringContent(file, tc, "application/rtf");
+        assertEquals(s1, s2);
+        String content = s1.replaceAll("\\s+"," ");
+        assertTrue(content.contains("a b c d \u00E4 \u00EB \u00F6 \u00FC"));
+    }
+
     public void testXMLExtraction() throws Exception {
         File file = getResourceAsFile("/test-documents/testXML.xml");
         String s1 = ParseUtils.getStringContent(file, tc);

Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testRTF-ms932.rtf
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testRTF-ms932.rtf?rev=1066081&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/test/resources/test-documents/testRTF-ms932.rtf (added)
+++ tika/trunk/tika-parsers/src/test/resources/test-documents/testRTF-ms932.rtf Tue Feb  1 16:15:51 2011
@@ -0,0 +1,30 @@
+{\rtf1\ansi\ansicpg932\uc2 \deff26\deflang1033\deflangfe1041{\fonttbl{\f0\froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}
+{\f23\froman\fcharset128\fprq1{\*\panose 02020609040205080304}\'82\'6c\'82\'72 \'96\'be\'92\'a9{\*\falt MS Mincho};}{\f26\froman\fcharset0\fprq2{\*\panose 02040604050505020304}Century;}
+{\f28\froman\fcharset128\fprq1{\*\panose 02020609040205080304}@\'82\'6c\'82\'72 \'96\'be\'92\'a9;}{\f37\froman\fcharset238\fprq2 Times New Roman CE;}{\f38\froman\fcharset204\fprq2 Times New Roman Cyr;}{\f40\froman\fcharset161\fprq2 Times New Roman Greek;}
+{\f41\froman\fcharset162\fprq2 Times New Roman Tur;}{\f42\froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\f43\froman\fcharset178\fprq2 Times New Roman (Arabic);}{\f44\froman\fcharset186\fprq2 Times New Roman Baltic;}
+{\f223\froman\fcharset0\fprq1 MS Mincho Western{\*\falt MS Mincho};}{\f221\froman\fcharset238\fprq1 MS Mincho CE{\*\falt MS Mincho};}{\f222\froman\fcharset204\fprq1 MS Mincho Cyr{\*\falt MS Mincho};}
+{\f224\froman\fcharset161\fprq1 MS Mincho Greek{\*\falt MS Mincho};}{\f225\froman\fcharset162\fprq1 MS Mincho Tur{\*\falt MS Mincho};}{\f228\froman\fcharset186\fprq1 MS Mincho Baltic{\*\falt MS Mincho};}{\f245\froman\fcharset238\fprq2 Century CE;}
+{\f246\froman\fcharset204\fprq2 Century Cyr;}{\f248\froman\fcharset161\fprq2 Century Greek;}{\f249\froman\fcharset162\fprq2 Century Tur;}{\f252\froman\fcharset186\fprq2 Century Baltic;}
+{\f263\froman\fcharset0\fprq1 @\'82\'6c\'82\'72 \'96\'be\'92\'a9 Western;}{\f261\froman\fcharset238\fprq1 @\'82\'6c\'82\'72 \'96\'be\'92\'a9 CE;}{\f262\froman\fcharset204\fprq1 @\'82\'6c\'82\'72 \'96\'be\'92\'a9 Cyr;}
+{\f264\froman\fcharset161\fprq1 @\'82\'6c\'82\'72 \'96\'be\'92\'a9 Greek;}{\f265\froman\fcharset162\fprq1 @\'82\'6c\'82\'72 \'96\'be\'92\'a9 Tur;}{\f268\froman\fcharset186\fprq1 @\'82\'6c\'82\'72 \'96\'be\'92\'a9 Baltic;}}{\colortbl;\red0\green0\blue0;
+\red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0;\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0;\red128\green0\blue128;
+\red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;\red192\green192\blue192;}{\stylesheet{\qj \li0\ri0\nowidctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 
+\fs21\lang1033\langfe1041\kerning2\loch\f26\hich\af26\dbch\af23\cgrid\langnp1033\langfenp1041 \snext0 Normal;}{\*\cs10 \additive Default Paragraph Font;}}{\info{\title \'83\'5e\'83\'43\'83\'67\'83\'8b}{\author shinsuke}{\operator shinsuke}
+{\creatim\yr2010\mo10\dy8\hr14\min18}{\revtim\yr2010\mo10\dy10\hr6\min59}{\version4}{\edmins3}{\nofpages1}{\nofwords3}{\nofchars3}{\nofcharsws0}{\vern8249}}\paperw11906\paperh16838\margl1701\margr1701\margt1985\margb1701\gutter0 
+\deftab840\ftnbj\aenddoc\hyphcaps0\formshade\horzdoc\dgmargin\dghspace180\dgvspace180\dghorigin1701\dgvorigin1985\dghshow0\dgvshow2\jcompress\lnongrid
+\viewkind1\viewscale100\splytwnine\ftnlytwnine\htmautsp\useltbaln\alntblind\lytcalctblwd\lyttblrtgr\lnbrkrule {\upr{\*\fchars 
+!%),.:\'3b?]\'7d\'81\'91\'81\'8b\'81\'66\'81\'68\'81\'f1\'81\'8c\'81\'8d\'81\'8e\'81\'41\'81\'42\'81\'58\'81\'72\'81\'74\'81\'76\'81\'78\'81\'7a\'81\'6c\'81\'4a\'81\'4b\'81\'54\'81\'55\'81\'45\'81\'52\'81\'53\'81\'49\'81\'93\'81\'6a\'81\'43\'81\'44
+\'81\'46\'81\'47\'81\'48\'81\'6e\'81\'70\'a1\'a3\'a4\'a5\'de\'df\'81\'91}{\*\ud\uc0{\*\fchars 
+!%),.:\'3b?]\'7d{\uc2\u162 \'81\'91\'81\'8b\'81f\'81h\'81\'f1\'81\'8c\'81\'8d\'81\'8e\'81A\'81B\'81X\'81r\'81t\'81v\'81x\'81z\'81l\'81J\'81K\'81T\'81U\'81E\'81R\'81S\'81I\'81\'93\'81j\'81C\'81D\'81F\'81G\'81H\'81n\'81p\'a1\'a3\'a4\'a5}\'de\'df\'81\'91}}}
+{\upr{\*\lchars $([\'5c\'7b\'81\'92\'5c\'81\'65\'81\'67\'81\'71\'81\'73\'81\'75\'81\'77\'81\'79\'81\'6b\'81\'90\'81\'69\'81\'6d\'81\'6f\'a2\'81\'92\'81\'8f}{\*\ud\uc0{\*\lchars 
+$([\'5c\'7b{\uc2\u163 \'81\'92}{\uc1\u165 \'5c\'81e\'81g\'81q\'81s\'81u\'81w\'81y\'81k\'81\'90\'81i\'81m\'81o\'a2\'81\'92\'81\'8f}}}}\fet0\sectd \linex0\headery851\footery992\colsx425\endnhere\sectlinegrid360\sectspecifyl {\*\pnseclvl1
+\pnucrm\pnstart1\pnindent720\pnhang{\pntxta \dbch .}}{\*\pnseclvl2\pnucltr\pnstart1\pnindent720\pnhang{\pntxta \dbch .}}{\*\pnseclvl3\pndec\pnstart1\pnindent720\pnhang{\pntxta \dbch .}}{\*\pnseclvl4\pnlcltr\pnstart1\pnindent720\pnhang{\pntxta \dbch )}}
+{\*\pnseclvl5\pndec\pnstart1\pnindent720\pnhang{\pntxtb \dbch (}{\pntxta \dbch )}}{\*\pnseclvl6\pnlcltr\pnstart1\pnindent720\pnhang{\pntxtb \dbch (}{\pntxta \dbch )}}{\*\pnseclvl7\pnlcrm\pnstart1\pnindent720\pnhang{\pntxtb \dbch (}{\pntxta \dbch )}}
+{\*\pnseclvl8\pnlcltr\pnstart1\pnindent720\pnhang{\pntxtb \dbch (}{\pntxta \dbch )}}{\*\pnseclvl9\pnlcrm\pnstart1\pnindent720\pnhang{\pntxtb \dbch (}{\pntxta \dbch )}}\pard\plain \qj \li0\ri0\nowidctlpar\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 
+\fs21\lang1033\langfe1041\kerning2\loch\af26\hich\af26\dbch\af23\cgrid\langnp1033\langfenp1041 {\hich\af26\dbch\af23\loch\f26 Hello
+\par }{\loch\af26\hich\af26\dbch\f23 \'82\'b1\'82\'f1\'82\'c9\'82\'bf\'82\'cd}{
+\par \hich\af26\dbch\af23\loch\f26 Test
+\par }{\loch\af26\hich\af26\dbch\f23 \'83\'65\'83\'58\'83\'67}{
+\par 
+\par }{
+\par }}

Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFTableCellSeparation.rtf
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFTableCellSeparation.rtf?rev=1066081&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFTableCellSeparation.rtf (added)
+++ tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFTableCellSeparation.rtf Tue Feb  1 16:15:51 2011
@@ -0,0 +1,7 @@
+{\rtf1\ansi\ansicpg1252\deff0\deflang1033\deflangfe1033{\fonttbl{\f0\froman\fprq2\fcharset0 Times New Roman;}{\f1\fswiss\fprq2\fcharset0 Calibri;}}
+{\*\generator Msftedit 5.41.21.2509;}\viewkind4\uc1\trowd\trgaph108\trleft-108\trbrdrl\brdrs\brdrw10 \trbrdrt\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trpaddl108\trpaddr108\trpaddfl3\trpaddfr3
+\clbrdrl\brdrw10\brdrs\clbrdrt\brdrw10\brdrs\clbrdrr\brdrw10\brdrs\clbrdrb\brdrw10\brdrs \cellx4680\clbrdrl\brdrw10\brdrs\clbrdrt\brdrw10\brdrs\clbrdrr\brdrw10\brdrs\clbrdrb\brdrw10\brdrs \cellx9468\pard\intbl\f1\fs22 a\cell b\cell\row\trowd\trgaph108\trleft-108\trbrdrl\brdrs\brdrw10 \trbrdrt\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trpaddl108\trpaddr108\trpaddfl3\trpaddfr3
+\clbrdrl\brdrw10\brdrs\clbrdrt\brdrw10\brdrs\clbrdrr\brdrw10\brdrs\clbrdrb\brdrw10\brdrs \cellx4680\clbrdrl\brdrw10\brdrs\clbrdrt\brdrw10\brdrs\clbrdrr\brdrw10\brdrs\clbrdrb\brdrw10\brdrs \cellx9468\pard\intbl c\cell d\cell\row\trowd\trgaph108\trleft-108\trbrdrl\brdrs\brdrw10 \trbrdrt\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trpaddl108\trpaddr108\trpaddfl3\trpaddfr3
+\clbrdrl\brdrw10\brdrs\clbrdrt\brdrw10\brdrs\clbrdrr\brdrw10\brdrs\clbrdrb\brdrw10\brdrs \cellx4680\clbrdrl\brdrw10\brdrs\clbrdrt\brdrw10\brdrs\clbrdrr\brdrw10\brdrs\clbrdrb\brdrw10\brdrs \cellx9468\pard\intbl\lang1031\'e4\cell\lang1033\'eb\cell\row\trowd\trgaph108\trleft-108\trbrdrl\brdrs\brdrw10 \trbrdrt\brdrs\brdrw10 \trbrdrr\brdrs\brdrw10 \trbrdrb\brdrs\brdrw10 \trpaddl108\trpaddr108\trpaddfl3\trpaddfr3
+\clbrdrl\brdrw10\brdrs\clbrdrt\brdrw10\brdrs\clbrdrr\brdrw10\brdrs\clbrdrb\brdrw10\brdrs \cellx4680\clbrdrl\brdrw10\brdrs\clbrdrt\brdrw10\brdrs\clbrdrr\brdrw10\brdrs\clbrdrb\brdrw10\brdrs \cellx9468\pard\intbl\'f6\cell\'fc\cell\row\pard\sa200\sl276\slmult1\par
+}
\ No newline at end of file

Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFUmlautSpaces.rtf
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFUmlautSpaces.rtf?rev=1066081&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFUmlautSpaces.rtf (added)
+++ tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFUmlautSpaces.rtf Tue Feb  1 16:15:51 2011
@@ -0,0 +1,3 @@
+{\rtf1\ansi\ansicpg1252\deff0\deflang1033{\fonttbl{\f0\fnil\fcharset0 Calibri;}}
+{\*\generator Msftedit 5.41.21.2509;}\viewkind4\uc1\pard\sa200\sl276\slmult1\lang9\f0\fs22\'dcbersicht\par
+}

Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFWord2010CzechCharacters.rtf
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFWord2010CzechCharacters.rtf?rev=1066081&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFWord2010CzechCharacters.rtf (added)
+++ tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFWord2010CzechCharacters.rtf Tue Feb  1 16:15:51 2011
@@ -0,0 +1,190 @@
+{\rtf1\adeflang1025\ansi\ansicpg1252\uc1\adeff31507\deff0\stshfdbch31506\stshfloch31506\stshfhich31506\stshfbi31507\deflang1033\deflangfe1033\themelang1033\themelangfe0\themelangcs0{\fonttbl{\f0\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f0\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}
+{\f37\fbidi \fswiss\fcharset0\fprq2{\*\panose 020f0502020204030204}Calibri;}{\flomajor\f31500\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}
+{\fdbmajor\f31501\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\fhimajor\f31502\fbidi \froman\fcharset0\fprq2{\*\panose 02040503050406030204}Cambria;}
+{\fbimajor\f31503\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\flominor\f31504\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}
+{\fdbminor\f31505\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\fhiminor\f31506\fbidi \fswiss\fcharset0\fprq2{\*\panose 020f0502020204030204}Calibri;}
+{\fbiminor\f31507\fbidi \froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f293\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\f294\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}
+{\f296\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\f297\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\f298\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\f299\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}
+{\f300\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\f301\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\f293\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\f294\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}
+{\f296\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\f297\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\f298\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\f299\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}
+{\f300\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\f301\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\f663\fbidi \fswiss\fcharset238\fprq2 Calibri CE;}{\f664\fbidi \fswiss\fcharset204\fprq2 Calibri Cyr;}
+{\f666\fbidi \fswiss\fcharset161\fprq2 Calibri Greek;}{\f667\fbidi \fswiss\fcharset162\fprq2 Calibri Tur;}{\f670\fbidi \fswiss\fcharset186\fprq2 Calibri Baltic;}{\f671\fbidi \fswiss\fcharset163\fprq2 Calibri (Vietnamese);}
+{\flomajor\f31508\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\flomajor\f31509\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\flomajor\f31511\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}
+{\flomajor\f31512\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\flomajor\f31513\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\flomajor\f31514\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}
+{\flomajor\f31515\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\flomajor\f31516\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fdbmajor\f31518\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}
+{\fdbmajor\f31519\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\fdbmajor\f31521\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fdbmajor\f31522\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}
+{\fdbmajor\f31523\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\fdbmajor\f31524\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fdbmajor\f31525\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}
+{\fdbmajor\f31526\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fhimajor\f31528\fbidi \froman\fcharset238\fprq2 Cambria CE;}{\fhimajor\f31529\fbidi \froman\fcharset204\fprq2 Cambria Cyr;}
+{\fhimajor\f31531\fbidi \froman\fcharset161\fprq2 Cambria Greek;}{\fhimajor\f31532\fbidi \froman\fcharset162\fprq2 Cambria Tur;}{\fhimajor\f31535\fbidi \froman\fcharset186\fprq2 Cambria Baltic;}
+{\fhimajor\f31536\fbidi \froman\fcharset163\fprq2 Cambria (Vietnamese);}{\fbimajor\f31538\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\fbimajor\f31539\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}
+{\fbimajor\f31541\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fbimajor\f31542\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\fbimajor\f31543\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}
+{\fbimajor\f31544\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fbimajor\f31545\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\fbimajor\f31546\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}
+{\flominor\f31548\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\flominor\f31549\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\flominor\f31551\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}
+{\flominor\f31552\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\flominor\f31553\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\flominor\f31554\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}
+{\flominor\f31555\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\flominor\f31556\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fdbminor\f31558\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}
+{\fdbminor\f31559\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}{\fdbminor\f31561\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fdbminor\f31562\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}
+{\fdbminor\f31563\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}{\fdbminor\f31564\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fdbminor\f31565\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}
+{\fdbminor\f31566\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}{\fhiminor\f31568\fbidi \fswiss\fcharset238\fprq2 Calibri CE;}{\fhiminor\f31569\fbidi \fswiss\fcharset204\fprq2 Calibri Cyr;}
+{\fhiminor\f31571\fbidi \fswiss\fcharset161\fprq2 Calibri Greek;}{\fhiminor\f31572\fbidi \fswiss\fcharset162\fprq2 Calibri Tur;}{\fhiminor\f31575\fbidi \fswiss\fcharset186\fprq2 Calibri Baltic;}
+{\fhiminor\f31576\fbidi \fswiss\fcharset163\fprq2 Calibri (Vietnamese);}{\fbiminor\f31578\fbidi \froman\fcharset238\fprq2 Times New Roman CE;}{\fbiminor\f31579\fbidi \froman\fcharset204\fprq2 Times New Roman Cyr;}
+{\fbiminor\f31581\fbidi \froman\fcharset161\fprq2 Times New Roman Greek;}{\fbiminor\f31582\fbidi \froman\fcharset162\fprq2 Times New Roman Tur;}{\fbiminor\f31583\fbidi \froman\fcharset177\fprq2 Times New Roman (Hebrew);}
+{\fbiminor\f31584\fbidi \froman\fcharset178\fprq2 Times New Roman (Arabic);}{\fbiminor\f31585\fbidi \froman\fcharset186\fprq2 Times New Roman Baltic;}{\fbiminor\f31586\fbidi \froman\fcharset163\fprq2 Times New Roman (Vietnamese);}}
+{\colortbl;\red0\green0\blue0;\red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0;\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0;
+\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;\red192\green192\blue192;}{\*\defchp \f31506\fs22 }{\*\defpap \ql \li0\ri0\sa200\sl276\slmult1
+\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 }\noqfpromote {\stylesheet{\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs22\alang1025 
+\ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \snext0 \sqformat \spriority0 \styrsid2501661 Normal;}{\*\cs10 \additive \ssemihidden \sunhideused \spriority1 Default Paragraph Font;}{\*
+\ts11\tsrowd\trftsWidthB3\trpaddl108\trpaddr108\trpaddfl3\trpaddft3\trpaddfb3\trpaddfr3\trcbpat1\trcfpat1\tblind0\tblindtype3\tsvertalt\tsbrdrt\tsbrdrl\tsbrdrb\tsbrdrr\tsbrdrdgl\tsbrdrdgr\tsbrdrh\tsbrdrv \ql \li0\ri0\sa200\sl276\slmult1
+\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 \snext11 \ssemihidden \sunhideused Normal Table;}{
+\s15\ql \li0\ri0\widctlpar\tqc\tx4680\tqr\tx9360\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 
+\sbasedon0 \snext15 \slink16 \sunhideused \styrsid12917103 header;}{\*\cs16 \additive \rtlch\fcs1 \af0 \ltrch\fcs0 \sbasedon10 \slink15 \slocked \styrsid12917103 Header Char;}{\s17\ql \li0\ri0\widctlpar
+\tqc\tx4680\tqr\tx9360\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 
+\sbasedon0 \snext17 \slink18 \sunhideused \styrsid12917103 footer;}{\*\cs18 \additive \rtlch\fcs1 \af0 \ltrch\fcs0 \sbasedon10 \slink17 \slocked \styrsid12917103 Footer Char;}}{\*\rsidtbl \rsid2501661\rsid4420266\rsid12917103\rsid15494032}{\mmathPr
+\mmathFont34\mbrkBin0\mbrkBinSub0\msmallFrac0\mdispDef1\mlMargin0\mrMargin0\mdefJc1\mwrapIndent1440\mintLim0\mnaryLim1}{\info{\creatim\yr2010\mo10\dy13\hr2\min55}{\revtim\yr2010\mo10\dy13\hr2\min55}{\version1}{\edmins0}{\nofpages1}{\nofwords70}
+{\nofchars401}{\nofcharsws470}{\vern49243}}{\*\xmlnstbl {\xmlns1 http://schemas.microsoft.com/office/word/2003/wordml}}\paperw12240\paperh15840\margl1440\margr1440\margt1440\margb1440\gutter0\ltrsect 
+\widowctrl\ftnbj\aenddoc\trackmoves0\trackformatting1\donotembedsysfont1\relyonvml0\donotembedlingdata0\grfdocevents0\validatexml1\showplaceholdtext0\ignoremixedcontent0\saveinvalidxml0\showxmlerrors1\noxlattoyen
+\expshrtn\noultrlspc\dntblnsbdb\nospaceforul\formshade\horzdoc\dgmargin\dghspace180\dgvspace180\dghorigin1440\dgvorigin1440\dghshow1\dgvshow1
+\jexpand\viewkind1\viewscale100\pgbrdrhead\pgbrdrfoot\splytwnine\ftnlytwnine\htmautsp\nolnhtadjtbl\useltbaln\alntblind\lytcalctblwd\lyttblrtgr\lnbrkrule\nobrkwrptbl\snaptogridincell\rempersonalinfo\allowfieldendsel
+\wrppunct\asianbrkrule\rsidroot2501661\newtblstyruls\nogrowautofit\remdttm\usenormstyforlist\noindnmbrts\felnbrelev\nocxsptable\indrlsweleven\noafcnsttbl\afelev\utinl\hwelev\spltpgpar\notcvasp\notbrkcnstfrctbl\notvatxbx\krnprsnet\cachedcolbal 
+\nouicompat \fet0{\*\wgrffmtfilter 2450}\nofeaturethrottle1\ilfomacatclnup0{\*\ftnsep \ltrpar \pard\plain \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid12917103 \rtlch\fcs1 \af31507\afs22\alang1025 
+\ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4420266 \chftnsep 
+\par }}{\*\ftnsepc \ltrpar \pard\plain \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid12917103 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 
+\f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4420266 \chftnsepc 
+\par }}{\*\aftnsep \ltrpar \pard\plain \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid12917103 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 
+\f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4420266 \chftnsep 
+\par }}{\*\aftnsepc \ltrpar \pard\plain \ltrpar\ql \li0\ri0\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0\pararsid12917103 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 
+\f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid4420266 \chftnsepc 
+\par }}\ltrpar \sectd \ltrsect\linex0\sectdefaultcl\sectrsid12992097\sftnbj {\headerl \ltrpar \pard\plain \ltrpar\s15\ql \li0\ri0\widctlpar\tqc\tx4680\tqr\tx9360\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 
+\af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid12917103 
+\par }}{\headerr \ltrpar \pard\plain \ltrpar\s15\ql \li0\ri0\widctlpar\tqc\tx4680\tqr\tx9360\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 
+\f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid12917103 
+\par }}{\footerl \ltrpar \pard\plain \ltrpar\s17\ql \li0\ri0\widctlpar\tqc\tx4680\tqr\tx9360\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 
+\f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid12917103 
+\par }}{\footerr \ltrpar \pard\plain \ltrpar\s17\ql \li0\ri0\widctlpar\tqc\tx4680\tqr\tx9360\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 
+\f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid12917103 
+\par }}{\headerf \ltrpar \pard\plain \ltrpar\s15\ql \li0\ri0\widctlpar\tqc\tx4680\tqr\tx9360\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 
+\f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid12917103 
+\par }}{\footerf \ltrpar \pard\plain \ltrpar\s17\ql \li0\ri0\widctlpar\tqc\tx4680\tqr\tx9360\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 \rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 
+\f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \insrsid12917103 
+\par }}{\*\pnseclvl1\pnucrm\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl2\pnucltr\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl3\pndec\pnstart1\pnindent720\pnhang {\pntxta .}}{\*\pnseclvl4\pnlcltr\pnstart1\pnindent720\pnhang {\pntxta )}}
+{\*\pnseclvl5\pndec\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl6\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl7\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl8
+\pnlcltr\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}{\*\pnseclvl9\pnlcrm\pnstart1\pnindent720\pnhang {\pntxtb (}{\pntxta )}}\pard\plain \ltrpar\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\faauto\rin0\lin0\itap0\pararsid2501661 
+\rtlch\fcs1 \af31507\afs22\alang1025 \ltrch\fcs0 \f31506\fs22\lang1033\langfe1033\cgrid\langnp1033\langfenp1033 {\rtlch\fcs1 \af663 \ltrch\fcs0 \f663\lang1048\langfe1033\langnp1048\insrsid2501661 \'c8l\'e1nek t\'fddne
+\par \'c8\'e1st svitku s textem Knihy Izaj\'e1\'9a
+\par Svitky od Mrtv\'e9ho mo\'f8e jsou starov\'eck\'e9 \'9eidovsk\'e9 n\'e1bo\'9eensk\'e9 texty, nalezen\'e9 pobl\'ed\'9e Mrtv\'e9ho mo\'f8e, zejm\'e9na v Kumr\'e1nu. P\'f8edpokl\'e1d\'e1 se, \'9ee p\'f9vodn\'edmi vlastn\'edky knihovny a autory \'e8\'e1
+sti text\'f9 byli esejci, \'e8lenov\'e9 asketick\'e9ho, apokalyptick\'e9ho a mesianistick\'e9ho hnut\'ed v r\'e1mci judaismu, kter\'e9 pravd\'ecpodobn\'ec vzniklo v polovin\'ec 2. stol. p\'f8.}{\rtlch\fcs1 \af37 \ltrch\fcs0 
+\f37\lang1048\langfe1033\langnp1048\insrsid2501661 \u8201\'3f}{\rtlch\fcs1 \af37 \ltrch\fcs0 \f37\lang1048\langfe1033\langnp1048\insrsid2501661 n.}{\rtlch\fcs1 \af37 \ltrch\fcs0 \f37\lang1048\langfe1033\langnp1048\insrsid2501661 \u8201\'3f}{\rtlch\fcs1 
+\af663 \ltrch\fcs0 \f663\lang1048\langfe1033\langnp1048\insrsid2501661 l. a zaniklo n\'e1sledkem prvn\'ed \'9eidovsk\'e9 v\'e1lky koncem 60. let prvn\'edho stolet\'ed na\'9aeho letopo\'e8tu.}{\rtlch\fcs1 \af37 \ltrch\fcs0 
+\f37\lang1048\langfe1033\langnp1048\insrsid2501661\charrsid2501661 
+\par }\pard \ltrpar\ql \li0\ri0\sa200\sl276\slmult1\widctlpar\wrapdefault\aspalpha\aspnum\faauto\adjustright\rin0\lin0\itap0 {\rtlch\fcs1 \af31507 \ltrch\fcs0 \lang1048\langfe1033\langnp1048\insrsid15494032\charrsid2501661 
+\par }{\*\themedata 504b030414000600080000002100e9de0fbfff0000001c020000130000005b436f6e74656e745f54797065735d2e786d6cac91cb4ec3301045f748fc83e52d4a
+9cb2400825e982c78ec7a27cc0c8992416c9d8b2a755fbf74cd25442a820166c2cd933f79e3be372bd1f07b5c3989ca74aaff2422b24eb1b475da5df374fd9ad
+5689811a183c61a50f98f4babebc2837878049899a52a57be670674cb23d8e90721f90a4d2fa3802cb35762680fd800ecd7551dc18eb899138e3c943d7e503b6
+b01d583deee5f99824e290b4ba3f364eac4a430883b3c092d4eca8f946c916422ecab927f52ea42b89a1cd59c254f919b0e85e6535d135a8de20f20b8c12c3b0
+0c895fcf6720192de6bf3b9e89ecdbd6596cbcdd8eb28e7c365ecc4ec1ff1460f53fe813d3cc7f5b7f020000ffff0300504b030414000600080000002100a5d6
+a7e7c0000000360100000b0000005f72656c732f2e72656c73848fcf6ac3300c87ef85bd83d17d51d2c31825762fa590432fa37d00e1287f68221bdb1bebdb4f
+c7060abb0884a4eff7a93dfeae8bf9e194e720169aaa06c3e2433fcb68e1763dbf7f82c985a4a725085b787086a37bdbb55fbc50d1a33ccd311ba548b6309512
+0f88d94fbc52ae4264d1c910d24a45db3462247fa791715fd71f989e19e0364cd3f51652d73760ae8fa8c9ffb3c330cc9e4fc17faf2ce545046e37944c69e462
+a1a82fe353bd90a865aad41ed0b5b8f9d6fd010000ffff0300504b0304140006000800000021006b799616830000008a0000001c0000007468656d652f746865
+6d652f7468656d654d616e616765722e786d6c0ccc4d0ac3201040e17da17790d93763bb284562b2cbaebbf600439c1a41c7a0d29fdbd7e5e38337cedf14d59b
+4b0d592c9c070d8a65cd2e88b7f07c2ca71ba8da481cc52c6ce1c715e6e97818c9b48d13df49c873517d23d59085adb5dd20d6b52bd521ef2cdd5eb9246a3d8b
+4757e8d3f729e245eb2b260a0238fd010000ffff0300504b0304140006000800000021003bb85ec01007000069230000160000007468656d652f7468656d652f
+7468656d65312e786d6cec5a4f6fdb3614bf0fd87720746f6d27761a07758ad8b19bad4d1bc46e871e6999965853a240d2497d1bdae38001c3ba618715d86d87
+615b8116d8a5fb34d93a6c1dd0afb04752b2c5585e9236d89ca23e2416f5f8fefc1edfe34f94af5e7b1031744084a43c6e7895cb650f91d8e7031a070def4eaf
+7369dd4352e17880198f49c39b10e95ddbfcf083ab784385242208e6c7720337bc50a964a354923e0c6379992724867b432e22ace05204a581c087a03762a595
+7279ad14611a7b28c611a8bd3d1c529fa09e56e96d66cadb0c2e6325f580cf4457ab26ce0c233b1855b4849cc81613e800b386077606fcb0471e280f312c15dc
+687865f3f14a9b574b78239dc4d482b9b9791df349e7a51306a315635304fda9d14aa75abfb2eda59044fe693089b0188d934b3e8f12ac689f32aa26061e0f45
+fec64741cc05ee334028538b2bd539bd11f505977ca82e839e123758666883b24ad9629d056e3c676a3e8076bbdd6a57324bcb1a40ea39f67d581b367bf92c54
+3beb95e6d26721e7bdfd3a9f8d56b956ae5e90408cf73690d5b9baa8379bcd5a7dd99755ce7bfbb53a17c87a79adbab572310231dedb406a7381549b5badd6da
+c508c4786f03599b0ba473a5be56bd208118ef4346e3d15c187a63ea74963d1f53df879ced14c6b10e71ac0385b0a46059379099fbc003a6bc422765c863b588
+6544f83e171d10d0820c36eb18a9494286d887ddb985a3bea0587304bc4170ee8e1df2e5dc90b685a42f68a21adec709062e34d3f7fac58faf5f3c43470f9f1f
+3dfce5e8d1a3a3873f5b45ceac1d1c07f959afbeffe2ef279fa2bf9e7df7eaf157c5f2322ffffb4f9ffdf6eb97c582409c66eebcfcfae91fcf9fbefce6f33f7f
+785c20be051c252fdea31191e8163944fb3c82c00c2aaee7a42fce36a317629a9fb1150712c7585b29d0df56a1237d6b82599a1dc78f267111bc2b803816095e
+1fdf771cee8662ac6881e51b61e408ee72ce9a40e28a50b8a16de560ee8de3a0d8b818e7e5f6313e28b2ddc2b193dff63801c69c2d4b27f056481c37f7188e15
+0e484c14d2f7f8889082e8ee51eae0ba9b314f748fa226a68590f468df594db3493b3482bc4c8a62867c3bd8ecde454dce8aa2de2607ae2454056605cef70873
+60bc8ec70a47452a7b386279c06f62151639d99d083f2fd7960a321d10c6517b40a42c9a735b40bcb9a4dfc0d04a0bd3becb26912b29141d15e9bc8939cf4b6e
+f3512bc4f04851804297c6615ef6233982258ad11e5745e2bbdcad107d0d79c0f1c274dfa5c449f7c9dde00e0d1c97660b44df198b8228ae13eeacdfee840d31
+3145064ddde9d5118587a8cec2c6cd28746e6be1fc1a37b4ca97df3e29f07b595bf616ec5e4535b373ac512f923bde9e5b5c0ce8f277e76d3c8ef70814c4fc16
+f5be39bf6fcede3bdf9c17d5f3f9b7e459178606adb98825da8676470b59f79032d65513466e4a43bc25ec3d830e0cea79e6ac914ccfdf9210beea4a06038e5c
+20b0998304579f501576439c0069af785a492053d581440997704c68860b756b7920feca1e32d6f4639bed1c12ab5d3eb0c3ab7ad88c6b3fcc9925f865bc0acc
+51666668552b38adb1d52ba952d0f926c62adaa9535bab18d74c5374ac4d432e0c0d06a76802a941408500e53538edd5a6e161073332d0b8db1c6569315938cf
+14c9100f489a231df77c8e2a2649d95af9971ce927da1350cb59ab6bb56f61ed3449ca9bab2e309765ef6db294ade0599640dbf1726471be38598c0e1b5ebdb6
+52f3908f93863784e764f81a259075a979246601bc66f095b0cbfec46236553ecb663d0bcc2d820a9c945adce70276fa4022a4dac632b44bc3dc4a97008bb525
+ebff4a0d603daf000abad1e9bc585d87c5f0bf790138baa925c321f1553ed9b9118d9dbd4c5b291f2b22bae1e010f5d958ec6348bf5eaa10cf804a38ee301d41
+5fc05b198db6b9e536e7b4e8f227fb460e742cebe9928601b324c4e93ea07b47d6626c1c6613988263ae72b801e885a01ad4cf8eb1e945ef3ac6f9c27f8ff1f9
+bc273cbe8ecf8ab1663d7090b43ad08a7c78bf2b30d24dafe171a1420e1b4a1252bf2380039a6d000a1f5ed2c26da86d78cb6cfe0b72a0ffdbf66975686d0cce
+03d43e0d90a0402d542808d9831dc63492139455521a6255668a2c599cb92b13eb769f1c10d6d3dbd99aa6691e0aa16b998d21ede846ee78c5bad76933ec079a
+afe65ba7b3294d19a2ed1aff3589b57d198272b754c34d33fca72e1690583bdf4ccf68543e107d63c698abd9520263b95dbd9e36ca3774e18cacc96e3e7311af
+d432e7208bf311c3e094dbc2abfa10e93f4065a8f099fdc982e6463dbe0fdb24825f206865b06c60555fb21c12e9bdce0ef68103db41bb98b42a0b6dca82356a
+19ef3ae78796a9dd63606bcf4e93ef33823de5d9ae39a716cf13ec1461076b3bb6106ac8ecf11285a161f64c6a12637eeb92ff390aefdf87446fc3eb9f3153d2
+2c26f8b189c0f0a4d7357500c56f2d9aa99bff000000ffff0300504b0304140006000800000021000dd1909fb60000001b010000270000007468656d652f7468
+656d652f5f72656c732f7468656d654d616e616765722e786d6c2e72656c73848f4d0ac2301484f78277086f6fd3ba109126dd88d0add40384e4350d363f2451
+eced0dae2c082e8761be9969bb979dc9136332de3168aa1a083ae995719ac16db8ec8e4052164e89d93b64b060828e6f37ed1567914b284d262452282e319872
+0e274a939cd08a54f980ae38a38f56e422a3a641c8bbd048f7757da0f19b017cc524bd62107bd5001996509affb3fd381a89672f1f165dfe514173d9850528a2
+c6cce0239baa4c04ca5bbabac4df000000ffff0300504b01022d0014000600080000002100e9de0fbfff0000001c020000130000000000000000000000000000
+0000005b436f6e74656e745f54797065735d2e786d6c504b01022d0014000600080000002100a5d6a7e7c0000000360100000b00000000000000000000000000
+300100005f72656c732f2e72656c73504b01022d00140006000800000021006b799616830000008a0000001c0000000000000000000000000019020000746865
+6d652f7468656d652f7468656d654d616e616765722e786d6c504b01022d00140006000800000021003bb85ec010070000692300001600000000000000000000
+000000d60200007468656d652f7468656d652f7468656d65312e786d6c504b01022d00140006000800000021000dd1909fb60000001b01000027000000000000
+000000000000001a0a00007468656d652f7468656d652f5f72656c732f7468656d654d616e616765722e786d6c2e72656c73504b050600000000050005005d010000150b00000000}
+{\*\colorschememapping 3c3f786d6c2076657273696f6e3d22312e302220656e636f64696e673d225554462d3822207374616e64616c6f6e653d22796573223f3e0d0a3c613a636c724d
+617020786d6c6e733a613d22687474703a2f2f736368656d61732e6f70656e786d6c666f726d6174732e6f72672f64726177696e676d6c2f323030362f6d6169
+6e22206267313d226c743122207478313d22646b3122206267323d226c743222207478323d22646b322220616363656e74313d22616363656e74312220616363
+656e74323d22616363656e74322220616363656e74333d22616363656e74332220616363656e74343d22616363656e74342220616363656e74353d22616363656e74352220616363656e74363d22616363656e74362220686c696e6b3d22686c696e6b2220666f6c486c696e6b3d22666f6c486c696e6b222f3e}
+{\*\latentstyles\lsdstimax267\lsdlockeddef0\lsdsemihiddendef1\lsdunhideuseddef1\lsdqformatdef0\lsdprioritydef99{\lsdlockedexcept \lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority0 \lsdlocked0 Normal;
+\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority9 \lsdlocked0 heading 1;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 2;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 3;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 4;
+\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 5;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 6;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 7;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 8;\lsdqformat1 \lsdpriority9 \lsdlocked0 heading 9;
+\lsdpriority39 \lsdlocked0 toc 1;\lsdpriority39 \lsdlocked0 toc 2;\lsdpriority39 \lsdlocked0 toc 3;\lsdpriority39 \lsdlocked0 toc 4;\lsdpriority39 \lsdlocked0 toc 5;\lsdpriority39 \lsdlocked0 toc 6;\lsdpriority39 \lsdlocked0 toc 7;
+\lsdpriority39 \lsdlocked0 toc 8;\lsdpriority39 \lsdlocked0 toc 9;\lsdqformat1 \lsdpriority35 \lsdlocked0 caption;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority10 \lsdlocked0 Title;\lsdpriority1 \lsdlocked0 Default Paragraph Font;
+\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority11 \lsdlocked0 Subtitle;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority22 \lsdlocked0 Strong;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority20 \lsdlocked0 Emphasis;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority59 \lsdlocked0 Table Grid;\lsdunhideused0 \lsdlocked0 Placeholder Text;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority1 \lsdlocked0 No Spacing;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading;\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List;\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List;\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 1;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 1;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 1;\lsdunhideused0 \lsdlocked0 Revision;
+\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority34 \lsdlocked0 List Paragraph;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority29 \lsdlocked0 Quote;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority30 \lsdlocked0 Intense Quote;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 1;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 1;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 1;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 2;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 2;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 2;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 2;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 2;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 2;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 3;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 3;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 3;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 3;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 3;\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 3;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 4;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 4;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 4;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 4;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 4;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 5;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 5;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 5;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 5;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 5;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 5;\lsdsemihidden0 \lsdunhideused0 \lsdpriority60 \lsdlocked0 Light Shading Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority61 \lsdlocked0 Light List Accent 6;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority62 \lsdlocked0 Light Grid Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority63 \lsdlocked0 Medium Shading 1 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority64 \lsdlocked0 Medium Shading 2 Accent 6;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority65 \lsdlocked0 Medium List 1 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority66 \lsdlocked0 Medium List 2 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority67 \lsdlocked0 Medium Grid 1 Accent 6;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority68 \lsdlocked0 Medium Grid 2 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority69 \lsdlocked0 Medium Grid 3 Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority70 \lsdlocked0 Dark List Accent 6;
+\lsdsemihidden0 \lsdunhideused0 \lsdpriority71 \lsdlocked0 Colorful Shading Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority72 \lsdlocked0 Colorful List Accent 6;\lsdsemihidden0 \lsdunhideused0 \lsdpriority73 \lsdlocked0 Colorful Grid Accent 6;
+\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority19 \lsdlocked0 Subtle Emphasis;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority21 \lsdlocked0 Intense Emphasis;
+\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority31 \lsdlocked0 Subtle Reference;\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority32 \lsdlocked0 Intense Reference;
+\lsdsemihidden0 \lsdunhideused0 \lsdqformat1 \lsdpriority33 \lsdlocked0 Book Title;\lsdpriority37 \lsdlocked0 Bibliography;\lsdqformat1 \lsdpriority39 \lsdlocked0 TOC Heading;}}{\*\datastore 010500000200000018000000
+4d73786d6c322e534158584d4c5265616465722e362e3000000000000000000000060000
+d0cf11e0a1b11ae1000000000000000000000000000000003e000300feff090006000000000000000000000001000000010000000000000000100000feffffff00000000feffffff0000000000000000ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+fffffffffffffffffdfffffffeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+ffffffffffffffffffffffffffffffff52006f006f007400200045006e00740072007900000000000000000000000000000000000000000000000000000000000000000000000000000000000000000016000500ffffffffffffffffffffffff0c6ad98892f1d411a65f0040963251e5000000000000000000000000b005
+07fd686acb01feffffff00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffffffffffff00000000000000000000000000000000000000000000000000000000
+00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffffffffffff0000000000000000000000000000000000000000000000000000
+000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffffffffffff000000000000000000000000000000000000000000000000
+0000000000000000000000000000000000000000000000000105000000000000}}
\ No newline at end of file

Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFWordPadCzechCharacters.rtf
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFWordPadCzechCharacters.rtf?rev=1066081&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFWordPadCzechCharacters.rtf (added)
+++ tika/trunk/tika-parsers/src/test/resources/test-documents/testRTFWordPadCzechCharacters.rtf Tue Feb  1 16:15:51 2011
@@ -0,0 +1,5 @@
+{\rtf1\ansi\ansicpg1252\deff0\deflang1033{\fonttbl{\f0\fnil\fcharset238 Calibri;}{\f1\fnil\fcharset0 Calibri;}}
+{\*\generator Msftedit 5.41.21.2509;}\viewkind4\uc1\pard\sa200\sl276\slmult1\lang1048\f0\fs22\'c8l\'e1nek t\'fddne\par
+\'c8\'e1st svitku s textem Knihy Izaj\'e1\'9a\par
+Svitky od Mrtv\'e9ho mo\'f8e jsou starov\'eck\'e9 \'9eidovsk\'e9 n\'e1bo\'9eensk\'e9 texty, nalezen\'e9 pobl\'ed\'9e Mrtv\'e9ho mo\'f8e, zejm\'e9na v Kumr\'e1nu. P\'f8edpokl\'e1d\'e1 se, \'9ee p\'f9vodn\'edmi vlastn\'edky knihovny a autory \'e8\'e1sti text\'f9 byli esejci, \'e8lenov\'e9 asketick\'e9ho, apokalyptick\'e9ho a mesianistick\'e9ho hnut\'ed v r\'e1mci judaismu, kter\'e9 pravd\'ecpodobn\'ec vzniklo v polovin\'ec 2. stol. p\'f8.\u8201?n.\u8201?l. a zaniklo n\'e1sledkem prvn\'ed \'9eidovsk\'e9 v\'e1lky koncem 60. let prvn\'edho stolet\'ed na\'9aeho letopo\'e8tu.\lang9\f1\par
+}
\ No newline at end of file