You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by mi...@apache.org on 2011/11/17 21:33:43 UTC
svn commit: r1203355 - in /tika/trunk: ./ tika-parsers/src/main/java/org/apache/tika/parser/rtf/ tika-parsers/src/test/java/org/apache/tika/parser/rtf/ tika-parsers/src/test/resources/test-documents/

Author: mikemccand
Date: Thu Nov 17 20:33:42 2011
New Revision: 1203355

URL: http://svn.apache.org/viewvc?rev=1203355&view=rev
Log:
TIKA-782: properly handle \bin control word

Added:
    tika/trunk/tika-parsers/src/test/resources/test-documents/testBinControlWord.rtf   (with props)
Modified:
    tika/trunk/CHANGES.txt
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java

Modified: tika/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1203355&r1=1203354&r2=1203355&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Thu Nov 17 20:33:42 2011
@@ -18,7 +18,10 @@ Release 1.1 - Current Development
  * RTF: Fixed case where a font change would result in processing
    bytes in the wrong font's charset, producing bogus text output
    (TIKA-777).  Don't output whitespace in ignored group states,
-   avoiding excessive whitespace output (TIKA-781).
+   avoiding excessive whitespace output (TIKA-781).  Binary embedded
+   content (using \bin control word) is now skipped correctly;
+   previously it could cause the parser to incorrectly extract binary
+   content as text (TIKA-782).
 
 Release 1.0 - 11/4/2011
 ---------------------------------

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java?rev=1203355&r1=1203354&r2=1203355&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/TextExtractor.java Thu Nov 17 20:33:42 2011
@@ -19,6 +19,7 @@ package org.apache.tika.parser.rtf;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.PushbackInputStream;
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
 import java.nio.charset.CharsetDecoder;
@@ -86,8 +87,6 @@ final class TextExtractor {
     private GroupState groupState = new GroupState();
 
     private boolean inHeader = true;
-    private int chIndex;
-    private int lastGroupStart;
     private int fontTableState;
     private int fontTableDepth;
 
@@ -233,22 +232,22 @@ final class TextExtractor {
         this.out = out;
     }
 
-    private static boolean isHexChar(char ch) {
+    private static boolean isHexChar(int ch) {
         return (ch >= '0' && ch <= '9') ||
             (ch >= 'a' && ch <= 'f') ||
             (ch >= 'A' && ch <= 'F');
     }
 
-    private static boolean isAlpha(char ch) {
+    private static boolean isAlpha(int ch) {
         return (ch >= 'a' && ch <= 'z') ||
             (ch >= 'A' && ch <= 'Z');
     }
 
-    private static boolean isDigit(char ch) {
+    private static boolean isDigit(int ch) {
         return ch >= '0' && ch <= '9';
     }
 
-    private static int hexValue(char ch) {
+    private static int hexValue(int ch) {
         if (ch >= '0' && ch <= '9') {
             return ch - '0';
         } else if (ch >= 'a' && ch <= 'z') {
@@ -271,7 +270,8 @@ final class TextExtractor {
 
     // Buffers the byte (unit in the current charset) for
     // output:
-    private void addOutputByte(byte b) throws IOException, SAXException, TikaException {
+    private void addOutputByte(int b) throws IOException, SAXException, TikaException {
+        assert b >= 0 && b < 256 : "byte value out of range: " + b;
 
         if (pendingCharCount != 0) {
             pushChars();
@@ -285,12 +285,12 @@ final class TextExtractor {
             pendingBytes = newArray;
             pendingByteBuffer = ByteBuffer.wrap(pendingBytes);
         }
-        pendingBytes[pendingByteCount++] = b;
+        pendingBytes[pendingByteCount++] = (byte) b;
     }
 
-    // Buffers a byte as part of a control word:
-    private void addControl(byte b) {
-        assert isAlpha((char) b);
+   // Buffers a byte as part of a control word:
+    private void addControl(int b) {
+        assert isAlpha(b);
         // Save the byte in pending buffer:
         if (pendingControlCount == pendingControl.length) {
             // Gradual but exponential growth:
@@ -298,7 +298,7 @@ final class TextExtractor {
             System.arraycopy(pendingControl, 0, newArray, 0, pendingControl.length);
             pendingControl = newArray;
         }
-        pendingControl[pendingControlCount++] = b;
+        pendingControl[pendingControlCount++] = (byte) b;
     }
 
     // Buffers a UTF16 code unit for output
@@ -323,166 +323,145 @@ final class TextExtractor {
     // Shallow parses the entire doc, writing output to
     // this.out and this.metadata
     public void extract(InputStream in) throws IOException, SAXException, TikaException {
+//        in = new FilterInputStream(in) {
+//            public int read() throws IOException {
+//                int r = super.read();
+//                System.out.write(r);
+//                System.out.flush();
+//                return r;
+//            }
+//            public int read(byte b[], int off, int len) throws IOException {
+//                int r = super.read(b, off, len);
+//                System.out.write(b, off, r);
+//                System.out.flush();
+//                return r;
+//            }
+//        };
+        extract(new PushbackInputStream(in, 2));
+    }
+    
+    private void extract(PushbackInputStream in) throws IOException, SAXException, TikaException {
         out.startDocument();
 
-        int state = 0;
-        int pushBack = -2;
-        boolean negParam = false;
-        char hex1 = 0;
-        long param = 0;
-
         while (true) {
-            final int b;
-            if (pushBack != -2) {
-                b = pushBack;
-                pushBack = -2;
-            } else {
-                b = in.read();
-                chIndex++;
-            }
+            final int b = in.read();
             if (b == -1) {
                 break;
-            }
-
-            // NOTE: this is always a 8bit clean byte (ie
-            // < 128), but we use a char for
-            // convenience in the testing below:
-            final char ch = (char) b;
-
-            switch (state) {
-
-            case 0:
-                if (ch == '\\') {
-                    state = 1;
-                } else if (ch == '{') {
-                    pushText();
-                    processGroupStart();
-                } else if (ch == '}') {
-                    pushText();
-                    processGroupEnd();
-                } else if (ch != '\r' && ch != '\n' && (!groupState.ignore || nextMetaData != null)) {
-                    // Linefeed and carriage return are not
-                    // significant
-                    if (ansiSkip != 0) {
-                        ansiSkip--;
-                    } else {
-                        addOutputByte((byte) ch);
-                    }
-                }
-                break;
-
-            // saw \
-            case 1:
-                if (ch == '\'') {
-                    // escaped hex char
-                    state = 2;
-                } else if (isAlpha(ch)) {
-                    // control word
-                    //pushText();
-                    addControl((byte) ch);
-                    state = 4;
-                } else if (ch == '{' || ch == '}' || ch == '\\' || ch == '\r' || ch == '\n') {
-                    // escaped char
-                    addOutputByte((byte) ch);
-                    state = 0;
-                } else {
-                    // control symbol, eg \* or \~
-                    //pushText();
-                    processControlSymbol(ch);
-                    state = 0;
-                }
-                break;
-
-            // saw \'
-            case 2:
-                if (isHexChar(ch)) {
-                    hex1 = ch;
-                    state = 3;
-                } else {
-                    // DOC ERROR (malformed hex escape): ignore 
-                    state = 0;
-                }
-                break;
-
-            // saw \'x
-            case 3: 
-                if (isHexChar(ch)) {
-                    if (ansiSkip != 0) {
-                        // Skip this ansi char since we are
-                        // still in the shadow of a unicode
-                        // escape:
-                        ansiSkip--;
-                    } else {
-                        // Unescape:
-                        addOutputByte((byte) (16*hexValue(hex1) + hexValue(ch)));
-                    }
-                    state = 0;
-                } else {
-                    // TODO: log a warning here, somehow?
-                    // DOC ERROR (malformed hex escape):
-                    // ignore
-                    state = 0;
-                }
-                break;
-
-            // inside control word
-            case 4:
-                if (isAlpha(ch)) {
-                    // still in control word
-                    addControl((byte) ch);
-                } else if (ch == '-') {
-                    // end of control word, start of negative parameter
-                    negParam = true;
-                    param = 0;
-                    state = 5;
-                } else if (isDigit(ch)) {
-                    // end of control word, start of positive parameter
-                    negParam = false;
-                    param = (long) (ch - '0');
-                    state = 5;
-                } else if (ch == ' ') {
-                    // space is consumed as part of the
-                    // control word, but is not added to the
-                    // control word
-                    processControlWord();
-                    pendingControlCount = 0;
-                    state = 0;
-                } else {
-                    processControlWord();
-                    pendingControlCount = 0;
-                    // eps transition back to start state
-                    pushBack = ch;
-                    state = 0;
+            } else if (b == '\\') {
+                parseControlToken(in);
+            } else if (b == '{') {
+                pushText();
+                processGroupStart(in);
+             } else if (b == '}') {
+                pushText();
+                processGroupEnd();
+                if (groupStates.isEmpty()) {
+                    // parsed document closing brace
+                    break;
                 }
-                break;
-
-            // inside control word's numeric param
-            case 5:
-                if (isDigit(ch)) {
-                    param = (10*param) + (long) (ch - '0');
+            } else if (b != '\r' && b != '\n' && (!groupState.ignore || nextMetaData != null)) {
+                // Linefeed and carriage return are not
+                // significant
+                if (ansiSkip != 0) {
+                    ansiSkip--;
                 } else {
-                    if (negParam) {
-                        param = -param;
-                    }
-                    processControlWord(param);
-                    pendingControlCount = 0;
-                    if (ch != ' ') {
-                        // space is consumed as part of the
-                        // control word
-                        pushBack = ch;
-                    }
-                    state = 0;
+                    addOutputByte(b);
                 }
-                break;
-              
-            default:
-                throw new RuntimeException("invalid state");
             }
         }
 
         endParagraph(false);
         out.endDocument();
     }
+    
+    private void parseControlToken(PushbackInputStream in) throws IOException, SAXException, TikaException {
+        int b = in.read();
+        if (b == '\'') {
+            // escaped hex char
+            parseHexChar(in);
+        } else if (isAlpha(b)) {
+            // control word
+            parseControlWord((char)b, in);
+        } else if (b == '{' || b == '}' || b == '\\' || b == '\r' || b == '\n') {
+            // escaped char
+            addOutputByte(b);
+        } else if (b != -1) {
+            // control symbol, eg \* or \~
+            processControlSymbol((char)b);
+        }
+    }
+    
+    private void parseHexChar(PushbackInputStream in) throws IOException, SAXException, TikaException {
+        int hex1 = in.read();
+        if (!isHexChar(hex1)) {
+            // DOC ERROR (malformed hex escape): ignore 
+            in.unread(hex1);
+            return;
+        }
+        
+        int hex2 = in.read();
+        if (!isHexChar(hex2)) {
+            // TODO: log a warning here, somehow?
+            // DOC ERROR (malformed hex escape):
+            // ignore
+            in.unread(hex2);
+            return;
+        }
+        
+        if (ansiSkip != 0) {
+            // Skip this ansi char since we are
+            // still in the shadow of a unicode
+            // escape:
+            ansiSkip--;
+        } else {
+            // Unescape:
+            addOutputByte(16*hexValue(hex1) + hexValue(hex2));
+        }
+    }
+
+    private void parseControlWord(int firstChar, PushbackInputStream in) throws IOException, SAXException, TikaException {
+        addControl(firstChar);
+        
+        int b = in.read();
+        while (isAlpha(b)) {
+            addControl(b);
+            b = in.read();
+        }
+        
+        boolean hasParam = false;
+        boolean negParam = false;
+        if (b == '-') {
+            negParam = true;
+            hasParam = true;
+            b = in.read();
+        }
+
+        int param = 0;
+        while (isDigit(b)) {
+            param *= 10;
+            param += (b - '0');
+            hasParam = true;
+            b = in.read();
+        }
+        
+        // space is consumed as part of the
+        // control word, but is not added to the
+        // control word
+        if (b != ' ') {
+            in.unread(b);
+        }
+        
+        if (hasParam) {
+            if (negParam) {
+                param = -param;
+            }
+            processControlWord(param, in);
+        } else {
+            processControlWord();
+        }
+        
+        pendingControlCount = 0;
+    }
 
     private void lazyStartParagraph() throws IOException, SAXException, TikaException {
         if (!inParagraph) {
@@ -624,14 +603,9 @@ final class TextExtractor {
             addOutputChar('\u00a0');
             break;
         case '*':
-            // Ignorable destination (control words defined
-            // after the 1987 RTF spec).  Note that
-            // sometimes we un-ignore within this group, eg
-            // when handling upr escape.
-            if (chIndex == lastGroupStart+2) {
-                // Only ignore if \* comes right after {:
-                groupState.ignore = true;
-            }
+            // Ignorable destination (control words defined after
+            // the 1987 RTF spec). These are already handled by
+            // processGroupStart()
             break;
         case '-':
             // Optional hyphen -> unicode SOFT HYPHEN
@@ -689,8 +663,7 @@ final class TextExtractor {
     }
 
     // Handle control word that takes a parameter:
-    // Param is long because spec says max value is 1+ Integer.MAX_VALUE!
-    private void processControlWord(long param) throws IOException, SAXException, TikaException {
+    private void processControlWord(int param, PushbackInputStream in) throws IOException, SAXException, TikaException {
 
         // TODO: afN?  (associated font number)
 
@@ -719,13 +692,13 @@ final class TextExtractor {
         if (inHeader) {
             if (equals("ansicpg")) {
                 // ANSI codepage
-                final String cs = ANSICPG_MAP.get((int) param);
+                final String cs = ANSICPG_MAP.get(param);
                 if (cs != null) {
                     globalCharset = cs;
                 }
             } else if (equals("deff")) {
                 // Default font
-                globalDefaultFont = (int) param;
+                globalDefaultFont = param;
             }
 
             if (fontTableState == 1) {
@@ -736,9 +709,9 @@ final class TextExtractor {
                 } else {
                     if (equals("f")) {
                         // Start new font definition
-                        curFontID = (int) param;
+                        curFontID = param;
                     } else if (equals("fcharset")) {
-                        final String cs = FCHARSET_MAP.get((int) param);
+                        final String cs = FCHARSET_MAP.get(param);
                         if (cs != null) {
                             fontToCharset.put(curFontID, cs);
                         }
@@ -771,7 +744,7 @@ final class TextExtractor {
                 }
             } else if (equals("f")) {
                 // Change current font
-                final String fontCharset = fontToCharset.get((int) param);
+                final String fontCharset = fontToCharset.get(param);
 
                 // Push any buffered text before changing
                 // font:
@@ -788,24 +761,36 @@ final class TextExtractor {
             }
         }
 
-        // Process unicode escape.  This can appear in doc
+        // Process unicode escape. This can appear in doc
         // or in header, since the metadata (info) fields
         // in the header can be unicode escaped as well:
-        if (pendingControl[0] == 'u') {
-            if (pendingControlCount == 1) {
-                // Unicode escape
-                if (!groupState.ignore) {
-                    final char utf16CodeUnit = (char) (((int) param) & 0xffff);
-                    addOutputChar(utf16CodeUnit);
+        if (equals("u")) {
+            // Unicode escape
+            if (!groupState.ignore) {
+                final char utf16CodeUnit = (char) (param & 0xffff);
+                addOutputChar(utf16CodeUnit);
+            }
+
+            // After seeing a unicode escape we must
+            // skip the next ucSkip ansi chars (the
+            // "unicode shadow")
+            ansiSkip = groupState.ucSkip;
+        } else if (equals("uc")) {
+            // Change unicode shadow length
+            groupState.ucSkip = (int) param;
+        } else if (equals("bin")) {
+            if (param >= 0) {
+                int bytesToRead = param;
+                byte[] tmpArray = new byte[Math.min(1024, bytesToRead)];
+                while (bytesToRead > 0) {
+                    int r = in.read(tmpArray, 0, Math.min(bytesToRead, tmpArray.length));
+                    if (r < 0) {
+                        throw new TikaException("unexpected end of file: need " + param + " bytes of binary data, found " + (param-bytesToRead));
+                    }
+                    bytesToRead -= r;
                 }
-
-                // After seeing a unicode escape we must
-                // skip the next ucSkip ansi chars (the
-                // "unicode shadow")
-                ansiSkip = groupState.ucSkip;
-            } else if (pendingControlCount == 2 && pendingControl[1] == 'c') {
-                // Change unicode shadow length
-                groupState.ucSkip = (int) param;
+            } else {
+                // log some warning?
             }
         }
     }
@@ -1043,7 +1028,7 @@ final class TextExtractor {
     }
 
     // Push new GroupState
-    private void processGroupStart() throws IOException {
+    private void processGroupStart(PushbackInputStream in) throws IOException {
         ansiSkip = 0;
         // Push current groupState onto the stack
         groupStates.add(groupState);
@@ -1056,8 +1041,19 @@ final class TextExtractor {
             uprState = 1;
             groupState.ignore = true;
         }
-
-        lastGroupStart = chIndex;
+        
+        // Check for ignorable groups. Note that
+        // sometimes we un-ignore within this group, eg
+        // when handling upr escape.
+        int b2 = in.read();
+        if (b2 == '\\') {
+            int b3 = in.read();
+            if (b3 == '*') {
+                groupState.ignore = true;
+            }
+               in.unread(b3);
+        }
+        in.unread(b2);
     }
 
     // Pop current GroupState

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java?rev=1203355&r1=1203354&r2=1203355&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/rtf/RTFParserTest.java Thu Nov 17 20:33:42 2011
@@ -280,6 +280,11 @@ public class RTFParserTest extends TikaT
                        getXML("testFontAfterBufferedText.rtf").xml);
     }
 
+    // TIKA-782
+    public void testBinControlWord() throws Exception {
+        assertTrue(getXML("testBinControlWord.rtf").xml.indexOf("\u00ff\u00ff\u00ff\u00ff") == -1);
+    }
+
     private Result getResult(String filename) throws Exception {
         File file = getResourceAsFile("/test-documents/" + filename);
        

Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testBinControlWord.rtf
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testBinControlWord.rtf?rev=1203355&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/test/resources/test-documents/testBinControlWord.rtf (added)
+++ tika/trunk/tika-parsers/src/test/resources/test-documents/testBinControlWord.rtf Thu Nov 17 20:33:42 2011
@@ -0,0 +1,2 @@
+{\rtf1\ansi\ansicpg1252\uc1 \deff0\deflang1033\deflangfe1033{\fonttbl{\f0\froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman;}{\f1\fswiss\fcharset0\fprq2{\*\panose 020b0604020202020204}Arial;}{\f3\froman\fcharset2\fprq2{\*\panose 05050102010706020507}Symbol;}{\f8\froman\fcharset0\fprq2{\*\panose 00000000000000000000}Tms Rmn;}{\f15\fswiss\fcharset0\fprq3{\*\panose 020b0604030504040204}Tahoma;}{\f16\froman\fcharset0\fprq2{\*\panose 00000000000000000000}Book Antiqua;}{\f18\froman\fcharset238\fprq2 Times New Roman CE;}{\f19\froman\fcharset204\fprq2 Times New Roman Cyr;}{\f21\froman\fcharset161\fprq2 Times New Roman Greek;}{\f22\froman\fcharset162\fprq2 Times New Roman Tur;}{\f23\froman\fcharset186\fprq2 Times New Roman Baltic;}{\f24\fswiss\fcharset238\fprq2 Arial CE;}{\f25\fswiss\fcharset204\fprq2 Arial Cyr;}{\f27\fswiss\fcharset161\fprq2 Arial Greek;}{\f28\fswiss\fcharset162\fprq2 Arial Tur;}{\f29\fswiss\fcharset186\fprq2 Arial Baltic;}}{\colortbl;\red0\green
 0\blue0;\red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0;\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128;\red0\green128\blue128;\red0\green128\blue0;\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;\red192\green192\blue192;}{\stylesheet{\widctlpar\adjustright \f1\cgrid \snext0 Normal;}{\s1\qc\keepn\widctlpar\adjustright \f15\fs96\cgrid \sbasedon0 \snext0 heading 1;}{\s3\li360\widctlpar\adjustright \b\f8\cgrid \sbasedon0 \snext15 heading 3;}{\*\cs10 \additive Default Paragraph Font;}{\s15\li720\widctlpar\adjustright \f1\cgrid \sbasedon0 \snext15 Normal Indent;}{\s16\widctlpar\tqc\tx4320\tqr\tx8640\adjustright \f1\cgrid \sbasedon0 \snext16 header;}{\s17\widctlpar\tqc\tx4320\tqr\tx8640\adjustright \f1\fs16\cgrid \sbasedon0 \snext17 footer;}{\s18\li1440\ri1440\sb120\sa120\sl360\slmult1\widctlpar\adjustright \f1\cgrid \sbasedon0 \snext18 dsbody;}{\s19\qc\wid
 ctlpar\adjustright \f1\cgrid \sbasedon0 \snext19 dmstitle;}{\s20\fi-720\li720\widctlpar\adjustright \b\i\caps\f16\cgrid \sbasedon0 \snext20 dms;}{\s21\fi-360\li360\widctlpar{\*\pn \pnlvlbody\ilvl11\ls2047\pnrnot0\pnf3\pnstart1\pnindent360\pnhang{\pntxtb \'88}}\ls2047\ilvl11\adjustright \f1\cgrid \sbasedon0 \snext21 checklist;}{\s22\fi-2880\li2880\widctlpar\tx2880\adjustright \f1\cgrid \sbasedon0 \snext22 Indent;}{\s23\fi-288\li3150\widctlpar\tx2880\adjustright \cgrid \sbasedon22 \snext23 indent2;}{\s24\fi-360\li3528\widctlpar\tx2880\adjustright \cgrid \sbasedon22 \snext24 indent3;}{\s25\widctlpar\adjustright \b\i\caps\f16\cgrid \sbasedon0 \snext25 ds;}}{\info{\title  }{\author Nicole Mendez}{\operator Nicole Mendez}{\creatim\yr2000\mo1\dy25\hr14\min4}{\revtim\yr2000\mo1\dy25\hr14\min4}{\printim\yr1999\mo1\dy27\hr15\min44}{\version3}{\edmins1}{\nofpages1}{\nofwords5}{\nofchars29}{\*\company ect}{\nofcharsws35}{\vern89}}\paperw15840\paperh12240\margl1440\margr1440 \widowctrl\f
 tnbj\aenddoc\noextrasprl\prcolbl\cvmme\sprsspbf\brkfrm\swpbdr\lytprtmet\hyphcaps0\fracwidth\viewkind1\viewscale150\pgbrdrhead\pgbrdrfoot \fet0\sectd \lndscpsxn\psz1\linex0\endnhere\sectdefaultcl {\*\pnseclvl1\pnucrm\pnstart1\pnindent720\pnhang{\pntxta .}}{\*\pnseclvl2\pnucltr\pnstart1\pnindent720\pnhang{\pntxta .}}{\*\pnseclvl3\pndec\pnstart1\pnindent720\pnhang{\pntxta .}}{\*\pnseclvl4\pnlcltr\pnstart1\pnindent720\pnhang{\pntxta )}}{\*\pnseclvl5\pndec\pnstart1\pnindent720\pnhang{\pntxtb (}{\pntxta )}}{\*\pnseclvl6\pnlcltr\pnstart1\pnindent720\pnhang{\pntxtb (}{\pntxta )}}{\*\pnseclvl7\pnlcrm\pnstart1\pnindent720\pnhang{\pntxtb (}{\pntxta )}}{\*\pnseclvl8\pnlcltr\pnstart1\pnindent720\pnhang{\pntxtb (}{\pntxta )}}{\*\pnseclvl9\pnlcrm\pnstart1\pnindent720\pnhang{\pntxtb (}{\pntxta )}}\pard\plain \widctlpar\adjustright \f1\cgrid {\pard\plain \widctlpar\adjustright \f1\cgrid {\object\objemb\objw3600\objh3600\objscalex55\objscaley56{\*\objclass PBrush}{\result {\fs20 {\pict\wmetaf
 ile8\picw6350\pich6350\picwgoal3600\pichgoal3600 \picscalex55\picscaley56 \bin10 ÿÿÿÿ}ÿÿÿÿÿ}}}{\tab \tab \tab  
+\par }}
\ No newline at end of file

Propchange: tika/trunk/tika-parsers/src/test/resources/test-documents/testBinControlWord.rtf
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: tika/trunk/tika-parsers/src/test/resources/test-documents/testBinControlWord.rtf
------------------------------------------------------------------------------
    svn:executable = *