You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2014/09/26 20:02:28 UTC

svn commit: r1627850 - /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java

Author: tilman
Date: Fri Sep 26 18:02:28 2014
New Revision: 1627850

URL: http://svn.apache.org/r1627850
Log:
PDFBOX-2385: remove hasPrecedingAscii85Data() from inline image EI heuristics (revert most of 1606177)

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java?rev=1627850&r1=1627849&r2=1627850&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java Fri Sep 26 18:02:28 2014
@@ -24,6 +24,8 @@ import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 import java.util.NoSuchElementException;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 
 import org.apache.pdfbox.cos.COSBase;
 import org.apache.pdfbox.cos.COSBoolean;
@@ -44,26 +46,28 @@ import org.apache.pdfbox.util.operator.O
  */
 public class PDFStreamParser extends BaseParser
 {
+    /**
+     * Log instance.
+     */
+    private static final Log LOG = LogFactory.getLog(PDFStreamParser.class);
+
     private List<Object> streamObjects = new ArrayList<Object>( 100 );
-//    private final RandomAccess file;
-    private final int    maxBinCharTestLength = 10;
-    private final byte[] binCharTestArr = new byte[maxBinCharTestLength];
+    private final int    MAXBINCHARTESTLENGTH = 10;
+    private final byte[] binCharTestArr = new byte[MAXBINCHARTESTLENGTH];
 
     /**
      * Constructor that takes a stream to parse.
      *
      * @since Apache PDFBox 1.3.0
      * @param stream The stream to read data from.
-     * @param raf The random access file.
      * @param forceParsing flag to skip malformed or otherwise unparseable
      *                     input where possible
      * @throws IOException If there is an error reading from the stream.
      */
-    public PDFStreamParser(InputStream stream,  boolean forceParsing)
+    public PDFStreamParser(InputStream stream, boolean forceParsing)
             throws IOException 
     {
         super(stream, forceParsing);
-//        file = raf;
     }
 
     /**
@@ -132,7 +136,6 @@ public class PDFStreamParser extends Bas
             while( (token = parseNextToken()) != null )
             {
                 streamObjects.add( token );
-                //logger().fine( "parsed=" + token );
             }
         }
         finally
@@ -397,8 +400,7 @@ public class PDFStreamParser extends Bas
                 while( !(lastByte == 'E' &&
                          currentByte == 'I' &&
                          hasNextSpaceOrReturn() &&
-                         hasNoFollowingBinData( pdfSource ) &&
-                         !hasPrecedingAscii85Data(imageData)) &&
+                         hasNoFollowingBinData( pdfSource )) &&
                        !pdfSource.isEOF() )
                 {
                     imageData.write( lastByte );
@@ -449,7 +451,7 @@ public class PDFStreamParser extends Bas
             throws IOException
     {
         // as suggested in PDFBOX-1164
-        final int readBytes = pdfSource.read(binCharTestArr, 0, maxBinCharTestLength);
+        final int readBytes = pdfSource.read(binCharTestArr, 0, MAXBINCHARTESTLENGTH);
         boolean noBinData = true;
         int startOpIdx = -1;
         int endOpIdx = -1;
@@ -483,7 +485,7 @@ public class PDFStreamParser extends Bas
                     }
                 }
             }
-            if (readBytes == maxBinCharTestLength) // only if not close to eof
+            if (readBytes == MAXBINCHARTESTLENGTH) // only if not close to eof
             {
                 // a PDF operator is 1-3 bytes long
                 if (endOpIdx == -1 || startOpIdx == -1 || endOpIdx - startOpIdx > 3)
@@ -493,35 +495,13 @@ public class PDFStreamParser extends Bas
             }
             pdfSource.unread(binCharTestArr, 0, readBytes);
         }
-        return noBinData;
-    }
-
-    /**
-     * Check whether the output stream ends with 70 ASCII85 data bytes
-     * (33..117). This method is to be called when "EI" and then space/LF/CR
-     * are detected.
-     *
-     * @param imageData output data stream without the "EI"
-     * @return true if this is an ASCII85 line so the "EI" is to be considered
-     * part of the data stream, false if not
-     */
-    private boolean hasPrecedingAscii85Data(ByteArrayOutputStream imageData)
-    {
-        if (imageData.size() < 70)
-        {
-            return false;
-        }
-        byte[] tab = imageData.toByteArray();
-        for (int i = tab.length - 1; i >= tab.length - 70; --i)
+        if (!noBinData)
         {
-            if (tab[i] < 33 || tab[i] > 117)
-            {
-                return false;
-            }
+            LOG.warn("ignoring 'EI' assumed to be in the middle of inline image");
         }
-        return true;
+        return noBinData;
     }
-    
+
     /**
      * This will read an operator from the stream.
      *