You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2014/06/28 10:09:01 UTC

svn commit: r1606305 - /pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java

Author: tilman
Date: Sat Jun 28 08:09:01 2014
New Revision: 1606305

URL: http://svn.apache.org/r1606305
Log:
PDFBOX-2163: ignore non-final EI at the end of ASCII85 line in inline image

Modified:
    pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java

Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java?rev=1606305&r1=1606304&r2=1606305&view=diff
==============================================================================
--- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java (original)
+++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java Sat Jun 28 08:09:01 2014
@@ -132,7 +132,7 @@ public class PDFStreamParser extends Bas
     {
         try
         {
-            Object token = null;
+            Object token;
             while( (token = parseNextToken()) != null )
             {
                 streamObjects.add( token );
@@ -229,7 +229,7 @@ public class PDFStreamParser extends Bas
      */
     private Object parseNextToken() throws IOException
     {
-        Object retval = null;
+        Object retval;
 
         skipSpaces();
         int nextByte = pdfSource.peek();
@@ -247,7 +247,6 @@ public class PDFStreamParser extends Bas
                 pdfSource.unread( leftBracket ); //put back first bracket
                 if(c == '<')
                 {
-
                     COSDictionary pod = parseCOSDictionary();
                     skipSpaces();
                     if((char)pdfSource.peek() == 's')
@@ -395,11 +394,12 @@ public class PDFStreamParser extends Bas
                 // PDF spec is kinda unclear about this. Should a whitespace
                 // always appear before EI? Not sure, so that we just read
                 // until EI<whitespace>.
-                // Be aware not all kind of whitespaces are allowed here. see PDFBOX1561
+                // Be aware not all kind of whitespaces are allowed here. see PDFBOX-1561
                 while( !(lastByte == 'E' &&
                          currentByte == 'I' &&
-                         isSpaceOrReturn() &&
-                         hasNoFollowingBinData( pdfSource )) &&
+                         hasNextSpaceOrReturn() &&
+                         hasNoFollowingBinData( pdfSource ) &&
+                         !hasPrecedingAscii85Data(imageData)) &&
                        !pdfSource.isEOF() )
                 {
                     imageData.write( lastByte );
@@ -408,7 +408,7 @@ public class PDFStreamParser extends Bas
                 }
                 // the EI operator isn't unread, as it won't be processed anyway
                 retval = PDFOperator.getOperator( "ID" );
-                // save the image data to the operator, so that it can be accessed it later
+                // save the image data to the operator, so that it can be accessed later
                 ((PDFOperator)retval).setImageData( imageData.toByteArray() );
                 break;
             }
@@ -434,9 +434,7 @@ public class PDFStreamParser extends Bas
                     retval = PDFOperator.getOperator( operator );
                 }
             }
-
         }
-
         return retval;
     }
 
@@ -472,6 +470,32 @@ public class PDFStreamParser extends Bas
     }
 
     /**
+     * Check whether the output stream ends with 70 ASCII85 data bytes
+     * (33..117). This method is to be called when "EI" and then space/LF/CR
+     * are detected.
+     *
+     * @param imageData output data stream without the "EI"
+     * @return true if this is an ASCII85 line so the "EI" is to be considered
+     * part of the data stream, false if not
+     */
+    private boolean hasPrecedingAscii85Data(ByteArrayOutputStream imageData)
+    {
+        if (imageData.size() < 70)
+        {
+            return false;
+        }
+        byte[] tab = imageData.toByteArray();
+        for (int i = tab.length - 1; i >= tab.length - 70; --i)
+        {
+            if (tab[i] < 33 || tab[i] > 117)
+            {
+                return false;
+            }
+        }
+        return true;
+    }
+    
+    /**
      * This will read an operator from the stream.
      *
      * @return The operator that was read from the stream.
@@ -510,6 +534,7 @@ public class PDFStreamParser extends Bas
         return buffer.toString();
     }
     
+    
     private boolean isSpaceOrReturn( int c )
     {
         return c == 10 || c == 13 || c == 32;
@@ -521,7 +546,7 @@ public class PDFStreamParser extends Bas
      * @return true if the next char is a space or a return
      * @throws IOException if something went wrong
      */
-    private boolean isSpaceOrReturn() throws IOException
+    private boolean hasNextSpaceOrReturn() throws IOException
     {
         return isSpaceOrReturn( pdfSource.peek() );
     }