You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2014/06/28 10:09:01 UTC
svn commit: r1606305 -
/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
Author: tilman
Date: Sat Jun 28 08:09:01 2014
New Revision: 1606305
URL: http://svn.apache.org/r1606305
Log:
PDFBOX-2163: ignore non-final EI at the end of ASCII85 line in inline image
Modified:
pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java?rev=1606305&r1=1606304&r2=1606305&view=diff
==============================================================================
--- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java (original)
+++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java Sat Jun 28 08:09:01 2014
@@ -132,7 +132,7 @@ public class PDFStreamParser extends Bas
{
try
{
- Object token = null;
+ Object token;
while( (token = parseNextToken()) != null )
{
streamObjects.add( token );
@@ -229,7 +229,7 @@ public class PDFStreamParser extends Bas
*/
private Object parseNextToken() throws IOException
{
- Object retval = null;
+ Object retval;
skipSpaces();
int nextByte = pdfSource.peek();
@@ -247,7 +247,6 @@ public class PDFStreamParser extends Bas
pdfSource.unread( leftBracket ); //put back first bracket
if(c == '<')
{
-
COSDictionary pod = parseCOSDictionary();
skipSpaces();
if((char)pdfSource.peek() == 's')
@@ -395,11 +394,12 @@ public class PDFStreamParser extends Bas
// PDF spec is kinda unclear about this. Should a whitespace
// always appear before EI? Not sure, so that we just read
// until EI<whitespace>.
- // Be aware not all kind of whitespaces are allowed here. see PDFBOX1561
+ // Be aware not all kind of whitespaces are allowed here. see PDFBOX-1561
while( !(lastByte == 'E' &&
currentByte == 'I' &&
- isSpaceOrReturn() &&
- hasNoFollowingBinData( pdfSource )) &&
+ hasNextSpaceOrReturn() &&
+ hasNoFollowingBinData( pdfSource ) &&
+ !hasPrecedingAscii85Data(imageData)) &&
!pdfSource.isEOF() )
{
imageData.write( lastByte );
@@ -408,7 +408,7 @@ public class PDFStreamParser extends Bas
}
// the EI operator isn't unread, as it won't be processed anyway
retval = PDFOperator.getOperator( "ID" );
- // save the image data to the operator, so that it can be accessed it later
+ // save the image data to the operator, so that it can be accessed later
((PDFOperator)retval).setImageData( imageData.toByteArray() );
break;
}
@@ -434,9 +434,7 @@ public class PDFStreamParser extends Bas
retval = PDFOperator.getOperator( operator );
}
}
-
}
-
return retval;
}
@@ -472,6 +470,32 @@ public class PDFStreamParser extends Bas
}
/**
+ * Check whether the output stream ends with 70 ASCII85 data bytes
+ * (33..117). This method is to be called when "EI" and then space/LF/CR
+ * are detected.
+ *
+ * @param imageData output data stream without the "EI"
+ * @return true if this is an ASCII85 line so the "EI" is to be considered
+ * part of the data stream, false if not
+ */
+ private boolean hasPrecedingAscii85Data(ByteArrayOutputStream imageData)
+ {
+ if (imageData.size() < 70)
+ {
+ return false;
+ }
+ byte[] tab = imageData.toByteArray();
+ for (int i = tab.length - 1; i >= tab.length - 70; --i)
+ {
+ if (tab[i] < 33 || tab[i] > 117)
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
* This will read an operator from the stream.
*
* @return The operator that was read from the stream.
@@ -510,6 +534,7 @@ public class PDFStreamParser extends Bas
return buffer.toString();
}
+
private boolean isSpaceOrReturn( int c )
{
return c == 10 || c == 13 || c == 32;
@@ -521,7 +546,7 @@ public class PDFStreamParser extends Bas
* @return true if the next char is a space or a return
* @throws IOException if something went wrong
*/
- private boolean isSpaceOrReturn() throws IOException
+ private boolean hasNextSpaceOrReturn() throws IOException
{
return isSpaceOrReturn( pdfSource.peek() );
}