You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2014/07/26 16:58:04 UTC
svn commit: r1613645 -
/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
Author: tilman
Date: Sat Jul 26 14:58:04 2014
New Revision: 1613645
URL: http://svn.apache.org/r1613645
Log:
PDFBOX-2163: require 1-3 char non-space sequence after EI and space sequence
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java?rev=1613645&r1=1613644&r2=1613645&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java Sat Jul 26 14:58:04 2014
@@ -47,7 +47,7 @@ public class PDFStreamParser extends Bas
{
private List<Object> streamObjects = new ArrayList<Object>( 100 );
private final RandomAccess file;
- private final int maxBinCharTestLength = 5;
+ private final int maxBinCharTestLength = 10;
private final byte[] binCharTestArr = new byte[maxBinCharTestLength];
/**
@@ -441,11 +441,12 @@ public class PDFStreamParser extends Bas
}
/**
- * Looks up next 5 bytes if they contain only ASCII characters (no control
- * sequences etc.).
+ * Looks up an amount of bytes if they contain only ASCII characters (no
+ * control sequences etc.), and that these ASCII characters begin with a
+ * sequence of 1-3 non-blank characters between blanks
*
- * @return <code>true</code> if next 5 bytes are printable ASCII characters,
- * otherwise <code>false</code>
+ * @return <code>true</code> if next bytes are probably printable ASCII
+ * characters starting with a PDF operator, otherwise <code>false</code>
*/
private boolean hasNoFollowingBinData(final PushbackInputStream pdfSource)
throws IOException
@@ -453,6 +454,8 @@ public class PDFStreamParser extends Bas
// as suggested in PDFBOX-1164
final int readBytes = pdfSource.read(binCharTestArr, 0, maxBinCharTestLength);
boolean noBinData = true;
+ int startOpIdx = -1;
+ int endOpIdx = -1;
if (readBytes > 0)
{
@@ -465,6 +468,28 @@ public class PDFStreamParser extends Bas
noBinData = false;
break;
}
+ // find the start of a PDF operator
+ if (startOpIdx == -1 && (b == 9 || b == 0x20 || b == 0x0a || b == 0x0d))
+ {
+ startOpIdx = bIdx;
+ }
+ else if (startOpIdx != -1 && endOpIdx == -1 && (b == 9 || b == 0x20 || b == 0x0a || b == 0x0d))
+ {
+ if (bIdx == startOpIdx + 1)
+ {
+ // several blanks after another
+ startOpIdx = bIdx;
+ }
+ else
+ {
+ endOpIdx = bIdx;
+ }
+ }
+ }
+ // a PDF operator is 1-3 bytes long
+ if (endOpIdx == -1 || startOpIdx == -1 || endOpIdx - startOpIdx > 3)
+ {
+ noBinData = false;
}
pdfSource.unread(binCharTestArr, 0, readBytes);
}