You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2014/07/26 16:58:22 UTC
svn commit: r1613646 -
/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
Author: tilman
Date: Sat Jul 26 14:58:21 2014
New Revision: 1613646
URL: http://svn.apache.org/r1613646
Log:
PDFBOX-2163: require 1-3 char non-space sequence after EI and space sequence
Modified:
pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java?rev=1613646&r1=1613645&r2=1613646&view=diff
==============================================================================
--- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java (original)
+++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java Sat Jul 26 14:58:21 2014
@@ -48,7 +48,7 @@ public class PDFStreamParser extends Bas
{
private List<Object> streamObjects = new ArrayList<Object>( 100 );
private final RandomAccess file;
- private final int maxBinCharTestLength = 5;
+ private final int maxBinCharTestLength = 10;
private final byte[] binCharTestArr = new byte[maxBinCharTestLength];
/**
@@ -439,11 +439,12 @@ public class PDFStreamParser extends Bas
}
/**
- * Looks up next 5 bytes if they contain only ASCII characters (no control
- * sequences etc.).
+ * Looks up an amount of bytes if they contain only ASCII characters (no
+ * control sequences etc.), and that these ASCII characters begin with a
+ * sequence of 1-3 non-blank characters between blanks
*
- * @return <code>true</code> if next 5 bytes are printable ASCII characters,
- * otherwise <code>false</code>
+ * @return <code>true</code> if next bytes are probably printable ASCII
+ * characters starting with a PDF operator, otherwise <code>false</code>
*/
private boolean hasNoFollowingBinData(final PushbackInputStream pdfSource)
throws IOException
@@ -451,6 +452,8 @@ public class PDFStreamParser extends Bas
// as suggested in PDFBOX-1164
final int readBytes = pdfSource.read(binCharTestArr, 0, maxBinCharTestLength);
boolean noBinData = true;
+ int startOpIdx = -1;
+ int endOpIdx = -1;
if (readBytes > 0)
{
@@ -463,6 +466,28 @@ public class PDFStreamParser extends Bas
noBinData = false;
break;
}
+ // find the start of a PDF operator
+ if (startOpIdx == -1 && (b == 9 || b == 0x20 || b == 0x0a || b == 0x0d))
+ {
+ startOpIdx = bIdx;
+ }
+ else if (startOpIdx != -1 && endOpIdx == -1 && (b == 9 || b == 0x20 || b == 0x0a || b == 0x0d))
+ {
+ if (bIdx == startOpIdx + 1)
+ {
+ // several blanks after another
+ startOpIdx = bIdx;
+ }
+ else
+ {
+ endOpIdx = bIdx;
+ }
+ }
+ }
+ // a PDF operator is 1-3 bytes long
+ if (endOpIdx == -1 || startOpIdx == -1 || endOpIdx - startOpIdx > 3)
+ {
+ noBinData = false;
}
pdfSource.unread(binCharTestArr, 0, readBytes);
}