You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2014/09/26 20:02:28 UTC
svn commit: r1627850 -
/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
Author: tilman
Date: Fri Sep 26 18:02:28 2014
New Revision: 1627850
URL: http://svn.apache.org/r1627850
Log:
PDFBOX-2385: remove hasPrecedingAscii85Data() from inline image EI heuristics (revert most of 1606177)
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java?rev=1627850&r1=1627849&r2=1627850&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java Fri Sep 26 18:02:28 2014
@@ -24,6 +24,8 @@ import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSBoolean;
@@ -44,26 +46,28 @@ import org.apache.pdfbox.util.operator.O
*/
public class PDFStreamParser extends BaseParser
{
+ /**
+ * Log instance.
+ */
+ private static final Log LOG = LogFactory.getLog(PDFStreamParser.class);
+
private List<Object> streamObjects = new ArrayList<Object>( 100 );
-// private final RandomAccess file;
- private final int maxBinCharTestLength = 10;
- private final byte[] binCharTestArr = new byte[maxBinCharTestLength];
+ private final int MAXBINCHARTESTLENGTH = 10;
+ private final byte[] binCharTestArr = new byte[MAXBINCHARTESTLENGTH];
/**
* Constructor that takes a stream to parse.
*
* @since Apache PDFBox 1.3.0
* @param stream The stream to read data from.
- * @param raf The random access file.
* @param forceParsing flag to skip malformed or otherwise unparseable
* input where possible
* @throws IOException If there is an error reading from the stream.
*/
- public PDFStreamParser(InputStream stream, boolean forceParsing)
+ public PDFStreamParser(InputStream stream, boolean forceParsing)
throws IOException
{
super(stream, forceParsing);
-// file = raf;
}
/**
@@ -132,7 +136,6 @@ public class PDFStreamParser extends Bas
while( (token = parseNextToken()) != null )
{
streamObjects.add( token );
- //logger().fine( "parsed=" + token );
}
}
finally
@@ -397,8 +400,7 @@ public class PDFStreamParser extends Bas
while( !(lastByte == 'E' &&
currentByte == 'I' &&
hasNextSpaceOrReturn() &&
- hasNoFollowingBinData( pdfSource ) &&
- !hasPrecedingAscii85Data(imageData)) &&
+ hasNoFollowingBinData( pdfSource )) &&
!pdfSource.isEOF() )
{
imageData.write( lastByte );
@@ -449,7 +451,7 @@ public class PDFStreamParser extends Bas
throws IOException
{
// as suggested in PDFBOX-1164
- final int readBytes = pdfSource.read(binCharTestArr, 0, maxBinCharTestLength);
+ final int readBytes = pdfSource.read(binCharTestArr, 0, MAXBINCHARTESTLENGTH);
boolean noBinData = true;
int startOpIdx = -1;
int endOpIdx = -1;
@@ -483,7 +485,7 @@ public class PDFStreamParser extends Bas
}
}
}
- if (readBytes == maxBinCharTestLength) // only if not close to eof
+ if (readBytes == MAXBINCHARTESTLENGTH) // only if not close to eof
{
// a PDF operator is 1-3 bytes long
if (endOpIdx == -1 || startOpIdx == -1 || endOpIdx - startOpIdx > 3)
@@ -493,35 +495,13 @@ public class PDFStreamParser extends Bas
}
pdfSource.unread(binCharTestArr, 0, readBytes);
}
- return noBinData;
- }
-
- /**
- * Check whether the output stream ends with 70 ASCII85 data bytes
- * (33..117). This method is to be called when "EI" and then space/LF/CR
- * are detected.
- *
- * @param imageData output data stream without the "EI"
- * @return true if this is an ASCII85 line so the "EI" is to be considered
- * part of the data stream, false if not
- */
- private boolean hasPrecedingAscii85Data(ByteArrayOutputStream imageData)
- {
- if (imageData.size() < 70)
- {
- return false;
- }
- byte[] tab = imageData.toByteArray();
- for (int i = tab.length - 1; i >= tab.length - 70; --i)
+ if (!noBinData)
{
- if (tab[i] < 33 || tab[i] > 117)
- {
- return false;
- }
+ LOG.warn("ignoring 'EI' assumed to be in the middle of inline image");
}
- return true;
+ return noBinData;
}
-
+
/**
* This will read an operator from the stream.
*