You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2015/05/31 11:59:18 UTC
svn commit: r1682706 - in /pdfbox/trunk:
pdfbox/src/main/java/org/apache/pdfbox/pdfparser/
preflight/src/main/java/org/apache/pdfbox/preflight/parser/
Author: lehmi
Date: Sun May 31 09:59:17 2015
New Revision: 1682706
URL: http://svn.apache.org/r1682706
Log:
PDFBOX-2301: use RandomAccessRead instead of PushBackInputStream as pdfSource
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java?rev=1682706&r1=1682705&r2=1682706&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java Sun May 31 09:59:17 2015
@@ -16,7 +16,6 @@
*/
package org.apache.pdfbox.pdfparser;
-import java.io.BufferedInputStream;
import java.io.ByteArrayOutputStream;
import java.io.Closeable;
import java.io.IOException;
@@ -36,8 +35,10 @@ import org.apache.pdfbox.cos.COSNull;
import org.apache.pdfbox.cos.COSNumber;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSString;
-import org.apache.pdfbox.io.PushBackInputStream;
+import org.apache.pdfbox.io.RandomAccessBuffer;
+import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.cos.COSObjectKey;
+
import static org.apache.pdfbox.util.Charsets.ISO_8859_1;
/**
@@ -137,7 +138,7 @@ public abstract class BaseParser impleme
/**
* This is the stream that will be read from.
*/
- protected PushBackInputStream pdfSource;
+ protected RandomAccessRead pdfSource;
/**
* This is the document that will be parsed.
@@ -159,19 +160,20 @@ public abstract class BaseParser impleme
*/
public BaseParser(InputStream input) throws IOException
{
- int pushbacksize = 65536;
- try
- {
- pushbacksize = Integer.getInteger(PROP_PUSHBACK_SIZE, 65536);
- }
- catch (SecurityException e)
+ pdfSource = copyInputStream(input);
+ }
+
+ protected RandomAccessRead copyInputStream(InputStream input) throws IOException
+ {
+ RandomAccessBuffer buffer = new RandomAccessBuffer();
+ byte[] byteBuffer = new byte[8192];
+ int bytesRead = 0;
+ while ((bytesRead = input.read(byteBuffer)) > -1)
{
- // PDFBOX-1946 getInteger calls System.getProperties,
- // which can get exception in an applet
- // ignore and use default
+ buffer.write(byteBuffer, 0, bytesRead);
}
- this.pdfSource = new PushBackInputStream(
- new BufferedInputStream(input, 16384), pushbacksize);
+ buffer.seek(0);
+ return buffer;
}
private static boolean isHexDigit(char ch)
@@ -190,14 +192,14 @@ public abstract class BaseParser impleme
*/
private COSBase parseCOSDictionaryValue() throws IOException
{
- long numOffset = pdfSource.getOffset();
+ long numOffset = pdfSource.getPosition();
COSBase number = parseDirObject();
skipSpaces();
if (!isDigit())
{
return number;
}
- long genOffset = pdfSource.getOffset();
+ long genOffset = pdfSource.getPosition();
COSBase generationNumber = parseDirObject();
skipSpaces();
readExpectedChar('R');
@@ -218,7 +220,7 @@ public abstract class BaseParser impleme
{
if (document == null)
{
- throw new IOException("object reference " + key + " at offset " + pdfSource.getOffset()
+ throw new IOException("object reference " + key + " at offset " + pdfSource.getPosition()
+ " in content stream");
}
return document.getObjectFromPool(key);
@@ -308,7 +310,7 @@ public abstract class BaseParser impleme
{
return true;
}
- pdfSource.unread(c);
+ pdfSource.rewind(1);
return false;
}
@@ -324,7 +326,7 @@ public abstract class BaseParser impleme
String potentialDEF = readString();
if (!potentialDEF.equals(DEF))
{
- pdfSource.unread(potentialDEF.getBytes(ISO_8859_1));
+ pdfSource.rewind(potentialDEF.getBytes(ISO_8859_1).length);
}
else
{
@@ -363,7 +365,7 @@ public abstract class BaseParser impleme
whitespace = pdfSource.read();
if (ASCII_LF != whitespace)
{
- pdfSource.unread(whitespace);
+ pdfSource.rewind(1);
//The spec says this is invalid but it happens in the real
//world so we must support it.
}
@@ -373,7 +375,7 @@ public abstract class BaseParser impleme
//we are in an error.
//but again we will do a lenient parsing and just assume that everything
//is fine
- pdfSource.unread(whitespace);
+ pdfSource.rewind(1);
}
}
@@ -475,7 +477,7 @@ public abstract class BaseParser impleme
if ( charMatchCount == keyw.length )
{
// keyword matched; unread matched keyword (endstream/endobj) and following buffered content
- pdfSource.unread( strmBuf, contentBytes, bufSize - contentBytes );
+ pdfSource.rewind( bufSize - contentBytes );
break;
}
else
@@ -543,7 +545,7 @@ public abstract class BaseParser impleme
}
if (amountRead > 0)
{
- pdfSource.unread( nextThreeBytes, 0, amountRead );
+ pdfSource.rewind( amountRead );
}
return braces;
}
@@ -715,7 +717,7 @@ public abstract class BaseParser impleme
}
if (c != -1)
{
- pdfSource.unread(c);
+ pdfSource.rewind(1);
}
return new COSString(out.toByteArray());
}
@@ -834,12 +836,12 @@ public abstract class BaseParser impleme
else
{
//it could be a bad object in the array which is just skipped
- LOG.warn("Corrupt object reference at offset " + pdfSource.getOffset());
+ LOG.warn("Corrupt object reference at offset " + pdfSource.getPosition());
// This could also be an "endobj" or "endstream" which means we can assume that
// the array has ended.
String isThisTheEnd = readString();
- pdfSource.unread(isThisTheEnd.getBytes(ISO_8859_1));
+ pdfSource.rewind(isThisTheEnd.getBytes(ISO_8859_1).length);
if(ENDOBJ_STRING.equals(isThisTheEnd) || ENDSTREAM_STRING.equals(isThisTheEnd))
{
return po;
@@ -909,7 +911,7 @@ public abstract class BaseParser impleme
}
else
{
- pdfSource.unread(ch2);
+ pdfSource.rewind(1);
c = ch1;
buffer.append( ch );
}
@@ -926,7 +928,7 @@ public abstract class BaseParser impleme
}
if (c != -1)
{
- pdfSource.unread(c);
+ pdfSource.rewind(1);
}
return COSName.getPDFName( buffer.toString() );
}
@@ -948,7 +950,7 @@ public abstract class BaseParser impleme
if( !trueString.equals( TRUE ) )
{
throw new IOException( "Error parsing boolean: expected='true' actual='" + trueString
- + "' at offset " + pdfSource.getOffset());
+ + "' at offset " + pdfSource.getPosition());
}
else
{
@@ -961,7 +963,7 @@ public abstract class BaseParser impleme
if( !falseString.equals( FALSE ) )
{
throw new IOException( "Error parsing boolean: expected='true' actual='" + falseString
- + "' at offset " + pdfSource.getOffset());
+ + "' at offset " + pdfSource.getPosition());
}
else
{
@@ -971,7 +973,7 @@ public abstract class BaseParser impleme
else
{
throw new IOException( "Error parsing boolean expected='t or f' actual='" + c
- + "' at offset " + pdfSource.getOffset());
+ + "' at offset " + pdfSource.getPosition());
}
return retval;
}
@@ -998,7 +1000,7 @@ public abstract class BaseParser impleme
int leftBracket = pdfSource.read();
// check for second left bracket
c = (char)pdfSource.peek();
- pdfSource.unread( leftBracket );
+ pdfSource.rewind(1);
if(c == '<')
{
@@ -1083,7 +1085,7 @@ public abstract class BaseParser impleme
}
if( ic != -1 )
{
- pdfSource.unread( ic );
+ pdfSource.rewind(1);
}
retval = COSNumber.get( buf.toString() );
}
@@ -1099,13 +1101,13 @@ public abstract class BaseParser impleme
// we can end up in an infinite loop otherwise
throw new IOException( "Unknown dir object c='" + c +
"' cInt=" + (int)c + " peek='" + (char)peek
- + "' peekInt=" + peek + " " + pdfSource.getOffset() );
+ + "' peekInt=" + peek + " " + pdfSource.getPosition() );
}
// if it's an endstream/endobj, we want to put it back so the caller will see it
if(ENDOBJ_STRING.equals(badString) || ENDSTREAM_STRING.equals(badString))
{
- pdfSource.unread(badString.getBytes(ISO_8859_1));
+ pdfSource.rewind(badString.getBytes(ISO_8859_1).length);
}
}
}
@@ -1132,7 +1134,7 @@ public abstract class BaseParser impleme
}
if (c != -1)
{
- pdfSource.unread(c);
+ pdfSource.rewind(1);
}
return buffer.toString();
}
@@ -1165,7 +1167,7 @@ public abstract class BaseParser impleme
{
throw new IOException("Expected string '" + new String(expectedString)
+ "' but missed at character '" + c + "' at offset "
- + pdfSource.getOffset());
+ + pdfSource.getPosition());
}
}
skipSpaces();
@@ -1183,7 +1185,7 @@ public abstract class BaseParser impleme
char c = (char) pdfSource.read();
if (c != ec)
{
- throw new IOException("expected='" + ec + "' actual='" + c + "' at offset " + pdfSource.getOffset());
+ throw new IOException("expected='" + ec + "' actual='" + c + "' at offset " + pdfSource.getPosition());
}
}
@@ -1216,7 +1218,7 @@ public abstract class BaseParser impleme
}
if (c != -1)
{
- pdfSource.unread(c);
+ pdfSource.rewind(1);
}
return buffer.toString();
}
@@ -1410,7 +1412,7 @@ public abstract class BaseParser impleme
}
if (c != -1)
{
- pdfSource.unread(c);
+ pdfSource.rewind(1);
}
}
@@ -1468,8 +1470,8 @@ public abstract class BaseParser impleme
}
catch( NumberFormatException e )
{
- pdfSource.unread(intBuffer.toString().getBytes(ISO_8859_1));
- throw new IOException( "Error: Expected an integer type at offset "+pdfSource.getOffset(), e);
+ pdfSource.rewind(intBuffer.toString().getBytes(ISO_8859_1).length);
+ throw new IOException( "Error: Expected an integer type at offset "+pdfSource.getPosition(), e);
}
return retval;
}
@@ -1495,9 +1497,9 @@ public abstract class BaseParser impleme
}
catch( NumberFormatException e )
{
- pdfSource.unread(longBuffer.toString().getBytes(ISO_8859_1));
+ pdfSource.rewind(longBuffer.toString().getBytes(ISO_8859_1).length);
throw new IOException( "Error: Expected a long type at offset "
- + pdfSource.getOffset() + ", instead got '" + longBuffer + "'", e);
+ + pdfSource.getPosition() + ", instead got '" + longBuffer + "'", e);
}
return retval;
}
@@ -1526,7 +1528,7 @@ public abstract class BaseParser impleme
}
if( lastByte != -1 )
{
- pdfSource.unread( lastByte );
+ pdfSource.rewind(1);
}
return buffer;
}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1682706&r1=1682705&r2=1682706&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Sun May 31 09:59:17 2015
@@ -16,6 +16,8 @@
*/
package org.apache.pdfbox.pdfparser;
+import static org.apache.pdfbox.util.Charsets.ISO_8859_1;
+
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
@@ -52,7 +54,6 @@ import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType;
import org.apache.pdfbox.pdmodel.encryption.SecurityHandler;
-import static org.apache.pdfbox.util.Charsets.ISO_8859_1;
/**
* PDF-Parser which first reads startxref and xref tables in order to know valid objects and parse only these objects.
@@ -224,11 +225,11 @@ public class COSParser extends BaseParse
// use existing parser to parse xref table
parseXrefTable(prev);
// parse the last trailer.
- trailerOffset = pdfSource.getOffset();
+ trailerOffset = pdfSource.getPosition();
// PDFBOX-1739 skip extra xref entries in RegisSTAR documents
while (isLenient && pdfSource.peek() != 't')
{
- if (pdfSource.getOffset() == trailerOffset)
+ if (pdfSource.getPosition() == trailerOffset)
{
// warn only the first time
LOG.warn("Expected trailer object at position " + trailerOffset
@@ -239,7 +240,7 @@ public class COSParser extends BaseParse
if (!parseTrailer())
{
throw new IOException("Expected trailer object at position: "
- + pdfSource.getOffset());
+ + pdfSource.getPosition());
}
COSDictionary trailer = xrefTrailerResolver.getCurrentTrailer();
// check for a XRef stream, it may contain some object ids of compressed objects
@@ -733,8 +734,7 @@ public class COSParser extends BaseParse
if (endObjectKey.equals(STREAM_STRING))
{
- pdfSource.unread(endObjectKey.getBytes(ISO_8859_1));
- pdfSource.unread(' ');
+ pdfSource.rewind(endObjectKey.getBytes(ISO_8859_1).length);
if (pb instanceof COSDictionary)
{
COSStream stream = parseCOSStream((COSDictionary) pb);
@@ -852,7 +852,7 @@ public class COSParser extends BaseParse
if (lengthObj.getObject() == null)
{
// not read so far, keep current stream position
- final long curFileOffset = pdfSource.getOffset();
+ final long curFileOffset = pdfSource.getPosition();
parseObjectDynamically(lengthObj, true);
// reset current stream position
pdfSource.seek(curFileOffset);
@@ -918,7 +918,7 @@ public class COSParser extends BaseParse
if (isLenient)
{
LOG.warn("The stream doesn't provide any stream length, using fallback readUntilEnd, at offset "
- + pdfSource.getOffset());
+ + pdfSource.getPosition());
}
else
{
@@ -941,22 +941,22 @@ public class COSParser extends BaseParse
if (endStream.equals("endobj") && isLenient)
{
LOG.warn("stream ends with 'endobj' instead of 'endstream' at offset "
- + pdfSource.getOffset());
+ + pdfSource.getPosition());
// avoid follow-up warning about missing endobj
- pdfSource.unread(ENDOBJ);
+ pdfSource.rewind(ENDOBJ.length);
}
else if (endStream.length() > 9 && isLenient && endStream.substring(0,9).equals(ENDSTREAM_STRING))
{
LOG.warn("stream ends with '" + endStream + "' instead of 'endstream' at offset "
- + pdfSource.getOffset());
+ + pdfSource.getPosition());
// unread the "extra" bytes
- pdfSource.unread(endStream.substring(9).getBytes(ISO_8859_1));
+ pdfSource.rewind(endStream.substring(9).getBytes(ISO_8859_1).length);
}
else if (!endStream.equals(ENDSTREAM_STRING))
{
throw new IOException(
"Error reading stream, expected='endstream' actual='"
- + endStream + "' at offset " + pdfSource.getOffset());
+ + endStream + "' at offset " + pdfSource.getPosition());
}
}
finally
@@ -979,7 +979,7 @@ public class COSParser extends BaseParse
if (readBytes <= 0)
{
// shouldn't happen, the stream length has already been validated
- throw new IOException("read error at offset " + pdfSource.getOffset()
+ throw new IOException("read error at offset " + pdfSource.getPosition()
+ ": expected " + chunk + " bytes, but read() returns " + readBytes);
}
out.write(streamCopyBuf, 0, readBytes);
@@ -990,7 +990,7 @@ public class COSParser extends BaseParse
private boolean validateStreamLength(long streamLength) throws IOException
{
boolean streamLengthIsValid = true;
- long originOffset = pdfSource.getOffset();
+ long originOffset = pdfSource.getPosition();
long expectedEndOfStream = originOffset + streamLength;
if (expectedEndOfStream > fileLen)
{
@@ -1171,7 +1171,7 @@ public class COSParser extends BaseParse
}
long objectNr = objectKey.getNumber();
int objectGen = objectKey.getGeneration();
- long originOffset = pdfSource.getOffset();
+ long originOffset = pdfSource.getPosition();
pdfSource.seek(offset);
String objectString = createObjectString(objectNr, objectGen);
try
@@ -1216,7 +1216,7 @@ public class COSParser extends BaseParse
if (bfSearchCOSObjectKeyOffsets == null)
{
bfSearchCOSObjectKeyOffsets = new HashMap<COSObjectKey, Long>();
- long originOffset = pdfSource.getOffset();
+ long originOffset = pdfSource.getPosition();
long currentOffset = MINIMUM_SEARCH_OFFSET;
String objString = " obj";
char[] string = objString.toCharArray();
@@ -1368,14 +1368,14 @@ public class COSParser extends BaseParse
{
// a pdf may contain more than one xref entry
bfSearchXRefTablesOffsets = new Vector<Long>();
- long originOffset = pdfSource.getOffset();
+ long originOffset = pdfSource.getPosition();
pdfSource.seek(MINIMUM_SEARCH_OFFSET);
// search for xref tables
while (!pdfSource.isEOF())
{
if (isString(XREF_TABLE))
{
- long newOffset = pdfSource.getOffset();
+ long newOffset = pdfSource.getPosition();
pdfSource.seek(newOffset - 1);
// ensure that we don't read "startxref" instead of "xref"
if (isWhitespace())
@@ -1401,7 +1401,7 @@ public class COSParser extends BaseParse
{
// a pdf may contain more than one /XRef entry
bfSearchXRefStreamsOffsets = new Vector<Long>();
- long originOffset = pdfSource.getOffset();
+ long originOffset = pdfSource.getPosition();
pdfSource.seek(MINIMUM_SEARCH_OFFSET);
// search for XRef streams
String objString = " obj";
@@ -1412,7 +1412,7 @@ public class COSParser extends BaseParse
{
// search backwards for the beginning of the stream
long newOffset = -1;
- long xrefOffset = pdfSource.getOffset();
+ long xrefOffset = pdfSource.getPosition();
boolean objFound = false;
for (int i = 1; i < 30 && !objFound; i++)
{
@@ -1445,7 +1445,7 @@ public class COSParser extends BaseParse
if (length > 0)
{
pdfSource.read();
- newOffset = pdfSource.getOffset();
+ newOffset = pdfSource.getPosition();
}
}
}
@@ -1584,7 +1584,7 @@ public class COSParser extends BaseParse
{
bytesMatching = true;
}
- pdfSource.unread(bytesRead, 0, numberOfBytes);
+ pdfSource.rewind(numberOfBytes);
}
return bytesMatching;
}
@@ -1599,7 +1599,7 @@ public class COSParser extends BaseParse
private boolean isString(char[] string) throws IOException
{
boolean bytesMatching = true;
- long originOffset = pdfSource.getOffset();
+ long originOffset = pdfSource.getPosition();
for (char c : string)
{
if (pdfSource.read() != c)
@@ -1624,7 +1624,7 @@ public class COSParser extends BaseParse
return false;
}
//read "trailer"
- long currentOffset = pdfSource.getOffset();
+ long currentOffset = pdfSource.getPosition();
String nextLine = readLine();
if( !nextLine.trim().equals( "trailer" ) )
{
@@ -1729,7 +1729,7 @@ public class COSParser extends BaseParse
{
String headerGarbage = header.substring(headerMarker.length() + 3, header.length()) + "\n";
header = header.substring(0, headerMarker.length() + 3);
- pdfSource.unread(headerGarbage.getBytes(ISO_8859_1));
+ pdfSource.rewind(headerGarbage.getBytes(ISO_8859_1).length);
}
}
float headerVersion = -1;
@@ -1777,7 +1777,7 @@ public class COSParser extends BaseParse
// check for trailer after xref
String str = readString();
byte[] b = str.getBytes(ISO_8859_1);
- pdfSource.unread(b, 0, b.length);
+ pdfSource.rewind(b.length);
// signal start of new XRef
xrefTrailerResolver.nextXrefObj( startByteOffset, XRefType.TABLE );
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java?rev=1682706&r1=1682705&r2=1682706&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java Sun May 31 09:59:17 2015
@@ -27,18 +27,13 @@ import org.apache.pdfbox.cos.COSDictiona
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.io.IOUtils;
-import org.apache.pdfbox.io.PushBackInputStream;
-import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
+import org.apache.pdfbox.io.RandomAccessFile;
import org.apache.pdfbox.pdmodel.fdf.FDFDocument;
public class FDFParser extends COSParser
{
private static final Log LOG = LogFactory.getLog(FDFParser.class);
- private final RandomAccessBufferedFileInputStream raStream;
-
- private File tempPDFFile;
-
/**
* Constructs parser for given file using memory buffer.
*
@@ -62,7 +57,7 @@ public class FDFParser extends COSParser
public FDFParser(File file) throws IOException
{
fileLen = file.length();
- raStream = new RandomAccessBufferedFileInputStream(file);
+ pdfSource = new RandomAccessFile(file, "r");
init();
}
@@ -74,9 +69,8 @@ public class FDFParser extends COSParser
*/
public FDFParser(InputStream input) throws IOException
{
- tempPDFFile = createTmpFile(input);
- fileLen = tempPDFFile.length();
- raStream = new RandomAccessBufferedFileInputStream(tempPDFFile);
+ pdfSource = copyInputStream(input);
+ fileLen = pdfSource.length();
init();
}
@@ -96,7 +90,6 @@ public class FDFParser extends COSParser
}
}
document = new COSDocument(false);
- pdfSource = new PushBackInputStream(raStream, 4096);
}
/**
@@ -154,8 +147,6 @@ public class FDFParser extends COSParser
finally
{
IOUtils.closeQuietly(pdfSource);
- deleteTempFile();
-
if (exceptionOccurred && document != null)
{
IOUtils.closeQuietly(document);
@@ -177,25 +168,4 @@ public class FDFParser extends COSParser
return new FDFDocument( getDocument() );
}
- /**
- * Remove the temporary file. A temporary file is created if this class is instantiated with an InputStream
- */
- private void deleteTempFile()
- {
- if (tempPDFFile != null)
- {
- try
- {
- if (!tempPDFFile.delete())
- {
- LOG.warn("Temporary file '" + tempPDFFile.getName() + "' can't be deleted");
- }
- }
- catch (SecurityException e)
- {
- LOG.warn("Temporary file '" + tempPDFFile.getName() + "' can't be deleted", e);
- }
- }
- }
-
}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1682706&r1=1682705&r2=1682706&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Sun May 31 09:59:17 2015
@@ -30,8 +30,8 @@ import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSNull;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.io.IOUtils;
-import org.apache.pdfbox.io.PushBackInputStream;
import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
+import org.apache.pdfbox.io.RandomAccessFile;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
import org.apache.pdfbox.pdmodel.encryption.DecryptionMaterial;
@@ -43,7 +43,6 @@ public class PDFParser extends COSParser
{
private static final Log LOG = LogFactory.getLog(PDFParser.class);
- private final RandomAccessBufferedFileInputStream raStream;
private String password = "";
private InputStream keyStoreInputStream = null;
private String keyAlias = null;
@@ -163,7 +162,7 @@ public class PDFParser extends COSParser
boolean useScratchFiles) throws IOException
{
fileLen = file.length();
- raStream = new RandomAccessBufferedFileInputStream(file);
+ pdfSource = new RandomAccessBufferedFileInputStream(file);
password = decryptionPassword;
keyStoreInputStream = keyStore;
keyAlias = alias;
@@ -251,9 +250,17 @@ public class PDFParser extends COSParser
public PDFParser(InputStream input, String decryptionPassword, InputStream keyStore,
String alias, boolean useScratchFiles) throws IOException
{
- tempPDFFile = createTmpFile(input);
- fileLen = tempPDFFile.length();
- raStream = new RandomAccessBufferedFileInputStream(tempPDFFile);
+ if (useScratchFiles)
+ {
+ tempPDFFile = createTmpFile(input);
+ fileLen = tempPDFFile.length();
+ pdfSource = new RandomAccessBufferedFileInputStream(tempPDFFile);
+ }
+ else
+ {
+ pdfSource = copyInputStream(input);
+ fileLen = pdfSource.length();
+ }
password = decryptionPassword;
keyStoreInputStream = keyStore;
keyAlias = alias;
@@ -276,7 +283,6 @@ public class PDFParser extends COSParser
}
}
document = new COSDocument(useScratchFiles);
- pdfSource = new PushBackInputStream(raStream, 4096);
}
/**
@@ -355,7 +361,6 @@ public class PDFParser extends COSParser
{
IOUtils.closeQuietly(pdfSource);
IOUtils.closeQuietly(keyStoreInputStream);
-
deleteTempFile();
if (exceptionOccurred && document != null)
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java?rev=1682706&r1=1682705&r2=1682706&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java Sun May 31 09:59:17 2015
@@ -19,14 +19,14 @@ package org.apache.pdfbox.pdfparser;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
-import java.io.PushbackInputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
+
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-
+import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSBoolean;
import org.apache.pdfbox.cos.COSDictionary;
@@ -35,8 +35,8 @@ import org.apache.pdfbox.cos.COSNull;
import org.apache.pdfbox.cos.COSNumber;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSStream;
+import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.pdmodel.common.PDStream;
-import org.apache.pdfbox.contentstream.operator.Operator;
/**
* This will parse a PDF byte stream and extract operands and such.
@@ -209,7 +209,7 @@ public class PDFStreamParser extends Bas
c = (char) pdfSource.peek();
// put back first bracket
- pdfSource.unread(leftBracket);
+ pdfSource.rewind(1);
if (c == '<')
{
@@ -408,7 +408,7 @@ public class PDFStreamParser extends Bas
* @return <code>true</code> if next bytes are probably printable ASCII
* characters starting with a PDF operator, otherwise <code>false</code>
*/
- private boolean hasNoFollowingBinData(final PushbackInputStream pdfSource)
+ private boolean hasNoFollowingBinData(final RandomAccessRead pdfSource)
throws IOException
{
// as suggested in PDFBOX-1164
@@ -453,7 +453,7 @@ public class PDFStreamParser extends Bas
noBinData = false;
}
}
- pdfSource.unread(binCharTestArr, 0, readBytes);
+ pdfSource.rewind(readBytes);
}
if (!noBinData)
{
Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java?rev=1682706&r1=1682705&r2=1682706&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java (original)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java Sun May 31 09:59:17 2015
@@ -64,13 +64,13 @@ import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSNull;
import org.apache.pdfbox.cos.COSNumber;
import org.apache.pdfbox.cos.COSObject;
+import org.apache.pdfbox.cos.COSObjectKey;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.pdfparser.PDFObjectStreamParser;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType;
import org.apache.pdfbox.pdmodel.PDDocument;
-import org.apache.pdfbox.cos.COSObjectKey;
import org.apache.pdfbox.preflight.Format;
import org.apache.pdfbox.preflight.PreflightConfiguration;
import org.apache.pdfbox.preflight.PreflightConstants;
@@ -347,7 +347,7 @@ public class PreflightParser extends PDF
// the number of objects in the xref table
int count;
- long offset = pdfSource.getOffset();
+ long offset = pdfSource.getPosition();
String line = readLine();
Pattern pattern = Pattern.compile("(\\d+)\\s(\\d+)(\\s*)");
Matcher matcher = pattern.matcher(line);
@@ -359,7 +359,8 @@ public class PreflightParser extends PDF
else
{
addValidationError(new ValidationError(ERROR_SYNTAX_CROSS_REF,
- "Cross reference subsection header is invalid: '" + line + "' at position " + pdfSource.getOffset()));
+ "Cross reference subsection header is invalid: '" + line + "' at position "
+ + pdfSource.getPosition()));
// reset pdfSource cursor to read xref information
pdfSource.seek(offset);
// first obj id
@@ -454,16 +455,16 @@ public class PreflightParser extends PDF
if (!streamV.equals("stream"))
{
addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
- "Expected 'stream' keyword but found '" + streamV + "' at offset "+pdfSource.getOffset()));
+ "Expected 'stream' keyword but found '" + streamV + "' at offset "+pdfSource.getPosition()));
}
int nextChar = pdfSource.read();
if (!((nextChar == 13 && pdfSource.peek() == 10) || nextChar == 10))
{
addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
- "Expected 'EOL' after the stream keyword at offset "+pdfSource.getOffset()));
+ "Expected 'EOL' after the stream keyword at offset "+pdfSource.getPosition()));
}
// set the offset before stream
- pdfSource.seek(pdfSource.getOffset() - 7);
+ pdfSource.seek(pdfSource.getPosition() - 7);
}
/**
@@ -473,17 +474,17 @@ public class PreflightParser extends PDF
*/
protected void checkEndstreamKeyWord() throws IOException
{
- pdfSource.seek(pdfSource.getOffset() - 10);
+ pdfSource.seek(pdfSource.getPosition() - 10);
if (!nextIsEOL())
{
addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
- "Expected 'EOL' before the endstream keyword at offset "+pdfSource.getOffset()+" but found '"+pdfSource.peek()+"'"));
+ "Expected 'EOL' before the endstream keyword at offset "+pdfSource.getPosition()+" but found '"+pdfSource.peek()+"'"));
}
String endstreamV = readString();
if (!endstreamV.equals("endstream"))
{
addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
- "Expected 'endstream' keyword at offset "+pdfSource.getOffset()+" but found '" + endstreamV + "'"));
+ "Expected 'endstream' keyword at offset "+pdfSource.getPosition()+" but found '" + endstreamV + "'"));
}
}
@@ -544,7 +545,7 @@ public class PreflightParser extends PDF
protected COSString parseCOSString() throws IOException
{
// offset reminder
- long offset = pdfSource.getOffset();
+ long offset = pdfSource.getPosition();
char nextChar = (char) pdfSource.read();
int count = 0;
if (nextChar == '<')
@@ -566,7 +567,7 @@ public class PreflightParser extends PDF
else
{
addValidationError(new ValidationError(ERROR_SYNTAX_HEXA_STRING_INVALID,
- "Hexa String must have only Hexadecimal Characters (found '" + nextChar + "') at offset " + pdfSource.getOffset()));
+ "Hexa String must have only Hexadecimal Characters (found '" + nextChar + "') at offset " + pdfSource.getPosition()));
break;
}
}
@@ -577,7 +578,7 @@ public class PreflightParser extends PDF
if (count % 2 != 0)
{
addValidationError(new ValidationError(ERROR_SYNTAX_HEXA_STRING_EVEN_NUMBER,
- "Hexa string shall contain even number of non white space char at offset " + pdfSource.getOffset()));
+ "Hexa string shall contain even number of non white space char at offset " + pdfSource.getPosition()));
}
// reset the offset to parse the COSString
@@ -586,7 +587,7 @@ public class PreflightParser extends PDF
if (result.getString().length() > MAX_STRING_LENGTH)
{
- addValidationError(new ValidationError(ERROR_SYNTAX_HEXA_STRING_TOO_LONG, "Hexa string is too long at offset "+pdfSource.getOffset()));
+ addValidationError(new ValidationError(ERROR_SYNTAX_HEXA_STRING_TOO_LONG, "Hexa string is too long at offset "+pdfSource.getPosition()));
}
return result;
}
@@ -612,7 +613,7 @@ public class PreflightParser extends PDF
if (real > MAX_POSITIVE_FLOAT || real < MAX_NEGATIVE_FLOAT)
{
addValidationError(new ValidationError(ERROR_SYNTAX_NUMERIC_RANGE,
- "Float is too long or too small: " + real+" at offset "+pdfSource.getOffset()));
+ "Float is too long or too small: " + real+" at offset "+pdfSource.getPosition()));
}
}
else
@@ -621,7 +622,7 @@ public class PreflightParser extends PDF
if (numAsLong > Integer.MAX_VALUE || numAsLong < Integer.MIN_VALUE)
{
addValidationError(new ValidationError(ERROR_SYNTAX_NUMERIC_RANGE,
- "Numeric is too long or too small: " + numAsLong+" at offset "+pdfSource.getOffset()));
+ "Numeric is too long or too small: " + numAsLong+" at offset "+pdfSource.getPosition()));
}
}
}
@@ -631,7 +632,7 @@ public class PreflightParser extends PDF
COSDictionary dic = (COSDictionary) result;
if (dic.size() > MAX_DICT_ENTRIES)
{
- addValidationError(new ValidationError(ERROR_SYNTAX_TOO_MANY_ENTRIES, "Too Many Entries In Dictionary at offset "+pdfSource.getOffset()));
+ addValidationError(new ValidationError(ERROR_SYNTAX_TOO_MANY_ENTRIES, "Too Many Entries In Dictionary at offset "+pdfSource.getPosition()));
}
}
return result;
@@ -680,7 +681,7 @@ public class PreflightParser extends PDF
long readObjNr;
int readObjGen;
- long offset = pdfSource.getOffset();
+ long offset = pdfSource.getPosition();
String line = readLine();
Pattern pattern = Pattern.compile("(\\d+)\\s(\\d+)\\sobj");
Matcher matcher = pattern.matcher(line);
@@ -721,7 +722,7 @@ public class PreflightParser extends PDF
skipSpaces();
COSBase pb = parseDirObject();
skipSpaces();
- long endObjectOffset = pdfSource.getOffset();
+ long endObjectOffset = pdfSource.getPosition();
String endObjectKey = readString();
if (endObjectKey.equals("stream"))
@@ -743,7 +744,7 @@ public class PreflightParser extends PDF
throw new IOException("Stream not preceded by dictionary (offset: " + offsetOrObjstmObNr + ").");
}
skipSpaces();
- endObjectOffset = pdfSource.getOffset();
+ endObjectOffset = pdfSource.getPosition();
endObjectKey = readString();
// we have case with a second 'endstream' before endobj
@@ -774,12 +775,12 @@ public class PreflightParser extends PDF
}
else
{
- offset = pdfSource.getOffset();
+ offset = pdfSource.getPosition();
pdfSource.seek(endObjectOffset - 1);
if (!nextIsEOL())
{
addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER,
- "EOL expected before the 'endobj' keyword at offset "+pdfSource.getOffset()));
+ "EOL expected before the 'endobj' keyword at offset "+pdfSource.getPosition()));
}
pdfSource.seek(offset);
}
@@ -787,7 +788,7 @@ public class PreflightParser extends PDF
if (!nextIsEOL())
{
addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER,
- "EOL expected after the 'endobj' keyword at offset "+pdfSource.getOffset()));
+ "EOL expected after the 'endobj' keyword at offset "+pdfSource.getPosition()));
}
}
else
@@ -838,8 +839,17 @@ public class PreflightParser extends PDF
|| (buf.length - tmpOffset == 2 && (buf[tmpOffset] != 13 || buf[tmpOffset + 1] != 10))
|| (buf.length - tmpOffset == 1 && (buf[tmpOffset] != 13 && buf[tmpOffset] != 10)))
{
+ long position = 0;
+ try
+ {
+ position = pdfSource.getPosition();
+ }
+ catch(IOException excpetion)
+ {
+ position = Long.MIN_VALUE;
+ }
addValidationError(new ValidationError(ERROR_SYNTAX_TRAILER_EOF,
- "File contains data after the last %%EOF sequence at offset " + pdfSource.getOffset()));
+ "File contains data after the last %%EOF sequence at offset " + position));
}
}
}