You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ja...@apache.org on 2015/07/21 00:18:01 UTC
svn commit: r1692033 [1/2] - in /pdfbox/trunk:
pdfbox/src/main/java/org/apache/pdfbox/pdfparser/
pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/
pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/
pdfbox/src/test/java/org/ap...
Author: jahewson
Date: Mon Jul 20 22:18:00 2015
New Revision: 1692033
URL: http://svn.apache.org/r1692033
Log:
PDFBOX-2893: Simplify COSStream encoding and decoding
Added:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/InputStreamSource.java (with props)
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/RandomAccessSource.java (with props)
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/SequentialSource.java (with props)
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/SampledImageReader.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/AppearanceGeneratorHelper.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/interactive/form/PDAppearanceString.java
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/PDFStreamParserTest.java
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestFDF.java
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/pdfdebugger/streampane/StreamPane.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java?rev=1692033&r1=1692032&r2=1692033&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java Mon Jul 20 22:18:00 2015
@@ -18,6 +18,7 @@ package org.apache.pdfbox.pdfparser;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
+import java.util.Arrays;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.cos.COSArray;
@@ -31,9 +32,7 @@ import org.apache.pdfbox.cos.COSNull;
import org.apache.pdfbox.cos.COSNumber;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSObjectKey;
-import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.cos.COSString;
-import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.util.Charsets;
@@ -47,15 +46,9 @@ import static org.apache.pdfbox.util.Cha
*/
public abstract class BaseParser
{
-
private static final long OBJECT_NUMBER_THRESHOLD = 10000000000L;
private static final long GENERATION_NUMBER_THRESHOLD = 65535;
-
- /**
- * system property allowing to define size of push back buffer.
- */
- public static final String PROP_PUSHBACK_SIZE = "org.apache.pdfbox.baseParser.pushBackSize";
/**
* Log instance.
@@ -116,12 +109,11 @@ public abstract class BaseParser
private static final byte ASCII_ZERO = 48;
private static final byte ASCII_NINE = 57;
private static final byte ASCII_SPACE = 32;
-
-
+
/**
* This is the stream that will be read from.
*/
- protected RandomAccessRead pdfSource;
+ protected final SequentialSource seqSource;
/**
* This is the document that will be parsed.
@@ -131,30 +123,9 @@ public abstract class BaseParser
/**
* Default constructor.
*/
- public BaseParser()
- {
- }
-
- /**
- * Constructor.
- *
- * @param stream The COS stream to read the data from.
- * @throws IOException If there is an error reading the input stream.
- */
- public BaseParser(COSStream stream) throws IOException
- {
- pdfSource = stream.getUnfilteredRandomAccess();
- }
-
- /**
- * Constructor.
- *
- * @param input The random access read to read the data from.
- * @throws IOException If there is an error reading the input stream.
- */
- public BaseParser(RandomAccessRead input) throws IOException
+ public BaseParser(SequentialSource pdfSource)
{
- pdfSource = input;
+ this.seqSource = pdfSource;
}
private static boolean isHexDigit(char ch)
@@ -173,14 +144,14 @@ public abstract class BaseParser
*/
private COSBase parseCOSDictionaryValue() throws IOException
{
- long numOffset = pdfSource.getPosition();
+ long numOffset = seqSource.getPosition();
COSBase number = parseDirObject();
skipSpaces();
if (!isDigit())
{
return number;
}
- long genOffset = pdfSource.getPosition();
+ long genOffset = seqSource.getPosition();
COSBase generationNumber = parseDirObject();
skipSpaces();
readExpectedChar('R');
@@ -201,7 +172,7 @@ public abstract class BaseParser
{
if (document == null)
{
- throw new IOException("object reference " + key + " at offset " + pdfSource.getPosition()
+ throw new IOException("object reference " + key + " at offset " + seqSource.getPosition()
+ " in content stream");
}
return document.getObjectFromPool(key);
@@ -224,7 +195,7 @@ public abstract class BaseParser
while (!done)
{
skipSpaces();
- char c = (char) pdfSource.peek();
+ char c = (char) seqSource.peek();
if (c == '>')
{
done = true;
@@ -260,23 +231,23 @@ public abstract class BaseParser
*/
private boolean readUntilEndOfCOSDictionary() throws IOException
{
- int c = pdfSource.read();
+ int c = seqSource.read();
while (c != -1 && c != '/' && c != '>')
{
// in addition to stopping when we find / or >, we also want
// to stop when we find endstream or endobj.
if (c == E)
{
- c = pdfSource.read();
+ c = seqSource.read();
if (c == N)
{
- c = pdfSource.read();
+ c = seqSource.read();
if (c == D)
{
- c = pdfSource.read();
- boolean isStream = c == S && pdfSource.read() == T && pdfSource.read() == R
- && pdfSource.read() == E && pdfSource.read() == A && pdfSource.read() == M;
- boolean isObj = !isStream && c == O && pdfSource.read() == B && pdfSource.read() == J;
+ c = seqSource.read();
+ boolean isStream = c == S && seqSource.read() == T && seqSource.read() == R
+ && seqSource.read() == E && seqSource.read() == A && seqSource.read() == M;
+ boolean isObj = !isStream && c == O && seqSource.read() == B && seqSource.read() == J;
if (isStream || isObj)
{
// we're done reading this object!
@@ -285,13 +256,13 @@ public abstract class BaseParser
}
}
}
- c = pdfSource.read();
+ c = seqSource.read();
}
if (c == -1)
{
return true;
}
- pdfSource.rewind(1);
+ seqSource.unread(c);
return false;
}
@@ -300,14 +271,14 @@ public abstract class BaseParser
COSName key = parseCOSName();
COSBase value = parseCOSDictionaryValue();
skipSpaces();
- if (((char) pdfSource.peek()) == 'd')
+ if (((char) seqSource.peek()) == 'd')
{
// if the next string is 'def' then we are parsing a cmap stream
// and want to ignore it, otherwise throw an exception.
String potentialDEF = readString();
if (!potentialDEF.equals(DEF))
{
- pdfSource.rewind(potentialDEF.getBytes(ISO_8859_1).length);
+ seqSource.unread(potentialDEF.getBytes(ISO_8859_1));
}
else
{
@@ -317,7 +288,7 @@ public abstract class BaseParser
if (value == null)
{
- LOG.warn("Bad Dictionary Declaration " + pdfSource);
+ LOG.warn("Bad Dictionary Declaration " + seqSource);
}
else
{
@@ -331,22 +302,22 @@ public abstract class BaseParser
//PDF Ref 3.2.7 A stream must be followed by either
//a CRLF or LF but nothing else.
- int whitespace = pdfSource.read();
+ int whitespace = seqSource.read();
//see brother_scan_cover.pdf, it adds whitespaces
//after the stream but before the start of the
//data, so just read those first
while (ASCII_SPACE == whitespace)
{
- whitespace = pdfSource.read();
+ whitespace = seqSource.read();
}
if (ASCII_CR == whitespace)
{
- whitespace = pdfSource.read();
+ whitespace = seqSource.read();
if (ASCII_LF != whitespace)
{
- pdfSource.rewind(1);
+ seqSource.unread(whitespace);
//The spec says this is invalid but it happens in the real
//world so we must support it.
}
@@ -356,7 +327,7 @@ public abstract class BaseParser
//we are in an error.
//but again we will do a lenient parsing and just assume that everything
//is fine
- pdfSource.rewind(1);
+ seqSource.unread(whitespace);
}
}
@@ -383,7 +354,7 @@ public abstract class BaseParser
{
int braces = bracesParameter;
byte[] nextThreeBytes = new byte[3];
- int amountRead = pdfSource.read(nextThreeBytes);
+ int amountRead = seqSource.read(nextThreeBytes);
//lets handle the special case seen in Bull River Rules and Regulations.pdf
//The dictionary looks like this
@@ -414,7 +385,7 @@ public abstract class BaseParser
}
if (amountRead > 0)
{
- pdfSource.rewind( amountRead );
+ seqSource.unread(Arrays.copyOfRange(nextThreeBytes, 0, amountRead));
}
return braces;
}
@@ -428,7 +399,7 @@ public abstract class BaseParser
*/
protected COSString parseCOSString() throws IOException
{
- char nextChar = (char)pdfSource.read();
+ char nextChar = (char) seqSource.read();
char openBrace;
char closeBrace;
if( nextChar == '(' )
@@ -443,7 +414,7 @@ public abstract class BaseParser
else
{
throw new IOException( "parseCOSString string should start with '(' or '<' and not '" +
- nextChar + "' " + pdfSource );
+ nextChar + "' " + seqSource);
}
ByteArrayOutputStream out = new ByteArrayOutputStream();
@@ -451,7 +422,7 @@ public abstract class BaseParser
//This is the number of braces read
//
int braces = 1;
- int c = pdfSource.read();
+ int c = seqSource.read();
while( braces > 0 && c != -1)
{
char ch = (char)c;
@@ -475,7 +446,7 @@ public abstract class BaseParser
else if( ch == '\\' )
{
//patched by ram
- char next = (char)pdfSource.read();
+ char next = (char) seqSource.read();
switch(next)
{
case 'n':
@@ -512,10 +483,10 @@ public abstract class BaseParser
case ASCII_LF:
case ASCII_CR:
//this is a break in the line so ignore it and the newline and continue
- c = pdfSource.read();
+ c = seqSource.read();
while( isEOL(c) && c != -1)
{
- c = pdfSource.read();
+ c = seqSource.read();
}
nextc = c;
break;
@@ -530,12 +501,12 @@ public abstract class BaseParser
{
StringBuffer octal = new StringBuffer();
octal.append( next );
- c = pdfSource.read();
+ c = seqSource.read();
char digit = (char)c;
if( digit >= '0' && digit <= '7' )
{
octal.append( digit );
- c = pdfSource.read();
+ c = seqSource.read();
digit = (char)c;
if( digit >= '0' && digit <= '7' )
{
@@ -581,12 +552,12 @@ public abstract class BaseParser
}
else
{
- c = pdfSource.read();
+ c = seqSource.read();
}
}
if (c != -1)
{
- pdfSource.rewind(1);
+ seqSource.unread(c);
}
return new COSString(out.toByteArray());
}
@@ -608,7 +579,7 @@ public abstract class BaseParser
final StringBuilder sBuf = new StringBuilder();
while( true )
{
- int c = pdfSource.read();
+ int c = seqSource.read();
if ( isHexDigit((char)c) )
{
sBuf.append( (char) c );
@@ -639,7 +610,7 @@ public abstract class BaseParser
// read till the closing bracket was found
do
{
- c = pdfSource.read();
+ c = seqSource.read();
}
while ( c != '>' && c >= 0 );
@@ -672,7 +643,7 @@ public abstract class BaseParser
COSBase pbo;
skipSpaces();
int i;
- while( ((i = pdfSource.peek()) > 0) && ((char)i != ']') )
+ while( ((i = seqSource.peek()) > 0) && ((char)i != ']') )
{
pbo = parseDirObject();
if( pbo instanceof COSObject )
@@ -705,12 +676,12 @@ public abstract class BaseParser
else
{
//it could be a bad object in the array which is just skipped
- LOG.warn("Corrupt object reference at offset " + pdfSource.getPosition());
+ LOG.warn("Corrupt object reference at offset " + seqSource.getPosition());
// This could also be an "endobj" or "endstream" which means we can assume that
// the array has ended.
String isThisTheEnd = readString();
- pdfSource.rewind(isThisTheEnd.getBytes(ISO_8859_1).length);
+ seqSource.unread(isThisTheEnd.getBytes(ISO_8859_1));
if(ENDOBJ_STRING.equals(isThisTheEnd) || ENDSTREAM_STRING.equals(isThisTheEnd))
{
return po;
@@ -719,7 +690,7 @@ public abstract class BaseParser
skipSpaces();
}
// read ']'
- pdfSource.read();
+ seqSource.read();
skipSpaces();
return po;
}
@@ -746,14 +717,14 @@ public abstract class BaseParser
{
readExpectedChar('/');
ByteArrayOutputStream buffer = new ByteArrayOutputStream();
- int c = pdfSource.read();
+ int c = seqSource.read();
while (c != -1)
{
int ch = c;
if (ch == '#')
{
- char ch1 = (char)pdfSource.read();
- char ch2 = (char)pdfSource.read();
+ char ch1 = (char) seqSource.read();
+ char ch2 = (char) seqSource.read();
// Prior to PDF v1.2, the # was not a special character. Also,
// it has been observed that various PDF tools do not follow the
@@ -772,11 +743,11 @@ public abstract class BaseParser
{
throw new IOException("Error: expected hex digit, actual='" + hex + "'", e);
}
- c = pdfSource.read();
+ c = seqSource.read();
}
else
{
- pdfSource.rewind(1);
+ seqSource.unread(ch2);
c = ch1;
buffer.write(ch);
}
@@ -788,12 +759,12 @@ public abstract class BaseParser
else
{
buffer.write(ch);
- c = pdfSource.read();
+ c = seqSource.read();
}
}
if (c != -1)
{
- pdfSource.rewind(1);
+ seqSource.unread(c);
}
String string = new String(buffer.toByteArray(), Charsets.UTF_8);
return COSName.getPDFName(string);
@@ -809,14 +780,14 @@ public abstract class BaseParser
protected COSBoolean parseBoolean() throws IOException
{
COSBoolean retval = null;
- char c = (char)pdfSource.peek();
+ char c = (char) seqSource.peek();
if( c == 't' )
{
- String trueString = new String( pdfSource.readFully( 4 ), ISO_8859_1 );
+ String trueString = new String( seqSource.readFully( 4 ), ISO_8859_1 );
if( !trueString.equals( TRUE ) )
{
throw new IOException( "Error parsing boolean: expected='true' actual='" + trueString
- + "' at offset " + pdfSource.getPosition());
+ + "' at offset " + seqSource.getPosition());
}
else
{
@@ -825,11 +796,11 @@ public abstract class BaseParser
}
else if( c == 'f' )
{
- String falseString = new String( pdfSource.readFully( 5 ), ISO_8859_1 );
+ String falseString = new String( seqSource.readFully( 5 ), ISO_8859_1 );
if( !falseString.equals( FALSE ) )
{
throw new IOException( "Error parsing boolean: expected='true' actual='" + falseString
- + "' at offset " + pdfSource.getPosition());
+ + "' at offset " + seqSource.getPosition());
}
else
{
@@ -839,7 +810,7 @@ public abstract class BaseParser
else
{
throw new IOException( "Error parsing boolean expected='t or f' actual='" + c
- + "' at offset " + pdfSource.getPosition());
+ + "' at offset " + seqSource.getPosition());
}
return retval;
}
@@ -856,17 +827,17 @@ public abstract class BaseParser
COSBase retval = null;
skipSpaces();
- int nextByte = pdfSource.peek();
+ int nextByte = seqSource.peek();
char c = (char)nextByte;
switch(c)
{
case '<':
{
// pull off first left bracket
- int leftBracket = pdfSource.read();
+ int leftBracket = seqSource.read();
// check for second left bracket
- c = (char)pdfSource.peek();
- pdfSource.rewind(1);
+ c = (char) seqSource.peek();
+ seqSource.unread(leftBracket);
if(c == '<')
{
@@ -901,34 +872,34 @@ public abstract class BaseParser
}
case 't':
{
- String trueString = new String( pdfSource.readFully(4), ISO_8859_1 );
+ String trueString = new String( seqSource.readFully(4), ISO_8859_1 );
if( trueString.equals( TRUE ) )
{
retval = COSBoolean.TRUE;
}
else
{
- throw new IOException( "expected true actual='" + trueString + "' " + pdfSource +
- "' at offset " + pdfSource.getPosition());
+ throw new IOException( "expected true actual='" + trueString + "' " + seqSource +
+ "' at offset " + seqSource.getPosition());
}
break;
}
case 'f':
{
- String falseString = new String( pdfSource.readFully(5), ISO_8859_1 );
+ String falseString = new String( seqSource.readFully(5), ISO_8859_1 );
if( falseString.equals( FALSE ) )
{
retval = COSBoolean.FALSE;
}
else
{
- throw new IOException( "expected false actual='" + falseString + "' " + pdfSource +
- "' at offset " + pdfSource.getPosition());
+ throw new IOException( "expected false actual='" + falseString + "' " + seqSource +
+ "' at offset " + seqSource.getPosition());
}
break;
}
case 'R':
- pdfSource.read();
+ seqSource.read();
retval = new COSObject(null);
break;
case (char)-1:
@@ -938,7 +909,7 @@ public abstract class BaseParser
if( Character.isDigit(c) || c == '-' || c == '+' || c == '.')
{
StringBuilder buf = new StringBuilder();
- int ic = pdfSource.read();
+ int ic = seqSource.read();
c = (char)ic;
while( Character.isDigit( c )||
c == '-' ||
@@ -948,12 +919,12 @@ public abstract class BaseParser
c == 'e' )
{
buf.append( c );
- ic = pdfSource.read();
+ ic = seqSource.read();
c = (char)ic;
}
if( ic != -1 )
{
- pdfSource.rewind(1);
+ seqSource.unread(ic);
}
retval = COSNumber.get( buf.toString() );
}
@@ -965,17 +936,17 @@ public abstract class BaseParser
String badString = readString();
if( badString == null || badString.length() == 0 )
{
- int peek = pdfSource.peek();
+ int peek = seqSource.peek();
// we can end up in an infinite loop otherwise
throw new IOException( "Unknown dir object c='" + c +
"' cInt=" + (int)c + " peek='" + (char)peek
- + "' peekInt=" + peek + " " + pdfSource.getPosition() );
+ + "' peekInt=" + peek + " " + seqSource.getPosition() );
}
// if it's an endstream/endobj, we want to put it back so the caller will see it
if(ENDOBJ_STRING.equals(badString) || ENDSTREAM_STRING.equals(badString))
{
- pdfSource.rewind(badString.getBytes(ISO_8859_1).length);
+ seqSource.unread(badString.getBytes(ISO_8859_1));
}
}
}
@@ -994,15 +965,15 @@ public abstract class BaseParser
{
skipSpaces();
StringBuilder buffer = new StringBuilder();
- int c = pdfSource.read();
+ int c = seqSource.read();
while( !isEndOfName((char)c) && c != -1 )
{
buffer.append( (char)c );
- c = pdfSource.read();
+ c = seqSource.read();
}
if (c != -1)
{
- pdfSource.rewind(1);
+ seqSource.unread(c);
}
return buffer.toString();
}
@@ -1020,7 +991,7 @@ public abstract class BaseParser
}
/**
- * Reads given pattern from {@link #pdfSource}. Skipping whitespace at start and end if wanted.
+ * Reads given pattern from {@link #seqSource}. Skipping whitespace at start and end if wanted.
*
* @param expectedString pattern to be skipped
* @param skipSpaces if set to true spaces before and after the string will be skipped
@@ -1031,11 +1002,11 @@ public abstract class BaseParser
skipSpaces();
for (char c : expectedString)
{
- if (pdfSource.read() != c)
+ if (seqSource.read() != c)
{
throw new IOException("Expected string '" + new String(expectedString)
+ "' but missed at character '" + c + "' at offset "
- + pdfSource.getPosition());
+ + seqSource.getPosition());
}
}
skipSpaces();
@@ -1050,10 +1021,10 @@ public abstract class BaseParser
*/
protected void readExpectedChar(char ec) throws IOException
{
- char c = (char) pdfSource.read();
+ char c = (char) seqSource.read();
if (c != ec)
{
- throw new IOException("expected='" + ec + "' actual='" + c + "' at offset " + pdfSource.getPosition());
+ throw new IOException("expected='" + ec + "' actual='" + c + "' at offset " + seqSource.getPosition());
}
}
@@ -1070,7 +1041,7 @@ public abstract class BaseParser
{
skipSpaces();
- int c = pdfSource.read();
+ int c = seqSource.read();
//average string size is around 2 and the normal string buffer size is
//about 16 so lets save some space.
@@ -1082,11 +1053,11 @@ public abstract class BaseParser
c != '/' )
{
buffer.append( (char)c );
- c = pdfSource.read();
+ c = seqSource.read();
}
if (c != -1)
{
- pdfSource.rewind(1);
+ seqSource.unread(c);
}
return buffer.toString();
}
@@ -1100,7 +1071,7 @@ public abstract class BaseParser
*/
protected boolean isClosing() throws IOException
{
- return isClosing(pdfSource.peek());
+ return isClosing(seqSource.peek());
}
/**
@@ -1125,7 +1096,7 @@ public abstract class BaseParser
*/
protected String readLine() throws IOException
{
- if (pdfSource.isEOF())
+ if (seqSource.isEOF())
{
throw new IOException( "Error: End-of-File, expected line");
}
@@ -1133,7 +1104,7 @@ public abstract class BaseParser
StringBuilder buffer = new StringBuilder( 11 );
int c;
- while ((c = pdfSource.read()) != -1)
+ while ((c = seqSource.read()) != -1)
{
// CR and LF are valid EOLs
if (isEOL(c))
@@ -1143,9 +1114,9 @@ public abstract class BaseParser
buffer.append( (char)c );
}
// CR+LF is also a valid EOL
- if (isCR(c) && isLF(pdfSource.peek()))
+ if (isCR(c) && isLF(seqSource.peek()))
{
- pdfSource.read();
+ seqSource.read();
}
return buffer.toString();
}
@@ -1159,7 +1130,7 @@ public abstract class BaseParser
*/
protected boolean isEOL() throws IOException
{
- return isEOL(pdfSource.peek());
+ return isEOL(seqSource.peek());
}
/**
@@ -1192,7 +1163,7 @@ public abstract class BaseParser
*/
protected boolean isWhitespace() throws IOException
{
- return isWhitespace( pdfSource.peek() );
+ return isWhitespace(seqSource.peek());
}
/**
@@ -1216,7 +1187,7 @@ public abstract class BaseParser
*/
protected boolean isSpace() throws IOException
{
- return isSpace( pdfSource.peek() );
+ return isSpace(seqSource.peek());
}
/**
@@ -1239,7 +1210,7 @@ public abstract class BaseParser
*/
protected boolean isDigit() throws IOException
{
- return isDigit( pdfSource.peek() );
+ return isDigit(seqSource.peek());
}
/**
@@ -1260,27 +1231,27 @@ public abstract class BaseParser
*/
protected void skipSpaces() throws IOException
{
- int c = pdfSource.read();
+ int c = seqSource.read();
// 37 is the % character, a comment
while( isWhitespace(c) || c == 37)
{
if ( c == 37 )
{
// skip past the comment section
- c = pdfSource.read();
+ c = seqSource.read();
while(!isEOL(c) && c != -1)
{
- c = pdfSource.read();
+ c = seqSource.read();
}
}
else
{
- c = pdfSource.read();
+ c = seqSource.read();
}
}
if (c != -1)
{
- pdfSource.rewind(1);
+ seqSource.unread(c);
}
}
@@ -1338,8 +1309,8 @@ public abstract class BaseParser
}
catch( NumberFormatException e )
{
- pdfSource.rewind(intBuffer.toString().getBytes(ISO_8859_1).length);
- throw new IOException( "Error: Expected an integer type at offset "+pdfSource.getPosition(), e);
+ seqSource.unread(intBuffer.toString().getBytes(ISO_8859_1));
+ throw new IOException( "Error: Expected an integer type at offset "+ seqSource.getPosition(), e);
}
return retval;
}
@@ -1365,9 +1336,9 @@ public abstract class BaseParser
}
catch( NumberFormatException e )
{
- pdfSource.rewind(longBuffer.toString().getBytes(ISO_8859_1).length);
+ seqSource.unread(longBuffer.toString().getBytes(ISO_8859_1));
throw new IOException( "Error: Expected a long type at offset "
- + pdfSource.getPosition() + ", instead got '" + longBuffer + "'", e);
+ + seqSource.getPosition() + ", instead got '" + longBuffer + "'", e);
}
return retval;
}
@@ -1377,13 +1348,13 @@ public abstract class BaseParser
* and the {@linkplain #readLong()} method.
*
* @return the token to parse as integer or long by the calling method.
- * @throws IOException throws by the {@link #pdfSource} methods.
+ * @throws IOException throws by the {@link #seqSource} methods.
*/
protected final StringBuilder readStringNumber() throws IOException
{
int lastByte = 0;
StringBuilder buffer = new StringBuilder();
- while( (lastByte = pdfSource.read() ) != ASCII_SPACE &&
+ while( (lastByte = seqSource.read() ) != ASCII_SPACE &&
lastByte != ASCII_LF &&
lastByte != ASCII_CR &&
lastByte != 60 && //see sourceforge bug 1714707
@@ -1396,9 +1367,8 @@ public abstract class BaseParser
}
if( lastByte != -1 )
{
- pdfSource.rewind(1);
+ seqSource.unread(lastByte);
}
return buffer;
}
-
}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1692033&r1=1692032&r2=1692033&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Mon Jul 20 22:18:00 2015
@@ -16,8 +16,6 @@
*/
package org.apache.pdfbox.pdfparser;
-import static org.apache.pdfbox.util.Charsets.ISO_8859_1;
-
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
@@ -35,7 +33,6 @@ import java.util.Queue;
import java.util.Set;
import java.util.TreeMap;
import java.util.Vector;
-
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.cos.COSArray;
@@ -48,9 +45,13 @@ import org.apache.pdfbox.cos.COSNumber;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSObjectKey;
import org.apache.pdfbox.cos.COSStream;
+import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType;
import org.apache.pdfbox.pdmodel.encryption.SecurityHandler;
+
+import static org.apache.pdfbox.util.Charsets.ISO_8859_1;
+
/**
* PDF-Parser which first reads startxref and xref tables in order to know valid objects and parse only these objects.
*
@@ -83,6 +84,8 @@ public class COSParser extends BaseParse
private static final int STRMBUFLEN = 2048;
private final byte[] strmBuf = new byte[ STRMBUFLEN ];
+ protected final RandomAccessRead source;
+
/**
* Only parse the PDF file minimally allowing access to basic information.
*/
@@ -156,8 +159,10 @@ public class COSParser extends BaseParse
/**
* Default constructor.
*/
- public COSParser()
+ public COSParser(RandomAccessRead source)
{
+ super(new RandomAccessSource(source));
+ this.source = source;
}
/**
@@ -192,7 +197,7 @@ public class COSParser extends BaseParse
*/
protected COSDictionary parseXref(long startXRefOffset) throws IOException
{
- pdfSource.seek(startXRefOffset);
+ source.seek(startXRefOffset);
long startXrefOffset = Math.max(0, parseStartXref());
// check the startxref offset
long fixedOffset = checkXRefOffset(startXrefOffset);
@@ -206,22 +211,22 @@ public class COSParser extends BaseParse
while (prev > 0)
{
// seek to xref table
- pdfSource.seek(prev);
+ source.seek(prev);
// skip white spaces
skipSpaces();
// -- parse xref
- if (pdfSource.peek() == X)
+ if (source.peek() == X)
{
// xref table and trailer
// use existing parser to parse xref table
parseXrefTable(prev);
// parse the last trailer.
- trailerOffset = pdfSource.getPosition();
+ trailerOffset = source.getPosition();
// PDFBOX-1739 skip extra xref entries in RegisSTAR documents
- while (isLenient && pdfSource.peek() != 't')
+ while (isLenient && source.peek() != 't')
{
- if (pdfSource.getPosition() == trailerOffset)
+ if (source.getPosition() == trailerOffset)
{
// warn only the first time
LOG.warn("Expected trailer object at position " + trailerOffset
@@ -232,7 +237,7 @@ public class COSParser extends BaseParse
if (!parseTrailer())
{
throw new IOException("Expected trailer object at position: "
- + pdfSource.getPosition());
+ + source.getPosition());
}
COSDictionary trailer = xrefTrailerResolver.getCurrentTrailer();
// check for a XRef stream, it may contain some object ids of compressed objects
@@ -248,7 +253,7 @@ public class COSParser extends BaseParse
}
if (streamOffset > 0)
{
- pdfSource.seek(streamOffset);
+ source.seek(streamOffset);
skipSpaces();
parseXrefObjStream(prev, false);
}
@@ -343,12 +348,12 @@ public class COSParser extends BaseParse
final int trailByteCount = (fileLen < readTrailBytes) ? (int) fileLen : readTrailBytes;
buf = new byte[trailByteCount];
skipBytes = fileLen - trailByteCount;
- pdfSource.seek(skipBytes);
+ source.seek(skipBytes);
int off = 0;
int readBytes;
while (off < trailByteCount)
{
- readBytes = pdfSource.read(buf, off, trailByteCount - off);
+ readBytes = source.read(buf, off, trailByteCount - off);
// in order to not get stuck in a loop we check readBytes (this should never happen)
if (readBytes < 1)
{
@@ -361,7 +366,7 @@ public class COSParser extends BaseParse
}
finally
{
- pdfSource.seek(0);
+ source.seek(0);
}
// find last '%%EOF'
int bufOff = lastIndexOf(EOF_MARKER, buf, buf.length);
@@ -705,7 +710,7 @@ public class COSParser extends BaseParse
private void parseFileObject(Long offsetOrObjstmObNr, final COSObjectKey objKey, long objNr, int objGenNr, final COSObject pdfObject) throws IOException
{
// ---- go to object start
- pdfSource.seek(offsetOrObjstmObNr);
+ source.seek(offsetOrObjstmObNr);
// ---- we must have an indirect object
final long readObjNr = readObjectNumber();
@@ -726,7 +731,7 @@ public class COSParser extends BaseParse
if (endObjectKey.equals(STREAM_STRING))
{
- pdfSource.rewind(endObjectKey.getBytes(ISO_8859_1).length);
+ source.rewind(endObjectKey.getBytes(ISO_8859_1).length);
if (pb instanceof COSDictionary)
{
COSStream stream = parseCOSStream((COSDictionary) pb);
@@ -843,10 +848,10 @@ public class COSParser extends BaseParse
if (lengthObj.getObject() == null)
{
// not read so far, keep current stream position
- final long curFileOffset = pdfSource.getPosition();
+ final long curFileOffset = source.getPosition();
parseObjectDynamically(lengthObj, true);
// reset current stream position
- pdfSource.seek(curFileOffset);
+ source.seek(curFileOffset);
if (lengthObj.getObject() == null)
{
throw new IOException("Length object content was not read.");
@@ -909,7 +914,7 @@ public class COSParser extends BaseParse
if (isLenient)
{
LOG.warn("The stream doesn't provide any stream length, using fallback readUntilEnd, at offset "
- + pdfSource.getPosition());
+ + source.getPosition());
}
else
{
@@ -932,22 +937,22 @@ public class COSParser extends BaseParse
if (endStream.equals("endobj") && isLenient)
{
LOG.warn("stream ends with 'endobj' instead of 'endstream' at offset "
- + pdfSource.getPosition());
+ + source.getPosition());
// avoid follow-up warning about missing endobj
- pdfSource.rewind(ENDOBJ.length);
+ source.rewind(ENDOBJ.length);
}
else if (endStream.length() > 9 && isLenient && endStream.substring(0,9).equals(ENDSTREAM_STRING))
{
LOG.warn("stream ends with '" + endStream + "' instead of 'endstream' at offset "
- + pdfSource.getPosition());
+ + source.getPosition());
// unread the "extra" bytes
- pdfSource.rewind(endStream.substring(9).getBytes(ISO_8859_1).length);
+ source.rewind(endStream.substring(9).getBytes(ISO_8859_1).length);
}
else if (!endStream.equals(ENDSTREAM_STRING))
{
throw new IOException(
"Error reading stream, expected='endstream' actual='"
- + endStream + "' at offset " + pdfSource.getPosition());
+ + endStream + "' at offset " + source.getPosition());
}
}
finally
@@ -984,7 +989,7 @@ public class COSParser extends BaseParse
final int quickTestOffset = 5;
// read next chunk into buffer; already matched chars are added to beginning of buffer
- while ( ( bufSize = pdfSource.read( strmBuf, charMatchCount, STRMBUFLEN - charMatchCount ) ) > 0 )
+ while ( ( bufSize = source.read( strmBuf, charMatchCount, STRMBUFLEN - charMatchCount ) ) > 0 )
{
bufSize += charMatchCount;
@@ -1055,7 +1060,7 @@ public class COSParser extends BaseParse
if ( charMatchCount == keyw.length )
{
// keyword matched; unread matched keyword (endstream/endobj) and following buffered content
- pdfSource.rewind( bufSize - contentBytes );
+ source.rewind( bufSize - contentBytes );
break;
}
else
@@ -1074,11 +1079,11 @@ public class COSParser extends BaseParse
while (remainBytes > 0)
{
final int chunk = (remainBytes > STREAMCOPYBUFLEN) ? STREAMCOPYBUFLEN : (int) remainBytes;
- final int readBytes = pdfSource.read(streamCopyBuf, 0, chunk);
+ final int readBytes = source.read(streamCopyBuf, 0, chunk);
if (readBytes <= 0)
{
// shouldn't happen, the stream length has already been validated
- throw new IOException("read error at offset " + pdfSource.getPosition()
+ throw new IOException("read error at offset " + source.getPosition()
+ ": expected " + chunk + " bytes, but read() returns " + readBytes);
}
out.write(streamCopyBuf, 0, readBytes);
@@ -1089,7 +1094,7 @@ public class COSParser extends BaseParse
private boolean validateStreamLength(long streamLength) throws IOException
{
boolean streamLengthIsValid = true;
- long originOffset = pdfSource.getPosition();
+ long originOffset = source.getPosition();
long expectedEndOfStream = originOffset + streamLength;
if (expectedEndOfStream > fileLen)
{
@@ -1100,7 +1105,7 @@ public class COSParser extends BaseParse
}
else
{
- pdfSource.seek(expectedEndOfStream);
+ source.seek(expectedEndOfStream);
skipSpaces();
if (!isString(ENDSTREAM))
{
@@ -1109,7 +1114,7 @@ public class COSParser extends BaseParse
+ "stream start position: " + originOffset + ", length: " + streamLength
+ ", expected end position: " + expectedEndOfStream);
}
- pdfSource.seek(originOffset);
+ source.seek(originOffset);
}
return streamLengthIsValid;
}
@@ -1128,8 +1133,8 @@ public class COSParser extends BaseParse
{
return startXRefOffset;
}
- pdfSource.seek(startXRefOffset);
- if (pdfSource.peek() == X && isString(XREF_TABLE))
+ source.seek(startXRefOffset);
+ if (source.peek() == X && isString(XREF_TABLE))
{
return startXRefOffset;
}
@@ -1161,8 +1166,8 @@ public class COSParser extends BaseParse
return startXRefOffset;
}
// seek to offset-1
- pdfSource.seek(startXRefOffset-1);
- int nextValue = pdfSource.read();
+ source.seek(startXRefOffset-1);
+ int nextValue = source.read();
// the first character has to be a whitespace, and then a digit
if (isWhitespace(nextValue) && isDigit())
{
@@ -1172,14 +1177,14 @@ public class COSParser extends BaseParse
readObjectNumber();
readGenerationNumber();
readExpectedString(OBJ_MARKER, true);
- pdfSource.seek(startXRefOffset);
+ source.seek(startXRefOffset);
return startXRefOffset;
}
catch (IOException exception)
{
// there wasn't an object of a xref stream
// try to repair the offset
- pdfSource.seek(startXRefOffset);
+ source.seek(startXRefOffset);
}
}
// try to find a fixed offset
@@ -1272,15 +1277,15 @@ public class COSParser extends BaseParse
}
long objectNr = objectKey.getNumber();
int objectGen = objectKey.getGeneration();
- long originOffset = pdfSource.getPosition();
- pdfSource.seek(offset);
+ long originOffset = source.getPosition();
+ source.seek(offset);
String objectString = createObjectString(objectNr, objectGen);
try
{
if (isString(objectString.getBytes(ISO_8859_1)))
{
// everything is ok, return origin object key
- pdfSource.seek(originOffset);
+ source.seek(originOffset);
return true;
}
}
@@ -1290,7 +1295,7 @@ public class COSParser extends BaseParse
}
finally
{
- pdfSource.seek(originOffset);
+ source.seek(originOffset);
}
// no valid object number found
return false;
@@ -1317,40 +1322,40 @@ public class COSParser extends BaseParse
if (bfSearchCOSObjectKeyOffsets == null)
{
bfSearchCOSObjectKeyOffsets = new HashMap<COSObjectKey, Long>();
- long originOffset = pdfSource.getPosition();
+ long originOffset = source.getPosition();
long currentOffset = MINIMUM_SEARCH_OFFSET;
String objString = " obj";
char[] string = objString.toCharArray();
do
{
- pdfSource.seek(currentOffset);
+ source.seek(currentOffset);
if (isString(string))
{
long tempOffset = currentOffset - 1;
- pdfSource.seek(tempOffset);
- int genID = pdfSource.peek();
+ source.seek(tempOffset);
+ int genID = source.peek();
// is the next char a digit?
if (isDigit(genID))
{
genID -= 48;
tempOffset--;
- pdfSource.seek(tempOffset);
+ source.seek(tempOffset);
if (isSpace())
{
while (tempOffset > MINIMUM_SEARCH_OFFSET && isSpace())
{
- pdfSource.seek(--tempOffset);
+ source.seek(--tempOffset);
}
int length = 0;
while (tempOffset > MINIMUM_SEARCH_OFFSET && isDigit())
{
- pdfSource.seek(--tempOffset);
+ source.seek(--tempOffset);
length++;
}
if (length > 0)
{
- pdfSource.read();
- byte[] objIDBytes = pdfSource.readFully(length);
+ source.read();
+ byte[] objIDBytes = source.readFully(length);
String objIdString = new String(objIDBytes, 0,
objIDBytes.length, ISO_8859_1);
Long objectID;
@@ -1372,9 +1377,9 @@ public class COSParser extends BaseParse
}
currentOffset++;
}
- while (!pdfSource.isEOF());
+ while (!source.isEOF());
// reestablish origin position
- pdfSource.seek(originOffset);
+ source.seek(originOffset);
}
}
@@ -1469,25 +1474,25 @@ public class COSParser extends BaseParse
{
// a pdf may contain more than one xref entry
bfSearchXRefTablesOffsets = new Vector<Long>();
- long originOffset = pdfSource.getPosition();
- pdfSource.seek(MINIMUM_SEARCH_OFFSET);
+ long originOffset = source.getPosition();
+ source.seek(MINIMUM_SEARCH_OFFSET);
// search for xref tables
- while (!pdfSource.isEOF())
+ while (!source.isEOF())
{
if (isString(XREF_TABLE))
{
- long newOffset = pdfSource.getPosition();
- pdfSource.seek(newOffset - 1);
+ long newOffset = source.getPosition();
+ source.seek(newOffset - 1);
// ensure that we don't read "startxref" instead of "xref"
if (isWhitespace())
{
bfSearchXRefTablesOffsets.add(newOffset);
}
- pdfSource.seek(newOffset + 4);
+ source.seek(newOffset + 4);
}
- pdfSource.read();
+ source.read();
}
- pdfSource.seek(originOffset);
+ source.seek(originOffset);
}
}
@@ -1502,51 +1507,51 @@ public class COSParser extends BaseParse
{
// a pdf may contain more than one /XRef entry
bfSearchXRefStreamsOffsets = new Vector<Long>();
- long originOffset = pdfSource.getPosition();
- pdfSource.seek(MINIMUM_SEARCH_OFFSET);
+ long originOffset = source.getPosition();
+ source.seek(MINIMUM_SEARCH_OFFSET);
// search for XRef streams
String objString = " obj";
char[] string = objString.toCharArray();
- while (!pdfSource.isEOF())
+ while (!source.isEOF())
{
if (isString(XREF_STREAM))
{
// search backwards for the beginning of the stream
long newOffset = -1;
- long xrefOffset = pdfSource.getPosition();
+ long xrefOffset = source.getPosition();
boolean objFound = false;
for (int i = 1; i < 30 && !objFound; i++)
{
long currentOffset = xrefOffset - (i * 10);
if (currentOffset > 0)
{
- pdfSource.seek(currentOffset);
+ source.seek(currentOffset);
for (int j = 0; j < 10; j++)
{
if (isString(string))
{
long tempOffset = currentOffset - 1;
- pdfSource.seek(tempOffset);
- int genID = pdfSource.peek();
+ source.seek(tempOffset);
+ int genID = source.peek();
// is the next char a digit?
if (isDigit(genID))
{
genID -= 48;
tempOffset--;
- pdfSource.seek(tempOffset);
+ source.seek(tempOffset);
if (isSpace())
{
int length = 0;
- pdfSource.seek(--tempOffset);
+ source.seek(--tempOffset);
while (tempOffset > MINIMUM_SEARCH_OFFSET && isDigit())
{
- pdfSource.seek(--tempOffset);
+ source.seek(--tempOffset);
length++;
}
if (length > 0)
{
- pdfSource.read();
- newOffset = pdfSource.getPosition();
+ source.read();
+ newOffset = source.getPosition();
}
}
}
@@ -1558,7 +1563,7 @@ public class COSParser extends BaseParse
else
{
currentOffset++;
- pdfSource.read();
+ source.read();
}
}
}
@@ -1567,11 +1572,11 @@ public class COSParser extends BaseParse
{
bfSearchXRefStreamsOffsets.add(newOffset);
}
- pdfSource.seek(xrefOffset + 5);
+ source.seek(xrefOffset + 5);
}
- pdfSource.read();
+ source.read();
}
- pdfSource.seek(originOffset);
+ source.seek(originOffset);
}
}
@@ -1600,7 +1605,7 @@ public class COSParser extends BaseParse
for(COSObjectKey key : bfSearchCOSObjectKeyOffsets.keySet())
{
Long offset = bfSearchCOSObjectKeyOffsets.get(key);
- pdfSource.seek(offset);
+ source.seek(offset);
readObjectNumber();
readGenerationNumber();
readExpectedString(OBJ_MARKER, true);
@@ -1667,14 +1672,14 @@ public class COSParser extends BaseParse
private boolean isString(byte[] string) throws IOException
{
boolean bytesMatching = false;
- if (pdfSource.peek() == string[0])
+ if (source.peek() == string[0])
{
int length = string.length;
byte[] bytesRead = new byte[length];
- int numberOfBytes = pdfSource.read(bytesRead, 0, length);
+ int numberOfBytes = source.read(bytesRead, 0, length);
while (numberOfBytes < length)
{
- int readMore = pdfSource.read(bytesRead, numberOfBytes, length - numberOfBytes);
+ int readMore = source.read(bytesRead, numberOfBytes, length - numberOfBytes);
if (readMore < 0)
{
break;
@@ -1685,7 +1690,7 @@ public class COSParser extends BaseParse
{
bytesMatching = true;
}
- pdfSource.rewind(numberOfBytes);
+ source.rewind(numberOfBytes);
}
return bytesMatching;
}
@@ -1700,15 +1705,15 @@ public class COSParser extends BaseParse
private boolean isString(char[] string) throws IOException
{
boolean bytesMatching = true;
- long originOffset = pdfSource.getPosition();
+ long originOffset = source.getPosition();
for (char c : string)
{
- if (pdfSource.read() != c)
+ if (source.read() != c)
{
bytesMatching = false;
}
}
- pdfSource.seek(originOffset);
+ source.seek(originOffset);
return bytesMatching;
}
@@ -1720,12 +1725,12 @@ public class COSParser extends BaseParse
*/
private boolean parseTrailer() throws IOException
{
- if(pdfSource.peek() != 't')
+ if(source.peek() != 't')
{
return false;
}
//read "trailer"
- long currentOffset = pdfSource.getPosition();
+ long currentOffset = source.getPosition();
String nextLine = readLine();
if( !nextLine.trim().equals( "trailer" ) )
{
@@ -1738,7 +1743,7 @@ public class COSParser extends BaseParse
// we can't just unread a portion of the read data as we don't know if the EOL consist of 1 or 2 bytes
int len = "trailer".length();
// jump back right after "trailer"
- pdfSource.seek(currentOffset + len);
+ source.seek(currentOffset + len);
}
else
{
@@ -1802,7 +1807,7 @@ public class COSParser extends BaseParse
// nothing found
if (!header.contains(headerMarker))
{
- pdfSource.seek(0);
+ source.seek(0);
return false;
}
@@ -1830,7 +1835,7 @@ public class COSParser extends BaseParse
{
String headerGarbage = header.substring(headerMarker.length() + 3, header.length()) + "\n";
header = header.substring(0, headerMarker.length() + 3);
- pdfSource.rewind(headerGarbage.getBytes(ISO_8859_1).length);
+ source.rewind(headerGarbage.getBytes(ISO_8859_1).length);
}
}
float headerVersion = -1;
@@ -1852,7 +1857,7 @@ public class COSParser extends BaseParse
}
document.setVersion(headerVersion);
// rewind
- pdfSource.seek(0);
+ source.seek(0);
return true;
}
@@ -1865,7 +1870,7 @@ public class COSParser extends BaseParse
*/
protected boolean parseXrefTable(long startByteOffset) throws IOException
{
- if(pdfSource.peek() != 'x')
+ if(source.peek() != 'x')
{
return false;
}
@@ -1878,7 +1883,7 @@ public class COSParser extends BaseParse
// check for trailer after xref
String str = readString();
byte[] b = str.getBytes(ISO_8859_1);
- pdfSource.rewind(b.length);
+ source.rewind(b.length);
// signal start of new XRef
xrefTrailerResolver.nextXrefObj( startByteOffset, XRefType.TABLE );
@@ -1901,11 +1906,11 @@ public class COSParser extends BaseParse
skipSpaces();
for(int i = 0; i < count; i++)
{
- if(pdfSource.isEOF() || isEndOfName((char)pdfSource.peek()))
+ if(source.isEOF() || isEndOfName((char)source.peek()))
{
break;
}
- if(pdfSource.peek() == 't')
+ if(source.peek() == 't')
{
break;
}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java?rev=1692033&r1=1692032&r2=1692033&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java Mon Jul 20 22:18:00 2015
@@ -56,8 +56,8 @@ public class FDFParser extends COSParser
*/
public FDFParser(File file) throws IOException
{
+ super(new RandomAccessFile(file, "r"));
fileLen = file.length();
- pdfSource = new RandomAccessFile(file, "r");
init();
}
@@ -69,8 +69,8 @@ public class FDFParser extends COSParser
*/
public FDFParser(InputStream input) throws IOException
{
- pdfSource = new RandomAccessBuffer(input);
- fileLen = pdfSource.length();
+ super(new RandomAccessBuffer(input));
+ fileLen = source.length();
init();
}
Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/InputStreamSource.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/InputStreamSource.java?rev=1692033&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/InputStreamSource.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/InputStreamSource.java Mon Jul 20 22:18:00 2015
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pdfbox.pdfparser;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PushbackInputStream;
+
+/**
+ * A SequentialSource backed by an InputStream.
+ */
+final class InputStreamSource implements SequentialSource
+{
+ private final PushbackInputStream input;
+ private int position;
+
+ /**
+ * Constructor.
+ *
+ * @param input The input stream to wrap.
+ */
+ InputStreamSource(InputStream input)
+ {
+ this.input = new PushbackInputStream(input, 32767); // maximum length of a PDF string
+ this.position = 0;
+ }
+
+ @Override
+ public int read() throws IOException
+ {
+ int b = input.read();
+ position++;
+ return b;
+ }
+
+ @Override
+ public int read(byte[] b) throws IOException
+ {
+ int n = input.read(b);
+ position += n;
+ return n;
+ }
+
+ @Override
+ public int read(byte[] b, int offset, int length) throws IOException
+ {
+ int n = input.read(b, offset, length);
+ position += n;
+ return n;
+ }
+
+ @Override
+ public long getPosition() throws IOException
+ {
+ return position;
+ }
+
+ @Override
+ public int peek() throws IOException
+ {
+ int b = input.read();
+ if (b != -1)
+ {
+ input.unread(b);
+ }
+ return b;
+ }
+
+ @Override
+ public void unread(int b) throws IOException
+ {
+ input.unread(b);
+ position--;
+ }
+
+ @Override
+ public void unread(byte[] bytes) throws IOException
+ {
+ input.unread(bytes);
+ position -= bytes.length;
+ }
+
+ @Override
+ public byte[] readFully(int length) throws IOException
+ {
+ byte[] bytes = new byte[length];
+ int off = 0;
+ int len = length;
+ while (len > 0)
+ {
+ int n = this.read(bytes, off, len);
+ off += n;
+ len -= n;
+ position += n;
+ }
+ return bytes;
+ }
+
+ @Override
+ public boolean isEOF() throws IOException
+ {
+ return peek() == -1;
+ }
+
+ @Override
+ public void close() throws IOException
+ {
+ input.close();
+ }
+}
Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/InputStreamSource.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java?rev=1692033&r1=1692032&r2=1692033&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java Mon Jul 20 22:18:00 2015
@@ -46,15 +46,15 @@ public class PDFObjectStreamParser exten
/**
* Constructor.
*
- * @param strm The stream to parse.
- * @param doc The document for the current parsing.
+ * @param stream The stream to parse.
+ * @param document The document for the current parsing.
* @throws IOException If there is an error initializing the stream.
*/
- public PDFObjectStreamParser(COSStream strm, COSDocument doc) throws IOException
+ public PDFObjectStreamParser(COSStream stream, COSDocument document) throws IOException
{
- super(strm.getUnfilteredRandomAccess());
- document = doc;
- stream = strm;
+ super(new InputStreamSource(stream.getUnfilteredStream()));
+ this.stream = stream;
+ this.document = document;
}
/**
@@ -99,7 +99,7 @@ public class PDFObjectStreamParser exten
// According to the spec objects within an object stream shall not be enclosed
// by obj/endobj tags, but there are some pdfs in the wild using those tags
// skip endobject marker if present
- if (!pdfSource.isEOF() && pdfSource.peek() == 'e')
+ if (!seqSource.isEOF() && seqSource.peek() == 'e')
{
readLine();
}
@@ -108,7 +108,7 @@ public class PDFObjectStreamParser exten
}
finally
{
- pdfSource.close();
+ seqSource.close();
}
}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1692033&r1=1692032&r2=1692033&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Mon Jul 20 22:18:00 2015
@@ -129,7 +129,7 @@ public class PDFParser extends COSParser
public PDFParser(RandomAccessRead source, String decryptionPassword, InputStream keyStore,
String alias, boolean useScratchFiles) throws IOException
{
- pdfSource = source;
+ super(source);
fileLen = source.length();
password = decryptionPassword;
keyStoreInputStream = keyStore;
@@ -152,7 +152,7 @@ public class PDFParser extends COSParser
public PDFParser(RandomAccessRead source, String decryptionPassword, InputStream keyStore,
String alias, ScratchFile scratchFile) throws IOException
{
- pdfSource = source;
+ super(source);
fileLen = source.length();
password = decryptionPassword;
keyStoreInputStream = keyStore;
@@ -206,7 +206,7 @@ public class PDFParser extends COSParser
*/
public PDDocument getPDDocument() throws IOException
{
- return new PDDocument( getDocument(), pdfSource, accessPermission );
+ return new PDDocument( getDocument(), source, accessPermission );
}
/**
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java?rev=1692033&r1=1692032&r2=1692033&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java Mon Jul 20 22:18:00 2015
@@ -16,9 +16,11 @@
*/
package org.apache.pdfbox.pdfparser;
+import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -32,8 +34,6 @@ import org.apache.pdfbox.cos.COSNull;
import org.apache.pdfbox.cos.COSNumber;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSStream;
-import org.apache.pdfbox.io.RandomAccessBuffer;
-import org.apache.pdfbox.io.RandomAccessRead;
import org.apache.pdfbox.pdmodel.common.PDStream;
/**
@@ -52,49 +52,55 @@ public class PDFStreamParser extends Bas
private static final int MAX_BIN_CHAR_TEST_LENGTH = 10;
private final byte[] binCharTestArr = new byte[MAX_BIN_CHAR_TEST_LENGTH];
-
+
/**
* Constructor.
*
- * @param contentStream The content stream to parse.
+ * @param stream The stream to parse.
* @throws IOException If there is an error initializing the stream.
+ *
+ * @deprecated Use {@link PDFStreamParser(PDContentStream)} instead.
*/
- public PDFStreamParser(PDContentStream contentStream) throws IOException
+ @Deprecated
+ public PDFStreamParser(PDStream stream) throws IOException
{
- this(new RandomAccessBuffer(contentStream.getContents()));
+ super(new InputStreamSource(stream.createInputStream()));
}
-
+
/**
* Constructor.
*
* @param stream The stream to parse.
* @throws IOException If there is an error initializing the stream.
+ *
+ * @deprecated Use {@link PDFStreamParser(PDContentStream)} instead.
*/
- public PDFStreamParser( PDStream stream ) throws IOException
+ @Deprecated
+ public PDFStreamParser(COSStream stream) throws IOException
{
- this(stream.getCOSObject());
+ super(new InputStreamSource(stream.getUnfilteredStream()));
}
/**
* Constructor.
*
- * @param stream The stream to parse.
+ * @param contentStream The content stream to parse.
* @throws IOException If there is an error initializing the stream.
*/
- public PDFStreamParser( COSStream stream ) throws IOException
+ public PDFStreamParser(PDContentStream contentStream) throws IOException
{
- super(stream);
+ super(new InputStreamSource(contentStream.getContents()));
}
-
+
/**
* Constructor.
*
- * @param input The random access read to parse.
+ * @param bytes the bytes to parse.
* @throws IOException If there is an error initializing the stream.
*/
- public PDFStreamParser( RandomAccessRead input ) throws IOException
+ public PDFStreamParser(byte[] bytes) throws IOException
{
- super( input );
+ super(new InputStreamSource(new ByteArrayInputStream(bytes)));
}
/**
@@ -134,24 +140,24 @@ public class PDFStreamParser extends Bas
Object retval;
skipSpaces();
- int nextByte = pdfSource.peek();
+ int nextByte = seqSource.peek();
if( ((byte)nextByte) == -1 )
{
return null;
}
char c = (char)nextByte;
- switch(c)
+ switch (c)
{
case '<':
{
// pull off first left bracket
- int leftBracket = pdfSource.read();
+ int leftBracket = seqSource.read();
// check for second left bracket
- c = (char) pdfSource.peek();
+ c = (char) seqSource.peek();
// put back first bracket
- pdfSource.rewind(1);
+ seqSource.unread(leftBracket);
if (c == '<')
{
@@ -241,13 +247,13 @@ public class PDFStreamParser extends Bas
* allow 1 "." and "-" and "+" at start of number. */
StringBuffer buf = new StringBuffer();
buf.append( c );
- pdfSource.read();
+ seqSource.read();
boolean dotNotRead = c != '.';
- while( Character.isDigit(c = (char)pdfSource.peek()) || dotNotRead && c == '.')
+ while( Character.isDigit(c = (char) seqSource.peek()) || dotNotRead && c == '.')
{
buf.append( c );
- pdfSource.read();
+ seqSource.read();
if (dotNotRead && c == '.')
{
@@ -281,7 +287,7 @@ public class PDFStreamParser extends Bas
case 'I':
{
//Special case for ID operator
- String id = "" + (char)pdfSource.read() + (char)pdfSource.read();
+ String id = "" + (char) seqSource.read() + (char) seqSource.read();
if( !id.equals( "ID" ) )
{
throw new IOException( "Error: Expected operator 'ID' actual='" + id + "'" );
@@ -290,10 +296,10 @@ public class PDFStreamParser extends Bas
if( isWhitespace() )
{
//pull off the whitespace character
- pdfSource.read();
+ seqSource.read();
}
- int lastByte = pdfSource.read();
- int currentByte = pdfSource.read();
+ int lastByte = seqSource.read();
+ int currentByte = seqSource.read();
// PDF spec is kinda unclear about this. Should a whitespace
// always appear before EI? Not sure, so that we just read
// until EI<whitespace>.
@@ -301,12 +307,12 @@ public class PDFStreamParser extends Bas
while( !(lastByte == 'E' &&
currentByte == 'I' &&
hasNextSpaceOrReturn() &&
- hasNoFollowingBinData( pdfSource )) &&
- !pdfSource.isEOF() )
+ hasNoFollowingBinData(seqSource)) &&
+ !seqSource.isEOF() )
{
imageData.write( lastByte );
lastByte = currentByte;
- currentByte = pdfSource.read();
+ currentByte = seqSource.read();
}
// the EI operator isn't unread, as it won't be processed anyway
retval = Operator.getOperator("ID");
@@ -318,7 +324,7 @@ public class PDFStreamParser extends Bas
{
// some ']' around without its previous '['
// this means a PDF is somewhat corrupt but we will continue to parse.
- pdfSource.read();
+ seqSource.read();
// must be a better solution than null...
retval = COSNull.NULL;
@@ -350,8 +356,7 @@ public class PDFStreamParser extends Bas
* @return <code>true</code> if next bytes are probably printable ASCII
* characters starting with a PDF operator, otherwise <code>false</code>
*/
- private boolean hasNoFollowingBinData(final RandomAccessRead pdfSource)
- throws IOException
+ private boolean hasNoFollowingBinData(SequentialSource pdfSource) throws IOException
{
// as suggested in PDFBOX-1164
final int readBytes = pdfSource.read(binCharTestArr, 0, MAX_BIN_CHAR_TEST_LENGTH);
@@ -395,7 +400,7 @@ public class PDFStreamParser extends Bas
noBinData = false;
}
}
- pdfSource.rewind(readBytes);
+ pdfSource.unread(Arrays.copyOfRange(binCharTestArr, 0, readBytes));
}
if (!noBinData)
{
@@ -418,7 +423,7 @@ public class PDFStreamParser extends Bas
//average string size is around 2 and the normal string buffer size is
//about 16 so lets save some space.
StringBuffer buffer = new StringBuffer(4);
- int nextChar = pdfSource.peek();
+ int nextChar = seqSource.peek();
while(
nextChar != -1 && // EOF
!isWhitespace(nextChar) &&
@@ -430,14 +435,14 @@ public class PDFStreamParser extends Bas
(nextChar < '0' ||
nextChar > '9' ) )
{
- char currentChar = (char)pdfSource.read();
- nextChar = pdfSource.peek();
+ char currentChar = (char) seqSource.read();
+ nextChar = seqSource.peek();
buffer.append( currentChar );
// Type3 Glyph description has operators with a number in the name
if (currentChar == 'd' && (nextChar == '0' || nextChar == '1') )
{
- buffer.append( (char)pdfSource.read() );
- nextChar = pdfSource.peek();
+ buffer.append( (char) seqSource.read() );
+ nextChar = seqSource.peek();
}
}
return buffer.toString();
@@ -457,6 +462,6 @@ public class PDFStreamParser extends Bas
*/
private boolean hasNextSpaceOrReturn() throws IOException
{
- return isSpaceOrReturn( pdfSource.peek() );
+ return isSpaceOrReturn( seqSource.peek() );
}
}
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java?rev=1692033&r1=1692032&r2=1692033&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java Mon Jul 20 22:18:00 2015
@@ -44,18 +44,18 @@ public class PDFXrefStreamParser extends
/**
* Constructor.
*
- * @param strm The stream to parse.
- * @param doc The document for the current parsing.
+ * @param stream The stream to parse.
+ * @param document The document for the current parsing.
* @param resolver resolver to read the xref/trailer information
*
* @throws IOException If there is an error initializing the stream.
*/
- public PDFXrefStreamParser(COSStream strm, COSDocument doc, XrefTrailerResolver resolver )
+ public PDFXrefStreamParser(COSStream stream, COSDocument document, XrefTrailerResolver resolver)
throws IOException
{
- super(strm.getUnfilteredRandomAccess());
- document = doc;
- stream = strm;
+ super(new InputStreamSource(stream.getUnfilteredStream()));
+ this.stream = stream;
+ this.document = document;
this.xrefTrailerResolver = resolver;
}
@@ -107,10 +107,10 @@ public class PDFXrefStreamParser extends
int w2 = xrefFormat.getInt(2);
int lineSize = w0 + w1 + w2;
- while(pdfSource.available() > 0 && objIter.hasNext())
+ while(!seqSource.isEOF() && objIter.hasNext())
{
byte[] currLine = new byte[lineSize];
- pdfSource.read(currLine);
+ seqSource.read(currLine);
int type = 0;
/*
Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/RandomAccessSource.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/RandomAccessSource.java?rev=1692033&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/RandomAccessSource.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/RandomAccessSource.java Mon Jul 20 22:18:00 2015
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pdfbox.pdfparser;
+
+import java.io.IOException;
+import org.apache.pdfbox.io.RandomAccessRead;
+
+/**
+ * A SequentialSource backed by a RandomAccessRead.
+ */
+final class RandomAccessSource implements SequentialSource
+{
+ private final RandomAccessRead reader;
+
+ /**
+ * Constructor.
+ *
+ * @param reader The random access reader to wrap.
+ */
+ RandomAccessSource(RandomAccessRead reader)
+ {
+ this.reader = reader;
+ }
+
+ @Override
+ public int read() throws IOException
+ {
+ return reader.read();
+ }
+
+ @Override
+ public int read(byte[] b) throws IOException
+ {
+ return reader.read(b);
+ }
+
+ @Override
+ public int read(byte[] b, int offset, int length) throws IOException
+ {
+ return reader.read(b, offset, length);
+ }
+
+ @Override
+ public long getPosition() throws IOException
+ {
+ return reader.getPosition();
+ }
+
+ @Override
+ public int peek() throws IOException
+ {
+ return reader.peek();
+ }
+
+ @Override
+ public void unread(int b) throws IOException
+ {
+ reader.rewind(1);
+ }
+
+ @Override
+ public void unread(byte[] bytes) throws IOException
+ {
+ reader.rewind(bytes.length);
+ }
+
+ @Override
+ public byte[] readFully(int length) throws IOException
+ {
+ return reader.readFully(length);
+ }
+
+ @Override
+ public boolean isEOF() throws IOException
+ {
+ return reader.isEOF();
+ }
+
+ @Override
+ public void close() throws IOException
+ {
+ reader.close();
+ }
+}
Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/RandomAccessSource.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/SequentialSource.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/SequentialSource.java?rev=1692033&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/SequentialSource.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/SequentialSource.java Mon Jul 20 22:18:00 2015
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pdfbox.pdfparser;
+
+import java.io.Closeable;
+import java.io.IOException;
+
+/**
+ * A SequentialSource provides access to sequential data for parsing.
+ */
+interface SequentialSource extends Closeable
+{
+ /**
+ * Read a single byte of data.
+ *
+ * @return The byte of data that is being read.
+ * @throws IOException If there is an error while reading the data.
+ */
+ int read() throws IOException;
+
+ /**
+ * Read a buffer of data.
+ *
+ * @param b The buffer to write the data to.
+ * @return The number of bytes that were actually read.
+ * @throws IOException If there was an error while reading the data.
+ */
+ int read(byte[] b) throws IOException;
+
+ /**
+ * Read a buffer of data.
+ *
+ * @param b The buffer to write the data to.
+ * @param offset Offset into the buffer to start writing.
+ * @param length The amount of data to attempt to read.
+ * @return The number of bytes that were actually read.
+ * @throws IOException If there was an error while reading the data.
+ */
+ int read(byte[] b, int offset, int length) throws IOException;
+
+ /**
+ * Returns offset of next byte to be returned by a read method.
+ *
+ * @return offset of next byte which will be returned with next {@link #read()} (if no more
+ * bytes are left it returns a value >= length of source).
+ * @throws IOException If there was an error while reading the data.
+ */
+ long getPosition() throws IOException;
+
+ /**
+ * This will peek at the next byte.
+ *
+ * @return The next byte on the stream, leaving it as available to read.
+ * @throws IOException If there is an error reading the next byte.
+ */
+ int peek() throws IOException;
+
+ /**
+ * Unreads a single byte.
+ *
+ * @param b byte array to push back
+ * @throws IOException If there is an error while seeking
+ */
+ void unread(int b) throws IOException;
+
+ /**
+ * Unreads an array of bytes.
+ *
+ * @param bytes byte array to push back
+ * @throws IOException If there is an error while seeking
+ */
+ void unread(byte[] bytes) throws IOException;
+
+ /**
+ * Reads a given number of bytes in its entirety.
+ *
+ * @param length the number of bytes to be read
+ * @return a byte array containing the bytes just read
+ * @throws IOException if an I/O error occurs while reading data
+ */
+ byte[] readFully(int length) throws IOException;
+
+ /**
+ * Returns true if the end of the data source has been reached.
+ *
+ * @return true if we are at the end of the data.
+ * @throws IOException If there is an error reading the next byte.
+ */
+ boolean isEOF() throws IOException;
+}
Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/SequentialSource.java
------------------------------------------------------------------------------
svn:eol-style = native