You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2009/07/12 17:33:11 UTC
svn commit: r793364 - in
/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox:
io/PushBackInputStream.java pdfparser/BaseParser.java
Author: lehmi
Date: Sun Jul 12 15:33:09 2009
New Revision: 793364
URL: http://svn.apache.org/viewvc?rev=793364&view=rev
Log:
PDFBOX-462: fixed parsing of boolean values. Patch by Jeremias Maerki (jeremias at apache dot org)
Modified:
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/io/PushBackInputStream.java
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/io/PushBackInputStream.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/io/PushBackInputStream.java?rev=793364&r1=793363&r2=793364&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/io/PushBackInputStream.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/io/PushBackInputStream.java Sun Jul 12 15:33:09 2009
@@ -18,6 +18,7 @@
import java.io.InputStream;
import java.io.IOException;
+import java.io.EOFException;
/**
* A simple subclass that adds a few convience methods.
@@ -67,43 +68,52 @@
}
/**
- * Returns the current byte offset in the file
+ * Returns the current byte offset in the file.
* @return the int byte offset
*/
- public int getOffset(){
+ public int getOffset()
+ {
return offset;
}
/**
* {@inheritDoc}
*/
- public int read() throws IOException{
+ public int read() throws IOException
+ {
int retval = super.read();
if (retval != -1)
+ {
offset++;
+ }
return retval;
}
/**
* {@inheritDoc}
*/
- public int read(byte[] b) throws IOException{
+ public int read(byte[] b) throws IOException
+ {
return this.read(b, 0, b.length);
}
/**
* {@inheritDoc}
*/
- public int read(byte[] b, int off, int len) throws IOException{
+ public int read(byte[] b, int off, int len) throws IOException
+ {
int retval = super.read(b, off, len);
if (retval != -1)
+ {
offset += retval;
+ }
return retval;
}
/**
* {@inheritDoc}
*/
- public void unread(int b) throws IOException{
+ public void unread(int b) throws IOException
+ {
offset--;
super.unread(b);
}
@@ -111,19 +121,21 @@
/**
* {@inheritDoc}
*/
- public void unread(byte[] b) throws IOException{
+ public void unread(byte[] b) throws IOException
+ {
this.unread(b, 0, b.length);
}
/**
* {@inheritDoc}
*/
- public void unread(byte[] b, int off, int len) throws IOException{
- if (len == 0)
- return;
-
- offset -= len;
- super.unread(b, off, len);
+ public void unread(byte[] b, int off, int len) throws IOException
+ {
+ if (len > 0)
+ {
+ offset -= len;
+ super.unread(b, off, len);
+ }
}
/**
@@ -163,4 +175,27 @@
}
this.unread( tmpBuffer, 0, totalAmountRead );
}
+
+ /**
+ * Reads a given number of bytes from the underlying stream.
+ * @param length the number of bytes to be read
+ * @return a byte array containing the bytes just read
+ * @throws IOException if an I/O error occurs while reading data
+ */
+ public byte[] readFully(int length) throws IOException
+ {
+ byte[] data = new byte[length];
+ int pos = 0;
+ while (pos < length)
+ {
+ int amountRead = read( data, pos, length - pos );
+ if (amountRead < 0)
+ {
+ throw new EOFException("Premature end of file");
+ }
+ pos += amountRead;
+ }
+ return data;
+ }
+
}
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java?rev=793364&r1=793363&r2=793364&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java Sun Jul 12 15:33:09 2009
@@ -41,7 +41,6 @@
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.persistence.util.COSObjectKey;
-import org.apache.pdfbox.exceptions.LoggingObject;
/**
* This class is used to contain parsing logic that will be used by both the
* PDFParser and the COSStreamParser.
@@ -57,6 +56,9 @@
public static final byte[] ENDSTREAM =
new byte[] {101,110,100,115,116,114,101,97,109};//"endstream".getBytes( "ISO-8859-1" );
+ /**
+ * This is a byte array that will be used for comparisons.
+ */
public static final byte[] ENDOBJ =
new byte[] {101,110,100,111,98,106};//"endobj".getBytes( "ISO-8859-1" );
/**
@@ -67,9 +69,11 @@
/**
* This is the stream that will be read from.
*/
- //protected PushBackByteArrayStream pdfSource;
protected PushBackInputStream pdfSource;
+ /**
+ * This is the document that will be parsed.
+ */
protected COSDocument document;
/**
@@ -292,13 +296,15 @@
skipSpaces();
endStream = readString();
- if (!endStream.equals("endstream")){
+ if (!endStream.equals("endstream"))
+ {
/*
* Sometimes stream objects don't have an endstream tag so readUntilEndStream(out)
* also can stop on endobj tags. If that's the case we need to make sure to unread
* the endobj so parseObject() can handle that case normally.
*/
- if (endStream.startsWith("endobj")){
+ if (endStream.startsWith("endobj"))
+ {
byte[] endobjarray = endStream.getBytes();
pdfSource.unread(endobjarray);
}
@@ -308,20 +314,23 @@
* and not part of the endstream keyword. Ex. Some files would have "endstream8"
* instead of "endstream"
*/
- else if(endStream.startsWith("endstream")){
+ else if(endStream.startsWith("endstream"))
+ {
String extra = endStream.substring(9, endStream.length());
endStream = endStream.substring(0, 9);
byte[] array = extra.getBytes();
pdfSource.unread(array);
}
- else{
+ else
+ {
/*
* If for some reason we get something else here, Read until we find the next
* "endstream"
*/
readUntilEndStream( out );
endStream = readString();
- if( !endStream.equals( "endstream" ) ){
+ if( !endStream.equals( "endstream" ) )
+ {
throw new IOException("expected='endstream' actual='" + endStream + "' " + pdfSource);
}
}
@@ -351,10 +360,13 @@
byte[] buffer = new byte[ENDSTREAM.length];
int nextIdx = pdfSource.read(buffer) % buffer.length;
if (nextIdx == -1)
+ {
return;
-
- while(byteRead != -1 ) {
- if (cmpCircularBuffer( buffer, (nextIdx-ENDSTREAM.length + buffer.length)%buffer.length, ENDSTREAM )) {
+ }
+ while(byteRead != -1 )
+ {
+ if (cmpCircularBuffer( buffer, (nextIdx-ENDSTREAM.length + buffer.length)%buffer.length, ENDSTREAM ))
+ {
pdfSource.unread( ENDSTREAM );
return;
}
@@ -363,10 +375,12 @@
* the object with an endobj tag so we want to stop there as well.
*/
int endObjStart = (nextIdx-ENDOBJ.length+ buffer.length)%buffer.length;
- if (cmpCircularBuffer( buffer, endObjStart, ENDOBJ )) {
+ if (cmpCircularBuffer( buffer, endObjStart, ENDOBJ ))
+ {
// data is written to out only when it is going to be overwritten.
// write out the rest of the data in the buffer since ENDOBJ is smaller then the buffer
- for (int i = nextIdx; i < buffer.length && i < endObjStart; i++ ) {
+ for (int i = nextIdx; i < buffer.length && i < endObjStart; i++ )
+ {
out.write(buffer[i]);
}
pdfSource.unread( ENDOBJ );
@@ -378,7 +392,8 @@
byteRead = pdfSource.read();
buffer[nextIdx] = (byte)byteRead;
- if (++nextIdx == buffer.length) {
+ if (++nextIdx == buffer.length)
+ {
nextIdx = 0;
}
}
@@ -396,7 +411,8 @@
int cmpLen = compareTo.length;
int buflen = buffer.length;
boolean match = true;
- for( int i=0; match && i<cmpLen; ++i ) {
+ for( int i=0; match && i<cmpLen; ++i )
+ {
match = buffer[(currentIndex+i)%buflen] == compareTo[i];
}
return match;
@@ -475,7 +491,8 @@
braces = 0;
}
}
- if (amountRead > 0) {
+ if (amountRead > 0)
+ {
pdfSource.unread( nextThreeBytes, 0, amountRead );
}
if( braces != 0 )
@@ -637,19 +654,25 @@
if( pbo instanceof COSObject )
{
// We have to check if the expected values are there or not PDFBOX-385
- if (po.get(po.size()-1) instanceof COSInteger) {
+ if (po.get(po.size()-1) instanceof COSInteger)
+ {
COSInteger genNumber = (COSInteger)po.remove( po.size() -1 );
- if (po.get(po.size()-1) instanceof COSInteger) {
+ if (po.get(po.size()-1) instanceof COSInteger)
+ {
COSInteger number = (COSInteger)po.remove( po.size() -1 );
COSObjectKey key = new COSObjectKey(number.intValue(), genNumber.intValue());
pbo = document.getObjectFromPool(key);
}
- else
+ else
+ {
// the object reference is somehow wrong
pbo = null;
+ }
}
- else
+ else
+ {
pbo = null;
+ }
}
if( pbo != null )
{
@@ -765,9 +788,7 @@
char c = (char)pdfSource.peek();
if( c == 't' )
{
- byte[] trueArray = new byte[ 4 ];
- int amountRead = pdfSource.read( trueArray, 0, 4 );
- String trueString = new String( trueArray, 0, amountRead );
+ String trueString = new String( pdfSource.readFully( 4 ) );
if( !trueString.equals( "true" ) )
{
throw new IOException( "Error parsing boolean: expected='true' actual='" + trueString + "'" );
@@ -779,9 +800,7 @@
}
else if( c == 'f' )
{
- byte[] falseArray = new byte[ 5 ];
- int amountRead = pdfSource.read( falseArray, 0, 5 );
- String falseString = new String( falseArray, 0, amountRead );
+ String falseString = new String( pdfSource.readFully( 5 ) );
if( !falseString.equals( "false" ) )
{
throw new IOException( "Error parsing boolean: expected='true' actual='" + falseString + "'" );
@@ -854,9 +873,7 @@
}
case 't':
{
- byte[] trueBytes = new byte[4];
- int amountRead = pdfSource.read( trueBytes, 0, 4 );
- String trueString = new String( trueBytes, 0, amountRead );
+ String trueString = new String( pdfSource.readFully(4) );
if( trueString.equals( "true" ) )
{
retval = COSBoolean.TRUE;
@@ -869,9 +886,7 @@
}
case 'f':
{
- byte[] falseBytes = new byte[5];
- int amountRead = pdfSource.read( falseBytes, 0, 5 );
- String falseString = new String( falseBytes, 0, amountRead );
+ String falseString = new String( pdfSource.readFully(5) );
if( falseString.equals( "false" ) )
{
retval = COSBoolean.FALSE;
@@ -1069,13 +1084,17 @@
*
* @throws IOException If there is an error reading from the stream.
*/
- protected String readLine() throws IOException {
+ protected String readLine() throws IOException
+ {
StringBuffer buffer = new StringBuffer( 11 );
int c;
- while ((c = pdfSource.read()) != -1) {
- if (isEOL(c))
+ while ((c = pdfSource.read()) != -1)
+ {
+ if (isEOL(c))
+ {
break;
+ }
buffer.append( (char)c );
}
return buffer.toString();