You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2009/06/10 07:48:30 UTC
svn commit: r783210 -
/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
Author: lehmi
Date: Wed Jun 10 05:48:30 2009
New Revision: 783210
URL: http://svn.apache.org/viewvc?rev=783210&view=rev
Log:
PDFBOX-464: fixed some parser issues concerning pdfs generated with crystal reports. Thanks to Sean Bridges (sean dot bridges at gmail dot com)
Modified:
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=783210&r1=783209&r2=783210&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Wed Jun 10 05:48:30 2009
@@ -31,8 +31,6 @@
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSObject;
-import org.apache.pdfbox.cos.COSStream;
-import org.apache.pdfbox.exceptions.LoggingObject;
import org.apache.pdfbox.exceptions.WrappedIOException;
import org.apache.pdfbox.io.RandomAccess;
@@ -569,11 +567,13 @@
* @throws IOException If an IO error occurs.
*/
private boolean parseStartXref() throws IOException{
- if(pdfSource.peek() != 's'){
+ if(pdfSource.peek() != 's')
+ {
return false;
}
- String nextLine = readLine();
- if( !nextLine.equals( "startxref" ) ) {
+ String startXRef = readString();
+ if( !startXRef.trim().equals( "startxref" ) )
+ {
return false;
}
skipSpaces();
@@ -593,49 +593,59 @@
* @throws IOException If an IO error occurs.
*/
private boolean parseXrefTable() throws IOException{
- if(pdfSource.peek() != 'x'){
+ if(pdfSource.peek() != 'x')
+ {
return false;
}
- String nextLine = readLine();
- if( !nextLine.equals( "xref" ) ) {
+ String xref = readString();
+ if( !xref.trim().equals( "xref" ) )
+ {
return false;
}
/*
* Xref tables can have multiple sections.
* Each starts with a starting object id and a count.
*/
- while(true){
+ while(true)
+ {
int currObjID = readInt(); // first obj id
int count = readInt(); // the number of objects in the xref table
skipSpaces();
for(int i = 0; i < count; i++){
- if(pdfSource.isEOF() || isEndOfName((char)pdfSource.peek())){
+ if(pdfSource.isEOF() || isEndOfName((char)pdfSource.peek()))
+ {
break;
}
- if(pdfSource.peek() == 't'){
+ if(pdfSource.peek() == 't')
+ {
break;
}
//Ignore table contents
String currentLine = readLine();
String[] splitString = currentLine.split(" ");
- if (splitString.length < 3) {
+ if (splitString.length < 3)
+ {
logger().warning("invalid xref line: " + currentLine);
break;
}
/* This supports the corrupt table as reported in
* PDFBOX-474 (XXXX XXX XX n) */
- if(splitString[splitString.length-1].equals("n")){
- try{
+ if(splitString[splitString.length-1].equals("n"))
+ {
+ try
+ {
int currOffset = Integer.parseInt(splitString[0]);
int currGenID = Integer.parseInt(splitString[1]);
COSObjectKey objKey = new COSObjectKey(currObjID, currGenID);
document.setXRef(objKey, currOffset);
}
- catch(NumberFormatException e){
+ catch(NumberFormatException e)
+ {
throw new IOException(e.getMessage());
}
}
- else if(!splitString[2].equals("f")){
+ else if(!splitString[2].equals("f"))
+ {
throw new IOException("Corrupt XRefTable Entry - ObjID:" + currObjID);
}
currObjID++;
@@ -658,22 +668,26 @@
*/
private boolean parseTrailer() throws IOException
{
- if(pdfSource.peek() != 't'){
+ if(pdfSource.peek() != 't')
+ {
return false;
}
//read "trailer"
String nextLine = readLine();
- if( !nextLine.equals( "trailer" ) ) {
+ if( !nextLine.trim().equals( "trailer" ) )
+ {
// in some cases the EOL is missing and the trailer immediately continues with "<<" or with a blank character
// even if this does not comply with PDF reference we want to support as many PDFs as possible
// Acrobat reader can also deal with this.
- if (nextLine.startsWith("trailer")) {
+ if (nextLine.startsWith("trailer"))
+ {
byte[] b = nextLine.getBytes();
int len = "trailer".length();
pdfSource.unread('\n');
pdfSource.unread(b, len, b.length-len);
}
- else {
+ else
+ {
return false;
}
}