You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2011/12/18 17:29:46 UTC
svn commit: r1220449 -
/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
Author: lehmi
Date: Sun Dec 18 16:29:46 2011
New Revision: 1220449
URL: http://svn.apache.org/viewvc?rev=1220449&view=rev
Log:
PDFBOX-569: fixed the handling of conflicting objects as proposed by LynX, added some reformatting
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1220449&r1=1220448&r2=1220449&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Sun Dec 18 16:29:46 2011
@@ -51,7 +51,7 @@ public class PDFParser extends BaseParse
/**
* Log instance.
*/
- private static final Log log = LogFactory.getLog(PDFParser.class);
+ private static final Log LOG = LogFactory.getLog(PDFParser.class);
private static final int SPACE_BYTE = 32;
@@ -64,7 +64,8 @@ public class PDFParser extends BaseParse
private List<ConflictObj> conflictList = new ArrayList<ConflictObj>();
/** Collects all Xref/trailer objects and resolves them into single
- * object using startxref reference */
+ * object using startxref reference.
+ */
private XrefTrailerResolver xrefTrailerResolver = new XrefTrailerResolver();
/**
@@ -81,7 +82,8 @@ public class PDFParser extends BaseParse
*
* @throws IOException If there is an error initializing the stream.
*/
- public PDFParser( InputStream input ) throws IOException {
+ public PDFParser( InputStream input ) throws IOException
+ {
this(input, null, FORCE_PARSING);
}
@@ -92,8 +94,8 @@ public class PDFParser extends BaseParse
*
* @throws IOException If there is an error initializing the stream.
*/
- public PDFParser(InputStream input, RandomAccess rafi)
- throws IOException {
+ public PDFParser(InputStream input, RandomAccess rafi) throws IOException
+ {
this(input, rafi, FORCE_PARSING);
}
@@ -107,8 +109,8 @@ public class PDFParser extends BaseParse
*
* @throws IOException If there is an error initializing the stream.
*/
- public PDFParser(InputStream input, RandomAccess rafi, boolean force)
- throws IOException {
+ public PDFParser(InputStream input, RandomAccess rafi, boolean force) throws IOException
+ {
super(input, force);
this.raf = rafi;
}
@@ -132,6 +134,7 @@ public class PDFParser extends BaseParse
* parsing when the number of exceptions thrown exceed a certain number).
*
* @param e The exception if vailable. Can be null if there is no exception available
+ * @return true if parsing could be continued, otherwise false
*/
protected boolean isContinueOnError(Exception e)
{
@@ -199,7 +202,7 @@ public class PDFParser extends BaseParse
* Warning is sent to the PDFBox.log and to the Console that
* we skipped over an object
*/
- log.warn("Parsing Error, Skipping Object", e);
+ LOG.warn("Parsing Error, Skipping Object", e);
skipToNextObj();
}
else
@@ -439,6 +442,7 @@ public class PDFParser extends BaseParse
//just read them and move on.
readString();
skipSpaces();
+ currentObjByteOffset = pdfSource.getOffset();
peekedChar = (char)pdfSource.peek();
}
if( pdfSource.isEOF())
@@ -475,16 +479,21 @@ public class PDFParser extends BaseParse
}
// verify that EOF exists (see PDFBOX-979 for documentation on special cases)
- if(!"%%EOF".equals(eof)) {
- if(eof.startsWith("%%EOF")) {
+ if(!"%%EOF".equals(eof))
+ {
+ if(eof.startsWith("%%EOF"))
+ {
// content after marker -> unread with first space byte for read newline
pdfSource.unread(SPACE_BYTE); // we read a whole line; add space as newline replacement
pdfSource.unread(eof.substring(5).getBytes("ISO-8859-1"));
- } else {
+ }
+ else
+ {
// PDF does not conform to spec, we should warn someone
- log.warn("expected='%%EOF' actual='" + eof + "'");
+ LOG.warn("expected='%%EOF' actual='" + eof + "'");
// if we're not at the end of a file, just put it back and move on
- if(!pdfSource.isEOF()) {
+ if(!pdfSource.isEOF())
+ {
pdfSource.unread( SPACE_BYTE ); // we read a whole line; add space as newline replacement
pdfSource.unread(eof.getBytes("ISO-8859-1"));
}
@@ -530,7 +539,8 @@ public class PDFParser extends BaseParse
//" genNumber=" + genNum + " key='" + objectKey + "'" );
if( !objectKey.equals( "obj" ) )
{
- if (!isContinueOnError(null) || !objectKey.equals("o")) {
+ if (!isContinueOnError(null) || !objectKey.equals("o"))
+ {
throw new IOException("expected='obj' actual='" + objectKey + "' " + pdfSource);
}
//assume that "o" was meant to be "obj" (this is a workaround for
@@ -609,7 +619,7 @@ public class PDFParser extends BaseParse
* I found which was created by Exstream Dialogue Version 5.0.039)
* in which case we ignore the data before endobj and just move on
*/
- log.warn("expected='endobj' actual='" + endObjectKey + "' ");
+ LOG.warn("expected='endobj' actual='" + endObjectKey + "' ");
}
else if( !pdfSource.isEOF() )
{
@@ -715,7 +725,7 @@ public class PDFParser extends BaseParse
String[] splitString = currentLine.split(" ");
if (splitString.length < 3)
{
- log.warn("invalid xref line: " + currentLine);
+ LOG.warn("invalid xref line: " + currentLine);
break;
}
/* This supports the corrupt table as reported in