You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2014/01/24 19:57:48 UTC
svn commit: r1561111 - in /pdfbox/trunk:
pdfbox/src/main/java/org/apache/pdfbox/pdfparser/
preflight/src/main/java/org/apache/pdfbox/preflight/parser/
Author: lehmi
Date: Fri Jan 24 18:57:47 2014
New Revision: 1561111
URL: http://svn.apache.org/r1561111
Log:
PDFBOX-1822: determine the correct type of the trailer as proposed by Timo Boehme
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java
pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java?rev=1561111&r1=1561110&r2=1561111&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java Fri Jan 24 18:57:47 2014
@@ -56,6 +56,7 @@ import org.apache.pdfbox.io.PushBackInpu
import org.apache.pdfbox.io.RandomAccess;
import org.apache.pdfbox.io.RandomAccessBuffer;
import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
+import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
@@ -353,7 +354,6 @@ public class NonSequentialPDFParser exte
// -- parse xref
if (pdfSource.peek() == X)
{
- document.setIsXRefStream(false);
// xref table and trailer
// use existing parser to parse xref table
parseXrefTable(prev);
@@ -377,7 +377,6 @@ public class NonSequentialPDFParser exte
}
else
{
- document.setIsXRefStream(true);
// parse xref stream
prev = parseXrefObjStream(prev);
if (isLenient && prev > -1)
@@ -398,7 +397,7 @@ public class NonSequentialPDFParser exte
xrefTrailerResolver.setStartxref(startXrefOffset);
COSDictionary trailer = xrefTrailerResolver.getTrailer();
document.setTrailer(trailer);
-
+ document.setIsXRefStream(XRefType.STREAM == xrefTrailerResolver.getXrefType());
// check the offsets of all referenced objects
if (isLenient) {
checkXrefOffsets();
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1561111&r1=1561110&r2=1561111&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Fri Jan 24 18:57:47 2014
@@ -36,6 +36,7 @@ import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.exceptions.WrappedIOException;
import org.apache.pdfbox.io.RandomAccess;
+import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.fdf.FDFDocument;
import org.apache.pdfbox.persistence.util.COSObjectKey;
@@ -237,6 +238,7 @@ public class PDFParser extends BaseParse
// get resolved xref table + trailer
document.setTrailer( xrefTrailerResolver.getTrailer() );
+ document.setIsXRefStream(XRefType.STREAM == xrefTrailerResolver.getXrefType());
document.addXRefTable( xrefTrailerResolver.getXrefTable() );
if( !document.isEncrypted() )
@@ -738,7 +740,7 @@ public class PDFParser extends BaseParse
}
// signal start of new XRef
- xrefTrailerResolver.nextXrefObj( startByteOffset );
+ xrefTrailerResolver.nextXrefObj( startByteOffset, XRefType.TABLE );
/*
* Xref tables can have multiple sections.
@@ -880,7 +882,7 @@ public class PDFParser extends BaseParse
*/
public void parseXrefStream( COSStream stream, long objByteOffset ) throws IOException
{
- xrefTrailerResolver.nextXrefObj( objByteOffset );
+ xrefTrailerResolver.nextXrefObj( objByteOffset, XRefType.STREAM );
xrefTrailerResolver.setTrailer( stream );
PDFXrefStreamParser parser =
new PDFXrefStreamParser( stream, document, forceParsing, xrefTrailerResolver );
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java?rev=1561111&r1=1561110&r2=1561111&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java Fri Jan 24 18:57:47 2014
@@ -31,7 +31,6 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSName;
-import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.persistence.util.COSObjectKey;
/**
@@ -63,6 +62,9 @@ public class XrefTrailerResolver
private class XrefTrailerObj
{
protected COSDictionary trailer = null;
+
+ private XRefType xrefType;
+
private final Map<COSObjectKey, Long> xrefTable = new HashMap<COSObjectKey, Long>();
/**
@@ -77,18 +79,33 @@ public class XrefTrailerResolver
*/
public void clearResources()
{
- if (trailer != null)
- {
- trailer.clear();
- trailer = null;
- }
- if (xrefTable != null)
- {
- xrefTable.clear();
- }
+ if (trailer != null)
+ {
+ trailer.clear();
+ trailer = null;
+ }
+ if (xrefTable != null)
+ {
+ xrefTable.clear();
+ }
}
}
+ /**
+ * The XRefType of a trailer.
+ */
+ public enum XRefType
+ {
+ /**
+ * XRef table type.
+ */
+ TABLE,
+ /**
+ * XRef stream type.
+ */
+ STREAM;
+ }
+
private final Map<Long, XrefTrailerObj> bytePosToXrefMap = new HashMap<Long, XrefTrailerObj>();
private XrefTrailerObj curXrefTrailerObj = null;
private XrefTrailerObj resolvedXrefTrailer = null;
@@ -96,33 +113,60 @@ public class XrefTrailerResolver
/** Log instance. */
private static final Log LOG = LogFactory.getLog( XrefTrailerResolver.class );
- public final COSDictionary getFirstTrailer() {
- if (bytePosToXrefMap.isEmpty()) return null;
-
- Set<Long> offsets = bytePosToXrefMap.keySet();
- SortedSet<Long> sortedOffset = new TreeSet<Long>(offsets);
- return bytePosToXrefMap.get(sortedOffset.first()).trailer;
+ /**
+ * Returns the first trailer if at least one exists.
+ *
+ * @return the first trailer or null
+ */
+ public final COSDictionary getFirstTrailer()
+ {
+ if (bytePosToXrefMap.isEmpty())
+ {
+ return null;
+ }
+ Set<Long> offsets = bytePosToXrefMap.keySet();
+ SortedSet<Long> sortedOffset = new TreeSet<Long>(offsets);
+ return bytePosToXrefMap.get(sortedOffset.first()).trailer;
}
- public final COSDictionary getLastTrailer() {
- if (bytePosToXrefMap.isEmpty()) return null;
-
- Set<Long> offsets = bytePosToXrefMap.keySet();
- SortedSet<Long> sortedOffset = new TreeSet<Long>(offsets);
- return bytePosToXrefMap.get(sortedOffset.last()).trailer;
+ /**
+ * Returns the last trailer if at least one exists.
+ *
+ * @return the last trailer ir null
+ */
+ public final COSDictionary getLastTrailer()
+ {
+ if (bytePosToXrefMap.isEmpty())
+ {
+ return null;
+ }
+ Set<Long> offsets = bytePosToXrefMap.keySet();
+ SortedSet<Long> sortedOffset = new TreeSet<Long>(offsets);
+ return bytePosToXrefMap.get(sortedOffset.last()).trailer;
}
/**
* Signals that a new XRef object (table or stream) starts.
* @param startBytePos the offset to start at
- *
+ * @param type the type of the Xref object
*/
- public void nextXrefObj( final long startBytePos )
+ public void nextXrefObj( final long startBytePos, XRefType type )
{
bytePosToXrefMap.put( startBytePos, curXrefTrailerObj = new XrefTrailerObj() );
+ curXrefTrailerObj.xrefType = type;
}
/**
+ * Returns the XRefTxpe of the resolved trailer.
+ *
+ * @return the XRefType or null.
+ */
+ public XRefType getXrefType()
+ {
+ return ( resolvedXrefTrailer == null ) ? null : resolvedXrefTrailer.xrefType;
+ }
+
+ /**
* Populate XRef HashMap of current XRef object.
* Will add an Xreftable entry that maps ObjectKeys to byte offsets in the file.
* @param objKey The objkey, with id and gen numbers
@@ -306,20 +350,20 @@ public class XrefTrailerResolver
*/
public void clearResources()
{
- if (curXrefTrailerObj != null)
- {
- curXrefTrailerObj.clearResources();
- curXrefTrailerObj = null;
- }
- if (resolvedXrefTrailer != null)
- {
- resolvedXrefTrailer.clearResources();
- resolvedXrefTrailer = null;
- }
- if (bytePosToXrefMap != null)
- {
- bytePosToXrefMap.clear();
- }
+ if (curXrefTrailerObj != null)
+ {
+ curXrefTrailerObj.clearResources();
+ curXrefTrailerObj = null;
+ }
+ if (resolvedXrefTrailer != null)
+ {
+ resolvedXrefTrailer.clearResources();
+ resolvedXrefTrailer = null;
+ }
+ if (bytePosToXrefMap != null)
+ {
+ bytePosToXrefMap.clear();
+ }
}
}
Modified: pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java?rev=1561111&r1=1561110&r2=1561111&view=diff
==============================================================================
--- pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java (original)
+++ pdfbox/trunk/preflight/src/main/java/org/apache/pdfbox/preflight/parser/PreflightParser.java Fri Jan 24 18:57:47 2014
@@ -76,6 +76,7 @@ import org.apache.pdfbox.pdfparser.BaseP
import org.apache.pdfbox.pdfparser.NonSequentialPDFParser;
import org.apache.pdfbox.pdfparser.PDFObjectStreamParser;
import org.apache.pdfbox.pdfparser.PDFParser;
+import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.persistence.util.COSObjectKey;
import org.apache.pdfbox.preflight.Format;
@@ -346,7 +347,7 @@ public class PreflightParser extends Non
}
// signal start of new XRef
- xrefTrailerResolver.nextXrefObj(startByteOffset);
+ xrefTrailerResolver.nextXrefObj(startByteOffset,XRefType.TABLE);
/*
* Xref tables can have multiple sections. Each starts with a starting object id and a count.