You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2015/05/02 17:26:17 UTC
svn commit: r1677325 -
/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
Author: tilman
Date: Sat May 2 15:26:17 2015
New Revision: 1677325
URL: http://svn.apache.org/r1677325
Log:
PDFBOX-2576: split long method; use isDigit() where possible; partial reformat
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1677325&r1=1677324&r2=1677325&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Sat May 2 15:26:17 2015
@@ -511,12 +511,12 @@ public class COSParser extends BaseParse
}
/**
- * Will parse every object necessary to load a single page from the pdf document. We try our best to order objects
- * according to offset in file before reading to minimize seek operations.
- *
+ * Will parse every object necessary to load a single page from the pdf document. We try our
+ * best to order objects according to offset in file before reading to minimize seek operations.
+ *
* @param dict the COSObject from the parent pages.
* @param excludeObjects dictionary object reference entries with these names will not be parsed
- *
+ *
* @throws IOException if something went wrong
*/
protected void parseDictObjects(COSDictionary dict, COSName... excludeObjects) throws IOException
@@ -602,7 +602,7 @@ public class COSParser extends BaseParse
}
}
- // ---- read first COSObject with smallest offset;
+ // ---- read first COSObject with smallest offset
// resulting object will be added to toBeParsedList
if (objToBeParsed.isEmpty())
{
@@ -697,119 +697,125 @@ public class COSParser extends BaseParse
else if (offsetOrObjstmObNr > 0)
{
// offset of indirect object in file
- // ---- go to object start
- pdfSource.seek(offsetOrObjstmObNr);
+ parseFileObject(offsetOrObjstmObNr, objKey, objNr, objGenNr, pdfObject);
+ }
+ else
+ {
+ // xref value is object nr of object stream containing object to be parsed
+ // since our object was not found it means object stream was not parsed so far
+ parseObjectStream((int) -offsetOrObjstmObNr);
+ }
+ }
+ return pdfObject.getObject();
+ }
- // ---- we must have an indirect object
- final long readObjNr = readObjectNumber();
- final int readObjGen = readGenerationNumber();
- readExpectedString(OBJ_MARKER, true);
+ private void parseFileObject(Long offsetOrObjstmObNr, final COSObjectKey objKey, long objNr, int objGenNr, final COSObject pdfObject) throws IOException
+ {
+ // ---- go to object start
+ pdfSource.seek(offsetOrObjstmObNr);
- // ---- consistency check
- if ((readObjNr != objKey.getNumber()) || (readObjGen != objKey.getGeneration()))
- {
- throw new IOException("XREF for " + objKey.getNumber() + ":"
- + objKey.getGeneration() + " points to wrong object: " + readObjNr
- + ":" + readObjGen);
- }
+ // ---- we must have an indirect object
+ final long readObjNr = readObjectNumber();
+ final int readObjGen = readGenerationNumber();
+ readExpectedString(OBJ_MARKER, true);
- skipSpaces();
- COSBase pb = parseDirObject();
- String endObjectKey = readString();
+ // ---- consistency check
+ if ((readObjNr != objKey.getNumber()) || (readObjGen != objKey.getGeneration()))
+ {
+ throw new IOException("XREF for " + objKey.getNumber() + ":"
+ + objKey.getGeneration() + " points to wrong object: " + readObjNr
+ + ":" + readObjGen);
+ }
- if (endObjectKey.equals(STREAM_STRING))
- {
- pdfSource.unread(endObjectKey.getBytes(ISO_8859_1));
- pdfSource.unread(' ');
- if (pb instanceof COSDictionary)
- {
- COSStream stream = parseCOSStream((COSDictionary) pb);
+ skipSpaces();
+ COSBase pb = parseDirObject();
+ String endObjectKey = readString();
- if (securityHandler != null)
- {
- securityHandler.decryptStream(stream, objNr, objGenNr);
- }
- pb = stream;
- }
- else
- {
- // this is not legal
- // the combination of a dict and the stream/endstream
- // forms a complete stream object
- throw new IOException("Stream not preceded by dictionary (offset: "
- + offsetOrObjstmObNr + ").");
- }
- skipSpaces();
- endObjectKey = readLine();
+ if (endObjectKey.equals(STREAM_STRING))
+ {
+ pdfSource.unread(endObjectKey.getBytes(ISO_8859_1));
+ pdfSource.unread(' ');
+ if (pb instanceof COSDictionary)
+ {
+ COSStream stream = parseCOSStream((COSDictionary) pb);
- // we have case with a second 'endstream' before endobj
- if (!endObjectKey.startsWith(ENDOBJ_STRING) && endObjectKey.startsWith(ENDSTREAM_STRING))
- {
- endObjectKey = endObjectKey.substring(9).trim();
- if (endObjectKey.length() == 0)
- {
- // no other characters in extra endstream line
- // read next line
- endObjectKey = readLine();
- }
- }
- }
- else if (securityHandler != null)
+ if (securityHandler != null)
{
- securityHandler.decrypt(pb, objNr, objGenNr);
+ securityHandler.decryptStream(stream, objNr, objGenNr);
}
+ pb = stream;
+ }
+ else
+ {
+ // this is not legal
+ // the combination of a dict and the stream/endstream
+ // forms a complete stream object
+ throw new IOException("Stream not preceded by dictionary (offset: "
+ + offsetOrObjstmObNr + ").");
+ }
+ skipSpaces();
+ endObjectKey = readLine();
- pdfObject.setObject(pb);
-
- if (!endObjectKey.startsWith(ENDOBJ_STRING))
+ // we have case with a second 'endstream' before endobj
+ if (!endObjectKey.startsWith(ENDOBJ_STRING) && endObjectKey.startsWith(ENDSTREAM_STRING))
+ {
+ endObjectKey = endObjectKey.substring(9).trim();
+ if (endObjectKey.length() == 0)
{
- if (isLenient)
- {
- LOG.warn("Object (" + readObjNr + ":" + readObjGen + ") at offset "
- + offsetOrObjstmObNr + " does not end with 'endobj' but with '"
- + endObjectKey + "'");
- }
- else
- {
- throw new IOException("Object (" + readObjNr + ":" + readObjGen
- + ") at offset " + offsetOrObjstmObNr
- + " does not end with 'endobj' but with '" + endObjectKey + "'");
- }
+ // no other characters in extra endstream line
+ // read next line
+ endObjectKey = readLine();
}
}
+ }
+ else if (securityHandler != null)
+ {
+ securityHandler.decrypt(pb, objNr, objGenNr);
+ }
+
+ pdfObject.setObject(pb);
+
+ if (!endObjectKey.startsWith(ENDOBJ_STRING))
+ {
+ if (isLenient)
+ {
+ LOG.warn("Object (" + readObjNr + ":" + readObjGen + ") at offset "
+ + offsetOrObjstmObNr + " does not end with 'endobj' but with '"
+ + endObjectKey + "'");
+ }
else
{
- // xref value is object nr of object stream containing object to
- // be parsed;
- // since our object was not found it means object stream was not
- // parsed so far
- final int objstmObjNr = (int) (-offsetOrObjstmObNr);
- final COSBase objstmBaseObj = parseObjectDynamically(objstmObjNr, 0, true);
- if (objstmBaseObj instanceof COSStream)
- {
- // parse object stream
- PDFObjectStreamParser parser = new PDFObjectStreamParser((COSStream) objstmBaseObj, document);
- parser.parse();
- parser.close();
- // get set of object numbers referenced for this object
- // stream
- final Set<Long> refObjNrs = xrefTrailerResolver.getContainedObjectNumbers(objstmObjNr);
-
- // register all objects which are referenced to be contained
- // in object stream
- for (COSObject next : parser.getObjects())
- {
- COSObjectKey stmObjKey = new COSObjectKey(next);
- if (refObjNrs.contains(stmObjKey.getNumber()))
- {
- COSObject stmObj = document.getObjectFromPool(stmObjKey);
- stmObj.setObject(next.getObject());
- }
- }
+ throw new IOException("Object (" + readObjNr + ":" + readObjGen
+ + ") at offset " + offsetOrObjstmObNr
+ + " does not end with 'endobj' but with '" + endObjectKey + "'");
+ }
+ }
+ }
+
+ private void parseObjectStream(int objstmObjNr) throws IOException
+ {
+ final COSBase objstmBaseObj = parseObjectDynamically(objstmObjNr, 0, true);
+ if (objstmBaseObj instanceof COSStream)
+ {
+ // parse object stream
+ PDFObjectStreamParser parser = new PDFObjectStreamParser((COSStream) objstmBaseObj, document);
+ parser.parse();
+ parser.close();
+
+ // get set of object numbers referenced for this object stream
+ final Set<Long> refObjNrs = xrefTrailerResolver.getContainedObjectNumbers(objstmObjNr);
+
+ // register all objects which are referenced to be contained in object stream
+ for (COSObject next : parser.getObjects())
+ {
+ COSObjectKey stmObjKey = new COSObjectKey(next);
+ if (refObjNrs.contains(stmObjKey.getNumber()))
+ {
+ COSObject stmObj = document.getObjectFromPool(stmObjKey);
+ stmObj.setObject(next.getObject());
}
}
}
- return pdfObject.getObject();
}
private boolean inGetLength = false;
@@ -879,17 +885,18 @@ public class COSParser extends BaseParse
private final byte[] streamCopyBuf = new byte[STREAMCOPYBUFLEN];
/**
- * This will read a COSStream from the input stream using length attribute within dictionary. If length attribute is
- * a indirect reference it is first resolved to get the stream length. This means we copy stream data without
- * testing for 'endstream' or 'endobj' and thus it is no problem if these keywords occur within stream. We require
- * 'endstream' to be found after stream data is read.
- *
+ * This will read a COSStream from the input stream using length attribute within dictionary. If
+ * length attribute is a indirect reference it is first resolved to get the stream length. This
+ * means we copy stream data without testing for 'endstream' or 'endobj' and thus it is no
+ * problem if these keywords occur within stream. We require 'endstream' to be found after
+ * stream data is read.
+ *
* @param dic dictionary that goes with this stream.
- *
+ *
* @return parsed pdf stream.
- *
- * @throws IOException if an error occurred reading the stream, like problems with reading length attribute, stream
- * does not end with 'endstream' after data read, stream too short etc.
+ *
+ * @throws IOException if an error occurred reading the stream, like problems with reading
+ * length attribute, stream does not end with 'endstream' after data read, stream too short etc.
*/
protected COSStream parseCOSStream(COSDictionary dic) throws IOException
{
@@ -1058,9 +1065,8 @@ public class COSParser extends BaseParse
// the first character has to be a whitespace
if (isWhitespace(nextValue))
{
- nextValue = pdfSource.peek();
// is the next character a digit?
- if (nextValue > 47 && nextValue < 58)
+ if (isDigit())
{
try
{
@@ -1227,7 +1233,7 @@ public class COSParser extends BaseParse
pdfSource.seek(tempOffset);
int genID = pdfSource.peek();
// is the next char a digit?
- if (genID > 47 && genID < 58)
+ if (isDigit(genID))
{
genID -= 48;
tempOffset--;