You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2017/10/03 17:38:42 UTC
svn commit: r1811016 -
/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
Author: lehmi
Date: Tue Oct 3 17:38:41 2017
New Revision: 1811016
URL: http://svn.apache.org/viewvc?rev=1811016&view=rev
Log:
PDFBOX-3948: eliminate newlines in object streams and added handling for object streams to rebuildTrailer
Modified:
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1811016&r1=1811015&r2=1811016&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Tue Oct 3 17:38:41 2017
@@ -1727,6 +1727,7 @@ public class COSParser extends BaseParse
}
String numbersStr = new String(numbersBytes, start, numbersBytes.length - start,
"ISO-8859-1");
+ numbersStr = numbersStr.replaceAll("\n", " ").replaceAll(" ", " ");
String[] numbers = numbersStr.split(" ");
for (int i = 0; i < nrOfObjects; i++)
{
@@ -1887,47 +1888,66 @@ public class COSParser extends BaseParse
for (Entry<COSObjectKey, Long> entry : bfSearchCOSObjectKeyOffsets.entrySet())
{
Long offset = entry.getValue();
- // skip compressed objects
+ COSDictionary dictionary = null;
+ // handle compressed objects
if (offset < 0)
{
- continue;
+ parseObjectStream((int) -offset);
+ COSObject compressedObject = document.getObjectFromPool(entry.getKey());
+ if (compressedObject != null)
+ {
+ COSBase baseObject = compressedObject.getObject();
+ if (baseObject instanceof COSDictionary)
+ {
+ dictionary = (COSDictionary) baseObject;
+ }
+ else
+ {
+ continue;
+ }
+ }
}
- source.seek(offset);
- readObjectNumber();
- readGenerationNumber();
- readExpectedString(OBJ_MARKER, true);
- try
+ else
{
+ source.seek(offset);
+ readObjectNumber();
+ readGenerationNumber();
+ readExpectedString(OBJ_MARKER, true);
if (source.peek() != '<')
{
continue;
}
- COSDictionary dictionary = parseCOSDictionary();
- // document catalog
- if (isCatalog(dictionary))
- {
- trailer.setItem(COSName.ROOT, document.getObjectFromPool(entry.getKey()));
- }
- // info dictionary
- else if (!dictionary.containsKey(COSName.PARENT) &&
- (dictionary.containsKey(COSName.MOD_DATE)
- || dictionary.containsKey(COSName.TITLE)
- || dictionary.containsKey(COSName.AUTHOR)
- || dictionary.containsKey(COSName.SUBJECT)
- || dictionary.containsKey(COSName.KEYWORDS)
- || dictionary.containsKey(COSName.CREATOR)
- || dictionary.containsKey(COSName.PRODUCER)
- || dictionary.containsKey(COSName.CREATION_DATE)))
+ try
{
- trailer.setItem(COSName.INFO, document.getObjectFromPool(entry.getKey()));
+ dictionary = parseCOSDictionary();
}
- // encryption dictionary, if existing, is lost
- // We can't run "Algorithm 2" from PDF specification because of missing ID
+ catch (IOException exception)
+ {
+ LOG.debug("Skipped object " + entry.getKey()
+ + ", either it's corrupt or not a dictionary");
+ continue;
+ }
+ }
+ // document catalog
+ if (isCatalog(dictionary))
+ {
+ trailer.setItem(COSName.ROOT, document.getObjectFromPool(entry.getKey()));
}
- catch(IOException exception)
+ // info dictionary
+ else if (!dictionary.containsKey(COSName.PARENT)
+ && (dictionary.containsKey(COSName.MOD_DATE)
+ || dictionary.containsKey(COSName.TITLE)
+ || dictionary.containsKey(COSName.AUTHOR)
+ || dictionary.containsKey(COSName.SUBJECT)
+ || dictionary.containsKey(COSName.KEYWORDS)
+ || dictionary.containsKey(COSName.CREATOR)
+ || dictionary.containsKey(COSName.PRODUCER)
+ || dictionary.containsKey(COSName.CREATION_DATE)))
{
- LOG.debug("Skipped object " + entry.getKey() + ", either it's corrupt or not a dictionary");
+ trailer.setItem(COSName.INFO, document.getObjectFromPool(entry.getKey()));
}
+ // encryption dictionary, if existing, is lost
+ // We can't run "Algorithm 2" from PDF specification because of missing ID
}
}
return trailer;