You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2017/10/03 17:38:42 UTC

svn commit: r1811016 - /pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java

Author: lehmi
Date: Tue Oct  3 17:38:41 2017
New Revision: 1811016

URL: http://svn.apache.org/viewvc?rev=1811016&view=rev
Log:
PDFBOX-3948: eliminate newlines in object streams and added handling for object streams to rebuildTrailer

Modified:
    pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java

Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1811016&r1=1811015&r2=1811016&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Tue Oct  3 17:38:41 2017
@@ -1727,6 +1727,7 @@ public class COSParser extends BaseParse
                 }
                 String numbersStr = new String(numbersBytes, start, numbersBytes.length - start,
                         "ISO-8859-1");
+                numbersStr = numbersStr.replaceAll("\n", " ").replaceAll("  ", " ");
                 String[] numbers = numbersStr.split(" ");
                 for (int i = 0; i < nrOfObjects; i++)
                 {
@@ -1887,47 +1888,66 @@ public class COSParser extends BaseParse
             for (Entry<COSObjectKey, Long> entry : bfSearchCOSObjectKeyOffsets.entrySet())
             {
                 Long offset = entry.getValue();
-                // skip compressed objects
+                COSDictionary dictionary = null;
+                // handle compressed objects
                 if (offset < 0)
                 {
-                    continue;
+                    parseObjectStream((int) -offset);
+                    COSObject compressedObject = document.getObjectFromPool(entry.getKey());
+                    if (compressedObject != null)
+                    {
+                        COSBase baseObject = compressedObject.getObject();
+                        if (baseObject instanceof COSDictionary)
+                        {
+                            dictionary = (COSDictionary) baseObject;
+                        }
+                        else
+                        {
+                            continue;
+                        }
+                    }
                 }
-                source.seek(offset);
-                readObjectNumber();
-                readGenerationNumber();
-                readExpectedString(OBJ_MARKER, true);
-                try
+                else
                 {
+                    source.seek(offset);
+                    readObjectNumber();
+                    readGenerationNumber();
+                    readExpectedString(OBJ_MARKER, true);
                     if (source.peek() != '<')
                     {
                         continue;
                     }
-                    COSDictionary dictionary = parseCOSDictionary();
-                    // document catalog
-                    if (isCatalog(dictionary))
-                    {
-                        trailer.setItem(COSName.ROOT, document.getObjectFromPool(entry.getKey()));
-                    }
-                    // info dictionary
-                    else if (!dictionary.containsKey(COSName.PARENT) && 
-                              (dictionary.containsKey(COSName.MOD_DATE)
-                            || dictionary.containsKey(COSName.TITLE)
-                            || dictionary.containsKey(COSName.AUTHOR)
-                            || dictionary.containsKey(COSName.SUBJECT)
-                            || dictionary.containsKey(COSName.KEYWORDS)
-                            || dictionary.containsKey(COSName.CREATOR)
-                            || dictionary.containsKey(COSName.PRODUCER)
-                            || dictionary.containsKey(COSName.CREATION_DATE)))
+                    try
                     {
-                        trailer.setItem(COSName.INFO, document.getObjectFromPool(entry.getKey()));
+                        dictionary = parseCOSDictionary();
                     }
-                    // encryption dictionary, if existing, is lost
-                    // We can't run "Algorithm 2" from PDF specification because of missing ID
+                    catch (IOException exception)
+                    {
+                        LOG.debug("Skipped object " + entry.getKey()
+                                + ", either it's corrupt or not a dictionary");
+                        continue;
+                    }
+                }
+                // document catalog
+                if (isCatalog(dictionary))
+                {
+                    trailer.setItem(COSName.ROOT, document.getObjectFromPool(entry.getKey()));
                 }
-                catch(IOException exception)
+                // info dictionary
+                else if (!dictionary.containsKey(COSName.PARENT)
+                        && (dictionary.containsKey(COSName.MOD_DATE)
+                                || dictionary.containsKey(COSName.TITLE)
+                                || dictionary.containsKey(COSName.AUTHOR)
+                                || dictionary.containsKey(COSName.SUBJECT)
+                                || dictionary.containsKey(COSName.KEYWORDS)
+                                || dictionary.containsKey(COSName.CREATOR)
+                                || dictionary.containsKey(COSName.PRODUCER)
+                                || dictionary.containsKey(COSName.CREATION_DATE)))
                 {
-                    LOG.debug("Skipped object " + entry.getKey() + ", either it's corrupt or not a dictionary");
+                    trailer.setItem(COSName.INFO, document.getObjectFromPool(entry.getKey()));
                 }
+                // encryption dictionary, if existing, is lost
+                // We can't run "Algorithm 2" from PDF specification because of missing ID
             }
         }
         return trailer;