You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2017/10/03 17:44:39 UTC

svn commit: r1811018 - /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java

Author: lehmi
Date: Tue Oct  3 17:44:38 2017
New Revision: 1811018

URL: http://svn.apache.org/viewvc?rev=1811018&view=rev
Log:
PDFBOX-3948: eliminate newlines in object streams and added handling for object streams to rebuildTrailer

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1811018&r1=1811017&r2=1811018&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Tue Oct  3 17:44:38 2017
@@ -931,7 +931,7 @@ public class COSParser extends BaseParse
             for (COSObject next : parser.getObjects())
             {
                 COSObjectKey stmObjKey = new COSObjectKey(next);
-                Long offset = document.getXrefTable().get(stmObjKey);
+                Long offset = xrefTrailerResolver.getXrefTable().get(stmObjKey);
                 if (offset != null && offset == -objstmObjNr)
                 {
                     COSObject stmObj = document.getObjectFromPool(stmObjKey);
@@ -1767,6 +1767,7 @@ public class COSParser extends BaseParse
                 }
                 String numbersStr = new String(numbersBytes, start, numbersBytes.length - start,
                         "ISO-8859-1");
+                numbersStr = numbersStr.replaceAll("\n", " ").replaceAll("  ", " ");
                 String[] numbers = numbersStr.split(" ");
                 for (int i = 0; i < nrOfObjects; i++)
                 {
@@ -1925,48 +1926,66 @@ public class COSParser extends BaseParse
         for (Entry<COSObjectKey, Long> entry : bfCOSObjectKeyOffsets.entrySet())
         {
             Long offset = entry.getValue();
-            // skip compressed objects
+            COSDictionary dictionary = null;
+            // handle compressed objects
             if (offset < 0)
             {
-                continue;
+                parseObjectStream((int) -offset);
+                COSObject compressedObject = document.getObjectFromPool(entry.getKey());
+                if (compressedObject != null)
+                {
+                    COSBase baseObject = compressedObject.getObject();
+                    if (baseObject instanceof COSDictionary)
+                    {
+                        dictionary = (COSDictionary) baseObject;
+                    }
+                    else
+                    {
+                        continue;
+                    }
+                }
             }
-            source.seek(offset);
-            readObjectNumber();
-            readGenerationNumber();
-            readExpectedString(OBJ_MARKER, true);
-            try
+            else
             {
+                source.seek(offset);
+                readObjectNumber();
+                readGenerationNumber();
+                readExpectedString(OBJ_MARKER, true);
                 if (source.peek() != '<')
                 {
                     continue;
                 }
-                COSDictionary dictionary = parseCOSDictionary();
-                // document catalog
-                if (isCatalog(dictionary))
+                try
                 {
-                    trailer.setItem(COSName.ROOT, document.getObjectFromPool(entry.getKey()));
+                    dictionary = parseCOSDictionary();
                 }
-                // info dictionary
-                else if (!dictionary.containsKey(COSName.PARENT) && 
-                                  (dictionary.containsKey(COSName.MOD_DATE)
-                                || dictionary.containsKey(COSName.TITLE)
-                                || dictionary.containsKey(COSName.AUTHOR)
-                                || dictionary.containsKey(COSName.SUBJECT)
-                                || dictionary.containsKey(COSName.KEYWORDS)
-                                || dictionary.containsKey(COSName.CREATOR)
-                                || dictionary.containsKey(COSName.PRODUCER)
-                                || dictionary.containsKey(COSName.CREATION_DATE)))
+                catch (IOException exception)
                 {
-                    trailer.setItem(COSName.INFO, document.getObjectFromPool(entry.getKey()));
+                    LOG.debug("Skipped object " + entry.getKey()
+                            + ", either it's corrupt or not a dictionary");
+                    continue;
                 }
-                // encryption dictionary, if existing, is lost
-                // We can't run "Algorithm 2" from PDF specification because of missing ID
             }
-            catch (IOException exception)
+            // document catalog
+            if (isCatalog(dictionary))
+            {
+                trailer.setItem(COSName.ROOT, document.getObjectFromPool(entry.getKey()));
+            }
+            // info dictionary
+            else if (!dictionary.containsKey(COSName.PARENT)
+                    && (dictionary.containsKey(COSName.MOD_DATE)
+                            || dictionary.containsKey(COSName.TITLE)
+                            || dictionary.containsKey(COSName.AUTHOR)
+                            || dictionary.containsKey(COSName.SUBJECT)
+                            || dictionary.containsKey(COSName.KEYWORDS)
+                            || dictionary.containsKey(COSName.CREATOR)
+                            || dictionary.containsKey(COSName.PRODUCER)
+                            || dictionary.containsKey(COSName.CREATION_DATE)))
             {
-                LOG.debug("Skipped object " + entry.getKey()
-                        + ", either it's corrupt or not a dictionary");
+                trailer.setItem(COSName.INFO, document.getObjectFromPool(entry.getKey()));
             }
+            // encryption dictionary, if existing, is lost
+            // We can't run "Algorithm 2" from PDF specification because of missing ID
         }
         return trailer;
     }