You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2017/05/09 17:57:58 UTC

svn commit: r1794621 - in /pdfbox/branches/2.0: ./ pdfbox/ pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java

Author: lehmi
Date: Tue May  9 17:57:58 2017
New Revision: 1794621

URL: http://svn.apache.org/viewvc?rev=1794621&view=rev
Log:
PDFBOX-3783: revert changes from PDFBOX-3318 and introduce a brute force search for the last startxref entry instead

Modified:
    pdfbox/branches/2.0/   (props changed)
    pdfbox/branches/2.0/pdfbox/   (props changed)
    pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java   (contents, props changed)

Propchange: pdfbox/branches/2.0/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May  9 17:57:58 2017
@@ -1,3 +1,3 @@
 /pdfbox/branches/no-awt:1618517-1621410
 /pdfbox/no-awt:1618514-1618516
-/pdfbox/trunk:1736223,1736227,1736615,1737043,1737130,1737599-1737600,1738755,1740160,1742437,1742442,1743248,1745595,1745606,1745772,1745774,1745776,1745779,1746032,1746151,1749162,1749165,1749432,1766088,1766213,1767585,1768061,1770985,1770988,1772528,1778172,1782679,1786586,1786603,1787546,1790745,1794073,1794090
+/pdfbox/trunk:1736223,1736227,1736615,1737043,1737130,1737599-1737600,1738755,1740160,1742437,1742442,1743248,1745595,1745606,1745772,1745774,1745776,1745779,1746032,1746151,1749162,1749165,1749432,1766088,1766213,1767585,1768061,1770985,1770988,1772528,1778172,1782679,1786586,1786603,1787546,1790745,1794073,1794090,1794620

Propchange: pdfbox/branches/2.0/pdfbox/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May  9 17:57:58 2017
@@ -1,3 +1,3 @@
 /pdfbox/branches/no-awt/pdfbox:1618517-1621410
 /pdfbox/no-awt/pdfbox:1618514-1618516
-/pdfbox/trunk/pdfbox:1736223,1736227,1736615,1737043,1737130,1737599-1737600,1738755,1740160,1742437,1742442,1743248,1745595,1745606,1745772,1745774,1745776,1745779,1746032,1746151,1749162,1749165,1749432,1757165,1758817,1767585,1770988,1772528,1778172,1782679,1786586,1786603,1787546,1790745,1794073,1794090
+/pdfbox/trunk/pdfbox:1736223,1736227,1736615,1737043,1737130,1737599-1737600,1738755,1740160,1742437,1742442,1743248,1745595,1745606,1745772,1745774,1745776,1745779,1746032,1746151,1749162,1749165,1749432,1757165,1758817,1767585,1770988,1772528,1778172,1782679,1786586,1786603,1787546,1790745,1794073,1794090,1794620

Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1794621&r1=1794620&r2=1794621&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java Tue May  9 17:57:58 2017
@@ -400,8 +400,8 @@ public class COSParser extends BaseParse
         {
             if (isLenient) 
             {
-                LOG.debug("Can't find offset for startxref");
-                return -1;
+                LOG.debug("Performing brute force search for last startxref entry");
+                return bfSearchForLastStartxrefEntry();
             }
             else
             {
@@ -1633,6 +1633,30 @@ public class COSParser extends BaseParse
     }
 
     /**
+     * Brute force search for the last startxref entry.
+     * 
+     * @throws IOException if something went wrong
+     */
+    private long bfSearchForLastStartxrefEntry() throws IOException
+    {
+        long originOffset = source.getPosition();
+        long lastStartxref = -1;
+        source.seek(MINIMUM_SEARCH_OFFSET);
+        // search for startxref
+        while (!source.isEOF())
+        {
+            if (isString(STARTXREF))
+            {
+                lastStartxref = source.getPosition();
+                source.seek(lastStartxref + 9);
+            }
+            source.read();
+        }
+        source.seek(originOffset);
+        return lastStartxref;
+    }
+
+    /**
      * Brute force search for all /XRef entries (streams).
      * 
      * @throws IOException if something went wrong
@@ -1731,19 +1755,14 @@ public class COSParser extends BaseParse
         {
             // reset trailer resolver
             xrefTrailerResolver.reset();
-            // search for an Xref stream
-            trailer = searchForXrefStream();
-            if (trailer == null)
-            {
-                // no xref stream found -> use the found objects to rebuild the trailer resolver
-                xrefTrailerResolver.nextXrefObj(0, XRefType.TABLE);
-                for (Entry<COSObjectKey, Long> entry : bfSearchCOSObjectKeyOffsets.entrySet())
-                {
-                    xrefTrailerResolver.setXRef(entry.getKey(), entry.getValue());
-                }
-                xrefTrailerResolver.setStartxref(0);
-                trailer = xrefTrailerResolver.getTrailer();
+            // no xref stream found -> use the found objects to rebuild the trailer resolver
+            xrefTrailerResolver.nextXrefObj(0, XRefType.TABLE);
+            for (Entry<COSObjectKey, Long> entry : bfSearchCOSObjectKeyOffsets.entrySet())
+            {
+                xrefTrailerResolver.setXRef(entry.getKey(), entry.getValue());
             }
+            xrefTrailerResolver.setStartxref(0);
+            trailer = xrefTrailerResolver.getTrailer();
             getDocument().setTrailer(trailer);
             // search for the different parts of the trailer dictionary
             for (Entry<COSObjectKey, Long> entry : bfSearchCOSObjectKeyOffsets.entrySet())
@@ -1787,39 +1806,6 @@ public class COSParser extends BaseParse
         return trailer;
     }
     
-    private COSDictionary searchForXrefStream() throws IOException
-    {
-        COSDictionary trailer = null;
-        for (Entry<COSObjectKey, Long> entry : bfSearchCOSObjectKeyOffsets.entrySet())
-        {
-            Long offset = entry.getValue();
-            source.seek(offset);
-            readObjectNumber();
-            readGenerationNumber();
-            readExpectedString(OBJ_MARKER, true);
-            try
-            {
-                COSDictionary dictionary = parseCOSDictionary();
-                if (dictionary != null && COSName.XREF.equals(dictionary.getCOSName(COSName.TYPE)))
-                {
-                    COSStream xrefStream = parseCOSStream(dictionary);
-                    parseXrefStream(xrefStream, offset, true);
-                    xrefStream.close();
-                    xrefTrailerResolver.setStartxref(offset);
-                    break;
-                }
-            }
-            catch (IOException exception)
-            {
-                LOG.debug("Skipped object " + entry.getKey()
-                        + ", either it's corrupt or not a dictionary");
-            }
-        }
-        if (validateXrefOffsets(xrefTrailerResolver.getXrefTable()))
-            trailer = xrefTrailerResolver.getTrailer();
-        return trailer;
-    }
-
     /**
      * This will parse the startxref section from the stream.
      * The startxref value is ignored.

Propchange: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue May  9 17:57:58 2017
@@ -1,4 +1,4 @@
 /pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java:1641458
 /pdfbox/branches/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java:1618517-1621410
 /pdfbox/no-awt/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java:1618514-1618516
-/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java:1737043,1737130,1738755,1778172,1782679,1786586,1787546
+/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java:1737043,1737130,1738755,1778172,1782679,1786586,1787546,1794620