You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2021/03/02 07:11:42 UTC

svn commit: r1887078 - /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java

Author: lehmi
Date: Tue Mar  2 07:11:42 2021
New Revision: 1887078

URL: http://svn.apache.org/viewvc?rev=1887078&view=rev
Log:
PDFBOX-5112: added another check for the W and the Index array, reduce memory footprint

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java?rev=1887078&r1=1887077&r2=1887078&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java Tue Mar  2 07:11:42 2021
@@ -18,10 +18,8 @@ package org.apache.pdfbox.pdfparser;
 
 import java.io.IOException;
 
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Iterator;
-import java.util.List;
 
 import org.apache.pdfbox.cos.COSArray;
 import org.apache.pdfbox.cos.COSBase;
@@ -40,7 +38,7 @@ import org.apache.pdfbox.cos.COSObjectKe
 public class PDFXrefStreamParser extends BaseParser
 {
     private final int[] w = new int[3];
-    private final List<Long> objNums = new ArrayList<>();
+    private ObjectNumbers objectNumbers = null;
 
     /**
      * Constructor.
@@ -72,6 +70,11 @@ public class PDFXrefStreamParser extends
         {
             throw new IOException("/W array is missing in Xref stream");
         }
+        if (wArray.size() != 3)
+        {
+            throw new IOException(
+                    "Wrong number of values for /W array in XRef: " + Arrays.toString(w));
+        }
         for (int i = 0; i < 3; i++)
         {
             w[i] = wArray.getInt(i, 0);
@@ -89,34 +92,13 @@ public class PDFXrefStreamParser extends
             indexArray.add(COSInteger.ZERO);
             indexArray.add(COSInteger.get(stream.getInt(COSName.SIZE, 0)));
         }
-
-        /*
-         * Populates objNums with all object numbers available
-         */
-        Iterator<COSBase> indexIter = indexArray.iterator();
-        while (indexIter.hasNext())
+        if (indexArray.size() % 2 == 1)
         {
-            COSBase base = indexIter.next();
-            if (!(base instanceof COSInteger))
-            {
-                throw new IOException("Xref stream must have integer in /Index array");
-            }
-            long objID = ((COSInteger) base).longValue();
-            if (!indexIter.hasNext())
-            {
-                break;
-            }
-            base = indexIter.next();
-            if (!(base instanceof COSInteger))
-            {
-                throw new IOException("Xref stream must have integer in /Index array");
-            }
-            int size = ((COSInteger) base).intValue();
-            for (int i = 0; i < size; i++)
-            {
-                objNums.add(objID + i);
-            }
+            throw new IOException(
+                    "Wrong number of values for /Index array in XRef: " + Arrays.toString(w));
         }
+        // create an Iterator for all object numbers using the index array
+        objectNumbers = new ObjectNumbers(indexArray);
     }
 
     private void close() throws IOException
@@ -126,7 +108,7 @@ public class PDFXrefStreamParser extends
             source.close();
         }
         document = null;
-        objNums.clear();
+        objectNumbers = null;
     }
 
     /**
@@ -138,14 +120,11 @@ public class PDFXrefStreamParser extends
     public void parse(XrefTrailerResolver resolver) throws IOException
     {
         byte[] currLine = new byte[w[0] + w[1] + w[2]];
-        Iterator<Long> objIter = objNums.iterator();
-        while (!isEOF() && objIter.hasNext())
+        while (!isEOF() && objectNumbers.hasNext())
         {
             source.read(currLine);
-
             // get the current objID
-            Long objID = objIter.next();
-
+            long objID = objectNumbers.next();
             // default value is 1 if w[0] == 0, otherwise parse first field
             int type = w[0] == 0 ? 1 : (int) parseValue(currLine, 0, w[0]);
             // Skip free objects (type 0) and invalid types
@@ -182,4 +161,65 @@ public class PDFXrefStreamParser extends
         }
         return value;
     }
+
+    private static class ObjectNumbers implements Iterator<Long>
+    {
+        private final long[] start;
+        private final long[] end;
+        private int currentRange = 0;
+        private long currentEnd = 0;
+        private long currentNumber = 0;
+        private long maxValue = 0;
+
+        private ObjectNumbers(COSArray indexArray) throws IOException
+        {
+            start = new long[indexArray.size() / 2];
+            end = new long[start.length];
+            int counter = 0;
+            Iterator<COSBase> indexIter = indexArray.iterator();
+            while (indexIter.hasNext())
+            {
+                COSBase base = indexIter.next();
+                if (!(base instanceof COSInteger))
+                {
+                    throw new IOException("Xref stream must have integer in /Index array");
+                }
+                long startValue = ((COSInteger) base).longValue();
+                if (!indexIter.hasNext())
+                {
+                    break;
+                }
+                base = indexIter.next();
+                if (!(base instanceof COSInteger))
+                {
+                    throw new IOException("Xref stream must have integer in /Index array");
+                }
+                long sizeValue = ((COSInteger) base).longValue();
+                start[counter] = startValue;
+                end[counter++] = startValue + sizeValue;
+            }
+            currentNumber = start[0];
+            currentEnd = end[0];
+            maxValue = end[counter - 1];
+        }
+
+        @Override
+        public boolean hasNext()
+        {
+            return currentNumber < maxValue;
+        }
+
+        @Override
+        public Long next()
+        {
+            if (currentNumber < currentEnd)
+            {
+                return currentNumber++;
+            }
+            currentNumber = start[++currentRange];
+            currentEnd = end[currentRange];
+            return currentNumber++;
+        }
+    }
+
 }