You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2014/12/21 01:46:02 UTC

svn commit: r1647070 - in /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser: BaseParser.java PDFParser.java VisualSignatureParser.java

Author: tilman
Date: Sun Dec 21 00:46:02 2014
New Revision: 1647070

URL: http://svn.apache.org/r1647070
Log:
PDFBOX-2576: refactor double code and move to base class, including Andersens_Fairy_Tales workaround

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/VisualSignatureParser.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java?rev=1647070&r1=1647069&r2=1647070&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java Sun Dec 21 00:46:02 2014
@@ -1704,4 +1704,60 @@ public abstract class BaseParser
             pdfSource = null;
         }
     }
+
+    /**
+     * Parse object key (number and generation).
+     *
+     * @param continueOnError true to continue on error, false if not.
+     * @return a new object key.
+     * @throws IOException if something goes wrong.
+     */
+    protected COSObjectKey parseObjectKey(boolean continueOnError) throws IOException
+    {
+        //we are going to parse a normal object
+        long number = -1;
+        int genNum;
+        boolean missingObjectNumber = false;
+        try
+        {
+            char peeked = (char) pdfSource.peek();
+            if (peeked == '<')
+            {
+                missingObjectNumber = true;
+            }
+            else
+            {
+                number = readObjectNumber();
+            }
+        }
+        catch (IOException e)
+        {
+            //ok for some reason "GNU Ghostscript 5.10" puts two endobj
+            //statements after an object, of course this is nonsense
+            //but because we want to support as many PDFs as possible
+            //we will simply try again
+            number = readObjectNumber();
+        }
+        if (!missingObjectNumber)
+        {
+            skipSpaces();
+            genNum = readGenerationNumber();
+            String objectKey = readString(3);
+            if (!objectKey.equals("obj"))
+            {
+                if (!continueOnError || !objectKey.equals("o"))
+                {
+                    throw new IOException("expected='obj' actual='" + objectKey + "' " + pdfSource);
+                }
+                //assume that "o" was meant to be "obj" (this is a workaround for
+                // PDFBOX-773 attached PDF Andersens_Fairy_Tales.pdf).
+            }
+        }
+        else
+        {
+            number = -1;
+            genNum = -1;
+        }
+        return new COSObjectKey(number, genNum);
+    }
 }

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1647070&r1=1647069&r2=1647070&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Sun Dec 21 00:46:02 2014
@@ -551,51 +551,7 @@ public class PDFParser extends BaseParse
         else
         {
             //we are going to parse a normal object
-            long number = -1;
-            int genNum;
-            boolean missingObjectNumber = false;
-            try
-            {
-                char peeked = (char) pdfSource.peek();
-                if (peeked == '<')
-                {
-                    missingObjectNumber = true;
-                }
-                else
-                {
-                    number = readObjectNumber();
-                }
-            }
-            catch (IOException e)
-            {
-                //ok for some reason "GNU Ghostscript 5.10" puts two endobj
-                //statements after an object, of course this is nonsense
-                //but because we want to support as many PDFs as possible
-                //we will simply try again
-                number = readObjectNumber();
-            }
-            if (!missingObjectNumber)
-            {
-                skipSpaces();
-                genNum = readGenerationNumber();
-
-                String objectKey = readString(3);
-                if (!objectKey.equals("obj"))
-                {
-                    if (!isContinueOnError(null) || !objectKey.equals("o"))
-                    {
-                        throw new IOException("expected='obj' actual='" + objectKey + "' " + pdfSource);
-                    }
-                    //assume that "o" was meant to be "obj" (this is a workaround for
-                    // PDFBOX-773 attached PDF Andersens_Fairy_Tales.pdf).
-                }
-            }
-            else
-            {
-                number = -1;
-                genNum = -1;
-            }
-
+            COSObjectKey key = parseObjectKey(!isContinueOnError(null));
             skipSpaces();
             COSBase pb = parseDirObject();
             String endObjectKey = readString();
@@ -640,7 +596,6 @@ public class PDFParser extends BaseParse
                 endObjectKey = readLine();
             }
 
-            COSObjectKey key = new COSObjectKey( number, genNum );
             COSObject pdfObject = document.getObjectFromPool( key );
             if(pdfObject.getObject() == null)
             {

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/VisualSignatureParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/VisualSignatureParser.java?rev=1647070&r1=1647069&r2=1647070&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/VisualSignatureParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/VisualSignatureParser.java Sun Dec 21 00:46:02 2014
@@ -187,46 +187,7 @@ public class VisualSignatureParser exten
         else 
         {
             //we are going to parse a normal object
-            long number = -1;
-            int genNum;
-            boolean missingObjectNumber = false;
-            try
-            {
-                char peeked = (char) pdfSource.peek();
-                if (peeked == '<')
-                {
-                    missingObjectNumber = true;
-                }
-                else
-                {
-                    number = readObjectNumber();
-                }
-            }
-            catch (IOException e)
-            {
-                //ok for some reason "GNU Ghostscript 5.10" puts two endobj
-                //statements after an object, of course this is nonsense
-                //but because we want to support as many PDFs as possible
-                //we will simply try again
-                number = readObjectNumber();
-            }
-            if (!missingObjectNumber)
-            {
-                skipSpaces();
-                genNum = readGenerationNumber();
-
-                String objectKey = readString(3);
-                if (!objectKey.equals("obj"))
-                {
-                    throw new IOException("expected='obj' actual='" + objectKey + "' " + pdfSource);
-                }
-            }
-            else
-            {
-                number = -1;
-                genNum = -1;
-            }
-
+            COSObjectKey key = parseObjectKey(false);
             skipSpaces();
             COSBase pb = parseDirObject();
             String endObjectKey = readString();
@@ -238,7 +199,6 @@ public class VisualSignatureParser exten
                 if (pb instanceof COSDictionary)
                 {
                     pb = parseCOSStream((COSDictionary) pb);
-
                 }
                 else
                 {
@@ -248,8 +208,6 @@ public class VisualSignatureParser exten
                 }
                 endObjectKey = readString();
             }
-
-            COSObjectKey key = new COSObjectKey(number, genNum);
             COSObject pdfObject = document.getObjectFromPool(key);
             pb.setNeedToBeUpdate(true);
             pdfObject.setObject(pb);