You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@pdfbox.apache.org by ad...@apache.org on 2011/07/02 00:28:24 UTC

svn commit: r1142109 - in /pdfbox/trunk/pdfbox/src: main/java/org/apache/pdfbox/cos/ main/java/org/apache/pdfbox/pdfparser/ main/java/org/apache/pdfbox/pdmodel/ main/java/org/apache/pdfbox/pdmodel/common/ test/java/org/apache/pdfbox/pdfparser/ test/jav...

Author: adam
Date: Fri Jul  1 22:28:23 2011
New Revision: 1142109

URL: http://svn.apache.org/viewvc?rev=1142109&view=rev
Log:
PDFBOX-1000: Conforming parser.  Initial commit to make it easier for others to test & contribute.

Added:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java
    pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java
    pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/
    pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf   (with props)
Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
    pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java?rev=1142109&r1=1142108&r2=1142109&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java Fri Jul  1 22:28:23 2011
@@ -43,7 +43,7 @@ public class COSDictionary extends COSBa
      * The name-value pairs of this dictionary. The pairs are kept in the
      * order they were added to the dictionary.
      */
-    private final Map<COSName, COSBase> items =
+    protected final Map<COSName, COSBase> items =
         new LinkedHashMap<COSName, COSBase>();
 
     /**
@@ -1410,12 +1410,18 @@ public class COSDictionary extends COSBa
     /**
      * {@inheritDoc}
      */
-    public String toString()
-    {
+    @Override
+    public String toString() {
         StringBuilder retVal = new StringBuilder("COSDictionary{");
-        for( COSName key : items.keySet() )
-        {
-            retVal.append("(" + key + ":" + getDictionaryObject(key).toString() + ") ");
+        for(COSName key : items.keySet()) {
+            retVal.append("(");
+            retVal.append(key);
+            retVal.append(":");
+            if(getDictionaryObject(key) != null)
+                retVal.append(getDictionaryObject(key).toString());
+            else
+                retVal.append("<null>");
+            retVal.append(") ");
         }
         retVal.append("}");
         return retVal.toString();

Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java?rev=1142109&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java Fri Jul  1 22:28:23 2011
@@ -0,0 +1,61 @@
+/*
+ *  Copyright 2011 adam.
+ * 
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ * 
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *  under the License.
+ */
+
+package org.apache.pdfbox.cos;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.pdfbox.pdfparser.ConformingPDFParser;
+
+/**
+ *
+ * @author adam
+ */
+public class COSDictionaryLateBinding extends COSDictionary {
+    public static final Log log = LogFactory.getLog(COSDictionaryLateBinding.class);
+    ConformingPDFParser parser;
+
+    public COSDictionaryLateBinding(ConformingPDFParser parser) {
+        super();
+        this.parser = parser;
+    }
+
+    /**
+     * This will get an object from this dictionary.  If the object is a reference then it will
+     * dereference it and get it from the document.  If the object is COSNull then
+     * null will be returned.
+     * @param key The key to the object that we are getting.
+     * @return The object that matches the key.
+     */
+    @Override
+    public COSBase getDictionaryObject(COSName key) {
+        COSBase retval = items.get(key);
+        if(retval instanceof COSObject) {
+            int objectNumber = ((COSObject)retval).getObjectNumber().intValue();
+            int generation = ((COSObject)retval).getGenerationNumber().intValue();
+            try {
+                retval = parser.getObject(objectNumber, generation);
+            } catch(Exception e) {
+                log.warn("Unable to read information for object " + objectNumber);
+            }
+        }
+        if(retval instanceof COSNull) {
+            retval = null;
+        }
+        return retval;
+    }
+}

Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java?rev=1142109&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java Fri Jul  1 22:28:23 2011
@@ -0,0 +1,100 @@
+/*
+ *  Copyright 2011 adam.
+ * 
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ * 
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *  under the License.
+ */
+
+package org.apache.pdfbox.cos;
+
+import org.apache.pdfbox.exceptions.COSVisitorException;
+import org.apache.pdfbox.pdfparser.ConformingPDFParser;
+
+/**
+ *
+ * @author adam
+ */
+public class COSUnread extends COSBase {
+    private long objectNumber;
+    private long generation;
+    private ConformingPDFParser parser;
+
+    public COSUnread() {
+        super();
+    }
+
+    public COSUnread(long objectNumber, long generation) {
+        this();
+        this.objectNumber = objectNumber;
+        this.generation = generation;
+    }
+
+    public COSUnread(long objectNumber, long generation, ConformingPDFParser parser) {
+        this(objectNumber, generation);
+        this.parser = parser;
+    }
+
+    @Override
+    public Object accept(ICOSVisitor visitor) throws COSVisitorException {
+        // TODO: read the object using the parser (if available) and visit that object
+        throw new UnsupportedOperationException("COSUnread can not be written/visited.");
+    }
+
+    @Override
+    public String toString() {
+        return "COSUnread{" + objectNumber + "," + generation + "}";
+    }
+
+    /**
+     * @return the objectNumber
+     */
+    public long getObjectNumber() {
+        return objectNumber;
+    }
+
+    /**
+     * @param objectNumber the objectNumber to set
+     */
+    public void setObjectNumber(long objectNumber) {
+        this.objectNumber = objectNumber;
+    }
+
+    /**
+     * @return the generation
+     */
+    public long getGeneration() {
+        return generation;
+    }
+
+    /**
+     * @param generation the generation to set
+     */
+    public void setGeneration(long generation) {
+        this.generation = generation;
+    }
+
+    /**
+     * @return the parser
+     */
+    public ConformingPDFParser getParser() {
+        return parser;
+    }
+
+    /**
+     * @param parser the parser to set
+     */
+    public void setParser(ConformingPDFParser parser) {
+        this.parser = parser;
+    }
+
+}

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java?rev=1142109&r1=1142108&r2=1142109&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java Fri Jul  1 22:28:23 2011
@@ -110,6 +110,10 @@ public abstract class BaseParser
      */
     protected final boolean forceParsing;
 
+    public BaseParser() {
+        this.forceParsing = FORCE_PARSING;
+    }
+
     /**
      * Constructor.
      *
@@ -876,7 +880,7 @@ public abstract class BaseParser
             throw new IOException("expected='/' actual='" + (char)c + "'-" + c + " " + pdfSource );
         }
         // costruisce il nome
-        StringBuffer buffer = new StringBuffer();
+        StringBuilder buffer = new StringBuilder();
         c = pdfSource.read();
         while( c != -1 )
         {
@@ -1063,7 +1067,7 @@ public abstract class BaseParser
         {
             if( Character.isDigit(c) || c == '-' || c == '+' || c == '.')
             {
-                StringBuffer buf = new StringBuffer();
+                StringBuilder buf = new StringBuilder();
                 int ic = pdfSource.read();
                 c = (char)ic;
                 while( Character.isDigit( c )||
@@ -1118,7 +1122,7 @@ public abstract class BaseParser
     protected String readString() throws IOException
     {
         skipSpaces();
-        StringBuffer buffer = new StringBuffer();
+        StringBuilder buffer = new StringBuilder();
         int c = pdfSource.read();
         while( !isEndOfName((char)c) && !isClosing(c) && c != -1 )
         {
@@ -1148,7 +1152,7 @@ public abstract class BaseParser
         {
             c = pdfSource.read();
         }
-        StringBuffer buffer = new StringBuffer( theString.length() );
+        StringBuilder buffer = new StringBuilder( theString.length() );
         int charsRead = 0;
         while( !isEOL(c) && c != -1 && charsRead < theString.length() )
         {
@@ -1194,7 +1198,7 @@ public abstract class BaseParser
 
         //average string size is around 2 and the normal string buffer size is
         //about 16 so lets save some space.
-        StringBuffer buffer = new StringBuffer(length);
+        StringBuilder buffer = new StringBuilder(length);
         while( !isWhitespace(c) && !isClosing(c) && c != -1 && buffer.length() < length &&
                 c != '[' &&
                 c != '<' &&
@@ -1250,7 +1254,7 @@ public abstract class BaseParser
             throw new IOException( "Error: End-of-File, expected line");
         }
 
-        StringBuffer buffer = new StringBuffer( 11 );
+        StringBuilder buffer = new StringBuilder( 11 );
        
         int c;
         while ((c = pdfSource.read()) != -1) 
@@ -1300,10 +1304,9 @@ public abstract class BaseParser
     }
 
     /**
-     * This will tell if the next byte is whitespace or not.
-     *
+     * This will tell if the next byte is whitespace or not.  These values are
+     * specified in table 1 (page 12) of ISO 32000-1:2008.
      * @param c The character to check against whitespace
-     *
      * @return true if the next byte in the stream is a whitespace character.
      */
     protected boolean isWhitespace( int c )

Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java?rev=1142109&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java Fri Jul  1 22:28:23 2011
@@ -0,0 +1,696 @@
+/*
+ *  Copyright 2010 adam.
+ * 
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ * 
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *  under the License.
+ */
+
+package org.apache.pdfbox.pdfparser;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+import org.apache.pdfbox.cos.COSArray;
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSDocument;
+import org.apache.pdfbox.cos.COSFloat;
+import org.apache.pdfbox.cos.COSInteger;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.cos.COSNumber;
+import org.apache.pdfbox.cos.COSObject;
+import org.apache.pdfbox.cos.COSString;
+import org.apache.pdfbox.cos.COSUnread;
+import org.apache.pdfbox.io.RandomAccess;
+import org.apache.pdfbox.io.RandomAccessFile;
+import org.apache.pdfbox.pdmodel.ConformingPDDocument;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.common.XrefEntry;
+import org.apache.pdfbox.persistence.util.COSObjectKey;
+
+/**
+ * 
+ * @author <a href="adam@apache.org">Adam Nichols</a>
+ */
+public class ConformingPDFParser extends BaseParser {
+    protected RandomAccess inputFile;
+    List<XrefEntry> xrefEntries;
+    private long currentOffset;
+    private ConformingPDDocument doc = null;
+    private boolean throwNonConformingException = true;
+    private boolean recursivlyRead = true;
+
+    /**
+     * Constructor.
+     *
+     * @param input The input stream that contains the PDF document.
+     *
+     * @throws IOException If there is an error initializing the stream.
+     */
+    public ConformingPDFParser(File inputFile) throws IOException {
+        this.inputFile = new RandomAccessFile(inputFile, "r");
+    }
+
+    /**
+     * This will parse the stream and populate the COSDocument object.  This will close
+     * the stream when it is done parsing.
+     *
+     * @throws IOException If there is an error reading from the stream or corrupt data
+     * is found.
+     */
+    public void parse() throws IOException {
+        document = new COSDocument();
+        doc = new ConformingPDDocument(document);
+        currentOffset = inputFile.length()-1;
+        long xRefTableLocation = parseTrailerInformation();
+        currentOffset = xRefTableLocation;
+        parseXrefTable();
+        // now that we read the xref table and put null references in the doc,
+        // we can deference those objects now.
+        boolean oldValue = recursivlyRead;
+        recursivlyRead = false;
+        List<COSObjectKey> keys = doc.getObjectKeysFromPool();
+        for(COSObjectKey key : keys) {
+            // getObject will put it into the document's object pool for us
+            getObject(key.getNumber(), key.getGeneration());
+        }
+        recursivlyRead = oldValue;
+    }
+
+    /**
+     * This will get the document that was parsed.  parse() must be called before this is called.
+     * When you are done with this document you must call close() on it to release
+     * resources.
+     *
+     * @return The document that was parsed.
+     *
+     * @throws IOException If there is an error getting the document.
+     */
+    public COSDocument getDocument() throws IOException {
+        if( document == null ) {
+            throw new IOException( "You must call parse() before calling getDocument()" );
+        }
+        return document;
+    }
+
+    /**
+     * This will get the PD document that was parsed.  When you are done with
+     * this document you must call close() on it to release resources.
+     *
+     * @return The document at the PD layer.
+     *
+     * @throws IOException If there is an error getting the document.
+     */
+    public PDDocument getPDDocument() throws IOException {
+        return doc;
+    }
+    
+    private boolean parseXrefTable() throws IOException {
+        String currentLine = readLine();
+        if(throwNonConformingException) {
+            if(!"xref".equals(currentLine))
+                throw new AssertionError("xref table not found.\nExpected: xref\nFound: "+currentLine);
+        }
+
+        int objectNumber = readInt();
+        int entries = readInt();
+        xrefEntries = new ArrayList<XrefEntry>(entries);
+        for(int i=0; i<entries; i++)
+            xrefEntries.add(new XrefEntry(objectNumber++, readInt(), readInt(), readLine()));
+        
+        return true;
+    }
+
+    protected long parseTrailerInformation() throws IOException, NumberFormatException {
+        long xrefLocation = -1;
+        consumeWhitespaceBackwards();
+        String currentLine = readLineBackwards();
+        if(throwNonConformingException) {
+            if(!"%%EOF".equals(currentLine))
+                throw new AssertionError("Invalid EOF marker.\nExpected: %%EOF\nFound: "+currentLine);
+        }
+
+        xrefLocation = readLongBackwards();
+        currentLine = readLineBackwards();
+        if(throwNonConformingException) {
+            if(!"startxref".equals(currentLine))
+                throw new AssertionError("Invalid trailer.\nExpected: startxref\nFound: "+currentLine);
+        }
+
+        document.setTrailer(readDictionaryBackwards());
+        consumeWhitespaceBackwards();
+        currentLine = readLineBackwards();
+        if(throwNonConformingException) {
+            if(!"trailer".equals(currentLine))
+                throw new AssertionError("Invalid trailer.\nExpected: trailer\nFound: "+currentLine);
+        }
+
+        return xrefLocation;
+    }
+    
+    protected byte readByteBackwards() throws IOException {
+        inputFile.seek(currentOffset);
+        byte singleByte = (byte)inputFile.read();
+        currentOffset--;
+        return singleByte;
+    }
+
+    protected byte readByte() throws IOException {
+        inputFile.seek(currentOffset);
+        byte singleByte = (byte)inputFile.read();
+        currentOffset++;
+        return singleByte;
+    }
+
+    protected String readBackwardUntilWhitespace() throws IOException {
+        StringBuilder sb = new StringBuilder();
+        byte singleByte = readByteBackwards();
+        while(!isWhitespace(singleByte)) {
+            sb.insert(0, (char)singleByte);
+            singleByte = readByteBackwards();
+        }
+        return sb.toString();
+    }
+
+    /**
+     * This will read all bytes (backwards) until a non-whitespace character is
+     * found.  To save you an extra read, the non-whitespace character is
+     * returned.  If the current character is not whitespace, this method will
+     * just return the current char.
+     * @return the first non-whitespace character found
+     * @throws IOException if there is an error reading from the file
+     */
+    protected byte consumeWhitespaceBackwards() throws IOException {
+        inputFile.seek(currentOffset);
+        byte singleByte = (byte)inputFile.read();
+        if(!isWhitespace(singleByte))
+            return singleByte;
+
+        // we have some whitespace, let's consume it
+        while(isWhitespace(singleByte)) {
+            singleByte = readByteBackwards();
+        }
+        // readByteBackwards will decrement the currentOffset to point the byte
+        // before the one just read, so we increment it back to the current byte
+        currentOffset++;
+        return singleByte;
+    }
+
+    /**
+     * This will read all bytes until a non-whitespace character is
+     * found.  To save you an extra read, the non-whitespace character is
+     * returned.  If the current character is not whitespace, this method will
+     * just return the current char.
+     * @return the first non-whitespace character found
+     * @throws IOException if there is an error reading from the file
+     */
+    protected byte consumeWhitespace() throws IOException {
+        inputFile.seek(currentOffset);
+        byte singleByte = (byte)inputFile.read();
+        if(!isWhitespace(singleByte))
+            return singleByte;
+
+        // we have some whitespace, let's consume it
+        while(isWhitespace(singleByte)) {
+            singleByte = readByte();
+        }
+        // readByte() will increment the currentOffset to point the byte
+        // after the one just read, so we decrement it back to the current byte
+        currentOffset--;
+        return singleByte;
+    }
+
+    /**
+     * This will consume any whitespace, read in bytes until whitespace is found
+     * again and then parse the characters which have been read as a long.  The
+     * current offset will then point at the first whitespace character which
+     * preceeds the number.
+     * @return the parsed number
+     * @throws IOException if there is an error reading from the file
+     * @throws NumberFormatException if the bytes read can not be converted to a number
+     */
+    protected long readLongBackwards() throws IOException, NumberFormatException {
+        StringBuilder sb = new StringBuilder();
+        consumeWhitespaceBackwards();
+        byte singleByte = readByteBackwards();
+        while(!isWhitespace(singleByte)) {
+            sb.insert(0, (char)singleByte);
+            singleByte = readByteBackwards();
+        }
+        if(sb.length() == 0)
+            throw new AssertionError("Number not found.  Expected number at offset: " + currentOffset);
+        return Long.parseLong(sb.toString());
+    }
+
+    @Override
+    protected int readInt() throws IOException {
+        StringBuilder sb = new StringBuilder();
+        consumeWhitespace();
+        byte singleByte = readByte();
+        while(!isWhitespace(singleByte)) {
+            sb.append((char)singleByte);
+            singleByte = readByte();
+        }
+        if(sb.length() == 0)
+            throw new AssertionError("Number not found.  Expected number at offset: " + currentOffset);
+        return Integer.parseInt(sb.toString());
+    }
+
+    /**
+     * This will read in a number and return the COS version of the number (be
+     * it a COSInteger or a COSFloat).
+     * @return the COSNumber which was read/parsed
+     * @throws IOException
+     */
+    protected COSNumber readNumber() throws IOException {
+        StringBuilder sb = new StringBuilder();
+        consumeWhitespace();
+        byte singleByte = readByte();
+        while(!isWhitespace(singleByte)) {
+            sb.append((char)singleByte);
+            singleByte = readByte();
+        }
+        if(sb.length() == 0)
+            throw new AssertionError("Number not found.  Expected number at offset: " + currentOffset);
+        return parseNumber(sb.toString());
+    }
+
+    protected COSNumber parseNumber(String number) throws IOException {
+        if(number.matches("^[0-9]+$"))
+            return COSInteger.get(number);
+        return new COSFloat(Float.parseFloat(number));
+    }
+
+    protected COSBase processCosObject(String string) throws IOException {
+        if(string != null && string.endsWith(">")) {
+            // string of hex codes
+            return COSString.createFromHexString(string.replaceAll("^<", "").replaceAll(">$", ""));
+        }
+        return null;
+    }
+
+    protected COSBase readObjectBackwards() throws IOException {
+        COSBase obj = null;
+        consumeWhitespaceBackwards();
+        String lastSection = readBackwardUntilWhitespace();
+        if("R".equals(lastSection)) {
+            // indirect reference
+            long gen = readLongBackwards();
+            long number = readLongBackwards();
+            // We just put a placeholder in the pool for now, we'll read the data later
+            doc.putObjectInPool(new COSUnread(), number, gen);
+            obj = new COSUnread(number, gen, this);
+        } else if(">>".equals(lastSection)) {
+            // dictionary
+            throw new RuntimeException("Not yet implemented");
+        } else if(lastSection != null && lastSection.endsWith("]")) {
+            // array
+            COSArray array = new COSArray();
+            lastSection = lastSection.replaceAll("]$", "");
+            while(!lastSection.startsWith("[")) {
+                if(lastSection.matches("^\\s*<.*>\\s*$")) // it's a hex string
+                    array.add(COSString.createFromHexString(lastSection.replaceAll("^\\s*<", "").replaceAll(">\\s*$", "")));
+                lastSection = readBackwardUntilWhitespace();
+            }
+            lastSection = lastSection.replaceAll("^\\[", "");
+            if(lastSection.matches("^\\s*<.*>\\s*$")) // it's a hex string
+                array.add(COSString.createFromHexString(lastSection.replaceAll("^\\s*<", "").replaceAll(">\\s*$", "")));
+            obj = array;
+        } else if(lastSection != null && lastSection.endsWith(">")) {
+            // string of hex codes
+            obj = processCosObject(lastSection);
+        } else {
+            // try a number, otherwise fall back on a string
+            try {
+                Long.parseLong(lastSection);
+                obj = COSNumber.get(lastSection);
+            } catch(NumberFormatException e) {
+                throw new RuntimeException("Not yet implemented");
+            }
+        }
+
+        return obj;
+    }
+
+    protected COSName readNameBackwards() throws IOException {
+        String name = readBackwardUntilWhitespace();
+        name = name.replaceAll("^/", "");
+        return COSName.getPDFName(name);
+    }
+
+    public COSBase getObject(long objectNumber, long generation) throws IOException {
+        // we could optionally, check to see if parse() have been called &
+        // throw an exception here, but I don't think that's really necessary
+        XrefEntry entry = xrefEntries.get((int)objectNumber);
+        currentOffset = entry.getByteOffset();
+        return readObject(objectNumber, generation);
+    }
+
+    /**
+     * This will read an object from the inputFile at whatever our currentOffset
+     * is.  If the object and generation are not the expected values and this
+     * object is set to throw an exception for non-conforming documents, then an
+     * exception will be thrown.
+     * @param objectNumber the object number you expect to read
+     * @param generation the generation you expect this object to be
+     * @return
+     */
+    public COSBase readObject(long objectNumber, long generation) throws IOException {
+        // when recursivly reading, we always pull the object from the filesystem
+        if(document != null && recursivlyRead) {
+            // check to see if it is in the document cache before hitting the filesystem
+            COSBase obj = doc.getObjectFromPool(objectNumber, generation);
+            if(obj != null)
+                return obj;
+        }
+
+        int actualObjectNumber = readInt();
+        if(objectNumber != actualObjectNumber)
+            if(throwNonConformingException)
+                throw new AssertionError("Object numer expected was " +
+                        objectNumber + " but actual was " + actualObjectNumber);
+        consumeWhitespace();
+
+        int actualGeneration = readInt();
+        if(generation != actualGeneration)
+            if(throwNonConformingException)
+                throw new AssertionError("Generation expected was " +
+                        generation + " but actual was " + actualGeneration);
+        consumeWhitespace();
+
+        String obj = readWord();
+        if(!"obj".equals(obj))
+            if(throwNonConformingException)
+                throw new AssertionError("Expected keyword 'obj' but found " + obj);
+        
+        // put placeholder object in doc to prevent infinite recursion
+        // e.g. read Root -> dereference object -> read object which has /Parent -> GOTO read Root
+        doc.putObjectInPool(new COSObject(null), objectNumber, generation);
+        COSBase object = readObject();
+        doc.putObjectInPool(object, objectNumber, generation);
+        return object;
+    }
+
+    /**
+     * This actually reads the object data.
+     * @return the object which is read
+     * @throws IOException
+     */
+    protected COSBase readObject() throws IOException {
+        consumeWhitespace();
+        String string = readWord();
+        if(string.startsWith("<<")) {
+            // this is a dictionary
+            COSDictionary dictionary = new COSDictionary();
+            boolean atEndOfDictionary = false;
+            // remove the marker for the beginning of the dictionary
+            string = string.replaceAll("^<<", "");
+            
+            if("".equals(string) || string.matches("^\\w$"))
+                string = readWord().trim();
+            while(!atEndOfDictionary) {
+                COSName name = COSName.getPDFName(string);
+                COSBase object = readObject();
+                dictionary.setItem(name, object);
+
+                byte singleByte = consumeWhitespace();
+                if(singleByte == '>') {
+                    readByte(); // get rid of the second '>'
+                    atEndOfDictionary = true;
+                }
+                if(!atEndOfDictionary)
+                    string = readWord().trim();
+            }
+            return dictionary;
+        } else if(string.startsWith("/")) {
+            // it's a dictionary label. i.e. /Type or /Pages or something similar
+            COSBase name = COSName.getPDFName(string);
+            return name;
+        } else if(string.startsWith("-")) {
+            // it's a negitive number
+            return parseNumber(string);
+        } else if(string.charAt(0) >= '0' && string.charAt(0) <= '9' ) {
+            // it's a COSInt or COSFloat, or a weak reference (i.e. "3 0 R")
+            // we'll have to peek ahead a little to see if it's a reference or not
+            long tempOffset = this.currentOffset;
+            consumeWhitespace();
+            String tempString = readWord();
+            if(tempString.matches("^[0-9]+$")) {
+                // it is an int, might be a weak reference...
+                tempString = readWord();
+                if(!"R".equals(tempString)) {
+                    // it's just a number, not a weak reference
+                    this.currentOffset = tempOffset;
+                    return parseNumber(string);
+                }
+            } else {
+                // it's just a number, not a weak reference
+                this.currentOffset = tempOffset;
+                return parseNumber(string);
+            }
+
+            // it wasn't a number, so we need to parse the weak-reference
+            this.currentOffset = tempOffset;
+            int number = Integer.parseInt(string);
+            int gen = readInt();
+            String r = readWord();
+
+            if(!"R".equals(r))
+                if(throwNonConformingException)
+                    throw new AssertionError("Expected keyword 'R' but found " + r);
+
+            if(recursivlyRead) {
+                // seek to the object, read it, seek back to current location
+                long tempLocation = this.currentOffset;
+                this.currentOffset = this.xrefEntries.get(number).getByteOffset();
+                COSBase returnValue = readObject(number, gen);
+                this.currentOffset = tempLocation;
+                return returnValue;
+            } else {
+                // Put a COSUnknown there as a placeholder
+                COSObject obj = new COSObject(new COSUnread());
+                obj.setObjectNumber(COSInteger.get(number));
+                obj.setGenerationNumber(COSInteger.get(gen));
+                return obj;
+            }
+        } else if(string.startsWith("]")) {
+            // end of an array, just return null
+            if("]".equals(string))
+                return null;
+            int oldLength = string.length();
+            this.currentOffset -= oldLength;
+            return null;
+        } else if(string.startsWith("[")) {
+            // array of values
+            // we'll just pay attention to the first part (this is in case there
+            // is no whitespace between the "[" and the first element)
+            int oldLength = string.length();
+            string = "[";
+            this.currentOffset -= (oldLength - string.length() + 1);
+
+            COSArray array = new COSArray();
+            COSBase object = readObject();
+            while(object != null) {
+                array.add(object);
+                object = readObject();
+            }
+            return array;
+        } else if(string.startsWith("(")) {
+            // this is a string (not hex encoded), strip off the '(' and read until ')'
+            StringBuilder sb = new StringBuilder(string.substring(1));
+            byte singleByte = readByte();
+            while(singleByte != ')') {
+                sb.append((char)singleByte);
+                singleByte = readByte();
+            }
+            return new COSString(sb.toString());
+        } else {
+            throw new RuntimeException("Not yet implemented: " + string
+                    + " loation=" + this.currentOffset);
+        }
+    }
+
+    /**
+     * This will read the next string from the stream.
+     * @return The string that was read from the stream.
+     * @throws IOException If there is an error reading from the stream.
+     */
+    @Override
+    protected String readString() throws IOException {
+        consumeWhitespace();
+        StringBuilder buffer = new StringBuilder();
+        int c = pdfSource.read();
+        while(!isEndOfName((char)c) && !isClosing(c) && c != -1) {
+            buffer.append( (char)c );
+            c = pdfSource.read();
+        }
+        if (c != -1) {
+            pdfSource.unread(c);
+        }
+        return buffer.toString();
+    }
+
+    protected COSDictionary readDictionaryBackwards() throws IOException {
+        COSDictionary dict = new COSDictionary();
+        
+        // consume the last two '>' chars which signify the end of the dictionary
+        consumeWhitespaceBackwards();
+        byte singleByte = readByteBackwards();
+        if(throwNonConformingException) {
+            if(singleByte != '>')
+                throw new AssertionError("");
+        }
+        singleByte = readByteBackwards();
+        if(throwNonConformingException) {
+            if(singleByte != '>')
+                throw new AssertionError("");
+        }
+        
+        // check to see if we're at the end of the dictionary
+        boolean atEndOfDictionary = false;
+        singleByte = consumeWhitespaceBackwards();
+        if(singleByte == '<') {
+            inputFile.seek(currentOffset-1);
+            atEndOfDictionary =  ((byte)inputFile.read()) == '<';
+        }
+
+        COSDictionary backwardsDictionary = new COSDictionary();
+        // while we're not at the end of the dictionary, read in entries
+        while(!atEndOfDictionary) {
+            COSBase object = readObjectBackwards();
+            COSName name = readNameBackwards();
+            backwardsDictionary.setItem(name, object);
+            
+            singleByte = consumeWhitespaceBackwards();
+            if(singleByte == '<') {
+                inputFile.seek(currentOffset-1);
+                atEndOfDictionary =  ((byte)inputFile.read()) == '<';
+            }
+        }
+
+        // the dictionaries preserve the order keys were added, as such we shall
+        // add them in the proper order, not the reverse order
+        Set<COSName> backwardsKeys = backwardsDictionary.keySet();
+        for(int i = backwardsKeys.size()-1; i >=0; i--)
+            dict.setItem((COSName)backwardsKeys.toArray()[i], backwardsDictionary.getItem((COSName)backwardsKeys.toArray()[i]));
+        
+        // consume the last two '<' chars
+        readByteBackwards();
+        readByteBackwards();
+
+        return dict;
+    }
+
+    /**
+     * This will read a line starting with the byte at offset and going 
+     * backwards until it finds a newline.  This should only be used if we are
+     * certain that the data will only be text, and not binary data.
+     * 
+     * @param offset the location of the file where we should start reading
+     * @return the string which was read
+     * @throws IOException if there was an error reading data from the file
+     */
+    protected String readLineBackwards() throws IOException {
+        StringBuilder sb = new StringBuilder();
+        boolean endOfObject = false;
+        
+        do {
+            // first we read the %%EOF marker
+            byte singleByte = readByteBackwards();
+            if(singleByte == '\n') {
+                // if ther's a preceeding \r, we'll eat that as well
+                inputFile.seek(currentOffset);
+                if((byte)inputFile.read() == '\r')
+                    currentOffset--;
+                endOfObject = true;
+            } else if(singleByte == '\r') {
+                endOfObject = true;
+            } else {
+                sb.insert(0, (char)singleByte);
+            }
+        } while(!endOfObject);
+        
+        return sb.toString();
+    }
+
+    /**
+     * This will read a line starting with the byte at offset and going
+     * forward until it finds a newline.  This should only be used if we are
+     * certain that the data will only be text, and not binary data.
+     * @param offset the location of the file where we should start reading
+     * @return the string which was read
+     * @throws IOException if there was an error reading data from the file
+     */
+    @Override
+    protected String readLine() throws IOException {
+        StringBuilder sb = new StringBuilder();
+        boolean endOfLine = false;
+
+        do {
+            // first we read the %%EOF marker
+            byte singleByte = readByte();
+            if(singleByte == '\n') {
+                // if ther's a preceeding \r, we'll eat that as well
+                inputFile.seek(currentOffset);
+                if((byte)inputFile.read() == '\r')
+                    currentOffset++;
+                endOfLine = true;
+            } else if(singleByte == '\r') {
+                endOfLine = true;
+            } else {
+                sb.append((char)singleByte);
+            }
+        } while(!endOfLine);
+
+        return sb.toString();
+    }
+
+    protected String readWord() throws IOException {
+        StringBuilder sb = new StringBuilder();
+        boolean stop = true;
+        do {
+            byte singleByte = readByte();
+            stop = this.isWhitespace(singleByte);
+
+            // there are some additional characters which indicate the next element/word has begun
+            // ignore the first char we read, b/c the first char is the beginnging of this object, not the next one
+            if(!stop && sb.length() > 0) {
+                stop = singleByte == '/' || singleByte == '['
+                        || singleByte == ']'
+                        || (singleByte == '>' && !">".equals(sb.toString()));
+                if(stop) // we're stopping on a non-whitespace char, decrement the
+                    this.currentOffset--; // counter so we don't miss this character
+            }
+            if(!stop)
+                sb.append((char)singleByte);
+        } while(!stop);
+
+        return sb.toString();
+    }
+
+    /**
+     * @return the recursivlyRead
+     */
+    public boolean isRecursivlyRead() {
+        return recursivlyRead;
+    }
+
+    /**
+     * @param recursivlyRead the recursivlyRead to set
+     */
+    public void setRecursivlyRead(boolean recursivlyRead) {
+        this.recursivlyRead = recursivlyRead;
+    }
+}

Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java?rev=1142109&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java Fri Jul  1 22:28:23 2011
@@ -0,0 +1,115 @@
+/*
+ *  Copyright 2011 adam.
+ * 
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ * 
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *  under the License.
+ */
+
+package org.apache.pdfbox.pdmodel;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSDocument;
+import org.apache.pdfbox.pdfparser.ConformingPDFParser;
+import org.apache.pdfbox.persistence.util.COSObjectKey;
+
+/**
+ *
+ * @author adam
+ */
+public class ConformingPDDocument extends PDDocument {
+    /**
+     * Maps ObjectKeys to a COSObject. Note that references to these objects
+     * are also stored in COSDictionary objects that map a name to a specific object.
+     */
+    private final Map<COSObjectKey, COSBase> objectPool =
+        new HashMap<COSObjectKey, COSBase>();
+    private ConformingPDFParser parser = null;
+
+    public ConformingPDDocument() throws IOException {
+        super();
+    }
+
+    public ConformingPDDocument(COSDocument doc) throws IOException {
+        super(doc);
+    }
+
+    /**
+     * This will load a document from an input stream.
+     * @param input The File which contains the document.
+     * @return The document that was loaded.
+     * @throws IOException If there is an error reading from the stream.
+     */
+    public static PDDocument load(File input) throws IOException {
+        ConformingPDFParser parser = new ConformingPDFParser(input);
+        parser.parse();
+        return parser.getPDDocument();
+    }
+
+    /**
+     * This will get an object from the pool.
+     * @param key The object key.
+     * @return The object in the pool or a new one if it has not been parsed yet.
+     * @throws IOException If there is an error getting the proxy object.
+     */
+    public COSBase getObjectFromPool(COSObjectKey key) throws IOException {
+        return objectPool.get(key);
+    }
+
+    /**
+     * This will get an object from the pool.
+     * @param key The object key.
+     * @return The object in the pool or a new one if it has not been parsed yet.
+     * @throws IOException If there is an error getting the proxy object.
+     */
+    public List<COSObjectKey> getObjectKeysFromPool() throws IOException {
+        List<COSObjectKey> keys = new ArrayList<COSObjectKey>();
+        for(COSObjectKey key : objectPool.keySet())
+            keys.add(key);
+        return keys;
+    }
+
+    /**
+     * This will get an object from the pool.
+     * @param number the object number
+     * @param generation the generation of this object you wish to load
+     * @return The object in the pool
+     * @throws IOException If there is an error getting the proxy object.
+     */
+    public COSBase getObjectFromPool(long number, long generation) throws IOException {
+        return objectPool.get(new COSObjectKey(number, generation));
+    }
+
+    public void putObjectInPool(COSBase object, long number, long generation) {
+        objectPool.put(new COSObjectKey(number, generation), object);
+    }
+
+    /**
+     * @return the parser
+     */
+    public ConformingPDFParser getParser() {
+        return parser;
+    }
+
+    /**
+     * @param parser the parser to set
+     */
+    public void setParser(ConformingPDFParser parser) {
+        this.parser = parser;
+    }
+}

Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java?rev=1142109&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java Fri Jul  1 22:28:23 2011
@@ -0,0 +1,43 @@
+/*
+ *  Copyright 2011 adam.
+ * 
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ * 
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *  under the License.
+ */
+
+package org.apache.pdfbox.pdmodel.common;
+
+/**
+ *
+ * @author adam
+ */
+public class XrefEntry {
+    private int objectNumber = 0;
+    private int byteOffset = 0;
+    private int generation = 0;
+    private boolean inUse = true;
+
+    public XrefEntry() {
+    }
+
+    public XrefEntry(int objectNumber, int byteOffset, int generation, String inUse) {
+        this.objectNumber = objectNumber;
+        this.byteOffset = byteOffset;
+        this.generation = generation;
+        this.inUse = "n".equals(inUse);
+    }
+
+    public int getByteOffset() {
+        return byteOffset;
+    }
+}

Added: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java?rev=1142109&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java (added)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java Fri Jul  1 22:28:23 2011
@@ -0,0 +1,73 @@
+/*
+ *  Copyright 2010 adam.
+ * 
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ * 
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *  under the License.
+ */
+
+package org.apache.pdfbox.pdfparser;
+
+import java.io.File;
+import java.net.URL;
+import org.apache.pdfbox.cos.COSDictionary;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+/**
+ *
+ * @author adam
+ */
+public class ConformingPDFParserTest {
+
+    public ConformingPDFParserTest() {
+    }
+
+    @BeforeClass
+    public static void setUpClass() throws Exception {
+    }
+
+    @AfterClass
+    public static void tearDownClass() throws Exception {
+    }
+
+    @Before
+    public void setUp() {
+    }
+
+    @After
+    public void tearDown() {
+    }
+
+    /**
+     * Test of parse method, of class ConformingPDFParser.
+     */
+    @Test
+    public void testParse() throws Exception {
+        URL inputUrl = ConformingPDFParser.class.getResource("gdb-refcard.pdf");
+        File inputFile = new File(inputUrl.toURI());
+        ConformingPDFParser instance = new ConformingPDFParser(inputFile);
+        instance.parse();
+        
+        COSDictionary trailer = instance.getDocument().getTrailer();
+        assertNotNull(trailer);
+        System.out.println("Trailer: " + instance.getDocument().getTrailer().toString());
+        assertEquals(3, trailer.size());
+        assertNotNull(trailer.getDictionaryObject("Root"));
+        assertNotNull(trailer.getDictionaryObject("Info"));
+        assertNotNull(trailer.getDictionaryObject("Size"));
+    }
+}
\ No newline at end of file

Modified: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java?rev=1142109&r1=1142108&r2=1142109&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java (original)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java Fri Jul  1 22:28:23 2011
@@ -16,7 +16,6 @@
  */
 package org.apache.pdfbox.pdmodel;
 
-import java.io.File;
 import junit.framework.TestCase;
 
 public class TestPDDocumentCatalog extends TestCase {
@@ -62,13 +61,29 @@ public class TestPDDocumentCatalog exten
             doc = PDDocument.load(TestPDDocumentCatalog.class.getResourceAsStream("page_label.pdf"));
             PDDocumentCatalog cat = doc.getDocumentCatalog();
             // getLabelsByPageIndices() should not throw an exception
-            String[] labels = cat.getPageLabels().getLabelsByPageIndices();
+            cat.getPageLabels().getLabelsByPageIndices();
         } catch(Exception e) {
-            e.printStackTrace();
             fail("Threw exception!");
         } finally {
             if(doc != null)
                 doc.close();
         }
     }
+
+    /**
+     * Test case for
+     * <a href="https://issues.apache.org/jira/browse/PDFBOX-911"
+     *   >PDFBOX-911</a> - Method PDDocument.getNumberOfPages() returns wrong
+     * number of pages
+     */
+    public void testGetNumberOfPages() throws Exception {
+        PDDocument doc = null;
+        try {
+            doc = PDDocument.load(TestPDDocumentCatalog.class.getResource("test.unc.pdf"));
+            assertEquals(4, doc.getNumberOfPages());
+        } finally {
+            if(doc != null)
+                doc.close();
+        }
+    }
 }

Added: pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf?rev=1142109&view=auto
==============================================================================
Binary file - no diff available.

Propchange: pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Re: svn commit: r1142109 - in /pdfbox/trunk/pdfbox/src: main/java/org/apache/pdfbox/cos/ main/java/org/apache/pdfbox/pdfparser/ main/java/org/apache/pdfbox/pdmodel/ main/java/org/apache/pdfbox/pdmodel/common/ test/java/org/apache/pdfbox/pdfparser/ test/jav...

Posted by Andreas Lehmkuehler <an...@lehmi.de>.

Hi,

Thanks!!

BR
Andreas Lehmkühler

Am 08.04.2012 05:01, schrieb Adam Nichols:
> Headers should all be fixed as of revision 1310946.  I updated all the
> headers which were non-conforming (pdmodel/common/XrefEntry.java
> pdmodel/ConformingPDDocument.java cos/COSDictionaryLateBinding.java
> cos/COSUnread.java).
>
> If I missed any, let me know and I'll take care of it.
>
> Thanks,
> Adam
>
> On 04/06/2012 08:45 AM, Andreas Lehmkuehler wrote:
>> Hi,
>>
>> I just realized that the headers of all new files aren't o.k., e.g. see [1]
>>
>> @Adam
>> Do you have the time to fix this. If not, do you give me the permission
>> to change the headers in question?
>>
>> BR
>> Andreas Lehmkühler
>>
>> [1]
>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java?view=markup&pathrev=1142109
>>
>>
>>
>> Am 02.07.2011 00:28, schrieb adam@apache.org:
>>> Author: adam
>>> Date: Fri Jul  1 22:28:23 2011
>>> New Revision: 1142109
>>>
>>> URL: http://svn.apache.org/viewvc?rev=1142109&view=rev
>>> Log:
>>> PDFBOX-1000: Conforming parser.  Initial commit to make it easier for
>>> others to test&   contribute.
>>>
>>> Added:
>>>
>>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java
>>>
>>>
>>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java
>>>
>>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java
>>>
>>>
>>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java
>>>
>>>
>>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java
>>>
>>>
>>> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java
>>>
>>>       pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/
>>>
>>> pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf
>>> (with props)
>>> Modified:
>>>
>>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
>>>
>>>
>>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
>>>
>>>
>>> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java
>>>
>>>
>>> Modified:
>>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
>>>
>>> URL:
>>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java?rev=1142109&r1=1142108&r2=1142109&view=diff
>>>
>>> ==============================================================================
>>>
>>> ---
>>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
>>> (original)
>>> +++
>>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
>>> Fri Jul  1 22:28:23 2011
>>> @@ -43,7 +43,7 @@ public class COSDictionary extends COSBa
>>>         * The name-value pairs of this dictionary. The pairs are kept
>>> in the
>>>         * order they were added to the dictionary.
>>>         */
>>> -    private final Map<COSName, COSBase>   items =
>>> +    protected final Map<COSName, COSBase>   items =
>>>            new LinkedHashMap<COSName, COSBase>();
>>>
>>>        /**
>>> @@ -1410,12 +1410,18 @@ public class COSDictionary extends COSBa
>>>        /**
>>>         * {@inheritDoc}
>>>         */
>>> -    public String toString()
>>> -    {
>>> +    @Override
>>> +    public String toString() {
>>>            StringBuilder retVal = new StringBuilder("COSDictionary{");
>>> -        for( COSName key : items.keySet() )
>>> -        {
>>> -            retVal.append("(" + key + ":" +
>>> getDictionaryObject(key).toString() + ") ");
>>> +        for(COSName key : items.keySet()) {
>>> +            retVal.append("(");
>>> +            retVal.append(key);
>>> +            retVal.append(":");
>>> +            if(getDictionaryObject(key) != null)
>>> +                retVal.append(getDictionaryObject(key).toString());
>>> +            else
>>> +                retVal.append("<null>");
>>> +            retVal.append(") ");
>>>            }
>>>            retVal.append("}");
>>>            return retVal.toString();
>>>
>>> Added:
>>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java
>>>
>>> URL:
>>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java?rev=1142109&view=auto
>>>
>>> ==============================================================================
>>>
>>> ---
>>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java
>>> (added)
>>> +++
>>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java
>>> Fri Jul  1 22:28:23 2011
>>> @@ -0,0 +1,61 @@
>>> +/*
>>> + *  Copyright 2011 adam.
>>> + *
>>> + *  Licensed under the Apache License, Version 2.0 (the "License");
>>> + *  you may not use this file except in compliance with the License.
>>> + *  You may obtain a copy of the License at
>>> + *
>>> + *       http://www.apache.org/licenses/LICENSE-2.0
>>> + *
>>> + *  Unless required by applicable law or agreed to in writing, software
>>> + *  distributed under the License is distributed on an "AS IS" BASIS,
>>> + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
>>> implied.
>>> + *  See the License for the specific language governing permissions and
>>> + *  limitations under the License.
>>> + *  under the License.
>>> + */
>>> +
>>> +package org.apache.pdfbox.cos;
>>> +
>>> +import org.apache.commons.logging.Log;
>>> +import org.apache.commons.logging.LogFactory;
>>> +import org.apache.pdfbox.pdfparser.ConformingPDFParser;
>>> +
>>> +/**
>>> + *
>>> + * @author adam
>>> + */
>>> +public class COSDictionaryLateBinding extends COSDictionary {
>>> +    public static final Log log =
>>> LogFactory.getLog(COSDictionaryLateBinding.class);
>>> +    ConformingPDFParser parser;
>>> +
>>> +    public COSDictionaryLateBinding(ConformingPDFParser parser) {
>>> +        super();
>>> +        this.parser = parser;
>>> +    }
>>> +
>>> +    /**
>>> +     * This will get an object from this dictionary.  If the object
>>> is a reference then it will
>>> +     * dereference it and get it from the document.  If the object is
>>> COSNull then
>>> +     * null will be returned.
>>> +     * @param key The key to the object that we are getting.
>>> +     * @return The object that matches the key.
>>> +     */
>>> +    @Override
>>> +    public COSBase getDictionaryObject(COSName key) {
>>> +        COSBase retval = items.get(key);
>>> +        if(retval instanceof COSObject) {
>>> +            int objectNumber =
>>> ((COSObject)retval).getObjectNumber().intValue();
>>> +            int generation =
>>> ((COSObject)retval).getGenerationNumber().intValue();
>>> +            try {
>>> +                retval = parser.getObject(objectNumber, generation);
>>> +            } catch(Exception e) {
>>> +                log.warn("Unable to read information for object " +
>>> objectNumber);
>>> +            }
>>> +        }
>>> +        if(retval instanceof COSNull) {
>>> +            retval = null;
>>> +        }
>>> +        return retval;
>>> +    }
>>> +}
>>>
>>> Added:
>>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java
>>> URL:
>>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java?rev=1142109&view=auto
>>>
>>> ==============================================================================
>>>
>>> ---
>>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java
>>> (added)
>>> +++
>>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java
>>> Fri Jul  1 22:28:23 2011
>>> @@ -0,0 +1,100 @@
>>> +/*
>>> + *  Copyright 2011 adam.
>>> + *
>>> + *  Licensed under the Apache License, Version 2.0 (the "License");
>>> + *  you may not use this file except in compliance with the License.
>>> + *  You may obtain a copy of the License at
>>> + *
>>> + *       http://www.apache.org/licenses/LICENSE-2.0
>>> + *
>>> + *  Unless required by applicable law or agreed to in writing, software
>>> + *  distributed under the License is distributed on an "AS IS" BASIS,
>>> + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
>>> implied.
>>> + *  See the License for the specific language governing permissions and
>>> + *  limitations under the License.
>>> + *  under the License.
>>> + */
>>> +
>>> +package org.apache.pdfbox.cos;
>>> +
>>> +import org.apache.pdfbox.exceptions.COSVisitorException;
>>> +import org.apache.pdfbox.pdfparser.ConformingPDFParser;
>>> +
>>> +/**
>>> + *
>>> + * @author adam
>>> + */
>>> +public class COSUnread extends COSBase {
>>> +    private long objectNumber;
>>> +    private long generation;
>>> +    private ConformingPDFParser parser;
>>> +
>>> +    public COSUnread() {
>>> +        super();
>>> +    }
>>> +
>>> +    public COSUnread(long objectNumber, long generation) {
>>> +        this();
>>> +        this.objectNumber = objectNumber;
>>> +        this.generation = generation;
>>> +    }
>>> +
>>> +    public COSUnread(long objectNumber, long generation,
>>> ConformingPDFParser parser) {
>>> +        this(objectNumber, generation);
>>> +        this.parser = parser;
>>> +    }
>>> +
>>> +    @Override
>>> +    public Object accept(ICOSVisitor visitor) throws
>>> COSVisitorException {
>>> +        // TODO: read the object using the parser (if available) and
>>> visit that object
>>> +        throw new UnsupportedOperationException("COSUnread can not be
>>> written/visited.");
>>> +    }
>>> +
>>> +    @Override
>>> +    public String toString() {
>>> +        return "COSUnread{" + objectNumber + "," + generation + "}";
>>> +    }
>>> +
>>> +    /**
>>> +     * @return the objectNumber
>>> +     */
>>> +    public long getObjectNumber() {
>>> +        return objectNumber;
>>> +    }
>>> +
>>> +    /**
>>> +     * @param objectNumber the objectNumber to set
>>> +     */
>>> +    public void setObjectNumber(long objectNumber) {
>>> +        this.objectNumber = objectNumber;
>>> +    }
>>> +
>>> +    /**
>>> +     * @return the generation
>>> +     */
>>> +    public long getGeneration() {
>>> +        return generation;
>>> +    }
>>> +
>>> +    /**
>>> +     * @param generation the generation to set
>>> +     */
>>> +    public void setGeneration(long generation) {
>>> +        this.generation = generation;
>>> +    }
>>> +
>>> +    /**
>>> +     * @return the parser
>>> +     */
>>> +    public ConformingPDFParser getParser() {
>>> +        return parser;
>>> +    }
>>> +
>>> +    /**
>>> +     * @param parser the parser to set
>>> +     */
>>> +    public void setParser(ConformingPDFParser parser) {
>>> +        this.parser = parser;
>>> +    }
>>> +
>>> +}
>>>
>>> Modified:
>>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
>>>
>>> URL:
>>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java?rev=1142109&r1=1142108&r2=1142109&view=diff
>>>
>>> ==============================================================================
>>>
>>> ---
>>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
>>> (original)
>>> +++
>>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
>>> Fri Jul  1 22:28:23 2011
>>> @@ -110,6 +110,10 @@ public abstract class BaseParser
>>>         */
>>>        protected final boolean forceParsing;
>>>
>>> +    public BaseParser() {
>>> +        this.forceParsing = FORCE_PARSING;
>>> +    }
>>> +
>>>        /**
>>>         * Constructor.
>>>         *
>>> @@ -876,7 +880,7 @@ public abstract class BaseParser
>>>                throw new IOException("expected='/' actual='" + (char)c
>>> + "'-" + c + " " + pdfSource );
>>>            }
>>>            // costruisce il nome
>>> -        StringBuffer buffer = new StringBuffer();
>>> +        StringBuilder buffer = new StringBuilder();
>>>            c = pdfSource.read();
>>>            while( c != -1 )
>>>            {
>>> @@ -1063,7 +1067,7 @@ public abstract class BaseParser
>>>            {
>>>                if( Character.isDigit(c) || c == '-' || c == '+' || c ==
>>> '.')
>>>                {
>>> -                StringBuffer buf = new StringBuffer();
>>> +                StringBuilder buf = new StringBuilder();
>>>                    int ic = pdfSource.read();
>>>                    c = (char)ic;
>>>                    while( Character.isDigit( c )||
>>> @@ -1118,7 +1122,7 @@ public abstract class BaseParser
>>>        protected String readString() throws IOException
>>>        {
>>>            skipSpaces();
>>> -        StringBuffer buffer = new StringBuffer();
>>> +        StringBuilder buffer = new StringBuilder();
>>>            int c = pdfSource.read();
>>>            while( !isEndOfName((char)c)&&   !isClosing(c)&&   c != -1 )
>>>            {
>>> @@ -1148,7 +1152,7 @@ public abstract class BaseParser
>>>            {
>>>                c = pdfSource.read();
>>>            }
>>> -        StringBuffer buffer = new StringBuffer( theString.length() );
>>> +        StringBuilder buffer = new StringBuilder( theString.length() );
>>>            int charsRead = 0;
>>>            while( !isEOL(c)&&   c != -1&&   charsRead<   theString.length() )
>>>            {
>>> @@ -1194,7 +1198,7 @@ public abstract class BaseParser
>>>
>>>            //average string size is around 2 and the normal string
>>> buffer size is
>>>            //about 16 so lets save some space.
>>> -        StringBuffer buffer = new StringBuffer(length);
>>> +        StringBuilder buffer = new StringBuilder(length);
>>>            while( !isWhitespace(c)&&   !isClosing(c)&&   c != -1&&
>>> buffer.length()<   length&&
>>>                    c != '['&&
>>>                    c != '<'&&
>>> @@ -1250,7 +1254,7 @@ public abstract class BaseParser
>>>                throw new IOException( "Error: End-of-File, expected
>>> line");
>>>            }
>>>
>>> -        StringBuffer buffer = new StringBuffer( 11 );
>>> +        StringBuilder buffer = new StringBuilder( 11 );
>>>
>>>            int c;
>>>            while ((c = pdfSource.read()) != -1)
>>> @@ -1300,10 +1304,9 @@ public abstract class BaseParser
>>>        }
>>>
>>>        /**
>>> -     * This will tell if the next byte is whitespace or not.
>>> -     *
>>> +     * This will tell if the next byte is whitespace or not.  These
>>> values are
>>> +     * specified in table 1 (page 12) of ISO 32000-1:2008.
>>>         * @param c The character to check against whitespace
>>> -     *
>>>         * @return true if the next byte in the stream is a whitespace
>>> character.
>>>         */
>>>        protected boolean isWhitespace( int c )
>>>
>>> Added:
>>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java
>>>
>>> URL:
>>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java?rev=1142109&view=auto
>>>
>>> ==============================================================================
>>>
>>> ---
>>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java
>>> (added)
>>> +++
>>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java
>>> Fri Jul  1 22:28:23 2011
>>> @@ -0,0 +1,696 @@
>>> +/*
>>> + *  Copyright 2010 adam.
>>> + *
>>> + *  Licensed under the Apache License, Version 2.0 (the "License");
>>> + *  you may not use this file except in compliance with the License.
>>> + *  You may obtain a copy of the License at
>>> + *
>>> + *       http://www.apache.org/licenses/LICENSE-2.0
>>> + *
>>> + *  Unless required by applicable law or agreed to in writing, software
>>> + *  distributed under the License is distributed on an "AS IS" BASIS,
>>> + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
>>> implied.
>>> + *  See the License for the specific language governing permissions and
>>> + *  limitations under the License.
>>> + *  under the License.
>>> + */
>>> +
>>> +package org.apache.pdfbox.pdfparser;
>>> +
>>> +import java.io.File;
>>> +import java.io.IOException;
>>> +import java.util.ArrayList;
>>> +import java.util.List;
>>> +import java.util.Set;
>>> +import org.apache.pdfbox.cos.COSArray;
>>> +import org.apache.pdfbox.cos.COSBase;
>>> +import org.apache.pdfbox.cos.COSDictionary;
>>> +import org.apache.pdfbox.cos.COSDocument;
>>> +import org.apache.pdfbox.cos.COSFloat;
>>> +import org.apache.pdfbox.cos.COSInteger;
>>> +import org.apache.pdfbox.cos.COSName;
>>> +import org.apache.pdfbox.cos.COSNumber;
>>> +import org.apache.pdfbox.cos.COSObject;
>>> +import org.apache.pdfbox.cos.COSString;
>>> +import org.apache.pdfbox.cos.COSUnread;
>>> +import org.apache.pdfbox.io.RandomAccess;
>>> +import org.apache.pdfbox.io.RandomAccessFile;
>>> +import org.apache.pdfbox.pdmodel.ConformingPDDocument;
>>> +import org.apache.pdfbox.pdmodel.PDDocument;
>>> +import org.apache.pdfbox.pdmodel.common.XrefEntry;
>>> +import org.apache.pdfbox.persistence.util.COSObjectKey;
>>> +
>>> +/**
>>> + *
>>> + * @author<a href="adam@apache.org">Adam Nichols</a>
>>> + */
>>> +public class ConformingPDFParser extends BaseParser {
>>> +    protected RandomAccess inputFile;
>>> +    List<XrefEntry>   xrefEntries;
>>> +    private long currentOffset;
>>> +    private ConformingPDDocument doc = null;
>>> +    private boolean throwNonConformingException = true;
>>> +    private boolean recursivlyRead = true;
>>> +
>>> +    /**
>>> +     * Constructor.
>>> +     *
>>> +     * @param input The input stream that contains the PDF document.
>>> +     *
>>> +     * @throws IOException If there is an error initializing the stream.
>>> +     */
>>> +    public ConformingPDFParser(File inputFile) throws IOException {
>>> +        this.inputFile = new RandomAccessFile(inputFile, "r");
>>> +    }
>>> +
>>> +    /**
>>> +     * This will parse the stream and populate the COSDocument
>>> object.  This will close
>>> +     * the stream when it is done parsing.
>>> +     *
>>> +     * @throws IOException If there is an error reading from the
>>> stream or corrupt data
>>> +     * is found.
>>> +     */
>>> +    public void parse() throws IOException {
>>> +        document = new COSDocument();
>>> +        doc = new ConformingPDDocument(document);
>>> +        currentOffset = inputFile.length()-1;
>>> +        long xRefTableLocation = parseTrailerInformation();
>>> +        currentOffset = xRefTableLocation;
>>> +        parseXrefTable();
>>> +        // now that we read the xref table and put null references in
>>> the doc,
>>> +        // we can deference those objects now.
>>> +        boolean oldValue = recursivlyRead;
>>> +        recursivlyRead = false;
>>> +        List<COSObjectKey>   keys = doc.getObjectKeysFromPool();
>>> +        for(COSObjectKey key : keys) {
>>> +            // getObject will put it into the document's object pool
>>> for us
>>> +            getObject(key.getNumber(), key.getGeneration());
>>> +        }
>>> +        recursivlyRead = oldValue;
>>> +    }
>>> +
>>> +    /**
>>> +     * This will get the document that was parsed.  parse() must be
>>> called before this is called.
>>> +     * When you are done with this document you must call close() on
>>> it to release
>>> +     * resources.
>>> +     *
>>> +     * @return The document that was parsed.
>>> +     *
>>> +     * @throws IOException If there is an error getting the document.
>>> +     */
>>> +    public COSDocument getDocument() throws IOException {
>>> +        if( document == null ) {
>>> +            throw new IOException( "You must call parse() before
>>> calling getDocument()" );
>>> +        }
>>> +        return document;
>>> +    }
>>> +
>>> +    /**
>>> +     * This will get the PD document that was parsed.  When you are
>>> done with
>>> +     * this document you must call close() on it to release resources.
>>> +     *
>>> +     * @return The document at the PD layer.
>>> +     *
>>> +     * @throws IOException If there is an error getting the document.
>>> +     */
>>> +    public PDDocument getPDDocument() throws IOException {
>>> +        return doc;
>>> +    }
>>> +
>>> +    private boolean parseXrefTable() throws IOException {
>>> +        String currentLine = readLine();
>>> +        if(throwNonConformingException) {
>>> +            if(!"xref".equals(currentLine))
>>> +                throw new AssertionError("xref table not
>>> found.\nExpected: xref\nFound: "+currentLine);
>>> +        }
>>> +
>>> +        int objectNumber = readInt();
>>> +        int entries = readInt();
>>> +        xrefEntries = new ArrayList<XrefEntry>(entries);
>>> +        for(int i=0; i<entries; i++)
>>> +            xrefEntries.add(new XrefEntry(objectNumber++, readInt(),
>>> readInt(), readLine()));
>>> +
>>> +        return true;
>>> +    }
>>> +
>>> +    protected long parseTrailerInformation() throws IOException,
>>> NumberFormatException {
>>> +        long xrefLocation = -1;
>>> +        consumeWhitespaceBackwards();
>>> +        String currentLine = readLineBackwards();
>>> +        if(throwNonConformingException) {
>>> +            if(!"%%EOF".equals(currentLine))
>>> +                throw new AssertionError("Invalid EOF
>>> marker.\nExpected: %%EOF\nFound: "+currentLine);
>>> +        }
>>> +
>>> +        xrefLocation = readLongBackwards();
>>> +        currentLine = readLineBackwards();
>>> +        if(throwNonConformingException) {
>>> +            if(!"startxref".equals(currentLine))
>>> +                throw new AssertionError("Invalid trailer.\nExpected:
>>> startxref\nFound: "+currentLine);
>>> +        }
>>> +
>>> +        document.setTrailer(readDictionaryBackwards());
>>> +        consumeWhitespaceBackwards();
>>> +        currentLine = readLineBackwards();
>>> +        if(throwNonConformingException) {
>>> +            if(!"trailer".equals(currentLine))
>>> +                throw new AssertionError("Invalid trailer.\nExpected:
>>> trailer\nFound: "+currentLine);
>>> +        }
>>> +
>>> +        return xrefLocation;
>>> +    }
>>> +
>>> +    protected byte readByteBackwards() throws IOException {
>>> +        inputFile.seek(currentOffset);
>>> +        byte singleByte = (byte)inputFile.read();
>>> +        currentOffset--;
>>> +        return singleByte;
>>> +    }
>>> +
>>> +    protected byte readByte() throws IOException {
>>> +        inputFile.seek(currentOffset);
>>> +        byte singleByte = (byte)inputFile.read();
>>> +        currentOffset++;
>>> +        return singleByte;
>>> +    }
>>> +
>>> +    protected String readBackwardUntilWhitespace() throws IOException {
>>> +        StringBuilder sb = new StringBuilder();
>>> +        byte singleByte = readByteBackwards();
>>> +        while(!isWhitespace(singleByte)) {
>>> +            sb.insert(0, (char)singleByte);
>>> +            singleByte = readByteBackwards();
>>> +        }
>>> +        return sb.toString();
>>> +    }
>>> +
>>> +    /**
>>> +     * This will read all bytes (backwards) until a non-whitespace
>>> character is
>>> +     * found.  To save you an extra read, the non-whitespace
>>> character is
>>> +     * returned.  If the current character is not whitespace, this
>>> method will
>>> +     * just return the current char.
>>> +     * @return the first non-whitespace character found
>>> +     * @throws IOException if there is an error reading from the file
>>> +     */
>>> +    protected byte consumeWhitespaceBackwards() throws IOException {
>>> +        inputFile.seek(currentOffset);
>>> +        byte singleByte = (byte)inputFile.read();
>>> +        if(!isWhitespace(singleByte))
>>> +            return singleByte;
>>> +
>>> +        // we have some whitespace, let's consume it
>>> +        while(isWhitespace(singleByte)) {
>>> +            singleByte = readByteBackwards();
>>> +        }
>>> +        // readByteBackwards will decrement the currentOffset to
>>> point the byte
>>> +        // before the one just read, so we increment it back to the
>>> current byte
>>> +        currentOffset++;
>>> +        return singleByte;
>>> +    }
>>> +
>>> +    /**
>>> +     * This will read all bytes until a non-whitespace character is
>>> +     * found.  To save you an extra read, the non-whitespace
>>> character is
>>> +     * returned.  If the current character is not whitespace, this
>>> method will
>>> +     * just return the current char.
>>> +     * @return the first non-whitespace character found
>>> +     * @throws IOException if there is an error reading from the file
>>> +     */
>>> +    protected byte consumeWhitespace() throws IOException {
>>> +        inputFile.seek(currentOffset);
>>> +        byte singleByte = (byte)inputFile.read();
>>> +        if(!isWhitespace(singleByte))
>>> +            return singleByte;
>>> +
>>> +        // we have some whitespace, let's consume it
>>> +        while(isWhitespace(singleByte)) {
>>> +            singleByte = readByte();
>>> +        }
>>> +        // readByte() will increment the currentOffset to point the byte
>>> +        // after the one just read, so we decrement it back to the
>>> current byte
>>> +        currentOffset--;
>>> +        return singleByte;
>>> +    }
>>> +
>>> +    /**
>>> +     * This will consume any whitespace, read in bytes until
>>> whitespace is found
>>> +     * again and then parse the characters which have been read as a
>>> long.  The
>>> +     * current offset will then point at the first whitespace
>>> character which
>>> +     * preceeds the number.
>>> +     * @return the parsed number
>>> +     * @throws IOException if there is an error reading from the file
>>> +     * @throws NumberFormatException if the bytes read can not be
>>> converted to a number
>>> +     */
>>> +    protected long readLongBackwards() throws IOException,
>>> NumberFormatException {
>>> +        StringBuilder sb = new StringBuilder();
>>> +        consumeWhitespaceBackwards();
>>> +        byte singleByte = readByteBackwards();
>>> +        while(!isWhitespace(singleByte)) {
>>> +            sb.insert(0, (char)singleByte);
>>> +            singleByte = readByteBackwards();
>>> +        }
>>> +        if(sb.length() == 0)
>>> +            throw new AssertionError("Number not found.  Expected
>>> number at offset: " + currentOffset);
>>> +        return Long.parseLong(sb.toString());
>>> +    }
>>> +
>>> +    @Override
>>> +    protected int readInt() throws IOException {
>>> +        StringBuilder sb = new StringBuilder();
>>> +        consumeWhitespace();
>>> +        byte singleByte = readByte();
>>> +        while(!isWhitespace(singleByte)) {
>>> +            sb.append((char)singleByte);
>>> +            singleByte = readByte();
>>> +        }
>>> +        if(sb.length() == 0)
>>> +            throw new AssertionError("Number not found.  Expected
>>> number at offset: " + currentOffset);
>>> +        return Integer.parseInt(sb.toString());
>>> +    }
>>> +
>>> +    /**
>>> +     * This will read in a number and return the COS version of the
>>> number (be
>>> +     * it a COSInteger or a COSFloat).
>>> +     * @return the COSNumber which was read/parsed
>>> +     * @throws IOException
>>> +     */
>>> +    protected COSNumber readNumber() throws IOException {
>>> +        StringBuilder sb = new StringBuilder();
>>> +        consumeWhitespace();
>>> +        byte singleByte = readByte();
>>> +        while(!isWhitespace(singleByte)) {
>>> +            sb.append((char)singleByte);
>>> +            singleByte = readByte();
>>> +        }
>>> +        if(sb.length() == 0)
>>> +            throw new AssertionError("Number not found.  Expected
>>> number at offset: " + currentOffset);
>>> +        return parseNumber(sb.toString());
>>> +    }
>>> +
>>> +    protected COSNumber parseNumber(String number) throws IOException {
>>> +        if(number.matches("^[0-9]+$"))
>>> +            return COSInteger.get(number);
>>> +        return new COSFloat(Float.parseFloat(number));
>>> +    }
>>> +
>>> +    protected COSBase processCosObject(String string) throws
>>> IOException {
>>> +        if(string != null&&   string.endsWith(">")) {
>>> +            // string of hex codes
>>> +            return
>>> COSString.createFromHexString(string.replaceAll("^<",
>>> "").replaceAll(">$", ""));
>>> +        }
>>> +        return null;
>>> +    }
>>> +
>>> +    protected COSBase readObjectBackwards() throws IOException {
>>> +        COSBase obj = null;
>>> +        consumeWhitespaceBackwards();
>>> +        String lastSection = readBackwardUntilWhitespace();
>>> +        if("R".equals(lastSection)) {
>>> +            // indirect reference
>>> +            long gen = readLongBackwards();
>>> +            long number = readLongBackwards();
>>> +            // We just put a placeholder in the pool for now, we'll
>>> read the data later
>>> +            doc.putObjectInPool(new COSUnread(), number, gen);
>>> +            obj = new COSUnread(number, gen, this);
>>> +        } else if(">>".equals(lastSection)) {
>>> +            // dictionary
>>> +            throw new RuntimeException("Not yet implemented");
>>> +        } else if(lastSection != null&&   lastSection.endsWith("]")) {
>>> +            // array
>>> +            COSArray array = new COSArray();
>>> +            lastSection = lastSection.replaceAll("]$", "");
>>> +            while(!lastSection.startsWith("[")) {
>>> +                if(lastSection.matches("^\\s*<.*>\\s*$")) // it's a
>>> hex string
>>> +
>>> array.add(COSString.createFromHexString(lastSection.replaceAll("^\\s*<",
>>> "").replaceAll(">\\s*$", "")));
>>> +                lastSection = readBackwardUntilWhitespace();
>>> +            }
>>> +            lastSection = lastSection.replaceAll("^\\[", "");
>>> +            if(lastSection.matches("^\\s*<.*>\\s*$")) // it's a hex
>>> string
>>> +
>>> array.add(COSString.createFromHexString(lastSection.replaceAll("^\\s*<",
>>> "").replaceAll(">\\s*$", "")));
>>> +            obj = array;
>>> +        } else if(lastSection != null&&   lastSection.endsWith(">")) {
>>> +            // string of hex codes
>>> +            obj = processCosObject(lastSection);
>>> +        } else {
>>> +            // try a number, otherwise fall back on a string
>>> +            try {
>>> +                Long.parseLong(lastSection);
>>> +                obj = COSNumber.get(lastSection);
>>> +            } catch(NumberFormatException e) {
>>> +                throw new RuntimeException("Not yet implemented");
>>> +            }
>>> +        }
>>> +
>>> +        return obj;
>>> +    }
>>> +
>>> +    protected COSName readNameBackwards() throws IOException {
>>> +        String name = readBackwardUntilWhitespace();
>>> +        name = name.replaceAll("^/", "");
>>> +        return COSName.getPDFName(name);
>>> +    }
>>> +
>>> +    public COSBase getObject(long objectNumber, long generation)
>>> throws IOException {
>>> +        // we could optionally, check to see if parse() have been
>>> called&
>>> +        // throw an exception here, but I don't think that's really
>>> necessary
>>> +        XrefEntry entry = xrefEntries.get((int)objectNumber);
>>> +        currentOffset = entry.getByteOffset();
>>> +        return readObject(objectNumber, generation);
>>> +    }
>>> +
>>> +    /**
>>> +     * This will read an object from the inputFile at whatever our
>>> currentOffset
>>> +     * is.  If the object and generation are not the expected values
>>> and this
>>> +     * object is set to throw an exception for non-conforming
>>> documents, then an
>>> +     * exception will be thrown.
>>> +     * @param objectNumber the object number you expect to read
>>> +     * @param generation the generation you expect this object to be
>>> +     * @return
>>> +     */
>>> +    public COSBase readObject(long objectNumber, long generation)
>>> throws IOException {
>>> +        // when recursivly reading, we always pull the object from
>>> the filesystem
>>> +        if(document != null&&   recursivlyRead) {
>>> +            // check to see if it is in the document cache before
>>> hitting the filesystem
>>> +            COSBase obj = doc.getObjectFromPool(objectNumber,
>>> generation);
>>> +            if(obj != null)
>>> +                return obj;
>>> +        }
>>> +
>>> +        int actualObjectNumber = readInt();
>>> +        if(objectNumber != actualObjectNumber)
>>> +            if(throwNonConformingException)
>>> +                throw new AssertionError("Object numer expected was " +
>>> +                        objectNumber + " but actual was " +
>>> actualObjectNumber);
>>> +        consumeWhitespace();
>>> +
>>> +        int actualGeneration = readInt();
>>> +        if(generation != actualGeneration)
>>> +            if(throwNonConformingException)
>>> +                throw new AssertionError("Generation expected was " +
>>> +                        generation + " but actual was " +
>>> actualGeneration);
>>> +        consumeWhitespace();
>>> +
>>> +        String obj = readWord();
>>> +        if(!"obj".equals(obj))
>>> +            if(throwNonConformingException)
>>> +                throw new AssertionError("Expected keyword 'obj' but
>>> found " + obj);
>>> +
>>> +        // put placeholder object in doc to prevent infinite recursion
>>> +        // e.g. read Root ->   dereference object ->   read object
>>> which has /Parent ->   GOTO read Root
>>> +        doc.putObjectInPool(new COSObject(null), objectNumber,
>>> generation);
>>> +        COSBase object = readObject();
>>> +        doc.putObjectInPool(object, objectNumber, generation);
>>> +        return object;
>>> +    }
>>> +
>>> +    /**
>>> +     * This actually reads the object data.
>>> +     * @return the object which is read
>>> +     * @throws IOException
>>> +     */
>>> +    protected COSBase readObject() throws IOException {
>>> +        consumeWhitespace();
>>> +        String string = readWord();
>>> +        if(string.startsWith("<<")) {
>>> +            // this is a dictionary
>>> +            COSDictionary dictionary = new COSDictionary();
>>> +            boolean atEndOfDictionary = false;
>>> +            // remove the marker for the beginning of the dictionary
>>> +            string = string.replaceAll("^<<", "");
>>> +
>>> +            if("".equals(string) || string.matches("^\\w$"))
>>> +                string = readWord().trim();
>>> +            while(!atEndOfDictionary) {
>>> +                COSName name = COSName.getPDFName(string);
>>> +                COSBase object = readObject();
>>> +                dictionary.setItem(name, object);
>>> +
>>> +                byte singleByte = consumeWhitespace();
>>> +                if(singleByte == '>') {
>>> +                    readByte(); // get rid of the second '>'
>>> +                    atEndOfDictionary = true;
>>> +                }
>>> +                if(!atEndOfDictionary)
>>> +                    string = readWord().trim();
>>> +            }
>>> +            return dictionary;
>>> +        } else if(string.startsWith("/")) {
>>> +            // it's a dictionary label. i.e. /Type or /Pages or
>>> something similar
>>> +            COSBase name = COSName.getPDFName(string);
>>> +            return name;
>>> +        } else if(string.startsWith("-")) {
>>> +            // it's a negitive number
>>> +            return parseNumber(string);
>>> +        } else if(string.charAt(0)>= '0'&&   string.charAt(0)<= '9' ) {
>>> +            // it's a COSInt or COSFloat, or a weak reference (i.e.
>>> "3 0 R")
>>> +            // we'll have to peek ahead a little to see if it's a
>>> reference or not
>>> +            long tempOffset = this.currentOffset;
>>> +            consumeWhitespace();
>>> +            String tempString = readWord();
>>> +            if(tempString.matches("^[0-9]+$")) {
>>> +                // it is an int, might be a weak reference...
>>> +                tempString = readWord();
>>> +                if(!"R".equals(tempString)) {
>>> +                    // it's just a number, not a weak reference
>>> +                    this.currentOffset = tempOffset;
>>> +                    return parseNumber(string);
>>> +                }
>>> +            } else {
>>> +                // it's just a number, not a weak reference
>>> +                this.currentOffset = tempOffset;
>>> +                return parseNumber(string);
>>> +            }
>>> +
>>> +            // it wasn't a number, so we need to parse the
>>> weak-reference
>>> +            this.currentOffset = tempOffset;
>>> +            int number = Integer.parseInt(string);
>>> +            int gen = readInt();
>>> +            String r = readWord();
>>> +
>>> +            if(!"R".equals(r))
>>> +                if(throwNonConformingException)
>>> +                    throw new AssertionError("Expected keyword 'R'
>>> but found " + r);
>>> +
>>> +            if(recursivlyRead) {
>>> +                // seek to the object, read it, seek back to current
>>> location
>>> +                long tempLocation = this.currentOffset;
>>> +                this.currentOffset =
>>> this.xrefEntries.get(number).getByteOffset();
>>> +                COSBase returnValue = readObject(number, gen);
>>> +                this.currentOffset = tempLocation;
>>> +                return returnValue;
>>> +            } else {
>>> +                // Put a COSUnknown there as a placeholder
>>> +                COSObject obj = new COSObject(new COSUnread());
>>> +                obj.setObjectNumber(COSInteger.get(number));
>>> +                obj.setGenerationNumber(COSInteger.get(gen));
>>> +                return obj;
>>> +            }
>>> +        } else if(string.startsWith("]")) {
>>> +            // end of an array, just return null
>>> +            if("]".equals(string))
>>> +                return null;
>>> +            int oldLength = string.length();
>>> +            this.currentOffset -= oldLength;
>>> +            return null;
>>> +        } else if(string.startsWith("[")) {
>>> +            // array of values
>>> +            // we'll just pay attention to the first part (this is in
>>> case there
>>> +            // is no whitespace between the "[" and the first element)
>>> +            int oldLength = string.length();
>>> +            string = "[";
>>> +            this.currentOffset -= (oldLength - string.length() + 1);
>>> +
>>> +            COSArray array = new COSArray();
>>> +            COSBase object = readObject();
>>> +            while(object != null) {
>>> +                array.add(object);
>>> +                object = readObject();
>>> +            }
>>> +            return array;
>>> +        } else if(string.startsWith("(")) {
>>> +            // this is a string (not hex encoded), strip off the '('
>>> and read until ')'
>>> +            StringBuilder sb = new StringBuilder(string.substring(1));
>>> +            byte singleByte = readByte();
>>> +            while(singleByte != ')') {
>>> +                sb.append((char)singleByte);
>>> +                singleByte = readByte();
>>> +            }
>>> +            return new COSString(sb.toString());
>>> +        } else {
>>> +            throw new RuntimeException("Not yet implemented: " + string
>>> +                    + " loation=" + this.currentOffset);
>>> +        }
>>> +    }
>>> +
>>> +    /**
>>> +     * This will read the next string from the stream.
>>> +     * @return The string that was read from the stream.
>>> +     * @throws IOException If there is an error reading from the stream.
>>> +     */
>>> +    @Override
>>> +    protected String readString() throws IOException {
>>> +        consumeWhitespace();
>>> +        StringBuilder buffer = new StringBuilder();
>>> +        int c = pdfSource.read();
>>> +        while(!isEndOfName((char)c)&&   !isClosing(c)&&   c != -1) {
>>> +            buffer.append( (char)c );
>>> +            c = pdfSource.read();
>>> +        }
>>> +        if (c != -1) {
>>> +            pdfSource.unread(c);
>>> +        }
>>> +        return buffer.toString();
>>> +    }
>>> +
>>> +    protected COSDictionary readDictionaryBackwards() throws
>>> IOException {
>>> +        COSDictionary dict = new COSDictionary();
>>> +
>>> +        // consume the last two '>' chars which signify the end of
>>> the dictionary
>>> +        consumeWhitespaceBackwards();
>>> +        byte singleByte = readByteBackwards();
>>> +        if(throwNonConformingException) {
>>> +            if(singleByte != '>')
>>> +                throw new AssertionError("");
>>> +        }
>>> +        singleByte = readByteBackwards();
>>> +        if(throwNonConformingException) {
>>> +            if(singleByte != '>')
>>> +                throw new AssertionError("");
>>> +        }
>>> +
>>> +        // check to see if we're at the end of the dictionary
>>> +        boolean atEndOfDictionary = false;
>>> +        singleByte = consumeWhitespaceBackwards();
>>> +        if(singleByte == '<') {
>>> +            inputFile.seek(currentOffset-1);
>>> +            atEndOfDictionary =  ((byte)inputFile.read()) == '<';
>>> +        }
>>> +
>>> +        COSDictionary backwardsDictionary = new COSDictionary();
>>> +        // while we're not at the end of the dictionary, read in entries
>>> +        while(!atEndOfDictionary) {
>>> +            COSBase object = readObjectBackwards();
>>> +            COSName name = readNameBackwards();
>>> +            backwardsDictionary.setItem(name, object);
>>> +
>>> +            singleByte = consumeWhitespaceBackwards();
>>> +            if(singleByte == '<') {
>>> +                inputFile.seek(currentOffset-1);
>>> +                atEndOfDictionary =  ((byte)inputFile.read()) == '<';
>>> +            }
>>> +        }
>>> +
>>> +        // the dictionaries preserve the order keys were added, as
>>> such we shall
>>> +        // add them in the proper order, not the reverse order
>>> +        Set<COSName>   backwardsKeys = backwardsDictionary.keySet();
>>> +        for(int i = backwardsKeys.size()-1; i>=0; i--)
>>> +            dict.setItem((COSName)backwardsKeys.toArray()[i],
>>> backwardsDictionary.getItem((COSName)backwardsKeys.toArray()[i]));
>>> +
>>> +        // consume the last two '<' chars
>>> +        readByteBackwards();
>>> +        readByteBackwards();
>>> +
>>> +        return dict;
>>> +    }
>>> +
>>> +    /**
>>> +     * This will read a line starting with the byte at offset and going
>>> +     * backwards until it finds a newline.  This should only be used
>>> if we are
>>> +     * certain that the data will only be text, and not binary data.
>>> +     *
>>> +     * @param offset the location of the file where we should start
>>> reading
>>> +     * @return the string which was read
>>> +     * @throws IOException if there was an error reading data from
>>> the file
>>> +     */
>>> +    protected String readLineBackwards() throws IOException {
>>> +        StringBuilder sb = new StringBuilder();
>>> +        boolean endOfObject = false;
>>> +
>>> +        do {
>>> +            // first we read the %%EOF marker
>>> +            byte singleByte = readByteBackwards();
>>> +            if(singleByte == '\n') {
>>> +                // if ther's a preceeding \r, we'll eat that as well
>>> +                inputFile.seek(currentOffset);
>>> +                if((byte)inputFile.read() == '\r')
>>> +                    currentOffset--;
>>> +                endOfObject = true;
>>> +            } else if(singleByte == '\r') {
>>> +                endOfObject = true;
>>> +            } else {
>>> +                sb.insert(0, (char)singleByte);
>>> +            }
>>> +        } while(!endOfObject);
>>> +
>>> +        return sb.toString();
>>> +    }
>>> +
>>> +    /**
>>> +     * This will read a line starting with the byte at offset and going
>>> +     * forward until it finds a newline.  This should only be used if
>>> we are
>>> +     * certain that the data will only be text, and not binary data.
>>> +     * @param offset the location of the file where we should start
>>> reading
>>> +     * @return the string which was read
>>> +     * @throws IOException if there was an error reading data from
>>> the file
>>> +     */
>>> +    @Override
>>> +    protected String readLine() throws IOException {
>>> +        StringBuilder sb = new StringBuilder();
>>> +        boolean endOfLine = false;
>>> +
>>> +        do {
>>> +            // first we read the %%EOF marker
>>> +            byte singleByte = readByte();
>>> +            if(singleByte == '\n') {
>>> +                // if ther's a preceeding \r, we'll eat that as well
>>> +                inputFile.seek(currentOffset);
>>> +                if((byte)inputFile.read() == '\r')
>>> +                    currentOffset++;
>>> +                endOfLine = true;
>>> +            } else if(singleByte == '\r') {
>>> +                endOfLine = true;
>>> +            } else {
>>> +                sb.append((char)singleByte);
>>> +            }
>>> +        } while(!endOfLine);
>>> +
>>> +        return sb.toString();
>>> +    }
>>> +
>>> +    protected String readWord() throws IOException {
>>> +        StringBuilder sb = new StringBuilder();
>>> +        boolean stop = true;
>>> +        do {
>>> +            byte singleByte = readByte();
>>> +            stop = this.isWhitespace(singleByte);
>>> +
>>> +            // there are some additional characters which indicate
>>> the next element/word has begun
>>> +            // ignore the first char we read, b/c the first char is
>>> the beginnging of this object, not the next one
>>> +            if(!stop&&   sb.length()>   0) {
>>> +                stop = singleByte == '/' || singleByte == '['
>>> +                        || singleByte == ']'
>>> +                        || (singleByte == '>'&&
>>> !">".equals(sb.toString()));
>>> +                if(stop) // we're stopping on a non-whitespace char,
>>> decrement the
>>> +                    this.currentOffset--; // counter so we don't miss
>>> this character
>>> +            }
>>> +            if(!stop)
>>> +                sb.append((char)singleByte);
>>> +        } while(!stop);
>>> +
>>> +        return sb.toString();
>>> +    }
>>> +
>>> +    /**
>>> +     * @return the recursivlyRead
>>> +     */
>>> +    public boolean isRecursivlyRead() {
>>> +        return recursivlyRead;
>>> +    }
>>> +
>>> +    /**
>>> +     * @param recursivlyRead the recursivlyRead to set
>>> +     */
>>> +    public void setRecursivlyRead(boolean recursivlyRead) {
>>> +        this.recursivlyRead = recursivlyRead;
>>> +    }
>>> +}
>>>
>>> Added:
>>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java
>>>
>>> URL:
>>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java?rev=1142109&view=auto
>>>
>>> ==============================================================================
>>>
>>> ---
>>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java
>>> (added)
>>> +++
>>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java
>>> Fri Jul  1 22:28:23 2011
>>> @@ -0,0 +1,115 @@
>>> +/*
>>> + *  Copyright 2011 adam.
>>> + *
>>> + *  Licensed under the Apache License, Version 2.0 (the "License");
>>> + *  you may not use this file except in compliance with the License.
>>> + *  You may obtain a copy of the License at
>>> + *
>>> + *       http://www.apache.org/licenses/LICENSE-2.0
>>> + *
>>> + *  Unless required by applicable law or agreed to in writing, software
>>> + *  distributed under the License is distributed on an "AS IS" BASIS,
>>> + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
>>> implied.
>>> + *  See the License for the specific language governing permissions and
>>> + *  limitations under the License.
>>> + *  under the License.
>>> + */
>>> +
>>> +package org.apache.pdfbox.pdmodel;
>>> +
>>> +import java.io.File;
>>> +import java.io.IOException;
>>> +import java.util.ArrayList;
>>> +import java.util.HashMap;
>>> +import java.util.List;
>>> +import java.util.Map;
>>> +import org.apache.pdfbox.cos.COSBase;
>>> +import org.apache.pdfbox.cos.COSDocument;
>>> +import org.apache.pdfbox.pdfparser.ConformingPDFParser;
>>> +import org.apache.pdfbox.persistence.util.COSObjectKey;
>>> +
>>> +/**
>>> + *
>>> + * @author adam
>>> + */
>>> +public class ConformingPDDocument extends PDDocument {
>>> +    /**
>>> +     * Maps ObjectKeys to a COSObject. Note that references to these
>>> objects
>>> +     * are also stored in COSDictionary objects that map a name to a
>>> specific object.
>>> +     */
>>> +    private final Map<COSObjectKey, COSBase>   objectPool =
>>> +        new HashMap<COSObjectKey, COSBase>();
>>> +    private ConformingPDFParser parser = null;
>>> +
>>> +    public ConformingPDDocument() throws IOException {
>>> +        super();
>>> +    }
>>> +
>>> +    public ConformingPDDocument(COSDocument doc) throws IOException {
>>> +        super(doc);
>>> +    }
>>> +
>>> +    /**
>>> +     * This will load a document from an input stream.
>>> +     * @param input The File which contains the document.
>>> +     * @return The document that was loaded.
>>> +     * @throws IOException If there is an error reading from the stream.
>>> +     */
>>> +    public static PDDocument load(File input) throws IOException {
>>> +        ConformingPDFParser parser = new ConformingPDFParser(input);
>>> +        parser.parse();
>>> +        return parser.getPDDocument();
>>> +    }
>>> +
>>> +    /**
>>> +     * This will get an object from the pool.
>>> +     * @param key The object key.
>>> +     * @return The object in the pool or a new one if it has not been
>>> parsed yet.
>>> +     * @throws IOException If there is an error getting the proxy
>>> object.
>>> +     */
>>> +    public COSBase getObjectFromPool(COSObjectKey key) throws
>>> IOException {
>>> +        return objectPool.get(key);
>>> +    }
>>> +
>>> +    /**
>>> +     * This will get an object from the pool.
>>> +     * @param key The object key.
>>> +     * @return The object in the pool or a new one if it has not been
>>> parsed yet.
>>> +     * @throws IOException If there is an error getting the proxy
>>> object.
>>> +     */
>>> +    public List<COSObjectKey>   getObjectKeysFromPool() throws
>>> IOException {
>>> +        List<COSObjectKey>   keys = new ArrayList<COSObjectKey>();
>>> +        for(COSObjectKey key : objectPool.keySet())
>>> +            keys.add(key);
>>> +        return keys;
>>> +    }
>>> +
>>> +    /**
>>> +     * This will get an object from the pool.
>>> +     * @param number the object number
>>> +     * @param generation the generation of this object you wish to load
>>> +     * @return The object in the pool
>>> +     * @throws IOException If there is an error getting the proxy
>>> object.
>>> +     */
>>> +    public COSBase getObjectFromPool(long number, long generation)
>>> throws IOException {
>>> +        return objectPool.get(new COSObjectKey(number, generation));
>>> +    }
>>> +
>>> +    public void putObjectInPool(COSBase object, long number, long
>>> generation) {
>>> +        objectPool.put(new COSObjectKey(number, generation), object);
>>> +    }
>>> +
>>> +    /**
>>> +     * @return the parser
>>> +     */
>>> +    public ConformingPDFParser getParser() {
>>> +        return parser;
>>> +    }
>>> +
>>> +    /**
>>> +     * @param parser the parser to set
>>> +     */
>>> +    public void setParser(ConformingPDFParser parser) {
>>> +        this.parser = parser;
>>> +    }
>>> +}
>>>
>>> Added:
>>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java
>>>
>>> URL:
>>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java?rev=1142109&view=auto
>>>
>>> ==============================================================================
>>>
>>> ---
>>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java
>>> (added)
>>> +++
>>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java
>>> Fri Jul  1 22:28:23 2011
>>> @@ -0,0 +1,43 @@
>>> +/*
>>> + *  Copyright 2011 adam.
>>> + *
>>> + *  Licensed under the Apache License, Version 2.0 (the "License");
>>> + *  you may not use this file except in compliance with the License.
>>> + *  You may obtain a copy of the License at
>>> + *
>>> + *       http://www.apache.org/licenses/LICENSE-2.0
>>> + *
>>> + *  Unless required by applicable law or agreed to in writing, software
>>> + *  distributed under the License is distributed on an "AS IS" BASIS,
>>> + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
>>> implied.
>>> + *  See the License for the specific language governing permissions and
>>> + *  limitations under the License.
>>> + *  under the License.
>>> + */
>>> +
>>> +package org.apache.pdfbox.pdmodel.common;
>>> +
>>> +/**
>>> + *
>>> + * @author adam
>>> + */
>>> +public class XrefEntry {
>>> +    private int objectNumber = 0;
>>> +    private int byteOffset = 0;
>>> +    private int generation = 0;
>>> +    private boolean inUse = true;
>>> +
>>> +    public XrefEntry() {
>>> +    }
>>> +
>>> +    public XrefEntry(int objectNumber, int byteOffset, int
>>> generation, String inUse) {
>>> +        this.objectNumber = objectNumber;
>>> +        this.byteOffset = byteOffset;
>>> +        this.generation = generation;
>>> +        this.inUse = "n".equals(inUse);
>>> +    }
>>> +
>>> +    public int getByteOffset() {
>>> +        return byteOffset;
>>> +    }
>>> +}
>>>
>>> Added:
>>> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java
>>>
>>> URL:
>>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java?rev=1142109&view=auto
>>>
>>> ==============================================================================
>>>
>>> ---
>>> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java
>>> (added)
>>> +++
>>> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java
>>> Fri Jul  1 22:28:23 2011
>>> @@ -0,0 +1,73 @@
>>> +/*
>>> + *  Copyright 2010 adam.
>>> + *
>>> + *  Licensed under the Apache License, Version 2.0 (the "License");
>>> + *  you may not use this file except in compliance with the License.
>>> + *  You may obtain a copy of the License at
>>> + *
>>> + *       http://www.apache.org/licenses/LICENSE-2.0
>>> + *
>>> + *  Unless required by applicable law or agreed to in writing, software
>>> + *  distributed under the License is distributed on an "AS IS" BASIS,
>>> + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
>>> implied.
>>> + *  See the License for the specific language governing permissions and
>>> + *  limitations under the License.
>>> + *  under the License.
>>> + */
>>> +
>>> +package org.apache.pdfbox.pdfparser;
>>> +
>>> +import java.io.File;
>>> +import java.net.URL;
>>> +import org.apache.pdfbox.cos.COSDictionary;
>>> +import org.junit.After;
>>> +import org.junit.AfterClass;
>>> +import org.junit.Before;
>>> +import org.junit.BeforeClass;
>>> +import org.junit.Test;
>>> +import static org.junit.Assert.*;
>>> +
>>> +/**
>>> + *
>>> + * @author adam
>>> + */
>>> +public class ConformingPDFParserTest {
>>> +
>>> +    public ConformingPDFParserTest() {
>>> +    }
>>> +
>>> +    @BeforeClass
>>> +    public static void setUpClass() throws Exception {
>>> +    }
>>> +
>>> +    @AfterClass
>>> +    public static void tearDownClass() throws Exception {
>>> +    }
>>> +
>>> +    @Before
>>> +    public void setUp() {
>>> +    }
>>> +
>>> +    @After
>>> +    public void tearDown() {
>>> +    }
>>> +
>>> +    /**
>>> +     * Test of parse method, of class ConformingPDFParser.
>>> +     */
>>> +    @Test
>>> +    public void testParse() throws Exception {
>>> +        URL inputUrl =
>>> ConformingPDFParser.class.getResource("gdb-refcard.pdf");
>>> +        File inputFile = new File(inputUrl.toURI());
>>> +        ConformingPDFParser instance = new
>>> ConformingPDFParser(inputFile);
>>> +        instance.parse();
>>> +
>>> +        COSDictionary trailer = instance.getDocument().getTrailer();
>>> +        assertNotNull(trailer);
>>> +        System.out.println("Trailer: " +
>>> instance.getDocument().getTrailer().toString());
>>> +        assertEquals(3, trailer.size());
>>> +        assertNotNull(trailer.getDictionaryObject("Root"));
>>> +        assertNotNull(trailer.getDictionaryObject("Info"));
>>> +        assertNotNull(trailer.getDictionaryObject("Size"));
>>> +    }
>>> +}
>>> \ No newline at end of file
>>>
>>> Modified:
>>> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java
>>>
>>> URL:
>>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java?rev=1142109&r1=1142108&r2=1142109&view=diff
>>>
>>> ==============================================================================
>>>
>>> ---
>>> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java
>>> (original)
>>> +++
>>> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java
>>> Fri Jul  1 22:28:23 2011
>>> @@ -16,7 +16,6 @@
>>>     */
>>>    package org.apache.pdfbox.pdmodel;
>>>
>>> -import java.io.File;
>>>    import junit.framework.TestCase;
>>>
>>>    public class TestPDDocumentCatalog extends TestCase {
>>> @@ -62,13 +61,29 @@ public class TestPDDocumentCatalog exten
>>>                doc =
>>> PDDocument.load(TestPDDocumentCatalog.class.getResourceAsStream("page_label.pdf"));
>>>
>>>                PDDocumentCatalog cat = doc.getDocumentCatalog();
>>>                // getLabelsByPageIndices() should not throw an exception
>>> -            String[] labels =
>>> cat.getPageLabels().getLabelsByPageIndices();
>>> +            cat.getPageLabels().getLabelsByPageIndices();
>>>            } catch(Exception e) {
>>> -            e.printStackTrace();
>>>                fail("Threw exception!");
>>>            } finally {
>>>                if(doc != null)
>>>                    doc.close();
>>>            }
>>>        }
>>> +
>>> +    /**
>>> +     * Test case for
>>> +     *<a href="https://issues.apache.org/jira/browse/PDFBOX-911"
>>> +     *>PDFBOX-911</a>   - Method PDDocument.getNumberOfPages() returns
>>> wrong
>>> +     * number of pages
>>> +     */
>>> +    public void testGetNumberOfPages() throws Exception {
>>> +        PDDocument doc = null;
>>> +        try {
>>> +            doc =
>>> PDDocument.load(TestPDDocumentCatalog.class.getResource("test.unc.pdf"));
>>> +            assertEquals(4, doc.getNumberOfPages());
>>> +        } finally {
>>> +            if(doc != null)
>>> +                doc.close();
>>> +        }
>>> +    }
>>>    }
>>>
>>> Added:
>>> pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf
>>>
>>> URL:
>>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf?rev=1142109&view=auto
>>>
>>> ==============================================================================
>>>
>>> Binary file - no diff available.
>>>
>>> Propchange:
>>> pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf
>>>
>>> ------------------------------------------------------------------------------
>>>
>>>       svn:mime-type = application/octet-stream
>>>
>>>
>>

Re: svn commit: r1142109 - in /pdfbox/trunk/pdfbox/src: main/java/org/apache/pdfbox/cos/ main/java/org/apache/pdfbox/pdfparser/ main/java/org/apache/pdfbox/pdmodel/ main/java/org/apache/pdfbox/pdmodel/common/ test/java/org/apache/pdfbox/pdfparser/ test/jav...

Posted by Adam Nichols <mr...@gmail.com>.

Headers should all be fixed as of revision 1310946.  I updated all the
headers which were non-conforming (pdmodel/common/XrefEntry.java
pdmodel/ConformingPDDocument.java cos/COSDictionaryLateBinding.java
cos/COSUnread.java).

If I missed any, let me know and I'll take care of it.

Thanks,
Adam

On 04/06/2012 08:45 AM, Andreas Lehmkuehler wrote:
> Hi,
> 
> I just realized that the headers of all new files aren't o.k., e.g. see [1]
> 
> @Adam
> Do you have the time to fix this. If not, do you give me the permission
> to change the headers in question?
> 
> BR
> Andreas Lehmkühler
> 
> [1]
> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java?view=markup&pathrev=1142109
> 
> 
> 
> Am 02.07.2011 00:28, schrieb adam@apache.org:
>> Author: adam
>> Date: Fri Jul  1 22:28:23 2011
>> New Revision: 1142109
>>
>> URL: http://svn.apache.org/viewvc?rev=1142109&view=rev
>> Log:
>> PDFBOX-1000: Conforming parser.  Initial commit to make it easier for
>> others to test&  contribute.
>>
>> Added:
>>     
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java
>>
>>     
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java
>>     
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java
>>
>>     
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java
>>
>>     
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java
>>
>>     
>> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java
>>
>>      pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/
>>     
>> pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf  
>> (with props)
>> Modified:
>>     
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
>>
>>     
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
>>
>>     
>> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java
>>
>>
>> Modified:
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
>>
>> URL:
>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java?rev=1142109&r1=1142108&r2=1142109&view=diff
>>
>> ==============================================================================
>>
>> ---
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
>> (original)
>> +++
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
>> Fri Jul  1 22:28:23 2011
>> @@ -43,7 +43,7 @@ public class COSDictionary extends COSBa
>>        * The name-value pairs of this dictionary. The pairs are kept
>> in the
>>        * order they were added to the dictionary.
>>        */
>> -    private final Map<COSName, COSBase>  items =
>> +    protected final Map<COSName, COSBase>  items =
>>           new LinkedHashMap<COSName, COSBase>();
>>
>>       /**
>> @@ -1410,12 +1410,18 @@ public class COSDictionary extends COSBa
>>       /**
>>        * {@inheritDoc}
>>        */
>> -    public String toString()
>> -    {
>> +    @Override
>> +    public String toString() {
>>           StringBuilder retVal = new StringBuilder("COSDictionary{");
>> -        for( COSName key : items.keySet() )
>> -        {
>> -            retVal.append("(" + key + ":" +
>> getDictionaryObject(key).toString() + ") ");
>> +        for(COSName key : items.keySet()) {
>> +            retVal.append("(");
>> +            retVal.append(key);
>> +            retVal.append(":");
>> +            if(getDictionaryObject(key) != null)
>> +                retVal.append(getDictionaryObject(key).toString());
>> +            else
>> +                retVal.append("<null>");
>> +            retVal.append(") ");
>>           }
>>           retVal.append("}");
>>           return retVal.toString();
>>
>> Added:
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java
>>
>> URL:
>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java?rev=1142109&view=auto
>>
>> ==============================================================================
>>
>> ---
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java
>> (added)
>> +++
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java
>> Fri Jul  1 22:28:23 2011
>> @@ -0,0 +1,61 @@
>> +/*
>> + *  Copyright 2011 adam.
>> + *
>> + *  Licensed under the Apache License, Version 2.0 (the "License");
>> + *  you may not use this file except in compliance with the License.
>> + *  You may obtain a copy of the License at
>> + *
>> + *       http://www.apache.org/licenses/LICENSE-2.0
>> + *
>> + *  Unless required by applicable law or agreed to in writing, software
>> + *  distributed under the License is distributed on an "AS IS" BASIS,
>> + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
>> implied.
>> + *  See the License for the specific language governing permissions and
>> + *  limitations under the License.
>> + *  under the License.
>> + */
>> +
>> +package org.apache.pdfbox.cos;
>> +
>> +import org.apache.commons.logging.Log;
>> +import org.apache.commons.logging.LogFactory;
>> +import org.apache.pdfbox.pdfparser.ConformingPDFParser;
>> +
>> +/**
>> + *
>> + * @author adam
>> + */
>> +public class COSDictionaryLateBinding extends COSDictionary {
>> +    public static final Log log =
>> LogFactory.getLog(COSDictionaryLateBinding.class);
>> +    ConformingPDFParser parser;
>> +
>> +    public COSDictionaryLateBinding(ConformingPDFParser parser) {
>> +        super();
>> +        this.parser = parser;
>> +    }
>> +
>> +    /**
>> +     * This will get an object from this dictionary.  If the object
>> is a reference then it will
>> +     * dereference it and get it from the document.  If the object is
>> COSNull then
>> +     * null will be returned.
>> +     * @param key The key to the object that we are getting.
>> +     * @return The object that matches the key.
>> +     */
>> +    @Override
>> +    public COSBase getDictionaryObject(COSName key) {
>> +        COSBase retval = items.get(key);
>> +        if(retval instanceof COSObject) {
>> +            int objectNumber =
>> ((COSObject)retval).getObjectNumber().intValue();
>> +            int generation =
>> ((COSObject)retval).getGenerationNumber().intValue();
>> +            try {
>> +                retval = parser.getObject(objectNumber, generation);
>> +            } catch(Exception e) {
>> +                log.warn("Unable to read information for object " +
>> objectNumber);
>> +            }
>> +        }
>> +        if(retval instanceof COSNull) {
>> +            retval = null;
>> +        }
>> +        return retval;
>> +    }
>> +}
>>
>> Added:
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java
>> URL:
>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java?rev=1142109&view=auto
>>
>> ==============================================================================
>>
>> ---
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java
>> (added)
>> +++
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java
>> Fri Jul  1 22:28:23 2011
>> @@ -0,0 +1,100 @@
>> +/*
>> + *  Copyright 2011 adam.
>> + *
>> + *  Licensed under the Apache License, Version 2.0 (the "License");
>> + *  you may not use this file except in compliance with the License.
>> + *  You may obtain a copy of the License at
>> + *
>> + *       http://www.apache.org/licenses/LICENSE-2.0
>> + *
>> + *  Unless required by applicable law or agreed to in writing, software
>> + *  distributed under the License is distributed on an "AS IS" BASIS,
>> + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
>> implied.
>> + *  See the License for the specific language governing permissions and
>> + *  limitations under the License.
>> + *  under the License.
>> + */
>> +
>> +package org.apache.pdfbox.cos;
>> +
>> +import org.apache.pdfbox.exceptions.COSVisitorException;
>> +import org.apache.pdfbox.pdfparser.ConformingPDFParser;
>> +
>> +/**
>> + *
>> + * @author adam
>> + */
>> +public class COSUnread extends COSBase {
>> +    private long objectNumber;
>> +    private long generation;
>> +    private ConformingPDFParser parser;
>> +
>> +    public COSUnread() {
>> +        super();
>> +    }
>> +
>> +    public COSUnread(long objectNumber, long generation) {
>> +        this();
>> +        this.objectNumber = objectNumber;
>> +        this.generation = generation;
>> +    }
>> +
>> +    public COSUnread(long objectNumber, long generation,
>> ConformingPDFParser parser) {
>> +        this(objectNumber, generation);
>> +        this.parser = parser;
>> +    }
>> +
>> +    @Override
>> +    public Object accept(ICOSVisitor visitor) throws
>> COSVisitorException {
>> +        // TODO: read the object using the parser (if available) and
>> visit that object
>> +        throw new UnsupportedOperationException("COSUnread can not be
>> written/visited.");
>> +    }
>> +
>> +    @Override
>> +    public String toString() {
>> +        return "COSUnread{" + objectNumber + "," + generation + "}";
>> +    }
>> +
>> +    /**
>> +     * @return the objectNumber
>> +     */
>> +    public long getObjectNumber() {
>> +        return objectNumber;
>> +    }
>> +
>> +    /**
>> +     * @param objectNumber the objectNumber to set
>> +     */
>> +    public void setObjectNumber(long objectNumber) {
>> +        this.objectNumber = objectNumber;
>> +    }
>> +
>> +    /**
>> +     * @return the generation
>> +     */
>> +    public long getGeneration() {
>> +        return generation;
>> +    }
>> +
>> +    /**
>> +     * @param generation the generation to set
>> +     */
>> +    public void setGeneration(long generation) {
>> +        this.generation = generation;
>> +    }
>> +
>> +    /**
>> +     * @return the parser
>> +     */
>> +    public ConformingPDFParser getParser() {
>> +        return parser;
>> +    }
>> +
>> +    /**
>> +     * @param parser the parser to set
>> +     */
>> +    public void setParser(ConformingPDFParser parser) {
>> +        this.parser = parser;
>> +    }
>> +
>> +}
>>
>> Modified:
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
>>
>> URL:
>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java?rev=1142109&r1=1142108&r2=1142109&view=diff
>>
>> ==============================================================================
>>
>> ---
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
>> (original)
>> +++
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
>> Fri Jul  1 22:28:23 2011
>> @@ -110,6 +110,10 @@ public abstract class BaseParser
>>        */
>>       protected final boolean forceParsing;
>>
>> +    public BaseParser() {
>> +        this.forceParsing = FORCE_PARSING;
>> +    }
>> +
>>       /**
>>        * Constructor.
>>        *
>> @@ -876,7 +880,7 @@ public abstract class BaseParser
>>               throw new IOException("expected='/' actual='" + (char)c
>> + "'-" + c + " " + pdfSource );
>>           }
>>           // costruisce il nome
>> -        StringBuffer buffer = new StringBuffer();
>> +        StringBuilder buffer = new StringBuilder();
>>           c = pdfSource.read();
>>           while( c != -1 )
>>           {
>> @@ -1063,7 +1067,7 @@ public abstract class BaseParser
>>           {
>>               if( Character.isDigit(c) || c == '-' || c == '+' || c ==
>> '.')
>>               {
>> -                StringBuffer buf = new StringBuffer();
>> +                StringBuilder buf = new StringBuilder();
>>                   int ic = pdfSource.read();
>>                   c = (char)ic;
>>                   while( Character.isDigit( c )||
>> @@ -1118,7 +1122,7 @@ public abstract class BaseParser
>>       protected String readString() throws IOException
>>       {
>>           skipSpaces();
>> -        StringBuffer buffer = new StringBuffer();
>> +        StringBuilder buffer = new StringBuilder();
>>           int c = pdfSource.read();
>>           while( !isEndOfName((char)c)&&  !isClosing(c)&&  c != -1 )
>>           {
>> @@ -1148,7 +1152,7 @@ public abstract class BaseParser
>>           {
>>               c = pdfSource.read();
>>           }
>> -        StringBuffer buffer = new StringBuffer( theString.length() );
>> +        StringBuilder buffer = new StringBuilder( theString.length() );
>>           int charsRead = 0;
>>           while( !isEOL(c)&&  c != -1&&  charsRead<  theString.length() )
>>           {
>> @@ -1194,7 +1198,7 @@ public abstract class BaseParser
>>
>>           //average string size is around 2 and the normal string
>> buffer size is
>>           //about 16 so lets save some space.
>> -        StringBuffer buffer = new StringBuffer(length);
>> +        StringBuilder buffer = new StringBuilder(length);
>>           while( !isWhitespace(c)&&  !isClosing(c)&&  c != -1&& 
>> buffer.length()<  length&&
>>                   c != '['&&
>>                   c != '<'&&
>> @@ -1250,7 +1254,7 @@ public abstract class BaseParser
>>               throw new IOException( "Error: End-of-File, expected
>> line");
>>           }
>>
>> -        StringBuffer buffer = new StringBuffer( 11 );
>> +        StringBuilder buffer = new StringBuilder( 11 );
>>
>>           int c;
>>           while ((c = pdfSource.read()) != -1)
>> @@ -1300,10 +1304,9 @@ public abstract class BaseParser
>>       }
>>
>>       /**
>> -     * This will tell if the next byte is whitespace or not.
>> -     *
>> +     * This will tell if the next byte is whitespace or not.  These
>> values are
>> +     * specified in table 1 (page 12) of ISO 32000-1:2008.
>>        * @param c The character to check against whitespace
>> -     *
>>        * @return true if the next byte in the stream is a whitespace
>> character.
>>        */
>>       protected boolean isWhitespace( int c )
>>
>> Added:
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java
>>
>> URL:
>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java?rev=1142109&view=auto
>>
>> ==============================================================================
>>
>> ---
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java
>> (added)
>> +++
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java
>> Fri Jul  1 22:28:23 2011
>> @@ -0,0 +1,696 @@
>> +/*
>> + *  Copyright 2010 adam.
>> + *
>> + *  Licensed under the Apache License, Version 2.0 (the "License");
>> + *  you may not use this file except in compliance with the License.
>> + *  You may obtain a copy of the License at
>> + *
>> + *       http://www.apache.org/licenses/LICENSE-2.0
>> + *
>> + *  Unless required by applicable law or agreed to in writing, software
>> + *  distributed under the License is distributed on an "AS IS" BASIS,
>> + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
>> implied.
>> + *  See the License for the specific language governing permissions and
>> + *  limitations under the License.
>> + *  under the License.
>> + */
>> +
>> +package org.apache.pdfbox.pdfparser;
>> +
>> +import java.io.File;
>> +import java.io.IOException;
>> +import java.util.ArrayList;
>> +import java.util.List;
>> +import java.util.Set;
>> +import org.apache.pdfbox.cos.COSArray;
>> +import org.apache.pdfbox.cos.COSBase;
>> +import org.apache.pdfbox.cos.COSDictionary;
>> +import org.apache.pdfbox.cos.COSDocument;
>> +import org.apache.pdfbox.cos.COSFloat;
>> +import org.apache.pdfbox.cos.COSInteger;
>> +import org.apache.pdfbox.cos.COSName;
>> +import org.apache.pdfbox.cos.COSNumber;
>> +import org.apache.pdfbox.cos.COSObject;
>> +import org.apache.pdfbox.cos.COSString;
>> +import org.apache.pdfbox.cos.COSUnread;
>> +import org.apache.pdfbox.io.RandomAccess;
>> +import org.apache.pdfbox.io.RandomAccessFile;
>> +import org.apache.pdfbox.pdmodel.ConformingPDDocument;
>> +import org.apache.pdfbox.pdmodel.PDDocument;
>> +import org.apache.pdfbox.pdmodel.common.XrefEntry;
>> +import org.apache.pdfbox.persistence.util.COSObjectKey;
>> +
>> +/**
>> + *
>> + * @author<a href="adam@apache.org">Adam Nichols</a>
>> + */
>> +public class ConformingPDFParser extends BaseParser {
>> +    protected RandomAccess inputFile;
>> +    List<XrefEntry>  xrefEntries;
>> +    private long currentOffset;
>> +    private ConformingPDDocument doc = null;
>> +    private boolean throwNonConformingException = true;
>> +    private boolean recursivlyRead = true;
>> +
>> +    /**
>> +     * Constructor.
>> +     *
>> +     * @param input The input stream that contains the PDF document.
>> +     *
>> +     * @throws IOException If there is an error initializing the stream.
>> +     */
>> +    public ConformingPDFParser(File inputFile) throws IOException {
>> +        this.inputFile = new RandomAccessFile(inputFile, "r");
>> +    }
>> +
>> +    /**
>> +     * This will parse the stream and populate the COSDocument
>> object.  This will close
>> +     * the stream when it is done parsing.
>> +     *
>> +     * @throws IOException If there is an error reading from the
>> stream or corrupt data
>> +     * is found.
>> +     */
>> +    public void parse() throws IOException {
>> +        document = new COSDocument();
>> +        doc = new ConformingPDDocument(document);
>> +        currentOffset = inputFile.length()-1;
>> +        long xRefTableLocation = parseTrailerInformation();
>> +        currentOffset = xRefTableLocation;
>> +        parseXrefTable();
>> +        // now that we read the xref table and put null references in
>> the doc,
>> +        // we can deference those objects now.
>> +        boolean oldValue = recursivlyRead;
>> +        recursivlyRead = false;
>> +        List<COSObjectKey>  keys = doc.getObjectKeysFromPool();
>> +        for(COSObjectKey key : keys) {
>> +            // getObject will put it into the document's object pool
>> for us
>> +            getObject(key.getNumber(), key.getGeneration());
>> +        }
>> +        recursivlyRead = oldValue;
>> +    }
>> +
>> +    /**
>> +     * This will get the document that was parsed.  parse() must be
>> called before this is called.
>> +     * When you are done with this document you must call close() on
>> it to release
>> +     * resources.
>> +     *
>> +     * @return The document that was parsed.
>> +     *
>> +     * @throws IOException If there is an error getting the document.
>> +     */
>> +    public COSDocument getDocument() throws IOException {
>> +        if( document == null ) {
>> +            throw new IOException( "You must call parse() before
>> calling getDocument()" );
>> +        }
>> +        return document;
>> +    }
>> +
>> +    /**
>> +     * This will get the PD document that was parsed.  When you are
>> done with
>> +     * this document you must call close() on it to release resources.
>> +     *
>> +     * @return The document at the PD layer.
>> +     *
>> +     * @throws IOException If there is an error getting the document.
>> +     */
>> +    public PDDocument getPDDocument() throws IOException {
>> +        return doc;
>> +    }
>> +
>> +    private boolean parseXrefTable() throws IOException {
>> +        String currentLine = readLine();
>> +        if(throwNonConformingException) {
>> +            if(!"xref".equals(currentLine))
>> +                throw new AssertionError("xref table not
>> found.\nExpected: xref\nFound: "+currentLine);
>> +        }
>> +
>> +        int objectNumber = readInt();
>> +        int entries = readInt();
>> +        xrefEntries = new ArrayList<XrefEntry>(entries);
>> +        for(int i=0; i<entries; i++)
>> +            xrefEntries.add(new XrefEntry(objectNumber++, readInt(),
>> readInt(), readLine()));
>> +
>> +        return true;
>> +    }
>> +
>> +    protected long parseTrailerInformation() throws IOException,
>> NumberFormatException {
>> +        long xrefLocation = -1;
>> +        consumeWhitespaceBackwards();
>> +        String currentLine = readLineBackwards();
>> +        if(throwNonConformingException) {
>> +            if(!"%%EOF".equals(currentLine))
>> +                throw new AssertionError("Invalid EOF
>> marker.\nExpected: %%EOF\nFound: "+currentLine);
>> +        }
>> +
>> +        xrefLocation = readLongBackwards();
>> +        currentLine = readLineBackwards();
>> +        if(throwNonConformingException) {
>> +            if(!"startxref".equals(currentLine))
>> +                throw new AssertionError("Invalid trailer.\nExpected:
>> startxref\nFound: "+currentLine);
>> +        }
>> +
>> +        document.setTrailer(readDictionaryBackwards());
>> +        consumeWhitespaceBackwards();
>> +        currentLine = readLineBackwards();
>> +        if(throwNonConformingException) {
>> +            if(!"trailer".equals(currentLine))
>> +                throw new AssertionError("Invalid trailer.\nExpected:
>> trailer\nFound: "+currentLine);
>> +        }
>> +
>> +        return xrefLocation;
>> +    }
>> +
>> +    protected byte readByteBackwards() throws IOException {
>> +        inputFile.seek(currentOffset);
>> +        byte singleByte = (byte)inputFile.read();
>> +        currentOffset--;
>> +        return singleByte;
>> +    }
>> +
>> +    protected byte readByte() throws IOException {
>> +        inputFile.seek(currentOffset);
>> +        byte singleByte = (byte)inputFile.read();
>> +        currentOffset++;
>> +        return singleByte;
>> +    }
>> +
>> +    protected String readBackwardUntilWhitespace() throws IOException {
>> +        StringBuilder sb = new StringBuilder();
>> +        byte singleByte = readByteBackwards();
>> +        while(!isWhitespace(singleByte)) {
>> +            sb.insert(0, (char)singleByte);
>> +            singleByte = readByteBackwards();
>> +        }
>> +        return sb.toString();
>> +    }
>> +
>> +    /**
>> +     * This will read all bytes (backwards) until a non-whitespace
>> character is
>> +     * found.  To save you an extra read, the non-whitespace
>> character is
>> +     * returned.  If the current character is not whitespace, this
>> method will
>> +     * just return the current char.
>> +     * @return the first non-whitespace character found
>> +     * @throws IOException if there is an error reading from the file
>> +     */
>> +    protected byte consumeWhitespaceBackwards() throws IOException {
>> +        inputFile.seek(currentOffset);
>> +        byte singleByte = (byte)inputFile.read();
>> +        if(!isWhitespace(singleByte))
>> +            return singleByte;
>> +
>> +        // we have some whitespace, let's consume it
>> +        while(isWhitespace(singleByte)) {
>> +            singleByte = readByteBackwards();
>> +        }
>> +        // readByteBackwards will decrement the currentOffset to
>> point the byte
>> +        // before the one just read, so we increment it back to the
>> current byte
>> +        currentOffset++;
>> +        return singleByte;
>> +    }
>> +
>> +    /**
>> +     * This will read all bytes until a non-whitespace character is
>> +     * found.  To save you an extra read, the non-whitespace
>> character is
>> +     * returned.  If the current character is not whitespace, this
>> method will
>> +     * just return the current char.
>> +     * @return the first non-whitespace character found
>> +     * @throws IOException if there is an error reading from the file
>> +     */
>> +    protected byte consumeWhitespace() throws IOException {
>> +        inputFile.seek(currentOffset);
>> +        byte singleByte = (byte)inputFile.read();
>> +        if(!isWhitespace(singleByte))
>> +            return singleByte;
>> +
>> +        // we have some whitespace, let's consume it
>> +        while(isWhitespace(singleByte)) {
>> +            singleByte = readByte();
>> +        }
>> +        // readByte() will increment the currentOffset to point the byte
>> +        // after the one just read, so we decrement it back to the
>> current byte
>> +        currentOffset--;
>> +        return singleByte;
>> +    }
>> +
>> +    /**
>> +     * This will consume any whitespace, read in bytes until
>> whitespace is found
>> +     * again and then parse the characters which have been read as a
>> long.  The
>> +     * current offset will then point at the first whitespace
>> character which
>> +     * preceeds the number.
>> +     * @return the parsed number
>> +     * @throws IOException if there is an error reading from the file
>> +     * @throws NumberFormatException if the bytes read can not be
>> converted to a number
>> +     */
>> +    protected long readLongBackwards() throws IOException,
>> NumberFormatException {
>> +        StringBuilder sb = new StringBuilder();
>> +        consumeWhitespaceBackwards();
>> +        byte singleByte = readByteBackwards();
>> +        while(!isWhitespace(singleByte)) {
>> +            sb.insert(0, (char)singleByte);
>> +            singleByte = readByteBackwards();
>> +        }
>> +        if(sb.length() == 0)
>> +            throw new AssertionError("Number not found.  Expected
>> number at offset: " + currentOffset);
>> +        return Long.parseLong(sb.toString());
>> +    }
>> +
>> +    @Override
>> +    protected int readInt() throws IOException {
>> +        StringBuilder sb = new StringBuilder();
>> +        consumeWhitespace();
>> +        byte singleByte = readByte();
>> +        while(!isWhitespace(singleByte)) {
>> +            sb.append((char)singleByte);
>> +            singleByte = readByte();
>> +        }
>> +        if(sb.length() == 0)
>> +            throw new AssertionError("Number not found.  Expected
>> number at offset: " + currentOffset);
>> +        return Integer.parseInt(sb.toString());
>> +    }
>> +
>> +    /**
>> +     * This will read in a number and return the COS version of the
>> number (be
>> +     * it a COSInteger or a COSFloat).
>> +     * @return the COSNumber which was read/parsed
>> +     * @throws IOException
>> +     */
>> +    protected COSNumber readNumber() throws IOException {
>> +        StringBuilder sb = new StringBuilder();
>> +        consumeWhitespace();
>> +        byte singleByte = readByte();
>> +        while(!isWhitespace(singleByte)) {
>> +            sb.append((char)singleByte);
>> +            singleByte = readByte();
>> +        }
>> +        if(sb.length() == 0)
>> +            throw new AssertionError("Number not found.  Expected
>> number at offset: " + currentOffset);
>> +        return parseNumber(sb.toString());
>> +    }
>> +
>> +    protected COSNumber parseNumber(String number) throws IOException {
>> +        if(number.matches("^[0-9]+$"))
>> +            return COSInteger.get(number);
>> +        return new COSFloat(Float.parseFloat(number));
>> +    }
>> +
>> +    protected COSBase processCosObject(String string) throws
>> IOException {
>> +        if(string != null&&  string.endsWith(">")) {
>> +            // string of hex codes
>> +            return
>> COSString.createFromHexString(string.replaceAll("^<",
>> "").replaceAll(">$", ""));
>> +        }
>> +        return null;
>> +    }
>> +
>> +    protected COSBase readObjectBackwards() throws IOException {
>> +        COSBase obj = null;
>> +        consumeWhitespaceBackwards();
>> +        String lastSection = readBackwardUntilWhitespace();
>> +        if("R".equals(lastSection)) {
>> +            // indirect reference
>> +            long gen = readLongBackwards();
>> +            long number = readLongBackwards();
>> +            // We just put a placeholder in the pool for now, we'll
>> read the data later
>> +            doc.putObjectInPool(new COSUnread(), number, gen);
>> +            obj = new COSUnread(number, gen, this);
>> +        } else if(">>".equals(lastSection)) {
>> +            // dictionary
>> +            throw new RuntimeException("Not yet implemented");
>> +        } else if(lastSection != null&&  lastSection.endsWith("]")) {
>> +            // array
>> +            COSArray array = new COSArray();
>> +            lastSection = lastSection.replaceAll("]$", "");
>> +            while(!lastSection.startsWith("[")) {
>> +                if(lastSection.matches("^\\s*<.*>\\s*$")) // it's a
>> hex string
>> +                   
>> array.add(COSString.createFromHexString(lastSection.replaceAll("^\\s*<",
>> "").replaceAll(">\\s*$", "")));
>> +                lastSection = readBackwardUntilWhitespace();
>> +            }
>> +            lastSection = lastSection.replaceAll("^\\[", "");
>> +            if(lastSection.matches("^\\s*<.*>\\s*$")) // it's a hex
>> string
>> +               
>> array.add(COSString.createFromHexString(lastSection.replaceAll("^\\s*<",
>> "").replaceAll(">\\s*$", "")));
>> +            obj = array;
>> +        } else if(lastSection != null&&  lastSection.endsWith(">")) {
>> +            // string of hex codes
>> +            obj = processCosObject(lastSection);
>> +        } else {
>> +            // try a number, otherwise fall back on a string
>> +            try {
>> +                Long.parseLong(lastSection);
>> +                obj = COSNumber.get(lastSection);
>> +            } catch(NumberFormatException e) {
>> +                throw new RuntimeException("Not yet implemented");
>> +            }
>> +        }
>> +
>> +        return obj;
>> +    }
>> +
>> +    protected COSName readNameBackwards() throws IOException {
>> +        String name = readBackwardUntilWhitespace();
>> +        name = name.replaceAll("^/", "");
>> +        return COSName.getPDFName(name);
>> +    }
>> +
>> +    public COSBase getObject(long objectNumber, long generation)
>> throws IOException {
>> +        // we could optionally, check to see if parse() have been
>> called&
>> +        // throw an exception here, but I don't think that's really
>> necessary
>> +        XrefEntry entry = xrefEntries.get((int)objectNumber);
>> +        currentOffset = entry.getByteOffset();
>> +        return readObject(objectNumber, generation);
>> +    }
>> +
>> +    /**
>> +     * This will read an object from the inputFile at whatever our
>> currentOffset
>> +     * is.  If the object and generation are not the expected values
>> and this
>> +     * object is set to throw an exception for non-conforming
>> documents, then an
>> +     * exception will be thrown.
>> +     * @param objectNumber the object number you expect to read
>> +     * @param generation the generation you expect this object to be
>> +     * @return
>> +     */
>> +    public COSBase readObject(long objectNumber, long generation)
>> throws IOException {
>> +        // when recursivly reading, we always pull the object from
>> the filesystem
>> +        if(document != null&&  recursivlyRead) {
>> +            // check to see if it is in the document cache before
>> hitting the filesystem
>> +            COSBase obj = doc.getObjectFromPool(objectNumber,
>> generation);
>> +            if(obj != null)
>> +                return obj;
>> +        }
>> +
>> +        int actualObjectNumber = readInt();
>> +        if(objectNumber != actualObjectNumber)
>> +            if(throwNonConformingException)
>> +                throw new AssertionError("Object numer expected was " +
>> +                        objectNumber + " but actual was " +
>> actualObjectNumber);
>> +        consumeWhitespace();
>> +
>> +        int actualGeneration = readInt();
>> +        if(generation != actualGeneration)
>> +            if(throwNonConformingException)
>> +                throw new AssertionError("Generation expected was " +
>> +                        generation + " but actual was " +
>> actualGeneration);
>> +        consumeWhitespace();
>> +
>> +        String obj = readWord();
>> +        if(!"obj".equals(obj))
>> +            if(throwNonConformingException)
>> +                throw new AssertionError("Expected keyword 'obj' but
>> found " + obj);
>> +
>> +        // put placeholder object in doc to prevent infinite recursion
>> +        // e.g. read Root ->  dereference object ->  read object
>> which has /Parent ->  GOTO read Root
>> +        doc.putObjectInPool(new COSObject(null), objectNumber,
>> generation);
>> +        COSBase object = readObject();
>> +        doc.putObjectInPool(object, objectNumber, generation);
>> +        return object;
>> +    }
>> +
>> +    /**
>> +     * This actually reads the object data.
>> +     * @return the object which is read
>> +     * @throws IOException
>> +     */
>> +    protected COSBase readObject() throws IOException {
>> +        consumeWhitespace();
>> +        String string = readWord();
>> +        if(string.startsWith("<<")) {
>> +            // this is a dictionary
>> +            COSDictionary dictionary = new COSDictionary();
>> +            boolean atEndOfDictionary = false;
>> +            // remove the marker for the beginning of the dictionary
>> +            string = string.replaceAll("^<<", "");
>> +
>> +            if("".equals(string) || string.matches("^\\w$"))
>> +                string = readWord().trim();
>> +            while(!atEndOfDictionary) {
>> +                COSName name = COSName.getPDFName(string);
>> +                COSBase object = readObject();
>> +                dictionary.setItem(name, object);
>> +
>> +                byte singleByte = consumeWhitespace();
>> +                if(singleByte == '>') {
>> +                    readByte(); // get rid of the second '>'
>> +                    atEndOfDictionary = true;
>> +                }
>> +                if(!atEndOfDictionary)
>> +                    string = readWord().trim();
>> +            }
>> +            return dictionary;
>> +        } else if(string.startsWith("/")) {
>> +            // it's a dictionary label. i.e. /Type or /Pages or
>> something similar
>> +            COSBase name = COSName.getPDFName(string);
>> +            return name;
>> +        } else if(string.startsWith("-")) {
>> +            // it's a negitive number
>> +            return parseNumber(string);
>> +        } else if(string.charAt(0)>= '0'&&  string.charAt(0)<= '9' ) {
>> +            // it's a COSInt or COSFloat, or a weak reference (i.e.
>> "3 0 R")
>> +            // we'll have to peek ahead a little to see if it's a
>> reference or not
>> +            long tempOffset = this.currentOffset;
>> +            consumeWhitespace();
>> +            String tempString = readWord();
>> +            if(tempString.matches("^[0-9]+$")) {
>> +                // it is an int, might be a weak reference...
>> +                tempString = readWord();
>> +                if(!"R".equals(tempString)) {
>> +                    // it's just a number, not a weak reference
>> +                    this.currentOffset = tempOffset;
>> +                    return parseNumber(string);
>> +                }
>> +            } else {
>> +                // it's just a number, not a weak reference
>> +                this.currentOffset = tempOffset;
>> +                return parseNumber(string);
>> +            }
>> +
>> +            // it wasn't a number, so we need to parse the
>> weak-reference
>> +            this.currentOffset = tempOffset;
>> +            int number = Integer.parseInt(string);
>> +            int gen = readInt();
>> +            String r = readWord();
>> +
>> +            if(!"R".equals(r))
>> +                if(throwNonConformingException)
>> +                    throw new AssertionError("Expected keyword 'R'
>> but found " + r);
>> +
>> +            if(recursivlyRead) {
>> +                // seek to the object, read it, seek back to current
>> location
>> +                long tempLocation = this.currentOffset;
>> +                this.currentOffset =
>> this.xrefEntries.get(number).getByteOffset();
>> +                COSBase returnValue = readObject(number, gen);
>> +                this.currentOffset = tempLocation;
>> +                return returnValue;
>> +            } else {
>> +                // Put a COSUnknown there as a placeholder
>> +                COSObject obj = new COSObject(new COSUnread());
>> +                obj.setObjectNumber(COSInteger.get(number));
>> +                obj.setGenerationNumber(COSInteger.get(gen));
>> +                return obj;
>> +            }
>> +        } else if(string.startsWith("]")) {
>> +            // end of an array, just return null
>> +            if("]".equals(string))
>> +                return null;
>> +            int oldLength = string.length();
>> +            this.currentOffset -= oldLength;
>> +            return null;
>> +        } else if(string.startsWith("[")) {
>> +            // array of values
>> +            // we'll just pay attention to the first part (this is in
>> case there
>> +            // is no whitespace between the "[" and the first element)
>> +            int oldLength = string.length();
>> +            string = "[";
>> +            this.currentOffset -= (oldLength - string.length() + 1);
>> +
>> +            COSArray array = new COSArray();
>> +            COSBase object = readObject();
>> +            while(object != null) {
>> +                array.add(object);
>> +                object = readObject();
>> +            }
>> +            return array;
>> +        } else if(string.startsWith("(")) {
>> +            // this is a string (not hex encoded), strip off the '('
>> and read until ')'
>> +            StringBuilder sb = new StringBuilder(string.substring(1));
>> +            byte singleByte = readByte();
>> +            while(singleByte != ')') {
>> +                sb.append((char)singleByte);
>> +                singleByte = readByte();
>> +            }
>> +            return new COSString(sb.toString());
>> +        } else {
>> +            throw new RuntimeException("Not yet implemented: " + string
>> +                    + " loation=" + this.currentOffset);
>> +        }
>> +    }
>> +
>> +    /**
>> +     * This will read the next string from the stream.
>> +     * @return The string that was read from the stream.
>> +     * @throws IOException If there is an error reading from the stream.
>> +     */
>> +    @Override
>> +    protected String readString() throws IOException {
>> +        consumeWhitespace();
>> +        StringBuilder buffer = new StringBuilder();
>> +        int c = pdfSource.read();
>> +        while(!isEndOfName((char)c)&&  !isClosing(c)&&  c != -1) {
>> +            buffer.append( (char)c );
>> +            c = pdfSource.read();
>> +        }
>> +        if (c != -1) {
>> +            pdfSource.unread(c);
>> +        }
>> +        return buffer.toString();
>> +    }
>> +
>> +    protected COSDictionary readDictionaryBackwards() throws
>> IOException {
>> +        COSDictionary dict = new COSDictionary();
>> +
>> +        // consume the last two '>' chars which signify the end of
>> the dictionary
>> +        consumeWhitespaceBackwards();
>> +        byte singleByte = readByteBackwards();
>> +        if(throwNonConformingException) {
>> +            if(singleByte != '>')
>> +                throw new AssertionError("");
>> +        }
>> +        singleByte = readByteBackwards();
>> +        if(throwNonConformingException) {
>> +            if(singleByte != '>')
>> +                throw new AssertionError("");
>> +        }
>> +
>> +        // check to see if we're at the end of the dictionary
>> +        boolean atEndOfDictionary = false;
>> +        singleByte = consumeWhitespaceBackwards();
>> +        if(singleByte == '<') {
>> +            inputFile.seek(currentOffset-1);
>> +            atEndOfDictionary =  ((byte)inputFile.read()) == '<';
>> +        }
>> +
>> +        COSDictionary backwardsDictionary = new COSDictionary();
>> +        // while we're not at the end of the dictionary, read in entries
>> +        while(!atEndOfDictionary) {
>> +            COSBase object = readObjectBackwards();
>> +            COSName name = readNameBackwards();
>> +            backwardsDictionary.setItem(name, object);
>> +
>> +            singleByte = consumeWhitespaceBackwards();
>> +            if(singleByte == '<') {
>> +                inputFile.seek(currentOffset-1);
>> +                atEndOfDictionary =  ((byte)inputFile.read()) == '<';
>> +            }
>> +        }
>> +
>> +        // the dictionaries preserve the order keys were added, as
>> such we shall
>> +        // add them in the proper order, not the reverse order
>> +        Set<COSName>  backwardsKeys = backwardsDictionary.keySet();
>> +        for(int i = backwardsKeys.size()-1; i>=0; i--)
>> +            dict.setItem((COSName)backwardsKeys.toArray()[i],
>> backwardsDictionary.getItem((COSName)backwardsKeys.toArray()[i]));
>> +
>> +        // consume the last two '<' chars
>> +        readByteBackwards();
>> +        readByteBackwards();
>> +
>> +        return dict;
>> +    }
>> +
>> +    /**
>> +     * This will read a line starting with the byte at offset and going
>> +     * backwards until it finds a newline.  This should only be used
>> if we are
>> +     * certain that the data will only be text, and not binary data.
>> +     *
>> +     * @param offset the location of the file where we should start
>> reading
>> +     * @return the string which was read
>> +     * @throws IOException if there was an error reading data from
>> the file
>> +     */
>> +    protected String readLineBackwards() throws IOException {
>> +        StringBuilder sb = new StringBuilder();
>> +        boolean endOfObject = false;
>> +
>> +        do {
>> +            // first we read the %%EOF marker
>> +            byte singleByte = readByteBackwards();
>> +            if(singleByte == '\n') {
>> +                // if ther's a preceeding \r, we'll eat that as well
>> +                inputFile.seek(currentOffset);
>> +                if((byte)inputFile.read() == '\r')
>> +                    currentOffset--;
>> +                endOfObject = true;
>> +            } else if(singleByte == '\r') {
>> +                endOfObject = true;
>> +            } else {
>> +                sb.insert(0, (char)singleByte);
>> +            }
>> +        } while(!endOfObject);
>> +
>> +        return sb.toString();
>> +    }
>> +
>> +    /**
>> +     * This will read a line starting with the byte at offset and going
>> +     * forward until it finds a newline.  This should only be used if
>> we are
>> +     * certain that the data will only be text, and not binary data.
>> +     * @param offset the location of the file where we should start
>> reading
>> +     * @return the string which was read
>> +     * @throws IOException if there was an error reading data from
>> the file
>> +     */
>> +    @Override
>> +    protected String readLine() throws IOException {
>> +        StringBuilder sb = new StringBuilder();
>> +        boolean endOfLine = false;
>> +
>> +        do {
>> +            // first we read the %%EOF marker
>> +            byte singleByte = readByte();
>> +            if(singleByte == '\n') {
>> +                // if ther's a preceeding \r, we'll eat that as well
>> +                inputFile.seek(currentOffset);
>> +                if((byte)inputFile.read() == '\r')
>> +                    currentOffset++;
>> +                endOfLine = true;
>> +            } else if(singleByte == '\r') {
>> +                endOfLine = true;
>> +            } else {
>> +                sb.append((char)singleByte);
>> +            }
>> +        } while(!endOfLine);
>> +
>> +        return sb.toString();
>> +    }
>> +
>> +    protected String readWord() throws IOException {
>> +        StringBuilder sb = new StringBuilder();
>> +        boolean stop = true;
>> +        do {
>> +            byte singleByte = readByte();
>> +            stop = this.isWhitespace(singleByte);
>> +
>> +            // there are some additional characters which indicate
>> the next element/word has begun
>> +            // ignore the first char we read, b/c the first char is
>> the beginnging of this object, not the next one
>> +            if(!stop&&  sb.length()>  0) {
>> +                stop = singleByte == '/' || singleByte == '['
>> +                        || singleByte == ']'
>> +                        || (singleByte == '>'&& 
>> !">".equals(sb.toString()));
>> +                if(stop) // we're stopping on a non-whitespace char,
>> decrement the
>> +                    this.currentOffset--; // counter so we don't miss
>> this character
>> +            }
>> +            if(!stop)
>> +                sb.append((char)singleByte);
>> +        } while(!stop);
>> +
>> +        return sb.toString();
>> +    }
>> +
>> +    /**
>> +     * @return the recursivlyRead
>> +     */
>> +    public boolean isRecursivlyRead() {
>> +        return recursivlyRead;
>> +    }
>> +
>> +    /**
>> +     * @param recursivlyRead the recursivlyRead to set
>> +     */
>> +    public void setRecursivlyRead(boolean recursivlyRead) {
>> +        this.recursivlyRead = recursivlyRead;
>> +    }
>> +}
>>
>> Added:
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java
>>
>> URL:
>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java?rev=1142109&view=auto
>>
>> ==============================================================================
>>
>> ---
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java
>> (added)
>> +++
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java
>> Fri Jul  1 22:28:23 2011
>> @@ -0,0 +1,115 @@
>> +/*
>> + *  Copyright 2011 adam.
>> + *
>> + *  Licensed under the Apache License, Version 2.0 (the "License");
>> + *  you may not use this file except in compliance with the License.
>> + *  You may obtain a copy of the License at
>> + *
>> + *       http://www.apache.org/licenses/LICENSE-2.0
>> + *
>> + *  Unless required by applicable law or agreed to in writing, software
>> + *  distributed under the License is distributed on an "AS IS" BASIS,
>> + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
>> implied.
>> + *  See the License for the specific language governing permissions and
>> + *  limitations under the License.
>> + *  under the License.
>> + */
>> +
>> +package org.apache.pdfbox.pdmodel;
>> +
>> +import java.io.File;
>> +import java.io.IOException;
>> +import java.util.ArrayList;
>> +import java.util.HashMap;
>> +import java.util.List;
>> +import java.util.Map;
>> +import org.apache.pdfbox.cos.COSBase;
>> +import org.apache.pdfbox.cos.COSDocument;
>> +import org.apache.pdfbox.pdfparser.ConformingPDFParser;
>> +import org.apache.pdfbox.persistence.util.COSObjectKey;
>> +
>> +/**
>> + *
>> + * @author adam
>> + */
>> +public class ConformingPDDocument extends PDDocument {
>> +    /**
>> +     * Maps ObjectKeys to a COSObject. Note that references to these
>> objects
>> +     * are also stored in COSDictionary objects that map a name to a
>> specific object.
>> +     */
>> +    private final Map<COSObjectKey, COSBase>  objectPool =
>> +        new HashMap<COSObjectKey, COSBase>();
>> +    private ConformingPDFParser parser = null;
>> +
>> +    public ConformingPDDocument() throws IOException {
>> +        super();
>> +    }
>> +
>> +    public ConformingPDDocument(COSDocument doc) throws IOException {
>> +        super(doc);
>> +    }
>> +
>> +    /**
>> +     * This will load a document from an input stream.
>> +     * @param input The File which contains the document.
>> +     * @return The document that was loaded.
>> +     * @throws IOException If there is an error reading from the stream.
>> +     */
>> +    public static PDDocument load(File input) throws IOException {
>> +        ConformingPDFParser parser = new ConformingPDFParser(input);
>> +        parser.parse();
>> +        return parser.getPDDocument();
>> +    }
>> +
>> +    /**
>> +     * This will get an object from the pool.
>> +     * @param key The object key.
>> +     * @return The object in the pool or a new one if it has not been
>> parsed yet.
>> +     * @throws IOException If there is an error getting the proxy
>> object.
>> +     */
>> +    public COSBase getObjectFromPool(COSObjectKey key) throws
>> IOException {
>> +        return objectPool.get(key);
>> +    }
>> +
>> +    /**
>> +     * This will get an object from the pool.
>> +     * @param key The object key.
>> +     * @return The object in the pool or a new one if it has not been
>> parsed yet.
>> +     * @throws IOException If there is an error getting the proxy
>> object.
>> +     */
>> +    public List<COSObjectKey>  getObjectKeysFromPool() throws
>> IOException {
>> +        List<COSObjectKey>  keys = new ArrayList<COSObjectKey>();
>> +        for(COSObjectKey key : objectPool.keySet())
>> +            keys.add(key);
>> +        return keys;
>> +    }
>> +
>> +    /**
>> +     * This will get an object from the pool.
>> +     * @param number the object number
>> +     * @param generation the generation of this object you wish to load
>> +     * @return The object in the pool
>> +     * @throws IOException If there is an error getting the proxy
>> object.
>> +     */
>> +    public COSBase getObjectFromPool(long number, long generation)
>> throws IOException {
>> +        return objectPool.get(new COSObjectKey(number, generation));
>> +    }
>> +
>> +    public void putObjectInPool(COSBase object, long number, long
>> generation) {
>> +        objectPool.put(new COSObjectKey(number, generation), object);
>> +    }
>> +
>> +    /**
>> +     * @return the parser
>> +     */
>> +    public ConformingPDFParser getParser() {
>> +        return parser;
>> +    }
>> +
>> +    /**
>> +     * @param parser the parser to set
>> +     */
>> +    public void setParser(ConformingPDFParser parser) {
>> +        this.parser = parser;
>> +    }
>> +}
>>
>> Added:
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java
>>
>> URL:
>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java?rev=1142109&view=auto
>>
>> ==============================================================================
>>
>> ---
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java
>> (added)
>> +++
>> pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java
>> Fri Jul  1 22:28:23 2011
>> @@ -0,0 +1,43 @@
>> +/*
>> + *  Copyright 2011 adam.
>> + *
>> + *  Licensed under the Apache License, Version 2.0 (the "License");
>> + *  you may not use this file except in compliance with the License.
>> + *  You may obtain a copy of the License at
>> + *
>> + *       http://www.apache.org/licenses/LICENSE-2.0
>> + *
>> + *  Unless required by applicable law or agreed to in writing, software
>> + *  distributed under the License is distributed on an "AS IS" BASIS,
>> + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
>> implied.
>> + *  See the License for the specific language governing permissions and
>> + *  limitations under the License.
>> + *  under the License.
>> + */
>> +
>> +package org.apache.pdfbox.pdmodel.common;
>> +
>> +/**
>> + *
>> + * @author adam
>> + */
>> +public class XrefEntry {
>> +    private int objectNumber = 0;
>> +    private int byteOffset = 0;
>> +    private int generation = 0;
>> +    private boolean inUse = true;
>> +
>> +    public XrefEntry() {
>> +    }
>> +
>> +    public XrefEntry(int objectNumber, int byteOffset, int
>> generation, String inUse) {
>> +        this.objectNumber = objectNumber;
>> +        this.byteOffset = byteOffset;
>> +        this.generation = generation;
>> +        this.inUse = "n".equals(inUse);
>> +    }
>> +
>> +    public int getByteOffset() {
>> +        return byteOffset;
>> +    }
>> +}
>>
>> Added:
>> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java
>>
>> URL:
>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java?rev=1142109&view=auto
>>
>> ==============================================================================
>>
>> ---
>> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java
>> (added)
>> +++
>> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java
>> Fri Jul  1 22:28:23 2011
>> @@ -0,0 +1,73 @@
>> +/*
>> + *  Copyright 2010 adam.
>> + *
>> + *  Licensed under the Apache License, Version 2.0 (the "License");
>> + *  you may not use this file except in compliance with the License.
>> + *  You may obtain a copy of the License at
>> + *
>> + *       http://www.apache.org/licenses/LICENSE-2.0
>> + *
>> + *  Unless required by applicable law or agreed to in writing, software
>> + *  distributed under the License is distributed on an "AS IS" BASIS,
>> + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
>> implied.
>> + *  See the License for the specific language governing permissions and
>> + *  limitations under the License.
>> + *  under the License.
>> + */
>> +
>> +package org.apache.pdfbox.pdfparser;
>> +
>> +import java.io.File;
>> +import java.net.URL;
>> +import org.apache.pdfbox.cos.COSDictionary;
>> +import org.junit.After;
>> +import org.junit.AfterClass;
>> +import org.junit.Before;
>> +import org.junit.BeforeClass;
>> +import org.junit.Test;
>> +import static org.junit.Assert.*;
>> +
>> +/**
>> + *
>> + * @author adam
>> + */
>> +public class ConformingPDFParserTest {
>> +
>> +    public ConformingPDFParserTest() {
>> +    }
>> +
>> +    @BeforeClass
>> +    public static void setUpClass() throws Exception {
>> +    }
>> +
>> +    @AfterClass
>> +    public static void tearDownClass() throws Exception {
>> +    }
>> +
>> +    @Before
>> +    public void setUp() {
>> +    }
>> +
>> +    @After
>> +    public void tearDown() {
>> +    }
>> +
>> +    /**
>> +     * Test of parse method, of class ConformingPDFParser.
>> +     */
>> +    @Test
>> +    public void testParse() throws Exception {
>> +        URL inputUrl =
>> ConformingPDFParser.class.getResource("gdb-refcard.pdf");
>> +        File inputFile = new File(inputUrl.toURI());
>> +        ConformingPDFParser instance = new
>> ConformingPDFParser(inputFile);
>> +        instance.parse();
>> +
>> +        COSDictionary trailer = instance.getDocument().getTrailer();
>> +        assertNotNull(trailer);
>> +        System.out.println("Trailer: " +
>> instance.getDocument().getTrailer().toString());
>> +        assertEquals(3, trailer.size());
>> +        assertNotNull(trailer.getDictionaryObject("Root"));
>> +        assertNotNull(trailer.getDictionaryObject("Info"));
>> +        assertNotNull(trailer.getDictionaryObject("Size"));
>> +    }
>> +}
>> \ No newline at end of file
>>
>> Modified:
>> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java
>>
>> URL:
>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java?rev=1142109&r1=1142108&r2=1142109&view=diff
>>
>> ==============================================================================
>>
>> ---
>> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java
>> (original)
>> +++
>> pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java
>> Fri Jul  1 22:28:23 2011
>> @@ -16,7 +16,6 @@
>>    */
>>   package org.apache.pdfbox.pdmodel;
>>
>> -import java.io.File;
>>   import junit.framework.TestCase;
>>
>>   public class TestPDDocumentCatalog extends TestCase {
>> @@ -62,13 +61,29 @@ public class TestPDDocumentCatalog exten
>>               doc =
>> PDDocument.load(TestPDDocumentCatalog.class.getResourceAsStream("page_label.pdf"));
>>
>>               PDDocumentCatalog cat = doc.getDocumentCatalog();
>>               // getLabelsByPageIndices() should not throw an exception
>> -            String[] labels =
>> cat.getPageLabels().getLabelsByPageIndices();
>> +            cat.getPageLabels().getLabelsByPageIndices();
>>           } catch(Exception e) {
>> -            e.printStackTrace();
>>               fail("Threw exception!");
>>           } finally {
>>               if(doc != null)
>>                   doc.close();
>>           }
>>       }
>> +
>> +    /**
>> +     * Test case for
>> +     *<a href="https://issues.apache.org/jira/browse/PDFBOX-911"
>> +     *>PDFBOX-911</a>  - Method PDDocument.getNumberOfPages() returns
>> wrong
>> +     * number of pages
>> +     */
>> +    public void testGetNumberOfPages() throws Exception {
>> +        PDDocument doc = null;
>> +        try {
>> +            doc =
>> PDDocument.load(TestPDDocumentCatalog.class.getResource("test.unc.pdf"));
>> +            assertEquals(4, doc.getNumberOfPages());
>> +        } finally {
>> +            if(doc != null)
>> +                doc.close();
>> +        }
>> +    }
>>   }
>>
>> Added:
>> pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf
>>
>> URL:
>> http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf?rev=1142109&view=auto
>>
>> ==============================================================================
>>
>> Binary file - no diff available.
>>
>> Propchange:
>> pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf
>>
>> ------------------------------------------------------------------------------
>>
>>      svn:mime-type = application/octet-stream
>>
>>
>

Re: svn commit: r1142109 - in /pdfbox/trunk/pdfbox/src: main/java/org/apache/pdfbox/cos/ main/java/org/apache/pdfbox/pdfparser/ main/java/org/apache/pdfbox/pdmodel/ main/java/org/apache/pdfbox/pdmodel/common/ test/java/org/apache/pdfbox/pdfparser/ test/jav...

Posted by Andreas Lehmkuehler <an...@lehmi.de>.

Hi,

I just realized that the headers of all new files aren't o.k., e.g. see [1]

@Adam
Do you have the time to fix this. If not, do you give me the permission to 
change the headers in question?

BR
Andreas Lehmkühler

[1] 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java?view=markup&pathrev=1142109


Am 02.07.2011 00:28, schrieb adam@apache.org:
> Author: adam
> Date: Fri Jul  1 22:28:23 2011
> New Revision: 1142109
>
> URL: http://svn.apache.org/viewvc?rev=1142109&view=rev
> Log:
> PDFBOX-1000: Conforming parser.  Initial commit to make it easier for others to test&  contribute.
>
> Added:
>      pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java
>      pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java
>      pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java
>      pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java
>      pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java
>      pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java
>      pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/
>      pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf   (with props)
> Modified:
>      pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
>      pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
>      pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java
>
> Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
> URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java?rev=1142109&r1=1142108&r2=1142109&view=diff
> ==============================================================================
> --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java (original)
> +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java Fri Jul  1 22:28:23 2011
> @@ -43,7 +43,7 @@ public class COSDictionary extends COSBa
>        * The name-value pairs of this dictionary. The pairs are kept in the
>        * order they were added to the dictionary.
>        */
> -    private final Map<COSName, COSBase>  items =
> +    protected final Map<COSName, COSBase>  items =
>           new LinkedHashMap<COSName, COSBase>();
>
>       /**
> @@ -1410,12 +1410,18 @@ public class COSDictionary extends COSBa
>       /**
>        * {@inheritDoc}
>        */
> -    public String toString()
> -    {
> +    @Override
> +    public String toString() {
>           StringBuilder retVal = new StringBuilder("COSDictionary{");
> -        for( COSName key : items.keySet() )
> -        {
> -            retVal.append("(" + key + ":" + getDictionaryObject(key).toString() + ") ");
> +        for(COSName key : items.keySet()) {
> +            retVal.append("(");
> +            retVal.append(key);
> +            retVal.append(":");
> +            if(getDictionaryObject(key) != null)
> +                retVal.append(getDictionaryObject(key).toString());
> +            else
> +                retVal.append("<null>");
> +            retVal.append(") ");
>           }
>           retVal.append("}");
>           return retVal.toString();
>
> Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java
> URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java?rev=1142109&view=auto
> ==============================================================================
> --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java (added)
> +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionaryLateBinding.java Fri Jul  1 22:28:23 2011
> @@ -0,0 +1,61 @@
> +/*
> + *  Copyright 2011 adam.
> + *
> + *  Licensed under the Apache License, Version 2.0 (the "License");
> + *  you may not use this file except in compliance with the License.
> + *  You may obtain a copy of the License at
> + *
> + *       http://www.apache.org/licenses/LICENSE-2.0
> + *
> + *  Unless required by applicable law or agreed to in writing, software
> + *  distributed under the License is distributed on an "AS IS" BASIS,
> + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + *  See the License for the specific language governing permissions and
> + *  limitations under the License.
> + *  under the License.
> + */
> +
> +package org.apache.pdfbox.cos;
> +
> +import org.apache.commons.logging.Log;
> +import org.apache.commons.logging.LogFactory;
> +import org.apache.pdfbox.pdfparser.ConformingPDFParser;
> +
> +/**
> + *
> + * @author adam
> + */
> +public class COSDictionaryLateBinding extends COSDictionary {
> +    public static final Log log = LogFactory.getLog(COSDictionaryLateBinding.class);
> +    ConformingPDFParser parser;
> +
> +    public COSDictionaryLateBinding(ConformingPDFParser parser) {
> +        super();
> +        this.parser = parser;
> +    }
> +
> +    /**
> +     * This will get an object from this dictionary.  If the object is a reference then it will
> +     * dereference it and get it from the document.  If the object is COSNull then
> +     * null will be returned.
> +     * @param key The key to the object that we are getting.
> +     * @return The object that matches the key.
> +     */
> +    @Override
> +    public COSBase getDictionaryObject(COSName key) {
> +        COSBase retval = items.get(key);
> +        if(retval instanceof COSObject) {
> +            int objectNumber = ((COSObject)retval).getObjectNumber().intValue();
> +            int generation = ((COSObject)retval).getGenerationNumber().intValue();
> +            try {
> +                retval = parser.getObject(objectNumber, generation);
> +            } catch(Exception e) {
> +                log.warn("Unable to read information for object " + objectNumber);
> +            }
> +        }
> +        if(retval instanceof COSNull) {
> +            retval = null;
> +        }
> +        return retval;
> +    }
> +}
>
> Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java
> URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java?rev=1142109&view=auto
> ==============================================================================
> --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java (added)
> +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSUnread.java Fri Jul  1 22:28:23 2011
> @@ -0,0 +1,100 @@
> +/*
> + *  Copyright 2011 adam.
> + *
> + *  Licensed under the Apache License, Version 2.0 (the "License");
> + *  you may not use this file except in compliance with the License.
> + *  You may obtain a copy of the License at
> + *
> + *       http://www.apache.org/licenses/LICENSE-2.0
> + *
> + *  Unless required by applicable law or agreed to in writing, software
> + *  distributed under the License is distributed on an "AS IS" BASIS,
> + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + *  See the License for the specific language governing permissions and
> + *  limitations under the License.
> + *  under the License.
> + */
> +
> +package org.apache.pdfbox.cos;
> +
> +import org.apache.pdfbox.exceptions.COSVisitorException;
> +import org.apache.pdfbox.pdfparser.ConformingPDFParser;
> +
> +/**
> + *
> + * @author adam
> + */
> +public class COSUnread extends COSBase {
> +    private long objectNumber;
> +    private long generation;
> +    private ConformingPDFParser parser;
> +
> +    public COSUnread() {
> +        super();
> +    }
> +
> +    public COSUnread(long objectNumber, long generation) {
> +        this();
> +        this.objectNumber = objectNumber;
> +        this.generation = generation;
> +    }
> +
> +    public COSUnread(long objectNumber, long generation, ConformingPDFParser parser) {
> +        this(objectNumber, generation);
> +        this.parser = parser;
> +    }
> +
> +    @Override
> +    public Object accept(ICOSVisitor visitor) throws COSVisitorException {
> +        // TODO: read the object using the parser (if available) and visit that object
> +        throw new UnsupportedOperationException("COSUnread can not be written/visited.");
> +    }
> +
> +    @Override
> +    public String toString() {
> +        return "COSUnread{" + objectNumber + "," + generation + "}";
> +    }
> +
> +    /**
> +     * @return the objectNumber
> +     */
> +    public long getObjectNumber() {
> +        return objectNumber;
> +    }
> +
> +    /**
> +     * @param objectNumber the objectNumber to set
> +     */
> +    public void setObjectNumber(long objectNumber) {
> +        this.objectNumber = objectNumber;
> +    }
> +
> +    /**
> +     * @return the generation
> +     */
> +    public long getGeneration() {
> +        return generation;
> +    }
> +
> +    /**
> +     * @param generation the generation to set
> +     */
> +    public void setGeneration(long generation) {
> +        this.generation = generation;
> +    }
> +
> +    /**
> +     * @return the parser
> +     */
> +    public ConformingPDFParser getParser() {
> +        return parser;
> +    }
> +
> +    /**
> +     * @param parser the parser to set
> +     */
> +    public void setParser(ConformingPDFParser parser) {
> +        this.parser = parser;
> +    }
> +
> +}
>
> Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
> URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java?rev=1142109&r1=1142108&r2=1142109&view=diff
> ==============================================================================
> --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java (original)
> +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java Fri Jul  1 22:28:23 2011
> @@ -110,6 +110,10 @@ public abstract class BaseParser
>        */
>       protected final boolean forceParsing;
>
> +    public BaseParser() {
> +        this.forceParsing = FORCE_PARSING;
> +    }
> +
>       /**
>        * Constructor.
>        *
> @@ -876,7 +880,7 @@ public abstract class BaseParser
>               throw new IOException("expected='/' actual='" + (char)c + "'-" + c + " " + pdfSource );
>           }
>           // costruisce il nome
> -        StringBuffer buffer = new StringBuffer();
> +        StringBuilder buffer = new StringBuilder();
>           c = pdfSource.read();
>           while( c != -1 )
>           {
> @@ -1063,7 +1067,7 @@ public abstract class BaseParser
>           {
>               if( Character.isDigit(c) || c == '-' || c == '+' || c == '.')
>               {
> -                StringBuffer buf = new StringBuffer();
> +                StringBuilder buf = new StringBuilder();
>                   int ic = pdfSource.read();
>                   c = (char)ic;
>                   while( Character.isDigit( c )||
> @@ -1118,7 +1122,7 @@ public abstract class BaseParser
>       protected String readString() throws IOException
>       {
>           skipSpaces();
> -        StringBuffer buffer = new StringBuffer();
> +        StringBuilder buffer = new StringBuilder();
>           int c = pdfSource.read();
>           while( !isEndOfName((char)c)&&  !isClosing(c)&&  c != -1 )
>           {
> @@ -1148,7 +1152,7 @@ public abstract class BaseParser
>           {
>               c = pdfSource.read();
>           }
> -        StringBuffer buffer = new StringBuffer( theString.length() );
> +        StringBuilder buffer = new StringBuilder( theString.length() );
>           int charsRead = 0;
>           while( !isEOL(c)&&  c != -1&&  charsRead<  theString.length() )
>           {
> @@ -1194,7 +1198,7 @@ public abstract class BaseParser
>
>           //average string size is around 2 and the normal string buffer size is
>           //about 16 so lets save some space.
> -        StringBuffer buffer = new StringBuffer(length);
> +        StringBuilder buffer = new StringBuilder(length);
>           while( !isWhitespace(c)&&  !isClosing(c)&&  c != -1&&  buffer.length()<  length&&
>                   c != '['&&
>                   c != '<'&&
> @@ -1250,7 +1254,7 @@ public abstract class BaseParser
>               throw new IOException( "Error: End-of-File, expected line");
>           }
>
> -        StringBuffer buffer = new StringBuffer( 11 );
> +        StringBuilder buffer = new StringBuilder( 11 );
>
>           int c;
>           while ((c = pdfSource.read()) != -1)
> @@ -1300,10 +1304,9 @@ public abstract class BaseParser
>       }
>
>       /**
> -     * This will tell if the next byte is whitespace or not.
> -     *
> +     * This will tell if the next byte is whitespace or not.  These values are
> +     * specified in table 1 (page 12) of ISO 32000-1:2008.
>        * @param c The character to check against whitespace
> -     *
>        * @return true if the next byte in the stream is a whitespace character.
>        */
>       protected boolean isWhitespace( int c )
>
> Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java
> URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java?rev=1142109&view=auto
> ==============================================================================
> --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java (added)
> +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ConformingPDFParser.java Fri Jul  1 22:28:23 2011
> @@ -0,0 +1,696 @@
> +/*
> + *  Copyright 2010 adam.
> + *
> + *  Licensed under the Apache License, Version 2.0 (the "License");
> + *  you may not use this file except in compliance with the License.
> + *  You may obtain a copy of the License at
> + *
> + *       http://www.apache.org/licenses/LICENSE-2.0
> + *
> + *  Unless required by applicable law or agreed to in writing, software
> + *  distributed under the License is distributed on an "AS IS" BASIS,
> + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + *  See the License for the specific language governing permissions and
> + *  limitations under the License.
> + *  under the License.
> + */
> +
> +package org.apache.pdfbox.pdfparser;
> +
> +import java.io.File;
> +import java.io.IOException;
> +import java.util.ArrayList;
> +import java.util.List;
> +import java.util.Set;
> +import org.apache.pdfbox.cos.COSArray;
> +import org.apache.pdfbox.cos.COSBase;
> +import org.apache.pdfbox.cos.COSDictionary;
> +import org.apache.pdfbox.cos.COSDocument;
> +import org.apache.pdfbox.cos.COSFloat;
> +import org.apache.pdfbox.cos.COSInteger;
> +import org.apache.pdfbox.cos.COSName;
> +import org.apache.pdfbox.cos.COSNumber;
> +import org.apache.pdfbox.cos.COSObject;
> +import org.apache.pdfbox.cos.COSString;
> +import org.apache.pdfbox.cos.COSUnread;
> +import org.apache.pdfbox.io.RandomAccess;
> +import org.apache.pdfbox.io.RandomAccessFile;
> +import org.apache.pdfbox.pdmodel.ConformingPDDocument;
> +import org.apache.pdfbox.pdmodel.PDDocument;
> +import org.apache.pdfbox.pdmodel.common.XrefEntry;
> +import org.apache.pdfbox.persistence.util.COSObjectKey;
> +
> +/**
> + *
> + * @author<a href="adam@apache.org">Adam Nichols</a>
> + */
> +public class ConformingPDFParser extends BaseParser {
> +    protected RandomAccess inputFile;
> +    List<XrefEntry>  xrefEntries;
> +    private long currentOffset;
> +    private ConformingPDDocument doc = null;
> +    private boolean throwNonConformingException = true;
> +    private boolean recursivlyRead = true;
> +
> +    /**
> +     * Constructor.
> +     *
> +     * @param input The input stream that contains the PDF document.
> +     *
> +     * @throws IOException If there is an error initializing the stream.
> +     */
> +    public ConformingPDFParser(File inputFile) throws IOException {
> +        this.inputFile = new RandomAccessFile(inputFile, "r");
> +    }
> +
> +    /**
> +     * This will parse the stream and populate the COSDocument object.  This will close
> +     * the stream when it is done parsing.
> +     *
> +     * @throws IOException If there is an error reading from the stream or corrupt data
> +     * is found.
> +     */
> +    public void parse() throws IOException {
> +        document = new COSDocument();
> +        doc = new ConformingPDDocument(document);
> +        currentOffset = inputFile.length()-1;
> +        long xRefTableLocation = parseTrailerInformation();
> +        currentOffset = xRefTableLocation;
> +        parseXrefTable();
> +        // now that we read the xref table and put null references in the doc,
> +        // we can deference those objects now.
> +        boolean oldValue = recursivlyRead;
> +        recursivlyRead = false;
> +        List<COSObjectKey>  keys = doc.getObjectKeysFromPool();
> +        for(COSObjectKey key : keys) {
> +            // getObject will put it into the document's object pool for us
> +            getObject(key.getNumber(), key.getGeneration());
> +        }
> +        recursivlyRead = oldValue;
> +    }
> +
> +    /**
> +     * This will get the document that was parsed.  parse() must be called before this is called.
> +     * When you are done with this document you must call close() on it to release
> +     * resources.
> +     *
> +     * @return The document that was parsed.
> +     *
> +     * @throws IOException If there is an error getting the document.
> +     */
> +    public COSDocument getDocument() throws IOException {
> +        if( document == null ) {
> +            throw new IOException( "You must call parse() before calling getDocument()" );
> +        }
> +        return document;
> +    }
> +
> +    /**
> +     * This will get the PD document that was parsed.  When you are done with
> +     * this document you must call close() on it to release resources.
> +     *
> +     * @return The document at the PD layer.
> +     *
> +     * @throws IOException If there is an error getting the document.
> +     */
> +    public PDDocument getPDDocument() throws IOException {
> +        return doc;
> +    }
> +
> +    private boolean parseXrefTable() throws IOException {
> +        String currentLine = readLine();
> +        if(throwNonConformingException) {
> +            if(!"xref".equals(currentLine))
> +                throw new AssertionError("xref table not found.\nExpected: xref\nFound: "+currentLine);
> +        }
> +
> +        int objectNumber = readInt();
> +        int entries = readInt();
> +        xrefEntries = new ArrayList<XrefEntry>(entries);
> +        for(int i=0; i<entries; i++)
> +            xrefEntries.add(new XrefEntry(objectNumber++, readInt(), readInt(), readLine()));
> +
> +        return true;
> +    }
> +
> +    protected long parseTrailerInformation() throws IOException, NumberFormatException {
> +        long xrefLocation = -1;
> +        consumeWhitespaceBackwards();
> +        String currentLine = readLineBackwards();
> +        if(throwNonConformingException) {
> +            if(!"%%EOF".equals(currentLine))
> +                throw new AssertionError("Invalid EOF marker.\nExpected: %%EOF\nFound: "+currentLine);
> +        }
> +
> +        xrefLocation = readLongBackwards();
> +        currentLine = readLineBackwards();
> +        if(throwNonConformingException) {
> +            if(!"startxref".equals(currentLine))
> +                throw new AssertionError("Invalid trailer.\nExpected: startxref\nFound: "+currentLine);
> +        }
> +
> +        document.setTrailer(readDictionaryBackwards());
> +        consumeWhitespaceBackwards();
> +        currentLine = readLineBackwards();
> +        if(throwNonConformingException) {
> +            if(!"trailer".equals(currentLine))
> +                throw new AssertionError("Invalid trailer.\nExpected: trailer\nFound: "+currentLine);
> +        }
> +
> +        return xrefLocation;
> +    }
> +
> +    protected byte readByteBackwards() throws IOException {
> +        inputFile.seek(currentOffset);
> +        byte singleByte = (byte)inputFile.read();
> +        currentOffset--;
> +        return singleByte;
> +    }
> +
> +    protected byte readByte() throws IOException {
> +        inputFile.seek(currentOffset);
> +        byte singleByte = (byte)inputFile.read();
> +        currentOffset++;
> +        return singleByte;
> +    }
> +
> +    protected String readBackwardUntilWhitespace() throws IOException {
> +        StringBuilder sb = new StringBuilder();
> +        byte singleByte = readByteBackwards();
> +        while(!isWhitespace(singleByte)) {
> +            sb.insert(0, (char)singleByte);
> +            singleByte = readByteBackwards();
> +        }
> +        return sb.toString();
> +    }
> +
> +    /**
> +     * This will read all bytes (backwards) until a non-whitespace character is
> +     * found.  To save you an extra read, the non-whitespace character is
> +     * returned.  If the current character is not whitespace, this method will
> +     * just return the current char.
> +     * @return the first non-whitespace character found
> +     * @throws IOException if there is an error reading from the file
> +     */
> +    protected byte consumeWhitespaceBackwards() throws IOException {
> +        inputFile.seek(currentOffset);
> +        byte singleByte = (byte)inputFile.read();
> +        if(!isWhitespace(singleByte))
> +            return singleByte;
> +
> +        // we have some whitespace, let's consume it
> +        while(isWhitespace(singleByte)) {
> +            singleByte = readByteBackwards();
> +        }
> +        // readByteBackwards will decrement the currentOffset to point the byte
> +        // before the one just read, so we increment it back to the current byte
> +        currentOffset++;
> +        return singleByte;
> +    }
> +
> +    /**
> +     * This will read all bytes until a non-whitespace character is
> +     * found.  To save you an extra read, the non-whitespace character is
> +     * returned.  If the current character is not whitespace, this method will
> +     * just return the current char.
> +     * @return the first non-whitespace character found
> +     * @throws IOException if there is an error reading from the file
> +     */
> +    protected byte consumeWhitespace() throws IOException {
> +        inputFile.seek(currentOffset);
> +        byte singleByte = (byte)inputFile.read();
> +        if(!isWhitespace(singleByte))
> +            return singleByte;
> +
> +        // we have some whitespace, let's consume it
> +        while(isWhitespace(singleByte)) {
> +            singleByte = readByte();
> +        }
> +        // readByte() will increment the currentOffset to point the byte
> +        // after the one just read, so we decrement it back to the current byte
> +        currentOffset--;
> +        return singleByte;
> +    }
> +
> +    /**
> +     * This will consume any whitespace, read in bytes until whitespace is found
> +     * again and then parse the characters which have been read as a long.  The
> +     * current offset will then point at the first whitespace character which
> +     * preceeds the number.
> +     * @return the parsed number
> +     * @throws IOException if there is an error reading from the file
> +     * @throws NumberFormatException if the bytes read can not be converted to a number
> +     */
> +    protected long readLongBackwards() throws IOException, NumberFormatException {
> +        StringBuilder sb = new StringBuilder();
> +        consumeWhitespaceBackwards();
> +        byte singleByte = readByteBackwards();
> +        while(!isWhitespace(singleByte)) {
> +            sb.insert(0, (char)singleByte);
> +            singleByte = readByteBackwards();
> +        }
> +        if(sb.length() == 0)
> +            throw new AssertionError("Number not found.  Expected number at offset: " + currentOffset);
> +        return Long.parseLong(sb.toString());
> +    }
> +
> +    @Override
> +    protected int readInt() throws IOException {
> +        StringBuilder sb = new StringBuilder();
> +        consumeWhitespace();
> +        byte singleByte = readByte();
> +        while(!isWhitespace(singleByte)) {
> +            sb.append((char)singleByte);
> +            singleByte = readByte();
> +        }
> +        if(sb.length() == 0)
> +            throw new AssertionError("Number not found.  Expected number at offset: " + currentOffset);
> +        return Integer.parseInt(sb.toString());
> +    }
> +
> +    /**
> +     * This will read in a number and return the COS version of the number (be
> +     * it a COSInteger or a COSFloat).
> +     * @return the COSNumber which was read/parsed
> +     * @throws IOException
> +     */
> +    protected COSNumber readNumber() throws IOException {
> +        StringBuilder sb = new StringBuilder();
> +        consumeWhitespace();
> +        byte singleByte = readByte();
> +        while(!isWhitespace(singleByte)) {
> +            sb.append((char)singleByte);
> +            singleByte = readByte();
> +        }
> +        if(sb.length() == 0)
> +            throw new AssertionError("Number not found.  Expected number at offset: " + currentOffset);
> +        return parseNumber(sb.toString());
> +    }
> +
> +    protected COSNumber parseNumber(String number) throws IOException {
> +        if(number.matches("^[0-9]+$"))
> +            return COSInteger.get(number);
> +        return new COSFloat(Float.parseFloat(number));
> +    }
> +
> +    protected COSBase processCosObject(String string) throws IOException {
> +        if(string != null&&  string.endsWith(">")) {
> +            // string of hex codes
> +            return COSString.createFromHexString(string.replaceAll("^<", "").replaceAll(">$", ""));
> +        }
> +        return null;
> +    }
> +
> +    protected COSBase readObjectBackwards() throws IOException {
> +        COSBase obj = null;
> +        consumeWhitespaceBackwards();
> +        String lastSection = readBackwardUntilWhitespace();
> +        if("R".equals(lastSection)) {
> +            // indirect reference
> +            long gen = readLongBackwards();
> +            long number = readLongBackwards();
> +            // We just put a placeholder in the pool for now, we'll read the data later
> +            doc.putObjectInPool(new COSUnread(), number, gen);
> +            obj = new COSUnread(number, gen, this);
> +        } else if(">>".equals(lastSection)) {
> +            // dictionary
> +            throw new RuntimeException("Not yet implemented");
> +        } else if(lastSection != null&&  lastSection.endsWith("]")) {
> +            // array
> +            COSArray array = new COSArray();
> +            lastSection = lastSection.replaceAll("]$", "");
> +            while(!lastSection.startsWith("[")) {
> +                if(lastSection.matches("^\\s*<.*>\\s*$")) // it's a hex string
> +                    array.add(COSString.createFromHexString(lastSection.replaceAll("^\\s*<", "").replaceAll(">\\s*$", "")));
> +                lastSection = readBackwardUntilWhitespace();
> +            }
> +            lastSection = lastSection.replaceAll("^\\[", "");
> +            if(lastSection.matches("^\\s*<.*>\\s*$")) // it's a hex string
> +                array.add(COSString.createFromHexString(lastSection.replaceAll("^\\s*<", "").replaceAll(">\\s*$", "")));
> +            obj = array;
> +        } else if(lastSection != null&&  lastSection.endsWith(">")) {
> +            // string of hex codes
> +            obj = processCosObject(lastSection);
> +        } else {
> +            // try a number, otherwise fall back on a string
> +            try {
> +                Long.parseLong(lastSection);
> +                obj = COSNumber.get(lastSection);
> +            } catch(NumberFormatException e) {
> +                throw new RuntimeException("Not yet implemented");
> +            }
> +        }
> +
> +        return obj;
> +    }
> +
> +    protected COSName readNameBackwards() throws IOException {
> +        String name = readBackwardUntilWhitespace();
> +        name = name.replaceAll("^/", "");
> +        return COSName.getPDFName(name);
> +    }
> +
> +    public COSBase getObject(long objectNumber, long generation) throws IOException {
> +        // we could optionally, check to see if parse() have been called&
> +        // throw an exception here, but I don't think that's really necessary
> +        XrefEntry entry = xrefEntries.get((int)objectNumber);
> +        currentOffset = entry.getByteOffset();
> +        return readObject(objectNumber, generation);
> +    }
> +
> +    /**
> +     * This will read an object from the inputFile at whatever our currentOffset
> +     * is.  If the object and generation are not the expected values and this
> +     * object is set to throw an exception for non-conforming documents, then an
> +     * exception will be thrown.
> +     * @param objectNumber the object number you expect to read
> +     * @param generation the generation you expect this object to be
> +     * @return
> +     */
> +    public COSBase readObject(long objectNumber, long generation) throws IOException {
> +        // when recursivly reading, we always pull the object from the filesystem
> +        if(document != null&&  recursivlyRead) {
> +            // check to see if it is in the document cache before hitting the filesystem
> +            COSBase obj = doc.getObjectFromPool(objectNumber, generation);
> +            if(obj != null)
> +                return obj;
> +        }
> +
> +        int actualObjectNumber = readInt();
> +        if(objectNumber != actualObjectNumber)
> +            if(throwNonConformingException)
> +                throw new AssertionError("Object numer expected was " +
> +                        objectNumber + " but actual was " + actualObjectNumber);
> +        consumeWhitespace();
> +
> +        int actualGeneration = readInt();
> +        if(generation != actualGeneration)
> +            if(throwNonConformingException)
> +                throw new AssertionError("Generation expected was " +
> +                        generation + " but actual was " + actualGeneration);
> +        consumeWhitespace();
> +
> +        String obj = readWord();
> +        if(!"obj".equals(obj))
> +            if(throwNonConformingException)
> +                throw new AssertionError("Expected keyword 'obj' but found " + obj);
> +
> +        // put placeholder object in doc to prevent infinite recursion
> +        // e.g. read Root ->  dereference object ->  read object which has /Parent ->  GOTO read Root
> +        doc.putObjectInPool(new COSObject(null), objectNumber, generation);
> +        COSBase object = readObject();
> +        doc.putObjectInPool(object, objectNumber, generation);
> +        return object;
> +    }
> +
> +    /**
> +     * This actually reads the object data.
> +     * @return the object which is read
> +     * @throws IOException
> +     */
> +    protected COSBase readObject() throws IOException {
> +        consumeWhitespace();
> +        String string = readWord();
> +        if(string.startsWith("<<")) {
> +            // this is a dictionary
> +            COSDictionary dictionary = new COSDictionary();
> +            boolean atEndOfDictionary = false;
> +            // remove the marker for the beginning of the dictionary
> +            string = string.replaceAll("^<<", "");
> +
> +            if("".equals(string) || string.matches("^\\w$"))
> +                string = readWord().trim();
> +            while(!atEndOfDictionary) {
> +                COSName name = COSName.getPDFName(string);
> +                COSBase object = readObject();
> +                dictionary.setItem(name, object);
> +
> +                byte singleByte = consumeWhitespace();
> +                if(singleByte == '>') {
> +                    readByte(); // get rid of the second '>'
> +                    atEndOfDictionary = true;
> +                }
> +                if(!atEndOfDictionary)
> +                    string = readWord().trim();
> +            }
> +            return dictionary;
> +        } else if(string.startsWith("/")) {
> +            // it's a dictionary label. i.e. /Type or /Pages or something similar
> +            COSBase name = COSName.getPDFName(string);
> +            return name;
> +        } else if(string.startsWith("-")) {
> +            // it's a negitive number
> +            return parseNumber(string);
> +        } else if(string.charAt(0)>= '0'&&  string.charAt(0)<= '9' ) {
> +            // it's a COSInt or COSFloat, or a weak reference (i.e. "3 0 R")
> +            // we'll have to peek ahead a little to see if it's a reference or not
> +            long tempOffset = this.currentOffset;
> +            consumeWhitespace();
> +            String tempString = readWord();
> +            if(tempString.matches("^[0-9]+$")) {
> +                // it is an int, might be a weak reference...
> +                tempString = readWord();
> +                if(!"R".equals(tempString)) {
> +                    // it's just a number, not a weak reference
> +                    this.currentOffset = tempOffset;
> +                    return parseNumber(string);
> +                }
> +            } else {
> +                // it's just a number, not a weak reference
> +                this.currentOffset = tempOffset;
> +                return parseNumber(string);
> +            }
> +
> +            // it wasn't a number, so we need to parse the weak-reference
> +            this.currentOffset = tempOffset;
> +            int number = Integer.parseInt(string);
> +            int gen = readInt();
> +            String r = readWord();
> +
> +            if(!"R".equals(r))
> +                if(throwNonConformingException)
> +                    throw new AssertionError("Expected keyword 'R' but found " + r);
> +
> +            if(recursivlyRead) {
> +                // seek to the object, read it, seek back to current location
> +                long tempLocation = this.currentOffset;
> +                this.currentOffset = this.xrefEntries.get(number).getByteOffset();
> +                COSBase returnValue = readObject(number, gen);
> +                this.currentOffset = tempLocation;
> +                return returnValue;
> +            } else {
> +                // Put a COSUnknown there as a placeholder
> +                COSObject obj = new COSObject(new COSUnread());
> +                obj.setObjectNumber(COSInteger.get(number));
> +                obj.setGenerationNumber(COSInteger.get(gen));
> +                return obj;
> +            }
> +        } else if(string.startsWith("]")) {
> +            // end of an array, just return null
> +            if("]".equals(string))
> +                return null;
> +            int oldLength = string.length();
> +            this.currentOffset -= oldLength;
> +            return null;
> +        } else if(string.startsWith("[")) {
> +            // array of values
> +            // we'll just pay attention to the first part (this is in case there
> +            // is no whitespace between the "[" and the first element)
> +            int oldLength = string.length();
> +            string = "[";
> +            this.currentOffset -= (oldLength - string.length() + 1);
> +
> +            COSArray array = new COSArray();
> +            COSBase object = readObject();
> +            while(object != null) {
> +                array.add(object);
> +                object = readObject();
> +            }
> +            return array;
> +        } else if(string.startsWith("(")) {
> +            // this is a string (not hex encoded), strip off the '(' and read until ')'
> +            StringBuilder sb = new StringBuilder(string.substring(1));
> +            byte singleByte = readByte();
> +            while(singleByte != ')') {
> +                sb.append((char)singleByte);
> +                singleByte = readByte();
> +            }
> +            return new COSString(sb.toString());
> +        } else {
> +            throw new RuntimeException("Not yet implemented: " + string
> +                    + " loation=" + this.currentOffset);
> +        }
> +    }
> +
> +    /**
> +     * This will read the next string from the stream.
> +     * @return The string that was read from the stream.
> +     * @throws IOException If there is an error reading from the stream.
> +     */
> +    @Override
> +    protected String readString() throws IOException {
> +        consumeWhitespace();
> +        StringBuilder buffer = new StringBuilder();
> +        int c = pdfSource.read();
> +        while(!isEndOfName((char)c)&&  !isClosing(c)&&  c != -1) {
> +            buffer.append( (char)c );
> +            c = pdfSource.read();
> +        }
> +        if (c != -1) {
> +            pdfSource.unread(c);
> +        }
> +        return buffer.toString();
> +    }
> +
> +    protected COSDictionary readDictionaryBackwards() throws IOException {
> +        COSDictionary dict = new COSDictionary();
> +
> +        // consume the last two '>' chars which signify the end of the dictionary
> +        consumeWhitespaceBackwards();
> +        byte singleByte = readByteBackwards();
> +        if(throwNonConformingException) {
> +            if(singleByte != '>')
> +                throw new AssertionError("");
> +        }
> +        singleByte = readByteBackwards();
> +        if(throwNonConformingException) {
> +            if(singleByte != '>')
> +                throw new AssertionError("");
> +        }
> +
> +        // check to see if we're at the end of the dictionary
> +        boolean atEndOfDictionary = false;
> +        singleByte = consumeWhitespaceBackwards();
> +        if(singleByte == '<') {
> +            inputFile.seek(currentOffset-1);
> +            atEndOfDictionary =  ((byte)inputFile.read()) == '<';
> +        }
> +
> +        COSDictionary backwardsDictionary = new COSDictionary();
> +        // while we're not at the end of the dictionary, read in entries
> +        while(!atEndOfDictionary) {
> +            COSBase object = readObjectBackwards();
> +            COSName name = readNameBackwards();
> +            backwardsDictionary.setItem(name, object);
> +
> +            singleByte = consumeWhitespaceBackwards();
> +            if(singleByte == '<') {
> +                inputFile.seek(currentOffset-1);
> +                atEndOfDictionary =  ((byte)inputFile.read()) == '<';
> +            }
> +        }
> +
> +        // the dictionaries preserve the order keys were added, as such we shall
> +        // add them in the proper order, not the reverse order
> +        Set<COSName>  backwardsKeys = backwardsDictionary.keySet();
> +        for(int i = backwardsKeys.size()-1; i>=0; i--)
> +            dict.setItem((COSName)backwardsKeys.toArray()[i], backwardsDictionary.getItem((COSName)backwardsKeys.toArray()[i]));
> +
> +        // consume the last two '<' chars
> +        readByteBackwards();
> +        readByteBackwards();
> +
> +        return dict;
> +    }
> +
> +    /**
> +     * This will read a line starting with the byte at offset and going
> +     * backwards until it finds a newline.  This should only be used if we are
> +     * certain that the data will only be text, and not binary data.
> +     *
> +     * @param offset the location of the file where we should start reading
> +     * @return the string which was read
> +     * @throws IOException if there was an error reading data from the file
> +     */
> +    protected String readLineBackwards() throws IOException {
> +        StringBuilder sb = new StringBuilder();
> +        boolean endOfObject = false;
> +
> +        do {
> +            // first we read the %%EOF marker
> +            byte singleByte = readByteBackwards();
> +            if(singleByte == '\n') {
> +                // if ther's a preceeding \r, we'll eat that as well
> +                inputFile.seek(currentOffset);
> +                if((byte)inputFile.read() == '\r')
> +                    currentOffset--;
> +                endOfObject = true;
> +            } else if(singleByte == '\r') {
> +                endOfObject = true;
> +            } else {
> +                sb.insert(0, (char)singleByte);
> +            }
> +        } while(!endOfObject);
> +
> +        return sb.toString();
> +    }
> +
> +    /**
> +     * This will read a line starting with the byte at offset and going
> +     * forward until it finds a newline.  This should only be used if we are
> +     * certain that the data will only be text, and not binary data.
> +     * @param offset the location of the file where we should start reading
> +     * @return the string which was read
> +     * @throws IOException if there was an error reading data from the file
> +     */
> +    @Override
> +    protected String readLine() throws IOException {
> +        StringBuilder sb = new StringBuilder();
> +        boolean endOfLine = false;
> +
> +        do {
> +            // first we read the %%EOF marker
> +            byte singleByte = readByte();
> +            if(singleByte == '\n') {
> +                // if ther's a preceeding \r, we'll eat that as well
> +                inputFile.seek(currentOffset);
> +                if((byte)inputFile.read() == '\r')
> +                    currentOffset++;
> +                endOfLine = true;
> +            } else if(singleByte == '\r') {
> +                endOfLine = true;
> +            } else {
> +                sb.append((char)singleByte);
> +            }
> +        } while(!endOfLine);
> +
> +        return sb.toString();
> +    }
> +
> +    protected String readWord() throws IOException {
> +        StringBuilder sb = new StringBuilder();
> +        boolean stop = true;
> +        do {
> +            byte singleByte = readByte();
> +            stop = this.isWhitespace(singleByte);
> +
> +            // there are some additional characters which indicate the next element/word has begun
> +            // ignore the first char we read, b/c the first char is the beginnging of this object, not the next one
> +            if(!stop&&  sb.length()>  0) {
> +                stop = singleByte == '/' || singleByte == '['
> +                        || singleByte == ']'
> +                        || (singleByte == '>'&&  !">".equals(sb.toString()));
> +                if(stop) // we're stopping on a non-whitespace char, decrement the
> +                    this.currentOffset--; // counter so we don't miss this character
> +            }
> +            if(!stop)
> +                sb.append((char)singleByte);
> +        } while(!stop);
> +
> +        return sb.toString();
> +    }
> +
> +    /**
> +     * @return the recursivlyRead
> +     */
> +    public boolean isRecursivlyRead() {
> +        return recursivlyRead;
> +    }
> +
> +    /**
> +     * @param recursivlyRead the recursivlyRead to set
> +     */
> +    public void setRecursivlyRead(boolean recursivlyRead) {
> +        this.recursivlyRead = recursivlyRead;
> +    }
> +}
>
> Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java
> URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java?rev=1142109&view=auto
> ==============================================================================
> --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java (added)
> +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/ConformingPDDocument.java Fri Jul  1 22:28:23 2011
> @@ -0,0 +1,115 @@
> +/*
> + *  Copyright 2011 adam.
> + *
> + *  Licensed under the Apache License, Version 2.0 (the "License");
> + *  you may not use this file except in compliance with the License.
> + *  You may obtain a copy of the License at
> + *
> + *       http://www.apache.org/licenses/LICENSE-2.0
> + *
> + *  Unless required by applicable law or agreed to in writing, software
> + *  distributed under the License is distributed on an "AS IS" BASIS,
> + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + *  See the License for the specific language governing permissions and
> + *  limitations under the License.
> + *  under the License.
> + */
> +
> +package org.apache.pdfbox.pdmodel;
> +
> +import java.io.File;
> +import java.io.IOException;
> +import java.util.ArrayList;
> +import java.util.HashMap;
> +import java.util.List;
> +import java.util.Map;
> +import org.apache.pdfbox.cos.COSBase;
> +import org.apache.pdfbox.cos.COSDocument;
> +import org.apache.pdfbox.pdfparser.ConformingPDFParser;
> +import org.apache.pdfbox.persistence.util.COSObjectKey;
> +
> +/**
> + *
> + * @author adam
> + */
> +public class ConformingPDDocument extends PDDocument {
> +    /**
> +     * Maps ObjectKeys to a COSObject. Note that references to these objects
> +     * are also stored in COSDictionary objects that map a name to a specific object.
> +     */
> +    private final Map<COSObjectKey, COSBase>  objectPool =
> +        new HashMap<COSObjectKey, COSBase>();
> +    private ConformingPDFParser parser = null;
> +
> +    public ConformingPDDocument() throws IOException {
> +        super();
> +    }
> +
> +    public ConformingPDDocument(COSDocument doc) throws IOException {
> +        super(doc);
> +    }
> +
> +    /**
> +     * This will load a document from an input stream.
> +     * @param input The File which contains the document.
> +     * @return The document that was loaded.
> +     * @throws IOException If there is an error reading from the stream.
> +     */
> +    public static PDDocument load(File input) throws IOException {
> +        ConformingPDFParser parser = new ConformingPDFParser(input);
> +        parser.parse();
> +        return parser.getPDDocument();
> +    }
> +
> +    /**
> +     * This will get an object from the pool.
> +     * @param key The object key.
> +     * @return The object in the pool or a new one if it has not been parsed yet.
> +     * @throws IOException If there is an error getting the proxy object.
> +     */
> +    public COSBase getObjectFromPool(COSObjectKey key) throws IOException {
> +        return objectPool.get(key);
> +    }
> +
> +    /**
> +     * This will get an object from the pool.
> +     * @param key The object key.
> +     * @return The object in the pool or a new one if it has not been parsed yet.
> +     * @throws IOException If there is an error getting the proxy object.
> +     */
> +    public List<COSObjectKey>  getObjectKeysFromPool() throws IOException {
> +        List<COSObjectKey>  keys = new ArrayList<COSObjectKey>();
> +        for(COSObjectKey key : objectPool.keySet())
> +            keys.add(key);
> +        return keys;
> +    }
> +
> +    /**
> +     * This will get an object from the pool.
> +     * @param number the object number
> +     * @param generation the generation of this object you wish to load
> +     * @return The object in the pool
> +     * @throws IOException If there is an error getting the proxy object.
> +     */
> +    public COSBase getObjectFromPool(long number, long generation) throws IOException {
> +        return objectPool.get(new COSObjectKey(number, generation));
> +    }
> +
> +    public void putObjectInPool(COSBase object, long number, long generation) {
> +        objectPool.put(new COSObjectKey(number, generation), object);
> +    }
> +
> +    /**
> +     * @return the parser
> +     */
> +    public ConformingPDFParser getParser() {
> +        return parser;
> +    }
> +
> +    /**
> +     * @param parser the parser to set
> +     */
> +    public void setParser(ConformingPDFParser parser) {
> +        this.parser = parser;
> +    }
> +}
>
> Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java
> URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java?rev=1142109&view=auto
> ==============================================================================
> --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java (added)
> +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/common/XrefEntry.java Fri Jul  1 22:28:23 2011
> @@ -0,0 +1,43 @@
> +/*
> + *  Copyright 2011 adam.
> + *
> + *  Licensed under the Apache License, Version 2.0 (the "License");
> + *  you may not use this file except in compliance with the License.
> + *  You may obtain a copy of the License at
> + *
> + *       http://www.apache.org/licenses/LICENSE-2.0
> + *
> + *  Unless required by applicable law or agreed to in writing, software
> + *  distributed under the License is distributed on an "AS IS" BASIS,
> + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + *  See the License for the specific language governing permissions and
> + *  limitations under the License.
> + *  under the License.
> + */
> +
> +package org.apache.pdfbox.pdmodel.common;
> +
> +/**
> + *
> + * @author adam
> + */
> +public class XrefEntry {
> +    private int objectNumber = 0;
> +    private int byteOffset = 0;
> +    private int generation = 0;
> +    private boolean inUse = true;
> +
> +    public XrefEntry() {
> +    }
> +
> +    public XrefEntry(int objectNumber, int byteOffset, int generation, String inUse) {
> +        this.objectNumber = objectNumber;
> +        this.byteOffset = byteOffset;
> +        this.generation = generation;
> +        this.inUse = "n".equals(inUse);
> +    }
> +
> +    public int getByteOffset() {
> +        return byteOffset;
> +    }
> +}
>
> Added: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java
> URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java?rev=1142109&view=auto
> ==============================================================================
> --- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java (added)
> +++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/ConformingPDFParserTest.java Fri Jul  1 22:28:23 2011
> @@ -0,0 +1,73 @@
> +/*
> + *  Copyright 2010 adam.
> + *
> + *  Licensed under the Apache License, Version 2.0 (the "License");
> + *  you may not use this file except in compliance with the License.
> + *  You may obtain a copy of the License at
> + *
> + *       http://www.apache.org/licenses/LICENSE-2.0
> + *
> + *  Unless required by applicable law or agreed to in writing, software
> + *  distributed under the License is distributed on an "AS IS" BASIS,
> + *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + *  See the License for the specific language governing permissions and
> + *  limitations under the License.
> + *  under the License.
> + */
> +
> +package org.apache.pdfbox.pdfparser;
> +
> +import java.io.File;
> +import java.net.URL;
> +import org.apache.pdfbox.cos.COSDictionary;
> +import org.junit.After;
> +import org.junit.AfterClass;
> +import org.junit.Before;
> +import org.junit.BeforeClass;
> +import org.junit.Test;
> +import static org.junit.Assert.*;
> +
> +/**
> + *
> + * @author adam
> + */
> +public class ConformingPDFParserTest {
> +
> +    public ConformingPDFParserTest() {
> +    }
> +
> +    @BeforeClass
> +    public static void setUpClass() throws Exception {
> +    }
> +
> +    @AfterClass
> +    public static void tearDownClass() throws Exception {
> +    }
> +
> +    @Before
> +    public void setUp() {
> +    }
> +
> +    @After
> +    public void tearDown() {
> +    }
> +
> +    /**
> +     * Test of parse method, of class ConformingPDFParser.
> +     */
> +    @Test
> +    public void testParse() throws Exception {
> +        URL inputUrl = ConformingPDFParser.class.getResource("gdb-refcard.pdf");
> +        File inputFile = new File(inputUrl.toURI());
> +        ConformingPDFParser instance = new ConformingPDFParser(inputFile);
> +        instance.parse();
> +
> +        COSDictionary trailer = instance.getDocument().getTrailer();
> +        assertNotNull(trailer);
> +        System.out.println("Trailer: " + instance.getDocument().getTrailer().toString());
> +        assertEquals(3, trailer.size());
> +        assertNotNull(trailer.getDictionaryObject("Root"));
> +        assertNotNull(trailer.getDictionaryObject("Info"));
> +        assertNotNull(trailer.getDictionaryObject("Size"));
> +    }
> +}
> \ No newline at end of file
>
> Modified: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java
> URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java?rev=1142109&r1=1142108&r2=1142109&view=diff
> ==============================================================================
> --- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java (original)
> +++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocumentCatalog.java Fri Jul  1 22:28:23 2011
> @@ -16,7 +16,6 @@
>    */
>   package org.apache.pdfbox.pdmodel;
>
> -import java.io.File;
>   import junit.framework.TestCase;
>
>   public class TestPDDocumentCatalog extends TestCase {
> @@ -62,13 +61,29 @@ public class TestPDDocumentCatalog exten
>               doc = PDDocument.load(TestPDDocumentCatalog.class.getResourceAsStream("page_label.pdf"));
>               PDDocumentCatalog cat = doc.getDocumentCatalog();
>               // getLabelsByPageIndices() should not throw an exception
> -            String[] labels = cat.getPageLabels().getLabelsByPageIndices();
> +            cat.getPageLabels().getLabelsByPageIndices();
>           } catch(Exception e) {
> -            e.printStackTrace();
>               fail("Threw exception!");
>           } finally {
>               if(doc != null)
>                   doc.close();
>           }
>       }
> +
> +    /**
> +     * Test case for
> +     *<a href="https://issues.apache.org/jira/browse/PDFBOX-911"
> +     *>PDFBOX-911</a>  - Method PDDocument.getNumberOfPages() returns wrong
> +     * number of pages
> +     */
> +    public void testGetNumberOfPages() throws Exception {
> +        PDDocument doc = null;
> +        try {
> +            doc = PDDocument.load(TestPDDocumentCatalog.class.getResource("test.unc.pdf"));
> +            assertEquals(4, doc.getNumberOfPages());
> +        } finally {
> +            if(doc != null)
> +                doc.close();
> +        }
> +    }
>   }
>
> Added: pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf
> URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf?rev=1142109&view=auto
> ==============================================================================
> Binary file - no diff available.
>
> Propchange: pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/gdb-refcard.pdf
> ------------------------------------------------------------------------------
>      svn:mime-type = application/octet-stream
>
>