You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ca...@apache.org on 2009/05/26 22:12:36 UTC

svn commit: r778869 - in /incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox: cos/COSDocument.java filter/FlateFilter.java pdfparser/PDFParser.java pdfparser/PDFXrefStreamParser.java

Author: carrier
Date: Tue May 26 20:12:36 2009
New Revision: 778869

URL: http://svn.apache.org/viewvc?rev=778869&view=rev
Log:
Patch for PDFBOX-475 that adds support for XRef streams and fixes a bug in the decompression code.  patch by Justin LeFebvre

Added:
    incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java   (with props)
Modified:
    incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSDocument.java
    incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/filter/FlateFilter.java
    incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java

Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSDocument.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSDocument.java?rev=778869&r1=778868&r2=778869&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSDocument.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSDocument.java Tue May 26 20:12:36 2009
@@ -31,6 +31,7 @@
 import org.apache.pdfbox.io.RandomAccessFile;
 
 import org.apache.pdfbox.pdfparser.PDFObjectStreamParser;
+import org.apache.pdfbox.pdfparser.PDFXrefStreamParser;
 import org.apache.pdfbox.persistence.util.COSObjectKey;
 
 /**
@@ -461,6 +462,7 @@
         }  
         return obj;
     }
+    
     /**
      * Used to populate the XRef HashMap. Will add an Xreftable entry
      * that maps ObjectKeys to byte offsets in the file. 
@@ -479,4 +481,25 @@
     public Map getXrefTable(){
         return xrefTable;
     }
+
+    /**
+     * This method will search the list of objects for types of XRef and 
+     * uses the parsed data to populate the trailer information as well as
+     * the xref Map. 
+     * 
+     * @throws IOException if there is an error parsing the stream
+     */
+    public void parseXrefStreams() throws IOException {
+        COSDictionary trailer = new COSDictionary();
+        Iterator xrefIter = getObjectsByType( "XRef" ).iterator();
+        while( xrefIter.hasNext() )
+        {
+            COSObject xrefStream = (COSObject)xrefIter.next();
+            COSStream stream = (COSStream)xrefStream.getObject();
+            trailer.addAll(stream);
+            PDFXrefStreamParser parser = new PDFXrefStreamParser(stream, this);
+            parser.parse();         
+        }
+        setTrailer( trailer );  
+    }
 }

Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/filter/FlateFilter.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/filter/FlateFilter.java?rev=778869&r1=778868&r2=778869&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/filter/FlateFilter.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/filter/FlateFilter.java Tue May 26 20:12:36 2009
@@ -78,9 +78,11 @@
         if (dict!=null)
         {
             predictor = dict.getInt("Predictor");
-            colors = dict.getInt("Colors");
-            bitsPerPixel = options.getInt("BitsPerComponent");
-            columns = dict.getInt("Columns");
+            if(predictor > 1){
+                colors = dict.getInt("Colors");
+                bitsPerPixel = options.getInt("BitsPerComponent");
+                columns = dict.getInt("Columns");
+            }
         }
 
         try
@@ -110,17 +112,23 @@
                 }
                 else
                 {
-                    if( colors==-1 )
+                    /*
+                     * Reverting back to default values
+                     */
+                    if( colors == -1 )
                     {
-                        throw new IOException("Error: Could not read 'colors' attribute to decompress flate stream.");
+                        colors = 1;
+//                        throw new IOException("Error: Could not read 'colors' attribute to decompress flate stream.");
                     }
-                    if( bitsPerPixel==-1 )
+                    if( bitsPerPixel == -1 )
                     {
-                        throw new IOException("Error: Could not read 'bitsPerPixel' attribute to decompress flate stream.");
+                        bitsPerPixel = 8;
+//                        throw new IOException("Error: Could not read 'bitsPerPixel' attribute to decompress flate stream.");
                     }
-                    if( columns==-1 )
+                    if( columns == -1 )
                     {
-                        throw new IOException("Error: Could not read 'columns' attribute to decompress flate stream.");
+                        columns = 1;
+//                        throw new IOException("Error: Could not read 'columns' attribute to decompress flate stream.");
                     }
 
                     baos = new ByteArrayOutputStream();
@@ -195,7 +203,7 @@
             boolean done = false;
             int linepredictor = predictor;
 
-            while (!done)
+            while (!done && data.available() > 0)
             {
                 if (predictor == 15)
                 {
@@ -215,7 +223,7 @@
 
                 // read line
                 int i = 0;
-                int offset = bpp;
+                int offset = 0;
                 while (offset < rowlength && ((i = data.read(actline, offset, rowlength - offset)) != -1))
                 {
                     offset += i;
@@ -284,7 +292,7 @@
                         break;
                 }
 
-                lastline = actline;
+                lastline = (byte[])actline.clone();
                 baos.write(actline, bpp, actline.length - bpp);
             }
         }

Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=778869&r1=778868&r2=778869&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Tue May 26 20:12:36 2009
@@ -184,19 +184,11 @@
                     skipSpaces();
                 }
                 //Test if we saw a trailer section. If not, look for an XRef Stream (Cross-Reference Stream) 
-                //For PDF 1.5 and above 
+                //to populate the trailer and xref information. For PDF 1.5 and above 
                 if( document.getTrailer() == null ){
-                    COSDictionary trailer = new COSDictionary();
-                    Iterator xrefIter = document.getObjectsByType( "XRef" ).iterator();
-                    while( xrefIter.hasNext() )
-                    {
-                        COSStream next = (COSStream)((COSObject)xrefIter.next()).getObject();
-                        trailer.addAll( next );
-                    }
-                    document.setTrailer( trailer );
+                    document.parseXrefStreams();
                 }
-                if( !document.isEncrypted() )
-                {
+                if( !document.isEncrypted() ){
                     document.dereferenceObjectStreams();
                 }
                 ConflictObj.resolveConflicts(document, conflictList);     

Added: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java?rev=778869&view=auto
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java (added)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java Tue May 26 20:12:36 2009
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdfparser;
+
+import java.io.IOException;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+
+import org.apache.pdfbox.cos.COSArray;
+import org.apache.pdfbox.cos.COSDocument;
+import org.apache.pdfbox.cos.COSInteger;
+import org.apache.pdfbox.cos.COSStream;
+import org.apache.pdfbox.persistence.util.COSObjectKey;
+
+/**
+ * This will parse a PDF 1.5 (or better) Xref stream and 
+ * extract the xref information from the stream
+ * 
+ * @author
+ * @version
+ */
+public class PDFXrefStreamParser extends BaseParser {
+
+    private COSStream stream;
+
+    /**
+     * Constructor.
+     *
+     * @param strm The stream to parse.
+     * @param doc The document for the current parsing.
+     *
+     * @throws IOException If there is an error initializing the stream.
+     */
+    public PDFXrefStreamParser(COSStream strm, COSDocument doc) throws IOException{
+        super(strm.getUnfilteredStream());
+        setDocument(doc);
+        stream = strm;
+    }
+
+    /**
+     * Parses through the unfiltered stream and populates the xrefTable
+     * HashMap
+     * @throws IOException If there is an error while parsing the stream.
+     */
+    public void parse() throws IOException{
+        try{
+            COSArray xrefFormat = (COSArray)stream.getDictionaryObject("W");
+            COSArray indexArray = (COSArray)stream.getDictionaryObject("Index");
+            /*
+             * If Index doesn't exist, we will use the default values. 
+             */
+            if(indexArray == null){
+                indexArray = new COSArray();
+                indexArray.add(new COSInteger(0));
+                indexArray.add(stream.getDictionaryObject("Size"));
+            }
+            
+            ArrayList objNums = new ArrayList();
+            
+            /*
+             * Populates objNums with all object numbers available
+             */
+            Iterator indexIter = indexArray.iterator();
+            while(indexIter.hasNext()){
+                int objID = ((COSInteger)indexIter.next()).intValue();
+                int size = ((COSInteger)indexIter.next()).intValue();
+                for(int i = 0; i < size; i++){
+                    objNums.add(new Integer(objID + i));
+                }
+            }
+            Iterator objIter = objNums.iterator();
+            /*
+             * Calculating the size of the line in bytes
+             */
+            int w0 = xrefFormat.getInt(0);
+            int w1 = xrefFormat.getInt(1);
+            int w2 = xrefFormat.getInt(2);
+            int lineSize = w0 + w1 + w2;
+            
+            while(pdfSource.available() > 0){
+                byte[] currLine = new byte[lineSize];
+                pdfSource.read(currLine);
+
+                int type = 0;
+                /*
+                 * Grabs the number of bytes specified for the first column in 
+                 * the W array and stores it.
+                 */
+                for(int i = 0; i < w0; i++){
+                    type += (currLine[i] & 0x00ff) << ((w0 - i - 1)* 8);
+                }
+                //Need to remember the current objID
+                Integer objID = (Integer)objIter.next();
+                /*
+                 * 3 different types of entries. 
+                 */
+                switch(type){
+                case 0:
+                    /*
+                     * Skipping free objects
+                     */
+                    break;
+                case 1:                   
+                    int offset = 0;
+                    for(int i = 0; i < w1; i++){
+                        offset += (currLine[i + w0] & 0x00ff) << ((w1 - i - 1) * 8);
+                    }
+                    int genNum = 0;
+                    for(int i = 0; i < w2; i++){
+                        genNum += (currLine[i + w0 + w1] & 0x00ff) << ((w2 - i - 1) * 8);
+                    }
+                    COSObjectKey objKey = new COSObjectKey(objID.intValue(), genNum);
+                    document.setXRef(objKey, offset);
+                    break;
+                case 2:
+                    /*
+                     * These objects are handled by the dereferenceObjects() method
+                     * since they're only pointing to object numbers
+                     */
+                    break;
+                }
+            }
+        }
+        finally{
+            pdfSource.close();
+        }
+    }
+}

Propchange: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java
------------------------------------------------------------------------------
    svn:executable = *

Propchange: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain