You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ca...@apache.org on 2009/05/26 22:12:36 UTC
svn commit: r778869 - in
/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox:
cos/COSDocument.java filter/FlateFilter.java pdfparser/PDFParser.java
pdfparser/PDFXrefStreamParser.java
Author: carrier
Date: Tue May 26 20:12:36 2009
New Revision: 778869
URL: http://svn.apache.org/viewvc?rev=778869&view=rev
Log:
Patch for PDFBOX-475 that adds support for XRef streams and fixes a bug in the decompression code. patch by Justin LeFebvre
Added:
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java (with props)
Modified:
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSDocument.java
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/filter/FlateFilter.java
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSDocument.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSDocument.java?rev=778869&r1=778868&r2=778869&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSDocument.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/cos/COSDocument.java Tue May 26 20:12:36 2009
@@ -31,6 +31,7 @@
import org.apache.pdfbox.io.RandomAccessFile;
import org.apache.pdfbox.pdfparser.PDFObjectStreamParser;
+import org.apache.pdfbox.pdfparser.PDFXrefStreamParser;
import org.apache.pdfbox.persistence.util.COSObjectKey;
/**
@@ -461,6 +462,7 @@
}
return obj;
}
+
/**
* Used to populate the XRef HashMap. Will add an Xreftable entry
* that maps ObjectKeys to byte offsets in the file.
@@ -479,4 +481,25 @@
public Map getXrefTable(){
return xrefTable;
}
+
+ /**
+ * This method will search the list of objects for types of XRef and
+ * uses the parsed data to populate the trailer information as well as
+ * the xref Map.
+ *
+ * @throws IOException if there is an error parsing the stream
+ */
+ public void parseXrefStreams() throws IOException {
+ COSDictionary trailer = new COSDictionary();
+ Iterator xrefIter = getObjectsByType( "XRef" ).iterator();
+ while( xrefIter.hasNext() )
+ {
+ COSObject xrefStream = (COSObject)xrefIter.next();
+ COSStream stream = (COSStream)xrefStream.getObject();
+ trailer.addAll(stream);
+ PDFXrefStreamParser parser = new PDFXrefStreamParser(stream, this);
+ parser.parse();
+ }
+ setTrailer( trailer );
+ }
}
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/filter/FlateFilter.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/filter/FlateFilter.java?rev=778869&r1=778868&r2=778869&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/filter/FlateFilter.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/filter/FlateFilter.java Tue May 26 20:12:36 2009
@@ -78,9 +78,11 @@
if (dict!=null)
{
predictor = dict.getInt("Predictor");
- colors = dict.getInt("Colors");
- bitsPerPixel = options.getInt("BitsPerComponent");
- columns = dict.getInt("Columns");
+ if(predictor > 1){
+ colors = dict.getInt("Colors");
+ bitsPerPixel = options.getInt("BitsPerComponent");
+ columns = dict.getInt("Columns");
+ }
}
try
@@ -110,17 +112,23 @@
}
else
{
- if( colors==-1 )
+ /*
+ * Reverting back to default values
+ */
+ if( colors == -1 )
{
- throw new IOException("Error: Could not read 'colors' attribute to decompress flate stream.");
+ colors = 1;
+// throw new IOException("Error: Could not read 'colors' attribute to decompress flate stream.");
}
- if( bitsPerPixel==-1 )
+ if( bitsPerPixel == -1 )
{
- throw new IOException("Error: Could not read 'bitsPerPixel' attribute to decompress flate stream.");
+ bitsPerPixel = 8;
+// throw new IOException("Error: Could not read 'bitsPerPixel' attribute to decompress flate stream.");
}
- if( columns==-1 )
+ if( columns == -1 )
{
- throw new IOException("Error: Could not read 'columns' attribute to decompress flate stream.");
+ columns = 1;
+// throw new IOException("Error: Could not read 'columns' attribute to decompress flate stream.");
}
baos = new ByteArrayOutputStream();
@@ -195,7 +203,7 @@
boolean done = false;
int linepredictor = predictor;
- while (!done)
+ while (!done && data.available() > 0)
{
if (predictor == 15)
{
@@ -215,7 +223,7 @@
// read line
int i = 0;
- int offset = bpp;
+ int offset = 0;
while (offset < rowlength && ((i = data.read(actline, offset, rowlength - offset)) != -1))
{
offset += i;
@@ -284,7 +292,7 @@
break;
}
- lastline = actline;
+ lastline = (byte[])actline.clone();
baos.write(actline, bpp, actline.length - bpp);
}
}
Modified: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=778869&r1=778868&r2=778869&view=diff
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Tue May 26 20:12:36 2009
@@ -184,19 +184,11 @@
skipSpaces();
}
//Test if we saw a trailer section. If not, look for an XRef Stream (Cross-Reference Stream)
- //For PDF 1.5 and above
+ //to populate the trailer and xref information. For PDF 1.5 and above
if( document.getTrailer() == null ){
- COSDictionary trailer = new COSDictionary();
- Iterator xrefIter = document.getObjectsByType( "XRef" ).iterator();
- while( xrefIter.hasNext() )
- {
- COSStream next = (COSStream)((COSObject)xrefIter.next()).getObject();
- trailer.addAll( next );
- }
- document.setTrailer( trailer );
+ document.parseXrefStreams();
}
- if( !document.isEncrypted() )
- {
+ if( !document.isEncrypted() ){
document.dereferenceObjectStreams();
}
ConflictObj.resolveConflicts(document, conflictList);
Added: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java?rev=778869&view=auto
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java (added)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java Tue May 26 20:12:36 2009
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdfparser;
+
+import java.io.IOException;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+
+import org.apache.pdfbox.cos.COSArray;
+import org.apache.pdfbox.cos.COSDocument;
+import org.apache.pdfbox.cos.COSInteger;
+import org.apache.pdfbox.cos.COSStream;
+import org.apache.pdfbox.persistence.util.COSObjectKey;
+
+/**
+ * This will parse a PDF 1.5 (or better) Xref stream and
+ * extract the xref information from the stream
+ *
+ * @author
+ * @version
+ */
+public class PDFXrefStreamParser extends BaseParser {
+
+ private COSStream stream;
+
+ /**
+ * Constructor.
+ *
+ * @param strm The stream to parse.
+ * @param doc The document for the current parsing.
+ *
+ * @throws IOException If there is an error initializing the stream.
+ */
+ public PDFXrefStreamParser(COSStream strm, COSDocument doc) throws IOException{
+ super(strm.getUnfilteredStream());
+ setDocument(doc);
+ stream = strm;
+ }
+
+ /**
+ * Parses through the unfiltered stream and populates the xrefTable
+ * HashMap
+ * @throws IOException If there is an error while parsing the stream.
+ */
+ public void parse() throws IOException{
+ try{
+ COSArray xrefFormat = (COSArray)stream.getDictionaryObject("W");
+ COSArray indexArray = (COSArray)stream.getDictionaryObject("Index");
+ /*
+ * If Index doesn't exist, we will use the default values.
+ */
+ if(indexArray == null){
+ indexArray = new COSArray();
+ indexArray.add(new COSInteger(0));
+ indexArray.add(stream.getDictionaryObject("Size"));
+ }
+
+ ArrayList objNums = new ArrayList();
+
+ /*
+ * Populates objNums with all object numbers available
+ */
+ Iterator indexIter = indexArray.iterator();
+ while(indexIter.hasNext()){
+ int objID = ((COSInteger)indexIter.next()).intValue();
+ int size = ((COSInteger)indexIter.next()).intValue();
+ for(int i = 0; i < size; i++){
+ objNums.add(new Integer(objID + i));
+ }
+ }
+ Iterator objIter = objNums.iterator();
+ /*
+ * Calculating the size of the line in bytes
+ */
+ int w0 = xrefFormat.getInt(0);
+ int w1 = xrefFormat.getInt(1);
+ int w2 = xrefFormat.getInt(2);
+ int lineSize = w0 + w1 + w2;
+
+ while(pdfSource.available() > 0){
+ byte[] currLine = new byte[lineSize];
+ pdfSource.read(currLine);
+
+ int type = 0;
+ /*
+ * Grabs the number of bytes specified for the first column in
+ * the W array and stores it.
+ */
+ for(int i = 0; i < w0; i++){
+ type += (currLine[i] & 0x00ff) << ((w0 - i - 1)* 8);
+ }
+ //Need to remember the current objID
+ Integer objID = (Integer)objIter.next();
+ /*
+ * 3 different types of entries.
+ */
+ switch(type){
+ case 0:
+ /*
+ * Skipping free objects
+ */
+ break;
+ case 1:
+ int offset = 0;
+ for(int i = 0; i < w1; i++){
+ offset += (currLine[i + w0] & 0x00ff) << ((w1 - i - 1) * 8);
+ }
+ int genNum = 0;
+ for(int i = 0; i < w2; i++){
+ genNum += (currLine[i + w0 + w1] & 0x00ff) << ((w2 - i - 1) * 8);
+ }
+ COSObjectKey objKey = new COSObjectKey(objID.intValue(), genNum);
+ document.setXRef(objKey, offset);
+ break;
+ case 2:
+ /*
+ * These objects are handled by the dereferenceObjects() method
+ * since they're only pointing to object numbers
+ */
+ break;
+ }
+ }
+ }
+ finally{
+ pdfSource.close();
+ }
+ }
+}
Propchange: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java
------------------------------------------------------------------------------
svn:executable = *
Propchange: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java
------------------------------------------------------------------------------
svn:mime-type = text/plain