You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by tb...@apache.org on 2012/04/08 16:55:09 UTC

svn commit: r1311015 - in /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser: PDFXrefStreamParser.java XrefTrailerResolver.java

Author: tboehme
Date: Sun Apr  8 14:55:09 2012
New Revision: 1311015

URL: http://svn.apache.org/viewvc?rev=1311015&view=rev
Log:
add parsing of object ids within compressed object streams (needed by conforming parsers having to know which compressed stream has to be accessed for a specific object id)

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java?rev=1311015&r1=1311014&r2=1311015&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java Sun Apr  8 14:55:09 2012
@@ -150,9 +150,26 @@ public class PDFXrefStreamParser extends
                         break;
                     case 2:
                         /*
+                         * object stored in object stream; 2nd argument is object number of object stream;
+                         * 3rd argument index of object within object stream
+                         * 
+                         * For sequential PDFParser we do not need this information
+                         * because
                          * These objects are handled by the dereferenceObjects() method
                          * since they're only pointing to object numbers
+                         * 
+                         * However for XRef aware parsers we have to know which objects contain
+                         * object streams. We will store this information in normal xref mapping
+                         * table but add object stream number with minus sign in order to
+                         * distinguish from file offsets
                          */
+	                      int objstmObjNr = 0;
+	                      for(int i = 0; i < w1; i++)
+	                      {
+	                      		objstmObjNr += (currLine[i + w0] & 0x00ff) << ((w1 - i - 1) * 8);
+	                      }
+                        objKey = new COSObjectKey( objID.intValue(), 0 );
+                        xrefTrailerResolver.setXRef( objKey, -objstmObjNr );
                         break;
                     default:
                         break;

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java?rev=1311015&r1=1311014&r2=1311015&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java Sun Apr  8 14:55:09 2012
@@ -19,8 +19,11 @@ package org.apache.pdfbox.pdfparser;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
+import java.util.Map.Entry;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -118,6 +121,13 @@ public class XrefTrailerResolver
     }
 
     /**
+     * Returns the trailer last set by {@link #setTrailer(COSDictionary)}.
+     */
+    public COSDictionary getCurrentTrailer() {
+    		return curXrefTrailerObj.trailer;
+    }
+
+    /**
      * Sets the byte position of the first XRef
      * (has to be called after very last startxref was read).
      * This is used to resolve chain of active XRef/trailer.
@@ -216,4 +226,33 @@ public class XrefTrailerResolver
     {
         return ( resolvedXrefTrailer == null ) ? null : resolvedXrefTrailer.xrefTable;
     }
+    
+    /** Returns object numbers which are referenced as contained
+     *  in object stream with specified object number.
+     *  
+     *  This will scan resolved xref table for all entries having negated
+     *  stream object number as value.
+     *
+     *  @param objstmObjNr  object number of object stream for which contained object numbers
+     *                      should be returned
+     *                       
+     *  @return set of object numbers referenced for given object stream
+     *          or <code>null</code> if {@link #setStartxref(long)} was not
+     *          called before so that no resolved xref table exists
+     */
+    public Set<Long> getContainedObjectNumbers( final int objstmObjNr ) 
+    {
+    		if ( resolvedXrefTrailer == null )
+    			return null;
+    		
+    		final Set<Long> refObjNrs = new HashSet<Long>();
+    		final int       cmpVal    = - objstmObjNr;
+    		
+    		for ( Entry<COSObjectKey,Long> xrefEntry : resolvedXrefTrailer.xrefTable.entrySet() ) {
+						if ( xrefEntry.getValue() == cmpVal )
+							refObjNrs.add( xrefEntry.getKey().getNumber() );
+				}
+    		
+    		return refObjNrs;
+    }
 }



Re: svn commit: r1311015 - in /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser: PDFXrefStreamParser.java XrefTrailerResolver.java

Posted by Andreas Lehmkuehler <an...@lehmi.de>.
Hi,

please don't forget to mention the issue number in your log statement, so that 
JIRA is able to link your checkin automatically to the issue.

TIA,
Andreas Lehmkühler

Am 08.04.2012 16:55, schrieb tboehme@apache.org:
> Author: tboehme
> Date: Sun Apr  8 14:55:09 2012
> New Revision: 1311015
>
> URL: http://svn.apache.org/viewvc?rev=1311015&view=rev
> Log:
> add parsing of object ids within compressed object streams (needed by conforming parsers having to know which compressed stream has to be accessed for a specific object id)
>
> Modified:
>      pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java
>      pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java
>
> Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java
> URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java?rev=1311015&r1=1311014&r2=1311015&view=diff
> ==============================================================================
> --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java (original)
> +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFXrefStreamParser.java Sun Apr  8 14:55:09 2012
> @@ -150,9 +150,26 @@ public class PDFXrefStreamParser extends
>                           break;
>                       case 2:
>                           /*
> +                         * object stored in object stream; 2nd argument is object number of object stream;
> +                         * 3rd argument index of object within object stream
> +                         *
> +                         * For sequential PDFParser we do not need this information
> +                         * because
>                            * These objects are handled by the dereferenceObjects() method
>                            * since they're only pointing to object numbers
> +                         *
> +                         * However for XRef aware parsers we have to know which objects contain
> +                         * object streams. We will store this information in normal xref mapping
> +                         * table but add object stream number with minus sign in order to
> +                         * distinguish from file offsets
>                            */
> +	                      int objstmObjNr = 0;
> +	                      for(int i = 0; i<  w1; i++)
> +	                      {
> +	                      		objstmObjNr += (currLine[i + w0]&  0x00ff)<<  ((w1 - i - 1) * 8);
> +	                      }
> +                        objKey = new COSObjectKey( objID.intValue(), 0 );
> +                        xrefTrailerResolver.setXRef( objKey, -objstmObjNr );
>                           break;
>                       default:
>                           break;
>
> Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java
> URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java?rev=1311015&r1=1311014&r2=1311015&view=diff
> ==============================================================================
> --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java (original)
> +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefTrailerResolver.java Sun Apr  8 14:55:09 2012
> @@ -19,8 +19,11 @@ package org.apache.pdfbox.pdfparser;
>   import java.util.ArrayList;
>   import java.util.Collections;
>   import java.util.HashMap;
> +import java.util.HashSet;
>   import java.util.List;
>   import java.util.Map;
> +import java.util.Set;
> +import java.util.Map.Entry;
>
>   import org.apache.commons.logging.Log;
>   import org.apache.commons.logging.LogFactory;
> @@ -118,6 +121,13 @@ public class XrefTrailerResolver
>       }
>
>       /**
> +     * Returns the trailer last set by {@link #setTrailer(COSDictionary)}.
> +     */
> +    public COSDictionary getCurrentTrailer() {
> +    		return curXrefTrailerObj.trailer;
> +    }
> +
> +    /**
>        * Sets the byte position of the first XRef
>        * (has to be called after very last startxref was read).
>        * This is used to resolve chain of active XRef/trailer.
> @@ -216,4 +226,33 @@ public class XrefTrailerResolver
>       {
>           return ( resolvedXrefTrailer == null ) ? null : resolvedXrefTrailer.xrefTable;
>       }
> +
> +    /** Returns object numbers which are referenced as contained
> +     *  in object stream with specified object number.
> +     *
> +     *  This will scan resolved xref table for all entries having negated
> +     *  stream object number as value.
> +     *
> +     *  @param objstmObjNr  object number of object stream for which contained object numbers
> +     *                      should be returned
> +     *
> +     *  @return set of object numbers referenced for given object stream
> +     *          or<code>null</code>  if {@link #setStartxref(long)} was not
> +     *          called before so that no resolved xref table exists
> +     */
> +    public Set<Long>  getContainedObjectNumbers( final int objstmObjNr )
> +    {
> +    		if ( resolvedXrefTrailer == null )
> +    			return null;
> +    		
> +    		final Set<Long>  refObjNrs = new HashSet<Long>();
> +    		final int       cmpVal    = - objstmObjNr;
> +    		
> +    		for ( Entry<COSObjectKey,Long>  xrefEntry : resolvedXrefTrailer.xrefTable.entrySet() ) {
> +						if ( xrefEntry.getValue() == cmpVal )
> +							refObjNrs.add( xrefEntry.getKey().getNumber() );
> +				}
> +    		
> +    		return refObjNrs;
> +    }
>   }
>
>