You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by tb...@apache.org on 2012/05/19 14:45:23 UTC

svn commit: r1340444 - in /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox: ExtractImages.java ExtractText.java pdfparser/NonSequentialPDFParser.java pdmodel/PDDocument.java

Author: tboehme
Date: Sat May 19 12:45:22 2012
New Revision: 1340444

URL: http://svn.apache.org/viewvc?rev=1340444&view=rev
Log:
PDFBOX-1316 - NonSequentialPDFParser now sets security handler in PDDocument; used in tools for checking access permissions

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/ExtractImages.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/ExtractText.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/ExtractImages.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/ExtractImages.java?rev=1340444&r1=1340443&r2=1340444&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/ExtractImages.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/ExtractImages.java Sat May 19 12:45:22 2012
@@ -141,15 +141,14 @@ public class ExtractImages
                         {
                             StandardDecryptionMaterial spm = new StandardDecryptionMaterial(password);
                             document.openProtection(spm);
-                            AccessPermission ap = document.getCurrentAccessPermission();
-    
-                            if( ! ap.canExtractContent() )
-                            {
-                                throw new IOException(
-                                    "Error: You do not have permission to extract images." );
-                            }
                         }
                     }
+                    AccessPermission ap = document.getCurrentAccessPermission();
+                    if( ! ap.canExtractContent() )
+                    {
+                        throw new IOException(
+                            "Error: You do not have permission to extract images." );
+                    }
 
                     List pages = document.getDocumentCatalog().getAllPages();
                     Iterator iter = pages.iterator();

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/ExtractText.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/ExtractText.java?rev=1340444&r1=1340443&r2=1340444&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/ExtractText.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/ExtractText.java Sat May 19 12:45:22 2012
@@ -213,14 +213,15 @@ public class ExtractText
                     {
                         StandardDecryptionMaterial sdm = new StandardDecryptionMaterial( password );
                         document.openProtection( sdm );
-                        AccessPermission ap = document.getCurrentAccessPermission();
-
-                        if( ! ap.canExtractContent() )
-                        {
-                            throw new IOException( "You do not have permission to extract text" );
-                        }
                     }
                 }
+                
+                AccessPermission ap = document.getCurrentAccessPermission();
+                if( ! ap.canExtractContent() )
+                {
+                    throw new IOException( "You do not have permission to extract text" );
+                }
+                
                 stopProcessing("Time for loading: ", startTime);
 
 

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java?rev=1340444&r1=1340443&r2=1340444&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java Sat May 19 12:45:22 2012
@@ -53,6 +53,7 @@ import org.apache.pdfbox.io.PushBackInpu
 import org.apache.pdfbox.io.RandomAccess;
 import org.apache.pdfbox.io.RandomAccessBuffer;
 import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
+import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.pdmodel.PDPage;
 import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
 import org.apache.pdfbox.pdmodel.encryption.DecryptionMaterial;
@@ -302,10 +303,6 @@ public class NonSequentialPDFParser exte
                 {
                     LOG.warn( "PDF file '" + pdfFile.getPath() + "' does not allow extracting content." );
                 }
-                else
-                {
-                    LOG.info( "PDF file '" + pdfFile.getPath() + "' allows content extraction." );
-                }
                   
             }
             catch ( Exception e )
@@ -623,6 +620,27 @@ public class NonSequentialPDFParser exte
 
     // ------------------------------------------------------------------------
     /**
+     * This will get the PD document that was parsed.  When you are done with
+     * this document you must call close() on it to release resources.
+     *
+     * Overwriting super method was necessary in order to set security handler.
+     *
+     * @return The document at the PD layer.
+     *
+     * @throws IOException If there is an error getting the document.
+     */
+    @Override
+    public PDDocument getPDDocument() throws IOException
+    {
+        PDDocument pdDocument = super.getPDDocument();
+        if ( securityHandler != null )
+            pdDocument.setSecurityHandler( securityHandler );
+        
+        return pdDocument;
+    }
+
+    // ------------------------------------------------------------------------
+    /**
      * Returns the number of pages in a document.
      * 
      * @return the number of pages.

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java?rev=1340444&r1=1340443&r2=1340444&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java Sat May 19 12:45:22 2012
@@ -1402,6 +1402,23 @@ public class PDDocument implements Pagea
     {
         return securityHandler;
     }
+    
+    /**
+     * Sets security handler if none is set already.
+     * 
+     * @param _sHandler  security handler to be assigned to document
+     * @return  <code>true</code> if security handler was set, <code>false</code>
+     *          otherwise (a security handler was already set)
+     */
+    public boolean setSecurityHandler( SecurityHandler _sHandler )
+    {
+        if ( securityHandler == null )
+        {
+            securityHandler = _sHandler;
+            return true;
+        }
+        return false;
+    }
 
     public boolean isAllSecurityToBeRemoved() {
         return allSecurityToBeRemoved;