You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2013/05/26 15:26:17 UTC
svn commit: r1486413 - in /pdfbox/trunk/pdfbox/src: main/java/org/apache/pdfbox/pdfparser/ main/java/org/apache/pdfbox/pdmodel/ test/java/org/apache/pdfbox/ test/java/org/apache/pdfbox/pdmodel/

Author: lehmi
Date: Sun May 26 13:26:16 2013
New Revision: 1486413

URL: http://svn.apache.org/r1486413
Log:
PDFBOX-1581: added PDDocument.save(File) and PDDocument.loadNonSeq(InputStream, ...)  as proposed by Fredrik Kjellberg

Added:
    pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocument.java
Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java
    pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/TestAll.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java?rev=1486413&r1=1486412&r2=1486413&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java Sun May 26 13:26:16 2013
@@ -91,16 +91,28 @@ public class NonSequentialPDFParser exte
     private static final InputStream EMPTY_INPUT_STREAM = new ByteArrayInputStream(new byte[0]);
 
     protected static final int DEFAULT_TRAIL_BYTECOUNT = 2048;
+    /**
+     * EOF-marker.
+     */
     protected static final char[] EOF_MARKER = new char[]
     { '%', '%', 'E', 'O', 'F' };
+    /**
+     * StartXRef-marker.
+     */
     protected static final char[] STARTXREF_MARKER = new char[]
     { 's', 't', 'a', 'r', 't', 'x', 'r', 'e', 'f' };
+    /**
+     * obj-marker.
+     */
     protected static final char[] OBJ_MARKER = new char[]
     { 'o', 'b', 'j' };
 
     private final File pdfFile;
     private final RandomAccessBufferedFileInputStream raStream;
 
+    /**
+     * The security handler.
+     */
     protected SecurityHandler securityHandler = null;
 
     private String keyStoreFilename = null;
@@ -219,12 +231,31 @@ public class NonSequentialPDFParser exte
         password = decryptionPassword;
     }
 
+    /**
+     * Constructor.
+     * 
+     * @param input input stream representing the pdf.
+     * @throws IOException If something went wrong.
+     */
     public NonSequentialPDFParser(InputStream input) throws IOException
     {
+        this(input, null, "");
+    }
+
+    /**
+     * Constructor.
+     * 
+     * @param input input stream representing the pdf.
+     * @param raBuf the buffer to be used for parsing
+     * @param decryptionPassword password to be used for decryption.
+     * @throws IOException If something went wrong.
+     */
+    public NonSequentialPDFParser(InputStream input, RandomAccess raBuf, String decryptionPassword) throws IOException
+    {
         super(EMPTY_INPUT_STREAM, null, false);
         pdfFile = createTmpFile(input);
         raStream = new RandomAccessBufferedFileInputStream(pdfFile);
-        init(pdfFile, null, "");
+        init(pdfFile, raBuf, decryptionPassword);
     }
 
     /**
@@ -234,7 +265,7 @@ public class NonSequentialPDFParser exte
      * 
      * @param input
      * @return
-     * @throws IOException
+     * @throws IOException If something went wrong.
      */
     private File createTmpFile(InputStream input) throws IOException
     {
@@ -287,7 +318,7 @@ public class NonSequentialPDFParser exte
      * can handle linearized pdfs, which will have an xref at the end pointing
      * to an xref at the beginning of the file. Last the root object is parsed.
      * 
-     * @throws IOException
+     * @throws IOException If something went wrong.
      */
     protected void initialParse() throws IOException
     {
@@ -442,7 +473,12 @@ public class NonSequentialPDFParser exte
         return pdfSource.getOffset();
     }
 
-    /** Sets {@link #pdfSource} to start next parsing at given file offset. */
+    /**
+     * Sets {@link #pdfSource} to start next parsing at given file offset.
+     * 
+     * @param fileOffset file offset
+     * @throws IOException If something went wrong.
+     */
     protected final void setPdfSource(long fileOffset) throws IOException
     {
 
@@ -458,7 +494,10 @@ public class NonSequentialPDFParser exte
         // pdfSource.skip( _fileOffset );
     }
 
-    /** Enable handling of alternative pdfSource implementation. */
+    /**
+     * Enable handling of alternative pdfSource implementation.
+     * @throws IOException If something went wrong.
+     */
     protected final void releasePdfSourceInputStream() throws IOException
     {
         // if ( pdfSource != null )
@@ -479,6 +518,9 @@ public class NonSequentialPDFParser exte
      * (within last {@link #DEFAULT_TRAIL_BYTECOUNT} bytes (or range set via
      * {@link #setEOFLookupRange(int)}) and go back to find
      * <code>startxref</code>.
+     * 
+     * @return the offset of StartXref 
+     * @throws IOException If something went wrong.
      */
     protected final long getStartxrefOffset() throws IOException
     {
@@ -590,6 +632,7 @@ public class NonSequentialPDFParser exte
      * Reads given pattern from {@link #pdfSource}. Skipping whitespace at start
      * and end.
      * 
+     * @param pattern pattern to be skipped
      * @throws IOException if pattern could not be read
      */
     protected final void readPattern(final char[] pattern) throws IOException
@@ -697,6 +740,11 @@ public class NonSequentialPDFParser exte
         }
     }
 
+    /**
+     * Return the pdf file.
+     * 
+     * @return the pdf file
+     */
     protected File getPdfFile()
     {
         return this.pdfFile;
@@ -713,7 +761,9 @@ public class NonSequentialPDFParser exte
             try
             {
                 if (!pdfFile.delete())
+                {
                     LOG.warn("Temporary file '" + pdfFile.getName() + "' can't be deleted");
+                }
             }
             catch (SecurityException e)
             {
@@ -750,7 +800,9 @@ public class NonSequentialPDFParser exte
     {
         PDDocument pdDocument = super.getPDDocument();
         if (securityHandler != null)
+        {
             pdDocument.setSecurityHandler(securityHandler);
+        }
         return pdDocument;
     }
 
@@ -1171,7 +1223,8 @@ public class NonSequentialPDFParser exte
                         // this is not legal
                         // the combination of a dict and the stream/endstream
                         // forms a complete stream object
-                        throw new IOException("Stream not preceded by dictionary (offset: " + offsetOrObjstmObNr + ").");
+                        throw new IOException("Stream not preceded by dictionary (offset: " 
+                        + offsetOrObjstmObNr + ").");
                     }
                     skipSpaces();
                     endObjectKey = readLine();
@@ -1271,7 +1324,14 @@ public class NonSequentialPDFParser exte
     }
 
     // ------------------------------------------------------------------------
-    /** Decrypts given COSString. */
+    /**
+     * Decrypts given COSString.
+     * 
+     * @param str the string to be decrypted
+     * @param objNr the object number
+     * @param objGenNr the object generation number
+     * @throws IOException ff something went wrong
+     */
     protected final void decrypt(COSString str, long objNr, long objGenNr) throws IOException
     {
         try
@@ -1439,7 +1499,10 @@ public class NonSequentialPDFParser exte
             int bytesRead = 0;
             boolean unexpectedEndOfStream = false;
             if (remainBytes == 35090)
+            {
+                // TODO debug system out, to be removed??
                 System.out.println();
+            }
             while (remainBytes > 0)
             {
                 final int readBytes = pdfSource.read(streamCopyBuf, 0,

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java?rev=1486413&r1=1486412&r2=1486413&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java Sun May 26 13:26:16 2013
@@ -1247,7 +1247,40 @@ public class PDDocument implements Pagea
     }
 
     /**
-     * This will save this document to the filesystem.
+     * Parses PDF with non sequential parser.
+     *  
+     * @param input stream that contains the document.
+     * @param scratchFile location to store temp PDFBox data for this document
+     *
+     * @return loaded document
+     *
+     * @throws IOException  in case of a file reading or parsing error
+     */
+    public static PDDocument loadNonSeq( InputStream input, RandomAccess scratchFile) throws IOException
+    {
+        return loadNonSeq(input, scratchFile, "");
+    }
+
+    /**
+     * Parses PDF with non sequential parser.
+     *  
+     * @param input stream that contains the document.
+     * @param scratchFile location to store temp PDFBox data for this document
+     * @param password password to be used for decryption
+     *
+     * @return loaded document
+     *
+     * @throws IOException  in case of a file reading or parsing error
+     */
+    public static PDDocument loadNonSeq( InputStream input, RandomAccess scratchFile, String password ) throws IOException
+    {
+        NonSequentialPDFParser parser = new NonSequentialPDFParser( input, scratchFile, password );
+        parser.parse();
+        return parser.getPDDocument();
+    }
+
+    /**
+     * Save the document to a file.
      *
      * @param fileName The file to save as.
      *
@@ -1256,7 +1289,20 @@ public class PDDocument implements Pagea
      */
     public void save( String fileName ) throws IOException, COSVisitorException
     {
-        save( new FileOutputStream( fileName ) );
+        save( new File( fileName ) );
+    }
+
+    /**
+     * Save the document to a file.
+     *
+     * @param file The file to save as.
+     *
+     * @throws IOException If there is an error saving the document.
+     * @throws COSVisitorException If an error occurs while generating the data.
+     */
+    public void save( File file ) throws IOException, COSVisitorException
+    {
+        save( new FileOutputStream( file ) );
     }
 
     /**

Modified: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/TestAll.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/TestAll.java?rev=1486413&r1=1486412&r2=1486413&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/TestAll.java (original)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/TestAll.java Sun May 26 13:26:16 2013
@@ -29,6 +29,7 @@ import org.apache.pdfbox.io.TestRandomAc
 import org.apache.pdfbox.io.ccitt.TestCCITTFaxG31DDecodeInputStream;
 import org.apache.pdfbox.io.ccitt.TestPackedBitArray;
 import org.apache.pdfbox.pdmodel.TestFDF;
+import org.apache.pdfbox.pdmodel.TestPDDocument;
 import org.apache.pdfbox.pdmodel.TestPDDocumentCatalog;
 import org.apache.pdfbox.pdmodel.TestPDDocumentInformation;
 import org.apache.pdfbox.pdmodel.common.TestPDNameTreeNode;
@@ -85,6 +86,7 @@ public class TestAll extends TestCase
         suite.addTest( TestCOSString.suite() );
         suite.addTest( TestCOSInteger.suite() );
         suite.addTest( TestCOSFloat.suite() );
+        suite.addTestSuite( TestPDDocument.class );
         suite.addTestSuite( TestPDDocumentCatalog.class );
         suite.addTestSuite( TestPDDocumentInformation.class );
         suite.addTestSuite( org.apache.pdfbox.pdmodel.graphics.optionalcontent.TestOptionalContentGroups.class );

Added: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocument.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocument.java?rev=1486413&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocument.java (added)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocument.java Sun May 26 13:26:16 2013
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdmodel;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+
+import org.apache.pdfbox.exceptions.COSVisitorException;
+import org.apache.pdfbox.io.IOUtils;
+import org.apache.pdfbox.io.RandomAccessBuffer;
+
+import junit.framework.TestCase;
+
+/**
+ * Testcase introduced with PDFBOX-1581.
+ * 
+ */
+public class TestPDDocument extends TestCase
+{
+    private File testResultsDir = new File("target/test-output");
+
+    @Override
+    protected void setUp() throws Exception
+    {
+        super.setUp();
+        testResultsDir.mkdirs();
+    }
+
+    /**
+     * Test document save/load using a stream.
+     * @throws IOException if something went wrong
+     * @throws COSVisitorException  if something went wrong
+     */
+    public void testSaveLoadStream() throws IOException, COSVisitorException
+    {
+        // Create PDF with one blank page
+        PDDocument document = new PDDocument();
+        document.addPage(new PDPage());
+
+        // Save
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        document.save(baos);
+        document.close();
+
+        // Verify content
+        byte[] pdf = baos.toByteArray();
+        assertTrue(pdf.length > 200);
+        assertEquals("%PDF-1.4", new String(Arrays.copyOfRange(pdf, 0, 8), "UTF-8"));
+        assertEquals("%%EOF\n", new String(Arrays.copyOfRange(pdf, pdf.length - 6, pdf.length), "UTF-8"));
+
+        // Load
+        PDDocument loadDoc = PDDocument.load(new ByteArrayInputStream(pdf), new RandomAccessBuffer());
+        assertEquals(1, loadDoc.getNumberOfPages());
+        loadDoc.close();
+    }
+
+    /**
+     * Test document save/load using a file.
+     * @throws IOException if something went wrong
+     * @throws COSVisitorException  if something went wrong
+     */
+    public void testSaveLoadFile() throws IOException, COSVisitorException
+    {
+        // Create PDF with one blank page
+        PDDocument document = new PDDocument();
+        document.addPage(new PDPage());
+
+        // Save
+        File targetFile = new File(testResultsDir, "pddocument-saveloadfile.pdf");
+        document.save(targetFile);
+        document.close();
+
+        // Verify content
+        assertTrue(targetFile.length() > 200);
+        InputStream in = new FileInputStream(targetFile);
+        byte[] pdf = IOUtils.toByteArray(in);
+        in.close();
+        assertTrue(pdf.length > 200);
+        assertEquals("%PDF-1.4", new String(Arrays.copyOfRange(pdf, 0, 8), "UTF-8"));
+        assertEquals("%%EOF\n", new String(Arrays.copyOfRange(pdf, pdf.length - 6, pdf.length), "UTF-8"));
+
+        // Load
+        PDDocument loadDoc = PDDocument.load(targetFile, new RandomAccessBuffer());
+        assertEquals(1, loadDoc.getNumberOfPages());
+        loadDoc.close();
+    }
+
+    /**
+     * Test document save/loadNonSeq using a stream.
+     * @throws IOException if something went wrong
+     * @throws COSVisitorException  if something went wrong
+     */
+public void testSaveLoadNonSeqStream() throws IOException, COSVisitorException
+    {
+        // Create PDF with one blank page
+        PDDocument document = new PDDocument();
+        document.addPage(new PDPage());
+
+        // Save
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        document.save(baos);
+        document.close();
+
+        // Verify content
+        byte[] pdf = baos.toByteArray();
+        assertTrue(pdf.length > 200);
+        assertEquals("%PDF-1.4", new String(Arrays.copyOfRange(pdf, 0, 8), "UTF-8"));
+        assertEquals("%%EOF\n", new String(Arrays.copyOfRange(pdf, pdf.length - 6, pdf.length), "UTF-8"));
+
+        // Load
+        PDDocument loadDoc = PDDocument.loadNonSeq(new ByteArrayInputStream(pdf), new RandomAccessBuffer());
+        assertEquals(1, loadDoc.getNumberOfPages());
+        loadDoc.close();
+    }
+
+    /**
+     * Test document save/loadNonSeq using a file.
+     * @throws IOException if something went wrong
+     * @throws COSVisitorException  if something went wrong
+     */
+    public void testSaveLoadNonSeqFile() throws IOException, COSVisitorException
+    {
+        // Create PDF with one blank page
+        PDDocument document = new PDDocument();
+        document.addPage(new PDPage());
+
+        // Save
+        File targetFile = new File(testResultsDir, "pddocument-saveloadnonseqfile.pdf");
+        document.save(targetFile);
+        document.close();
+
+        // Verify content
+        assertTrue(targetFile.length() > 200);
+        InputStream in = new FileInputStream(targetFile);
+        byte[] pdf = IOUtils.toByteArray(in);
+        in.close();
+        assertTrue(pdf.length > 200);
+        assertEquals("%PDF-1.4", new String(Arrays.copyOfRange(pdf, 0, 8), "UTF-8"));
+        assertEquals("%%EOF\n", new String(Arrays.copyOfRange(pdf, pdf.length - 6, pdf.length), "UTF-8"));
+
+        // Load
+        PDDocument loadDoc = PDDocument.loadNonSeq(targetFile, new RandomAccessBuffer());
+        assertEquals(1, loadDoc.getNumberOfPages());
+        loadDoc.close();
+    }
+}