You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by le...@apache.org on 2013/05/26 15:26:17 UTC
svn commit: r1486413 - in /pdfbox/trunk/pdfbox/src:
main/java/org/apache/pdfbox/pdfparser/ main/java/org/apache/pdfbox/pdmodel/
test/java/org/apache/pdfbox/ test/java/org/apache/pdfbox/pdmodel/
Author: lehmi
Date: Sun May 26 13:26:16 2013
New Revision: 1486413
URL: http://svn.apache.org/r1486413
Log:
PDFBOX-1581: added PDDocument.save(File) and PDDocument.loadNonSeq(InputStream, ...) as proposed by Fredrik Kjellberg
Added:
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocument.java
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/TestAll.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java?rev=1486413&r1=1486412&r2=1486413&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/NonSequentialPDFParser.java Sun May 26 13:26:16 2013
@@ -91,16 +91,28 @@ public class NonSequentialPDFParser exte
private static final InputStream EMPTY_INPUT_STREAM = new ByteArrayInputStream(new byte[0]);
protected static final int DEFAULT_TRAIL_BYTECOUNT = 2048;
+ /**
+ * EOF-marker.
+ */
protected static final char[] EOF_MARKER = new char[]
{ '%', '%', 'E', 'O', 'F' };
+ /**
+ * StartXRef-marker.
+ */
protected static final char[] STARTXREF_MARKER = new char[]
{ 's', 't', 'a', 'r', 't', 'x', 'r', 'e', 'f' };
+ /**
+ * obj-marker.
+ */
protected static final char[] OBJ_MARKER = new char[]
{ 'o', 'b', 'j' };
private final File pdfFile;
private final RandomAccessBufferedFileInputStream raStream;
+ /**
+ * The security handler.
+ */
protected SecurityHandler securityHandler = null;
private String keyStoreFilename = null;
@@ -219,12 +231,31 @@ public class NonSequentialPDFParser exte
password = decryptionPassword;
}
+ /**
+ * Constructor.
+ *
+ * @param input input stream representing the pdf.
+ * @throws IOException If something went wrong.
+ */
public NonSequentialPDFParser(InputStream input) throws IOException
{
+ this(input, null, "");
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param input input stream representing the pdf.
+ * @param raBuf the buffer to be used for parsing
+ * @param decryptionPassword password to be used for decryption.
+ * @throws IOException If something went wrong.
+ */
+ public NonSequentialPDFParser(InputStream input, RandomAccess raBuf, String decryptionPassword) throws IOException
+ {
super(EMPTY_INPUT_STREAM, null, false);
pdfFile = createTmpFile(input);
raStream = new RandomAccessBufferedFileInputStream(pdfFile);
- init(pdfFile, null, "");
+ init(pdfFile, raBuf, decryptionPassword);
}
/**
@@ -234,7 +265,7 @@ public class NonSequentialPDFParser exte
*
* @param input
* @return
- * @throws IOException
+ * @throws IOException If something went wrong.
*/
private File createTmpFile(InputStream input) throws IOException
{
@@ -287,7 +318,7 @@ public class NonSequentialPDFParser exte
* can handle linearized pdfs, which will have an xref at the end pointing
* to an xref at the beginning of the file. Last the root object is parsed.
*
- * @throws IOException
+ * @throws IOException If something went wrong.
*/
protected void initialParse() throws IOException
{
@@ -442,7 +473,12 @@ public class NonSequentialPDFParser exte
return pdfSource.getOffset();
}
- /** Sets {@link #pdfSource} to start next parsing at given file offset. */
+ /**
+ * Sets {@link #pdfSource} to start next parsing at given file offset.
+ *
+ * @param fileOffset file offset
+ * @throws IOException If something went wrong.
+ */
protected final void setPdfSource(long fileOffset) throws IOException
{
@@ -458,7 +494,10 @@ public class NonSequentialPDFParser exte
// pdfSource.skip( _fileOffset );
}
- /** Enable handling of alternative pdfSource implementation. */
+ /**
+ * Enable handling of alternative pdfSource implementation.
+ * @throws IOException If something went wrong.
+ */
protected final void releasePdfSourceInputStream() throws IOException
{
// if ( pdfSource != null )
@@ -479,6 +518,9 @@ public class NonSequentialPDFParser exte
* (within last {@link #DEFAULT_TRAIL_BYTECOUNT} bytes (or range set via
* {@link #setEOFLookupRange(int)}) and go back to find
* <code>startxref</code>.
+ *
+ * @return the offset of StartXref
+ * @throws IOException If something went wrong.
*/
protected final long getStartxrefOffset() throws IOException
{
@@ -590,6 +632,7 @@ public class NonSequentialPDFParser exte
* Reads given pattern from {@link #pdfSource}. Skipping whitespace at start
* and end.
*
+ * @param pattern pattern to be skipped
* @throws IOException if pattern could not be read
*/
protected final void readPattern(final char[] pattern) throws IOException
@@ -697,6 +740,11 @@ public class NonSequentialPDFParser exte
}
}
+ /**
+ * Return the pdf file.
+ *
+ * @return the pdf file
+ */
protected File getPdfFile()
{
return this.pdfFile;
@@ -713,7 +761,9 @@ public class NonSequentialPDFParser exte
try
{
if (!pdfFile.delete())
+ {
LOG.warn("Temporary file '" + pdfFile.getName() + "' can't be deleted");
+ }
}
catch (SecurityException e)
{
@@ -750,7 +800,9 @@ public class NonSequentialPDFParser exte
{
PDDocument pdDocument = super.getPDDocument();
if (securityHandler != null)
+ {
pdDocument.setSecurityHandler(securityHandler);
+ }
return pdDocument;
}
@@ -1171,7 +1223,8 @@ public class NonSequentialPDFParser exte
// this is not legal
// the combination of a dict and the stream/endstream
// forms a complete stream object
- throw new IOException("Stream not preceded by dictionary (offset: " + offsetOrObjstmObNr + ").");
+ throw new IOException("Stream not preceded by dictionary (offset: "
+ + offsetOrObjstmObNr + ").");
}
skipSpaces();
endObjectKey = readLine();
@@ -1271,7 +1324,14 @@ public class NonSequentialPDFParser exte
}
// ------------------------------------------------------------------------
- /** Decrypts given COSString. */
+ /**
+ * Decrypts given COSString.
+ *
+ * @param str the string to be decrypted
+ * @param objNr the object number
+ * @param objGenNr the object generation number
+ * @throws IOException ff something went wrong
+ */
protected final void decrypt(COSString str, long objNr, long objGenNr) throws IOException
{
try
@@ -1439,7 +1499,10 @@ public class NonSequentialPDFParser exte
int bytesRead = 0;
boolean unexpectedEndOfStream = false;
if (remainBytes == 35090)
+ {
+ // TODO debug system out, to be removed??
System.out.println();
+ }
while (remainBytes > 0)
{
final int readBytes = pdfSource.read(streamCopyBuf, 0,
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java?rev=1486413&r1=1486412&r2=1486413&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java Sun May 26 13:26:16 2013
@@ -1247,7 +1247,40 @@ public class PDDocument implements Pagea
}
/**
- * This will save this document to the filesystem.
+ * Parses PDF with non sequential parser.
+ *
+ * @param input stream that contains the document.
+ * @param scratchFile location to store temp PDFBox data for this document
+ *
+ * @return loaded document
+ *
+ * @throws IOException in case of a file reading or parsing error
+ */
+ public static PDDocument loadNonSeq( InputStream input, RandomAccess scratchFile) throws IOException
+ {
+ return loadNonSeq(input, scratchFile, "");
+ }
+
+ /**
+ * Parses PDF with non sequential parser.
+ *
+ * @param input stream that contains the document.
+ * @param scratchFile location to store temp PDFBox data for this document
+ * @param password password to be used for decryption
+ *
+ * @return loaded document
+ *
+ * @throws IOException in case of a file reading or parsing error
+ */
+ public static PDDocument loadNonSeq( InputStream input, RandomAccess scratchFile, String password ) throws IOException
+ {
+ NonSequentialPDFParser parser = new NonSequentialPDFParser( input, scratchFile, password );
+ parser.parse();
+ return parser.getPDDocument();
+ }
+
+ /**
+ * Save the document to a file.
*
* @param fileName The file to save as.
*
@@ -1256,7 +1289,20 @@ public class PDDocument implements Pagea
*/
public void save( String fileName ) throws IOException, COSVisitorException
{
- save( new FileOutputStream( fileName ) );
+ save( new File( fileName ) );
+ }
+
+ /**
+ * Save the document to a file.
+ *
+ * @param file The file to save as.
+ *
+ * @throws IOException If there is an error saving the document.
+ * @throws COSVisitorException If an error occurs while generating the data.
+ */
+ public void save( File file ) throws IOException, COSVisitorException
+ {
+ save( new FileOutputStream( file ) );
}
/**
Modified: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/TestAll.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/TestAll.java?rev=1486413&r1=1486412&r2=1486413&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/TestAll.java (original)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/TestAll.java Sun May 26 13:26:16 2013
@@ -29,6 +29,7 @@ import org.apache.pdfbox.io.TestRandomAc
import org.apache.pdfbox.io.ccitt.TestCCITTFaxG31DDecodeInputStream;
import org.apache.pdfbox.io.ccitt.TestPackedBitArray;
import org.apache.pdfbox.pdmodel.TestFDF;
+import org.apache.pdfbox.pdmodel.TestPDDocument;
import org.apache.pdfbox.pdmodel.TestPDDocumentCatalog;
import org.apache.pdfbox.pdmodel.TestPDDocumentInformation;
import org.apache.pdfbox.pdmodel.common.TestPDNameTreeNode;
@@ -85,6 +86,7 @@ public class TestAll extends TestCase
suite.addTest( TestCOSString.suite() );
suite.addTest( TestCOSInteger.suite() );
suite.addTest( TestCOSFloat.suite() );
+ suite.addTestSuite( TestPDDocument.class );
suite.addTestSuite( TestPDDocumentCatalog.class );
suite.addTestSuite( TestPDDocumentInformation.class );
suite.addTestSuite( org.apache.pdfbox.pdmodel.graphics.optionalcontent.TestOptionalContentGroups.class );
Added: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocument.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocument.java?rev=1486413&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocument.java (added)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/TestPDDocument.java Sun May 26 13:26:16 2013
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdmodel;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+
+import org.apache.pdfbox.exceptions.COSVisitorException;
+import org.apache.pdfbox.io.IOUtils;
+import org.apache.pdfbox.io.RandomAccessBuffer;
+
+import junit.framework.TestCase;
+
+/**
+ * Testcase introduced with PDFBOX-1581.
+ *
+ */
+public class TestPDDocument extends TestCase
+{
+ private File testResultsDir = new File("target/test-output");
+
+ @Override
+ protected void setUp() throws Exception
+ {
+ super.setUp();
+ testResultsDir.mkdirs();
+ }
+
+ /**
+ * Test document save/load using a stream.
+ * @throws IOException if something went wrong
+ * @throws COSVisitorException if something went wrong
+ */
+ public void testSaveLoadStream() throws IOException, COSVisitorException
+ {
+ // Create PDF with one blank page
+ PDDocument document = new PDDocument();
+ document.addPage(new PDPage());
+
+ // Save
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ document.save(baos);
+ document.close();
+
+ // Verify content
+ byte[] pdf = baos.toByteArray();
+ assertTrue(pdf.length > 200);
+ assertEquals("%PDF-1.4", new String(Arrays.copyOfRange(pdf, 0, 8), "UTF-8"));
+ assertEquals("%%EOF\n", new String(Arrays.copyOfRange(pdf, pdf.length - 6, pdf.length), "UTF-8"));
+
+ // Load
+ PDDocument loadDoc = PDDocument.load(new ByteArrayInputStream(pdf), new RandomAccessBuffer());
+ assertEquals(1, loadDoc.getNumberOfPages());
+ loadDoc.close();
+ }
+
+ /**
+ * Test document save/load using a file.
+ * @throws IOException if something went wrong
+ * @throws COSVisitorException if something went wrong
+ */
+ public void testSaveLoadFile() throws IOException, COSVisitorException
+ {
+ // Create PDF with one blank page
+ PDDocument document = new PDDocument();
+ document.addPage(new PDPage());
+
+ // Save
+ File targetFile = new File(testResultsDir, "pddocument-saveloadfile.pdf");
+ document.save(targetFile);
+ document.close();
+
+ // Verify content
+ assertTrue(targetFile.length() > 200);
+ InputStream in = new FileInputStream(targetFile);
+ byte[] pdf = IOUtils.toByteArray(in);
+ in.close();
+ assertTrue(pdf.length > 200);
+ assertEquals("%PDF-1.4", new String(Arrays.copyOfRange(pdf, 0, 8), "UTF-8"));
+ assertEquals("%%EOF\n", new String(Arrays.copyOfRange(pdf, pdf.length - 6, pdf.length), "UTF-8"));
+
+ // Load
+ PDDocument loadDoc = PDDocument.load(targetFile, new RandomAccessBuffer());
+ assertEquals(1, loadDoc.getNumberOfPages());
+ loadDoc.close();
+ }
+
+ /**
+ * Test document save/loadNonSeq using a stream.
+ * @throws IOException if something went wrong
+ * @throws COSVisitorException if something went wrong
+ */
+public void testSaveLoadNonSeqStream() throws IOException, COSVisitorException
+ {
+ // Create PDF with one blank page
+ PDDocument document = new PDDocument();
+ document.addPage(new PDPage());
+
+ // Save
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ document.save(baos);
+ document.close();
+
+ // Verify content
+ byte[] pdf = baos.toByteArray();
+ assertTrue(pdf.length > 200);
+ assertEquals("%PDF-1.4", new String(Arrays.copyOfRange(pdf, 0, 8), "UTF-8"));
+ assertEquals("%%EOF\n", new String(Arrays.copyOfRange(pdf, pdf.length - 6, pdf.length), "UTF-8"));
+
+ // Load
+ PDDocument loadDoc = PDDocument.loadNonSeq(new ByteArrayInputStream(pdf), new RandomAccessBuffer());
+ assertEquals(1, loadDoc.getNumberOfPages());
+ loadDoc.close();
+ }
+
+ /**
+ * Test document save/loadNonSeq using a file.
+ * @throws IOException if something went wrong
+ * @throws COSVisitorException if something went wrong
+ */
+ public void testSaveLoadNonSeqFile() throws IOException, COSVisitorException
+ {
+ // Create PDF with one blank page
+ PDDocument document = new PDDocument();
+ document.addPage(new PDPage());
+
+ // Save
+ File targetFile = new File(testResultsDir, "pddocument-saveloadnonseqfile.pdf");
+ document.save(targetFile);
+ document.close();
+
+ // Verify content
+ assertTrue(targetFile.length() > 200);
+ InputStream in = new FileInputStream(targetFile);
+ byte[] pdf = IOUtils.toByteArray(in);
+ in.close();
+ assertTrue(pdf.length > 200);
+ assertEquals("%PDF-1.4", new String(Arrays.copyOfRange(pdf, 0, 8), "UTF-8"));
+ assertEquals("%%EOF\n", new String(Arrays.copyOfRange(pdf, pdf.length - 6, pdf.length), "UTF-8"));
+
+ // Load
+ PDDocument loadDoc = PDDocument.loadNonSeq(targetFile, new RandomAccessBuffer());
+ assertEquals(1, loadDoc.getNumberOfPages());
+ loadDoc.close();
+ }
+}