You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ad...@apache.org on 2010/07/16 19:46:29 UTC
svn commit: r964874 -
/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PageExtractor.java
Author: adam
Date: Fri Jul 16 17:46:29 2010
New Revision: 964874
URL: http://svn.apache.org/viewvc?rev=964874&view=rev
Log:
PDFBOX-777: Add utility class to easily extract a range of pages from a PDF
Added:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PageExtractor.java
Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PageExtractor.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PageExtractor.java?rev=964874&view=auto
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PageExtractor.java (added)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PageExtractor.java Fri Jul 16 17:46:29 2010
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pdfbox.util;
+
+import java.io.IOException;
+import java.util.List;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+
+/**
+ * This class will extract one or more sequential pages and create a new document.
+ * @author Adam Nichols (adam@apache.org)
+ */
+public class PageExtractor {
+ protected PDDocument sourceDocument;
+ protected int startPage = 1; // first page to extract is page 1 (by default)
+ protected int endPage = 0;
+
+ /**
+ * Creates a new instance of PageExtractor
+ * @param document The document to split.
+ */
+ public PageExtractor(PDDocument sourceDocument) {
+ this.sourceDocument = sourceDocument;
+ endPage = sourceDocument.getNumberOfPages();
+ }
+
+ /**
+ * Creates a new instance of PageExtractor
+ * @param document The document to split.
+ * @param startPage The first page you want extracted (inclusive)
+ * @param endPage The last page you want extracted (inclusive)
+ */
+ public PageExtractor(PDDocument sourceDocument, int startPage, int endPage) {
+ this(sourceDocument);
+ this.startPage = startPage;
+ this.endPage = endPage;
+ }
+
+ /**
+ * This will take a document and extract the desired pages into a new
+ * document. Both startPage and endPage are included in the extracted
+ * document. If the endPage is greater than the number of pages in the
+ * source document, it will go to the end of the document. If startPage is
+ * less than 1, it'll start with page 1. If startPage is greater than
+ * endPage or greater than the number of pages in the source document, a
+ * blank document will be returned.
+ *
+ * @return The extracted document
+ * @throws IOException If there is an IOError
+ */
+ public PDDocument extract() throws IOException {
+ PDDocument extractedDocument = new PDDocument();
+ extractedDocument.setDocumentInformation(sourceDocument.getDocumentInformation());
+ extractedDocument.getDocumentCatalog().setViewerPreferences(
+ sourceDocument.getDocumentCatalog().getViewerPreferences());
+
+ List<PDPage> pages = (List<PDPage>)sourceDocument.getDocumentCatalog().getAllPages();
+ int pageCounter = 1;
+ for(PDPage page : pages) {
+ if(pageCounter >= startPage && pageCounter <= endPage) {
+ PDPage imported = extractedDocument.importPage(page);
+ imported.setCropBox(page.findCropBox());
+ imported.setMediaBox(page.findMediaBox());
+ imported.setResources(page.findResources());
+ imported.setRotation(page.findRotation());
+ }
+ pageCounter++;
+ }
+
+ return extractedDocument;
+ }
+
+ public int getStartPage() {
+ return startPage;
+ }
+
+ public void setStartPage(int startPage) {
+ this.startPage = startPage;
+ }
+
+ public int getEndPage() {
+ return endPage;
+ }
+
+ public void setEndPage(int endPage) {
+ this.endPage = endPage;
+ }
+}