You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by gb...@apache.org on 2011/12/06 21:15:20 UTC
svn commit: r1211081 [3/3] - in /pdfbox/trunk: ./ examples/ examples/src/
examples/src/main/ examples/src/main/java/ examples/src/main/java/org/
examples/src/main/java/org/apache/
examples/src/main/java/org/apache/pdfbox/ examples/src/main/java/org/apa...
Added: pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/signature/ShowSignature.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/signature/ShowSignature.java?rev=1211081&view=auto
==============================================================================
--- pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/signature/ShowSignature.java (added)
+++ pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/signature/ShowSignature.java Tue Dec 6 20:15:18 2011
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.examples.signature;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+
+import java.security.cert.CertificateFactory;
+
+import java.util.Collection;
+
+import org.apache.pdfbox.cos.COSArray;
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.cos.COSString;
+
+import org.apache.pdfbox.pdmodel.PDDocument;
+
+/**
+ * This will read a document from the filesystem, decrypt it and do something with the signature.
+ *
+ * usage: java org.apache.pdfbox.examples.signature.ShowSignature <password> <inputfile>
+ *
+ *
+ * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
+ * @version $Revision: 1.9 $
+ */
+public class ShowSignature
+{
+
+ private ShowSignature()
+ {
+ }
+ /**
+ * This is the entry point for the application.
+ *
+ * @param args The command-line arguments.
+ *
+ * @throws Exception If there is an error reading the file.
+ */
+ public static void main( String[] args ) throws Exception
+ {
+ ShowSignature show = new ShowSignature();
+ show.showSignature( args );
+ }
+
+ private void showSignature( String[] args ) throws Exception
+ {
+ if( args.length != 2 )
+ {
+ usage();
+ }
+ else
+ {
+ String password = args[0];
+ String infile = args[1];
+ PDDocument document = null;
+ try
+ {
+ document = PDDocument.load( infile );
+
+ if( document.isEncrypted() )
+ {
+ document.decrypt( password );
+ }
+ else
+ {
+ System.err.println( "Warning: Document is not encrypted." );
+ }
+
+ COSDictionary trailer = document.getDocument().getTrailer();
+ COSDictionary root = (COSDictionary)trailer.getDictionaryObject( COSName.ROOT );
+ COSDictionary acroForm = (COSDictionary)root.getDictionaryObject( COSName.ACRO_FORM );
+ COSArray fields = (COSArray)acroForm.getDictionaryObject( COSName.FIELDS );
+ for( int i=0; i<fields.size(); i++ )
+ {
+ COSDictionary field = (COSDictionary)fields.getObject( i );
+ String type = field.getNameAsString( "FT" );
+ if( "Sig".equals( type ) )
+ {
+ COSDictionary cert = (COSDictionary)field.getDictionaryObject( COSName.V );
+ if( cert != null )
+ {
+ System.out.println( "Certificate found" );
+ System.out.println( "Name=" + cert.getDictionaryObject( COSName.NAME ) );
+ System.out.println( "Modified=" + cert.getDictionaryObject( COSName.getPDFName( "M" ) ) );
+ COSName subFilter = (COSName)cert.getDictionaryObject( COSName.getPDFName( "SubFilter" ) );
+ if( subFilter != null )
+ {
+ if( subFilter.getName().equals( "adbe.x509.rsa_sha1" ) )
+ {
+ COSString certString = (COSString)cert.getDictionaryObject(
+ COSName.getPDFName( "Cert" ) );
+ byte[] certData = certString.getBytes();
+ CertificateFactory factory = CertificateFactory.getInstance( "X.509" );
+ ByteArrayInputStream certStream = new ByteArrayInputStream( certData );
+ Collection certs = factory.generateCertificates( certStream );
+ System.out.println( "certs=" + certs );
+ }
+ else if( subFilter.getName().equals( "adbe.pkcs7.sha1" ) )
+ {
+ COSString certString = (COSString)cert.getDictionaryObject(
+ COSName.CONTENTS );
+ byte[] certData = certString.getBytes();
+ CertificateFactory factory = CertificateFactory.getInstance( "X.509" );
+ ByteArrayInputStream certStream = new ByteArrayInputStream( certData );
+ Collection certs = factory.generateCertificates( certStream );
+ System.out.println( "certs=" + certs );
+ }
+ else
+ {
+ System.err.println( "Unknown certificate type:" + subFilter );
+ }
+ }
+ else
+ {
+ throw new IOException( "Missing subfilter for cert dictionary" );
+ }
+ }
+ else
+ {
+ System.out.println( "Signature found, but no certificate" );
+ }
+ }
+ }
+ }
+ finally
+ {
+ if( document != null )
+ {
+ document.close();
+ }
+ }
+ }
+ }
+
+ /**
+ * This will print a usage message.
+ */
+ private static void usage()
+ {
+ System.err.println( "usage: java org.apache.pdfbox.examples.signature.ShowSignature " +
+ "<password> <inputfile>" );
+ }
+
+}
Propchange: pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/signature/ShowSignature.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/signature/package.html
URL: http://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/signature/package.html?rev=1211081&view=auto
==============================================================================
--- pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/signature/package.html (added)
+++ pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/signature/package.html Tue Dec 6 20:15:18 2011
@@ -0,0 +1,25 @@
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one or more
+ ! contributor license agreements. See the NOTICE file distributed with
+ ! this work for additional information regarding copyright ownership.
+ ! The ASF licenses this file to You under the Apache License, Version 2.0
+ ! (the "License"); you may not use this file except in compliance with
+ ! the License. You may obtain a copy of the License at
+ !
+ ! http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing, software
+ ! distributed under the License is distributed on an "AS IS" BASIS,
+ ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ! See the License for the specific language governing permissions and
+ ! limitations under the License.
+ !-->
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<head>
+
+</head>
+<body>
+These examples will show how to gain access to the PDF signature.
+</body>
+</html>
Propchange: pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/signature/package.html
------------------------------------------------------------------------------
svn:eol-style = native
Added: pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/ExtractTextByArea.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/ExtractTextByArea.java?rev=1211081&view=auto
==============================================================================
--- pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/ExtractTextByArea.java (added)
+++ pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/ExtractTextByArea.java Tue Dec 6 20:15:18 2011
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.examples.util;
+
+import org.apache.pdfbox.exceptions.InvalidPasswordException;
+
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.util.PDFTextStripperByArea;
+
+import java.awt.Rectangle;
+
+import java.util.List;
+
+/**
+ * This is an example on how to extract text from a specific area on the PDF document.
+ *
+ * Usage: java org.apache.pdfbox.examples.util.ExtractTextByArea <input-pdf>
+ *
+ * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
+ * @version $Revision: 1.2 $
+ */
+public class ExtractTextByArea
+{
+ private ExtractTextByArea()
+ {
+ //utility class and should not be constructed.
+ }
+
+
+ /**
+ * This will print the documents text in a certain area.
+ *
+ * @param args The command line arguments.
+ *
+ * @throws Exception If there is an error parsing the document.
+ */
+ public static void main( String[] args ) throws Exception
+ {
+ if( args.length != 1 )
+ {
+ usage();
+ }
+ else
+ {
+ PDDocument document = null;
+ try
+ {
+ document = PDDocument.load( args[0] );
+ if( document.isEncrypted() )
+ {
+ try
+ {
+ document.decrypt( "" );
+ }
+ catch( InvalidPasswordException e )
+ {
+ System.err.println( "Error: Document is encrypted with a password." );
+ System.exit( 1 );
+ }
+ }
+ PDFTextStripperByArea stripper = new PDFTextStripperByArea();
+ stripper.setSortByPosition( true );
+ Rectangle rect = new Rectangle( 10, 280, 275, 60 );
+ stripper.addRegion( "class1", rect );
+ List allPages = document.getDocumentCatalog().getAllPages();
+ PDPage firstPage = (PDPage)allPages.get( 0 );
+ stripper.extractRegions( firstPage );
+ System.out.println( "Text in the area:" + rect );
+ System.out.println( stripper.getTextForRegion( "class1" ) );
+
+ }
+ finally
+ {
+ if( document != null )
+ {
+ document.close();
+ }
+ }
+ }
+ }
+
+ /**
+ * This will print the usage for this document.
+ */
+ private static void usage()
+ {
+ System.err.println( "Usage: java org.apache.pdfbox.examples.util.ExtractTextByArea <input-pdf>" );
+ }
+
+}
Propchange: pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/ExtractTextByArea.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintImageLocations.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintImageLocations.java?rev=1211081&view=auto
==============================================================================
--- pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintImageLocations.java (added)
+++ pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintImageLocations.java Tue Dec 6 20:15:18 2011
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.examples.util;
+
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.exceptions.InvalidPasswordException;
+import org.apache.pdfbox.exceptions.WrappedIOException;
+
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObject;
+import org.apache.pdfbox.pdmodel.graphics.xobject.PDXObjectImage;
+import org.apache.pdfbox.util.Matrix;
+import org.apache.pdfbox.util.PDFOperator;
+import org.apache.pdfbox.util.PDFStreamEngine;
+import org.apache.pdfbox.util.ResourceLoader;
+
+import java.awt.geom.AffineTransform;
+import java.awt.geom.NoninvertibleTransformException;
+import java.io.IOException;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * This is an example on how to get the x/y coordinates of image locations.
+ *
+ * Usage: java org.apache.pdfbox.examples.util.PrintImageLocations <input-pdf>
+ *
+ * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
+ * @version $Revision: 1.5 $
+ */
+public class PrintImageLocations extends PDFStreamEngine
+{
+ /**
+ * Default constructor.
+ *
+ * @throws IOException If there is an error loading text stripper properties.
+ */
+ public PrintImageLocations() throws IOException
+ {
+ super( ResourceLoader.loadProperties(
+ "org/apache/pdfbox/resources/PDFTextStripper.properties", true ) );
+ }
+
+ /**
+ * This will print the documents data.
+ *
+ * @param args The command line arguments.
+ *
+ * @throws Exception If there is an error parsing the document.
+ */
+ public static void main( String[] args ) throws Exception
+ {
+ if( args.length != 1 )
+ {
+ usage();
+ }
+ else
+ {
+ PDDocument document = null;
+ try
+ {
+ document = PDDocument.load( args[0] );
+ if( document.isEncrypted() )
+ {
+ try
+ {
+ document.decrypt( "" );
+ }
+ catch( InvalidPasswordException e )
+ {
+ System.err.println( "Error: Document is encrypted with a password." );
+ System.exit( 1 );
+ }
+ }
+ PrintImageLocations printer = new PrintImageLocations();
+ List allPages = document.getDocumentCatalog().getAllPages();
+ for( int i=0; i<allPages.size(); i++ )
+ {
+ PDPage page = (PDPage)allPages.get( i );
+ System.out.println( "Processing page: " + i );
+ printer.processStream( page, page.findResources(), page.getContents().getStream() );
+ }
+ }
+ finally
+ {
+ if( document != null )
+ {
+ document.close();
+ }
+ }
+ }
+ }
+
+ /**
+ * This is used to handle an operation.
+ *
+ * @param operator The operation to perform.
+ * @param arguments The list of arguments.
+ *
+ * @throws IOException If there is an error processing the operation.
+ */
+ protected void processOperator( PDFOperator operator, List arguments ) throws IOException
+ {
+ String operation = operator.getOperation();
+ if( operation.equals( "Do" ) )
+ {
+ COSName objectName = (COSName)arguments.get( 0 );
+ Map xobjects = getResources().getXObjects();
+ PDXObject xobject = (PDXObject)xobjects.get( objectName.getName() );
+ if( xobject instanceof PDXObjectImage )
+ {
+ try
+ {
+ PDXObjectImage image = (PDXObjectImage)xobject;
+ PDPage page = getCurrentPage();
+ Matrix ctm = getGraphicsState().getCurrentTransformationMatrix();
+ double rotationInRadians =(page.findRotation() * Math.PI)/180;
+
+
+ AffineTransform rotation = new AffineTransform();
+ rotation.setToRotation( rotationInRadians );
+ AffineTransform rotationInverse = rotation.createInverse();
+ Matrix rotationInverseMatrix = new Matrix();
+ rotationInverseMatrix.setFromAffineTransform( rotationInverse );
+ Matrix rotationMatrix = new Matrix();
+ rotationMatrix.setFromAffineTransform( rotation );
+
+ Matrix unrotatedCTM = ctm.multiply( rotationInverseMatrix );
+ float xScale = unrotatedCTM.getXScale();
+ float yScale = unrotatedCTM.getYScale();
+
+ System.out.println( "Found image[" + objectName.getName() + "] " +
+ "at " + unrotatedCTM.getXPosition() + "," + unrotatedCTM.getYPosition() +
+ " size=" + (xScale/100f*image.getWidth()) + "," + (yScale/100f*image.getHeight() ));
+ }
+ catch( NoninvertibleTransformException e )
+ {
+ throw new WrappedIOException( e );
+ }
+ }
+ }
+ else
+ {
+ super.processOperator( operator, arguments );
+ }
+ }
+
+ /**
+ * This will print the usage for this document.
+ */
+ private static void usage()
+ {
+ System.err.println( "Usage: java org.apache.pdfbox.examples.pdmodel.PrintImageLocations <input-pdf>" );
+ }
+
+}
Propchange: pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintImageLocations.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java?rev=1211081&view=auto
==============================================================================
--- pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java (added)
+++ pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java Tue Dec 6 20:15:18 2011
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.examples.util;
+
+import org.apache.pdfbox.exceptions.InvalidPasswordException;
+
+
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.common.PDStream;
+import org.apache.pdfbox.util.PDFTextStripper;
+import org.apache.pdfbox.util.TextPosition;
+
+import java.io.IOException;
+
+import java.util.List;
+
+/**
+ * This is an example on how to get some x/y coordinates of text.
+ *
+ * Usage: java org.apache.pdfbox.examples.util.PrintTextLocations <input-pdf>
+ *
+ * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
+ * @version $Revision: 1.7 $
+ */
+public class PrintTextLocations extends PDFTextStripper
+{
+ /**
+ * Default constructor.
+ *
+ * @throws IOException If there is an error loading text stripper properties.
+ */
+ public PrintTextLocations() throws IOException
+ {
+ super.setSortByPosition( true );
+ }
+
+ /**
+ * This will print the documents data.
+ *
+ * @param args The command line arguments.
+ *
+ * @throws Exception If there is an error parsing the document.
+ */
+ public static void main( String[] args ) throws Exception
+ {
+ if( args.length != 1 )
+ {
+ usage();
+ }
+ else
+ {
+ PDDocument document = null;
+ try
+ {
+ document = PDDocument.load( args[0] );
+ if( document.isEncrypted() )
+ {
+ try
+ {
+ document.decrypt( "" );
+ }
+ catch( InvalidPasswordException e )
+ {
+ System.err.println( "Error: Document is encrypted with a password." );
+ System.exit( 1 );
+ }
+ }
+ PrintTextLocations printer = new PrintTextLocations();
+ List allPages = document.getDocumentCatalog().getAllPages();
+ for( int i=0; i<allPages.size(); i++ )
+ {
+ PDPage page = (PDPage)allPages.get( i );
+ System.out.println( "Processing page: " + i );
+ PDStream contents = page.getContents();
+ if( contents != null )
+ {
+ printer.processStream( page, page.findResources(), page.getContents().getStream() );
+ }
+ }
+ }
+ finally
+ {
+ if( document != null )
+ {
+ document.close();
+ }
+ }
+ }
+ }
+
+ /**
+ * A method provided as an event interface to allow a subclass to perform
+ * some specific functionality when text needs to be processed.
+ *
+ * @param text The text to be processed
+ */
+ protected void processTextPosition( TextPosition text )
+ {
+ System.out.println( "String[" + text.getXDirAdj() + "," +
+ text.getYDirAdj() + " fs=" + text.getFontSize() + " xscale=" +
+ text.getXScale() + " height=" + text.getHeightDir() + " space=" +
+ text.getWidthOfSpace() + " width=" +
+ text.getWidthDirAdj() + "]" + text.getCharacter() );
+ }
+
+ /**
+ * This will print the usage for this document.
+ */
+ private static void usage()
+ {
+ System.err.println( "Usage: java org.apache.pdfbox.examples.pdmodel.PrintTextLocations <input-pdf>" );
+ }
+
+}
Propchange: pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/PrintTextLocations.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/RemoveAllText.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/RemoveAllText.java?rev=1211081&view=auto
==============================================================================
--- pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/RemoveAllText.java (added)
+++ pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/RemoveAllText.java Tue Dec 6 20:15:18 2011
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.examples.util;
+
+import org.apache.pdfbox.pdfparser.PDFStreamParser;
+import org.apache.pdfbox.pdfwriter.ContentStreamWriter;
+
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.common.PDStream;
+import org.apache.pdfbox.util.PDFOperator;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * This is an example on how to remove all text from PDF document.
+ *
+ * Usage: java org.apache.pdfbox.examples.util.RemoveAllText <input-pdf> <output-pdf>
+ *
+ * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
+ * @version $Revision: 1.2 $
+ */
+public class RemoveAllText
+{
+ /**
+ * Default constructor.
+ */
+ private RemoveAllText()
+ {
+ //example class should not be instantiated
+ }
+
+ /**
+ * This will remove all text from a PDF document.
+ *
+ * @param args The command line arguments.
+ *
+ * @throws Exception If there is an error parsing the document.
+ */
+ public static void main( String[] args ) throws Exception
+ {
+ if( args.length != 2 )
+ {
+ usage();
+ }
+ else
+ {
+ PDDocument document = null;
+ try
+ {
+ document = PDDocument.load( args[0] );
+ if( document.isEncrypted() )
+ {
+ System.err.println( "Error: Encrypted documents are not supported for this example." );
+ System.exit( 1 );
+ }
+ List allPages = document.getDocumentCatalog().getAllPages();
+ for( int i=0; i<allPages.size(); i++ )
+ {
+ PDPage page = (PDPage)allPages.get( i );
+ PDFStreamParser parser = new PDFStreamParser(page.getContents());
+ parser.parse();
+ List tokens = parser.getTokens();
+ List newTokens = new ArrayList();
+ for( int j=0; j<tokens.size(); j++)
+ {
+ Object token = tokens.get( j );
+ if( token instanceof PDFOperator )
+ {
+ PDFOperator op = (PDFOperator)token;
+ if( op.getOperation().equals( "TJ") || op.getOperation().equals( "Tj" ))
+ {
+ //remove the one argument to this operator
+ newTokens.remove( newTokens.size() -1 );
+ continue;
+ }
+ }
+ newTokens.add( token );
+
+ }
+ PDStream newContents = new PDStream( document );
+ ContentStreamWriter writer = new ContentStreamWriter( newContents.createOutputStream() );
+ writer.writeTokens( newTokens );
+ newContents.addCompression();
+ page.setContents( newContents );
+ }
+ document.save( args[1] );
+ }
+ finally
+ {
+ if( document != null )
+ {
+ document.close();
+ }
+ }
+ }
+ }
+
+ /**
+ * This will print the usage for this document.
+ */
+ private static void usage()
+ {
+ System.err.println( "Usage: java org.apache.pdfbox.examples.pdmodel.RemoveAllText <input-pdf> <output-pdf>" );
+ }
+
+}
Propchange: pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/RemoveAllText.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/package.html
URL: http://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/package.html?rev=1211081&view=auto
==============================================================================
--- pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/package.html (added)
+++ pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/package.html Tue Dec 6 20:15:18 2011
@@ -0,0 +1,25 @@
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one or more
+ ! contributor license agreements. See the NOTICE file distributed with
+ ! this work for additional information regarding copyright ownership.
+ ! The ASF licenses this file to You under the Apache License, Version 2.0
+ ! (the "License"); you may not use this file except in compliance with
+ ! the License. You may obtain a copy of the License at
+ !
+ ! http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing, software
+ ! distributed under the License is distributed on an "AS IS" BASIS,
+ ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ! See the License for the specific language governing permissions and
+ ! limitations under the License.
+ !-->
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<head>
+
+</head>
+<body>
+The packages in this package will show how to use the PDFBox util API.
+</body>
+</html>
Propchange: pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/util/package.html
------------------------------------------------------------------------------
svn:eol-style = native
Modified: pdfbox/trunk/pom.xml
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pom.xml?rev=1211081&r1=1211080&r2=1211081&view=diff
==============================================================================
--- pdfbox/trunk/pom.xml (original)
+++ pdfbox/trunk/pom.xml Tue Dec 6 20:15:18 2011
@@ -53,6 +53,7 @@
<module>ant</module>
<module>war</module>
<module>app</module>
+ <module>examples</module>
</modules>
<build>