You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by je...@apache.org on 2009/10/24 14:44:40 UTC
svn commit: r829357 -
/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractMetadata.java
Author: jeremias
Date: Sat Oct 24 12:44:40 2009
New Revision: 829357
URL: http://svn.apache.org/viewvc?rev=829357&view=rev
Log:
Added an example on how to extract (document-level) XMP metadata.
Added:
incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractMetadata.java (with props)
Added: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractMetadata.java
URL: http://svn.apache.org/viewvc/incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractMetadata.java?rev=829357&view=auto
==============================================================================
--- incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractMetadata.java (added)
+++ incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractMetadata.java Sat Oct 24 12:44:40 2009
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.examples.pdmodel;
+
+import org.apache.jempbox.xmp.XMPMetadata;
+import org.apache.jempbox.xmp.XMPSchemaBasic;
+import org.apache.jempbox.xmp.XMPSchemaDublinCore;
+import org.apache.jempbox.xmp.XMPSchemaPDF;
+
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
+import org.apache.pdfbox.pdmodel.common.PDMetadata;
+
+import java.text.DateFormat;
+import java.util.Calendar;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * This is an example on how to extract metadata from a PDF document.
+ * <p>
+ * Usage: java org.apache.pdfbox.examples.pdmodel.ExtractDocument <input-pdf>
+ *
+ * @version $Revision$
+ */
+public class ExtractMetadata
+{
+ private ExtractMetadata()
+ {
+ //utility class
+ }
+
+ /**
+ * This is the main method.
+ *
+ * @param args The command line arguments.
+ *
+ * @throws Exception If there is an error parsing the document.
+ */
+ public static void main( String[] args ) throws Exception
+ {
+ if( args.length != 1 )
+ {
+ usage();
+ System.exit(1);
+ }
+ else
+ {
+ PDDocument document = null;
+
+ try
+ {
+ document = PDDocument.load( args[0] );
+ PDDocumentCatalog catalog = document.getDocumentCatalog();
+ PDMetadata meta = catalog.getMetadata();
+ XMPMetadata metadata = meta.exportXMPMetadata();
+
+ XMPSchemaDublinCore dc = metadata.getDublinCoreSchema();
+ if (dc != null)
+ {
+ display("Title:", dc.getTitle());
+ display("Description:", dc.getDescription());
+ list("Creators: ", dc.getCreators());
+ list("Dates:", dc.getDates());
+ }
+
+ XMPSchemaPDF pdf = metadata.getPDFSchema();
+ if (pdf != null)
+ {
+ display("Keywords:", pdf.getKeywords());
+ display("PDF Version:", pdf.getPDFVersion());
+ display("PDF Producer:", pdf.getProducer());
+ }
+
+ XMPSchemaBasic basic = metadata.getBasicSchema();
+ if (basic != null)
+ {
+ display("Create Date:", basic.getCreateDate());
+ display("Modify Date:", basic.getModifyDate());
+ display("Creator Tool:", basic.getCreatorTool());
+ }
+ }
+ finally
+ {
+ if( document != null )
+ {
+ document.close();
+ }
+ }
+ }
+ }
+
+ private static void list(String title, List list)
+ {
+ if (list == null)
+ {
+ return;
+ }
+ System.out.println(title);
+ Iterator iter = list.iterator();
+ while (iter.hasNext())
+ {
+ Object o = iter.next();
+ System.out.println(" " + format(o));
+ }
+ }
+
+ private static String format(Object o)
+ {
+ if (o instanceof Calendar)
+ {
+ Calendar cal = (Calendar)o;
+ return DateFormat.getDateInstance().format(cal.getTime());
+ }
+ else
+ {
+ return o.toString();
+ }
+ }
+
+ private static void display(String title, Object value)
+ {
+ if (value != null)
+ {
+ System.out.println(title + " " + format(value));
+ }
+ }
+
+ /**
+ * This will print the usage for this program.
+ */
+ private static void usage()
+ {
+ System.err.println( "Usage: java " + ExtractMetadata.class.getName() + " <input-pdf>" );
+ }
+}
Propchange: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractMetadata.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/pdfbox/trunk/src/main/java/org/apache/pdfbox/examples/pdmodel/ExtractMetadata.java
------------------------------------------------------------------------------
svn:keywords = Id