You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2023/05/18 17:30:33 UTC

svn commit: r1909913 - in /pdfbox/branches/2.0/tools/src/main/java/org/apache/pdfbox/tools: ExtractXMP.java PDFBox.java

Author: tilman
Date: Thu May 18 17:30:33 2023
New Revision: 1909913

URL: http://svn.apache.org/viewvc?rev=1909913&view=rev
Log:
PDFBOX-5598: create a command line utiliy to extract document XMP content or page XMP content

Added:
    pdfbox/branches/2.0/tools/src/main/java/org/apache/pdfbox/tools/ExtractXMP.java   (with props)
Modified:
    pdfbox/branches/2.0/tools/src/main/java/org/apache/pdfbox/tools/PDFBox.java

Added: pdfbox/branches/2.0/tools/src/main/java/org/apache/pdfbox/tools/ExtractXMP.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/tools/src/main/java/org/apache/pdfbox/tools/ExtractXMP.java?rev=1909913&view=auto
==============================================================================
--- pdfbox/branches/2.0/tools/src/main/java/org/apache/pdfbox/tools/ExtractXMP.java (added)
+++ pdfbox/branches/2.0/tools/src/main/java/org/apache/pdfbox/tools/ExtractXMP.java Thu May 18 17:30:33 2023
@@ -0,0 +1,183 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.tools;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintStream;
+
+import org.apache.pdfbox.io.IOUtils;
+
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
+import org.apache.pdfbox.pdmodel.common.PDMetadata;
+
+/**
+ * Extract the XMP metadata from the document or from a page.
+ *
+ * @author Tilman Hausherr
+ */
+public class ExtractXMP
+{
+    @SuppressWarnings({"squid:S2068"})
+    private static final String PASSWORD = "-password";
+    private static final String CONSOLE = "-console";
+    private static final String PAGE = "-page";
+
+    /**
+     * private constructor.
+    */
+    public ExtractXMP()
+    {
+        //static class
+    }
+
+    /**
+     * Infamous main method.
+     *
+     * @param args Command line arguments, should be one and a reference to a file.
+     *
+     * @throws IOException if there is an error reading the document or extracting the XMP data.
+     */
+    public static void main( String[] args ) throws IOException
+    {
+        // suppress the Dock icon on OS X
+        System.setProperty("apple.awt.UIElement", "true");
+
+        ExtractXMP extractor = new ExtractXMP();
+        extractor.startExtraction(args);
+    }
+
+    public void startExtraction(String[] args) throws IOException
+    {
+        boolean toConsole = false;
+        @SuppressWarnings({"squid:S2068"})
+        String password = "";
+        int page = 0;
+        String pdfFile = null;
+        String outputFile = null;
+
+        for (int i = 0; i < args.length; i++)
+        {
+            if (args[i].equals(PASSWORD))
+            {
+                i++;
+                if (i >= args.length)
+                {
+                    usage();
+                }
+                password = args[i];
+            }
+            else if (args[i].equals(PAGE))
+            {
+                i++;
+                if (i >= args.length)
+                {
+                    usage();
+                }
+                page = Integer.parseInt(args[i]);
+            }
+            else if (args[i].equals(CONSOLE))
+            {
+                toConsole = true;
+            }
+            else
+            {
+                if (pdfFile == null)
+                {
+                    pdfFile = args[i];
+                }
+                else
+                {
+                    outputFile = args[i];
+                }
+            }
+        }
+
+        if (pdfFile == null)
+        {
+            usage();
+        }
+        else
+        {
+            PrintStream output = null;
+            PDDocument document = null;
+            try
+            {
+                if (outputFile == null && pdfFile.length() > 4)
+                {
+                    outputFile = new File(pdfFile.substring(0, pdfFile.length() - 4) + ".xml").getAbsolutePath();
+                }
+                document = PDDocument.load(new File(pdfFile), password);
+
+                PDDocumentCatalog catalog = document.getDocumentCatalog();
+                PDMetadata meta;
+                if (page == 0)
+                {
+                    meta = catalog.getMetadata();
+                }
+                else
+                {
+                    if (page > document.getNumberOfPages())
+                    {
+                        System.err.println("Page " + page + " doesn't exist");
+                        System.exit(1);
+                    }
+                    meta = document.getPage(page - 1).getMetadata();
+                }
+                if (meta == null)
+                {
+                    System.err.println("No XMP metadata available");
+                    System.exit(1);
+                }
+                if (toConsole)
+                {
+                    output = System.out;
+                }
+                else
+                {
+                    output = new PrintStream(outputFile);
+                }
+                output.write(meta.toByteArray());
+                output.close();
+            }
+            finally
+            {
+                IOUtils.closeQuietly(output);
+                IOUtils.closeQuietly(document);
+            }
+        }
+        System.exit(0);
+    }
+
+    /**
+     * This will print the usage requirements and exit.
+     */
+    private static void usage()
+    {
+        String message = "Usage: java -jar pdfbox-app-x.y.z.jar ExtractXMP [options] <inputfile> [output-text-file]\n"
+            + "\nOptions:\n"
+            + "  -password <password>        : Password to decrypt document\n"
+            + "  -console                    : Send text to console instead of file\n"
+            + "  -page <number>              : The optional page to extract XMP (1 based)\n"
+            + "  <inputfile>                 : The PDF document to use\n"
+            + "  [output-xml-file]           : The file to write the XMP to";
+        System.err.println(message);
+        System.exit( 1 );
+    }
+    
+}

Propchange: pdfbox/branches/2.0/tools/src/main/java/org/apache/pdfbox/tools/ExtractXMP.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: pdfbox/branches/2.0/tools/src/main/java/org/apache/pdfbox/tools/PDFBox.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/tools/src/main/java/org/apache/pdfbox/tools/PDFBox.java?rev=1909913&r1=1909912&r2=1909913&view=diff
==============================================================================
--- pdfbox/branches/2.0/tools/src/main/java/org/apache/pdfbox/tools/PDFBox.java (original)
+++ pdfbox/branches/2.0/tools/src/main/java/org/apache/pdfbox/tools/PDFBox.java Thu May 18 17:30:33 2023
@@ -59,6 +59,10 @@ public final class PDFBox
             {
                 ExtractText.main(arguments);
             }
+            else if (command.equals("ExtractXMP"))
+            {
+                ExtractXMP.main(arguments);
+            }
             else if (command.equals("ExtractImages"))
             {
                 ExtractImages.main(arguments);
@@ -123,6 +127,7 @@ public final class PDFBox
                 + "  Decrypt\n"
                 + "  Encrypt\n"
                 + "  ExtractText\n"
+                + "  ExtractXMP\n"
                 + "  ExtractImages\n"
                 + "  OverlayPDF\n"
                 + "  PrintPDF\n"