You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2020/08/30 09:22:31 UTC

svn commit: r1881320 - in /pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools: ExtractImages.java imageio/ImageIOUtil.java

Author: tilman
Date: Sun Aug 30 09:22:31 2020
New Revision: 1881320

URL: http://svn.apache.org/viewvc?rev=1881320&view=rev
Log:
PDFBOX-4847: extend the ExtractImages utility with a new "-noColorConvert" option, by Emmeran Seehuber

Modified:
    pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java
    pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/imageio/ImageIOUtil.java

Modified: pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java?rev=1881320&r1=1881319&r2=1881320&view=diff
==============================================================================
--- pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java (original)
+++ pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java Sun Aug 30 09:22:31 2020
@@ -64,12 +64,14 @@ public final class ExtractImages
     private static final String PASSWORD = "-password";
     private static final String PREFIX = "-prefix";
     private static final String DIRECTJPEG = "-directJPEG";
+    private static final String NOCOLORCONVERT = "-noColorConvert";
 
     private static final List<String> JPEG = Arrays.asList(
             COSName.DCT_DECODE.getName(),
             COSName.DCT_DECODE_ABBREVIATION.getName());
 
     private boolean useDirectJPEG;
+    private boolean noColorConvert;
     private String prefix;
 
     private final Set<COSStream> seen = new HashSet<>();
@@ -128,6 +130,9 @@ public final class ExtractImages
                     case DIRECTJPEG:
                         useDirectJPEG = true;
                         break;
+                    case NOCOLORCONVERT:
+                        noColorConvert = true;
+                        break;
                     default:
                         if (pdfFile == null)
                         {
@@ -161,10 +166,12 @@ public final class ExtractImages
                 + "\nOptions:\n"
                 + "  -password <password>   : Password to decrypt document\n"
                 + "  -prefix <image-prefix> : Image prefix (default to pdf name)\n"
-                + "  -directJPEG            : Forces the direct extraction of JPEG/JPX images "
+                + "  -directJPEG            : Forces the direct extraction of JPEG/JPX images \n"
                 + "                           regardless of colorspace or masking\n"
+                + "  -noColorConvert        : Images are extracted with their \n"
+                + "                           original colorspace if possible.\n"
                 + "  <inputfile>            : The PDF document to use\n";
-        
+
         System.err.println(message);
         System.exit(1);
     }
@@ -249,7 +256,7 @@ public final class ExtractImages
             imageCounter++;
 
             System.out.println("Writing image: " + name);
-            write2file(pdImage, name, useDirectJPEG);
+            write2file(pdImage, name, useDirectJPEG, noColorConvert);
         }
 
         @Override
@@ -364,9 +371,11 @@ public final class ExtractImages
      * @param pdImage the image.
      * @param prefix the filename prefix.
      * @param directJPEG if true, force saving JPEG/JPX streams as they are in the PDF file. 
+     * @param noColorConvert if true, images are extracted with their original colorspace if possible.
      * @throws IOException When something is wrong with the corresponding file.
      */
-    private void write2file(PDImage pdImage, String prefix, boolean directJPEG) throws IOException
+    private void write2file(PDImage pdImage, String prefix, boolean directJPEG,
+            boolean noColorConvert) throws IOException
     {
         String suffix = pdImage.getSuffix();
         if (suffix == null || "jb2".equals(suffix))
@@ -385,6 +394,28 @@ public final class ExtractImages
             suffix = "png";
         }
 
+        if (noColorConvert)
+        {
+            // We write the raw image if in any way possible.
+            // But we have no alpha information here.
+            BufferedImage image = pdImage.getRawImage();
+            if (image != null)
+            {
+                int elements = image.getRaster().getNumDataElements();
+                suffix = "png";
+                if (elements > 3)
+                {
+                    // More then 3 channels: Thats likely CMYK. We use tiff here,
+                    // but a TIFF codec must be in the class path for this to work.
+                    suffix = "tiff";
+                }
+                try (FileOutputStream out = new FileOutputStream(prefix + "." + suffix))
+                {
+                    ImageIOUtil.writeImage(image, suffix, out);
+                }
+                return;
+            }
+        }
         try (FileOutputStream out = new FileOutputStream(prefix + "." + suffix))
         {
             if ("jpg".equals(suffix))

Modified: pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/imageio/ImageIOUtil.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/imageio/ImageIOUtil.java?rev=1881320&r1=1881319&r2=1881320&view=diff
==============================================================================
--- pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/imageio/ImageIOUtil.java (original)
+++ pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/imageio/ImageIOUtil.java Sun Aug 30 09:22:31 2020
@@ -16,13 +16,20 @@
  */
 package org.apache.pdfbox.tools.imageio;
 
+import java.awt.color.ColorSpace;
+import java.awt.color.ICC_ColorSpace;
+import java.awt.color.ICC_Profile;
 import java.awt.image.BufferedImage;
+
 import java.io.BufferedOutputStream;
+import java.io.ByteArrayOutputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
+
 import java.util.Arrays;
 import java.util.Iterator;
+import java.util.zip.DeflaterOutputStream;
 
 import javax.imageio.IIOImage;
 import javax.imageio.ImageIO;
@@ -33,8 +40,11 @@ import javax.imageio.metadata.IIOInvalid
 import javax.imageio.metadata.IIOMetadata;
 import javax.imageio.metadata.IIOMetadataNode;
 import javax.imageio.stream.ImageOutputStream;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+
+import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
 
 /**
@@ -283,6 +293,20 @@ public final class ImageIOUtil
                 }
             }
 
+            if (metadata != null && formatName.equalsIgnoreCase("png") && hasICCProfile(image))
+            {
+                // add ICC profile
+                IIOMetadataNode iccp = new IIOMetadataNode("iCCP");
+                ICC_Profile profile = ((ICC_ColorSpace) image.getColorModel().getColorSpace())
+                        .getProfile();
+                iccp.setUserObject(getAsDeflatedBytes(profile));
+                iccp.setAttribute("profileName", "unknown");
+                iccp.setAttribute("compressionMethod", "deflate");
+                Node nativeTree = metadata.getAsTree(metadata.getNativeMetadataFormatName());
+                nativeTree.appendChild(iccp);
+                metadata.mergeTree(metadata.getNativeMetadataFormatName(), nativeTree);
+            }
+
             // write
             imageOutput = ImageIO.createImageOutputStream(output);
             writer.setOutput(imageOutput);
@@ -303,6 +327,37 @@ public final class ImageIOUtil
     }
 
     /**
+     * Determine if the given image has a ICC profile that should be embedded.
+     * @param image the image to analyse
+     * @return true if this image has an ICC profile, that is different from sRGB.
+     */
+    private static boolean hasICCProfile(BufferedImage image)
+    {
+        ColorSpace colorSpace = image.getColorModel().getColorSpace();
+        // We can only export ICC color spaces
+        if (!(colorSpace instanceof ICC_ColorSpace))
+        {
+            return false;
+        }
+
+        // The colorspace should not be sRGB and not be the builtin gray colorspace
+        return !colorSpace.isCS_sRGB() && colorSpace != ColorSpace.getInstance(ColorSpace.CS_GRAY);
+    }
+
+    private static byte[] getAsDeflatedBytes(ICC_Profile profile) throws IOException
+    {
+        byte[] data = profile.getData();
+
+        ByteArrayOutputStream deflated = new ByteArrayOutputStream();
+        try (DeflaterOutputStream deflater = new DeflaterOutputStream(deflated))
+        {
+            deflater.write(data);
+        }
+
+        return deflated.toByteArray();
+    }
+
+    /**
      * Gets the named child node, or creates and attaches it.
      *
      * @param parentNode the parent node