You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2020/08/30 09:22:31 UTC
svn commit: r1881320 - in
/pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools:
ExtractImages.java imageio/ImageIOUtil.java
Author: tilman
Date: Sun Aug 30 09:22:31 2020
New Revision: 1881320
URL: http://svn.apache.org/viewvc?rev=1881320&view=rev
Log:
PDFBOX-4847: extend the ExtractImages utility with a new "-noColorConvert" option, by Emmeran Seehuber
Modified:
pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java
pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/imageio/ImageIOUtil.java
Modified: pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java?rev=1881320&r1=1881319&r2=1881320&view=diff
==============================================================================
--- pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java (original)
+++ pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java Sun Aug 30 09:22:31 2020
@@ -64,12 +64,14 @@ public final class ExtractImages
private static final String PASSWORD = "-password";
private static final String PREFIX = "-prefix";
private static final String DIRECTJPEG = "-directJPEG";
+ private static final String NOCOLORCONVERT = "-noColorConvert";
private static final List<String> JPEG = Arrays.asList(
COSName.DCT_DECODE.getName(),
COSName.DCT_DECODE_ABBREVIATION.getName());
private boolean useDirectJPEG;
+ private boolean noColorConvert;
private String prefix;
private final Set<COSStream> seen = new HashSet<>();
@@ -128,6 +130,9 @@ public final class ExtractImages
case DIRECTJPEG:
useDirectJPEG = true;
break;
+ case NOCOLORCONVERT:
+ noColorConvert = true;
+ break;
default:
if (pdfFile == null)
{
@@ -161,10 +166,12 @@ public final class ExtractImages
+ "\nOptions:\n"
+ " -password <password> : Password to decrypt document\n"
+ " -prefix <image-prefix> : Image prefix (default to pdf name)\n"
- + " -directJPEG : Forces the direct extraction of JPEG/JPX images "
+ + " -directJPEG : Forces the direct extraction of JPEG/JPX images \n"
+ " regardless of colorspace or masking\n"
+ + " -noColorConvert : Images are extracted with their \n"
+ + " original colorspace if possible.\n"
+ " <inputfile> : The PDF document to use\n";
-
+
System.err.println(message);
System.exit(1);
}
@@ -249,7 +256,7 @@ public final class ExtractImages
imageCounter++;
System.out.println("Writing image: " + name);
- write2file(pdImage, name, useDirectJPEG);
+ write2file(pdImage, name, useDirectJPEG, noColorConvert);
}
@Override
@@ -364,9 +371,11 @@ public final class ExtractImages
* @param pdImage the image.
* @param prefix the filename prefix.
* @param directJPEG if true, force saving JPEG/JPX streams as they are in the PDF file.
+ * @param noColorConvert if true, images are extracted with their original colorspace if possible.
* @throws IOException When something is wrong with the corresponding file.
*/
- private void write2file(PDImage pdImage, String prefix, boolean directJPEG) throws IOException
+ private void write2file(PDImage pdImage, String prefix, boolean directJPEG,
+ boolean noColorConvert) throws IOException
{
String suffix = pdImage.getSuffix();
if (suffix == null || "jb2".equals(suffix))
@@ -385,6 +394,28 @@ public final class ExtractImages
suffix = "png";
}
+ if (noColorConvert)
+ {
+ // We write the raw image if in any way possible.
+ // But we have no alpha information here.
+ BufferedImage image = pdImage.getRawImage();
+ if (image != null)
+ {
+ int elements = image.getRaster().getNumDataElements();
+ suffix = "png";
+ if (elements > 3)
+ {
+ // More then 3 channels: Thats likely CMYK. We use tiff here,
+ // but a TIFF codec must be in the class path for this to work.
+ suffix = "tiff";
+ }
+ try (FileOutputStream out = new FileOutputStream(prefix + "." + suffix))
+ {
+ ImageIOUtil.writeImage(image, suffix, out);
+ }
+ return;
+ }
+ }
try (FileOutputStream out = new FileOutputStream(prefix + "." + suffix))
{
if ("jpg".equals(suffix))
Modified: pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/imageio/ImageIOUtil.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/imageio/ImageIOUtil.java?rev=1881320&r1=1881319&r2=1881320&view=diff
==============================================================================
--- pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/imageio/ImageIOUtil.java (original)
+++ pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/imageio/ImageIOUtil.java Sun Aug 30 09:22:31 2020
@@ -16,13 +16,20 @@
*/
package org.apache.pdfbox.tools.imageio;
+import java.awt.color.ColorSpace;
+import java.awt.color.ICC_ColorSpace;
+import java.awt.color.ICC_Profile;
import java.awt.image.BufferedImage;
+
import java.io.BufferedOutputStream;
+import java.io.ByteArrayOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
+
import java.util.Arrays;
import java.util.Iterator;
+import java.util.zip.DeflaterOutputStream;
import javax.imageio.IIOImage;
import javax.imageio.ImageIO;
@@ -33,8 +40,11 @@ import javax.imageio.metadata.IIOInvalid
import javax.imageio.metadata.IIOMetadata;
import javax.imageio.metadata.IIOMetadataNode;
import javax.imageio.stream.ImageOutputStream;
+
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+
+import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
/**
@@ -283,6 +293,20 @@ public final class ImageIOUtil
}
}
+ if (metadata != null && formatName.equalsIgnoreCase("png") && hasICCProfile(image))
+ {
+ // add ICC profile
+ IIOMetadataNode iccp = new IIOMetadataNode("iCCP");
+ ICC_Profile profile = ((ICC_ColorSpace) image.getColorModel().getColorSpace())
+ .getProfile();
+ iccp.setUserObject(getAsDeflatedBytes(profile));
+ iccp.setAttribute("profileName", "unknown");
+ iccp.setAttribute("compressionMethod", "deflate");
+ Node nativeTree = metadata.getAsTree(metadata.getNativeMetadataFormatName());
+ nativeTree.appendChild(iccp);
+ metadata.mergeTree(metadata.getNativeMetadataFormatName(), nativeTree);
+ }
+
// write
imageOutput = ImageIO.createImageOutputStream(output);
writer.setOutput(imageOutput);
@@ -303,6 +327,37 @@ public final class ImageIOUtil
}
/**
+ * Determine if the given image has a ICC profile that should be embedded.
+ * @param image the image to analyse
+ * @return true if this image has an ICC profile, that is different from sRGB.
+ */
+ private static boolean hasICCProfile(BufferedImage image)
+ {
+ ColorSpace colorSpace = image.getColorModel().getColorSpace();
+ // We can only export ICC color spaces
+ if (!(colorSpace instanceof ICC_ColorSpace))
+ {
+ return false;
+ }
+
+ // The colorspace should not be sRGB and not be the builtin gray colorspace
+ return !colorSpace.isCS_sRGB() && colorSpace != ColorSpace.getInstance(ColorSpace.CS_GRAY);
+ }
+
+ private static byte[] getAsDeflatedBytes(ICC_Profile profile) throws IOException
+ {
+ byte[] data = profile.getData();
+
+ ByteArrayOutputStream deflated = new ByteArrayOutputStream();
+ try (DeflaterOutputStream deflater = new DeflaterOutputStream(deflated))
+ {
+ deflater.write(data);
+ }
+
+ return deflated.toByteArray();
+ }
+
+ /**
* Gets the named child node, or creates and attaches it.
*
* @param parentNode the parent node