You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2018/03/07 21:40:42 UTC

svn commit: r1826161 [2/2] - in /pdfbox/trunk/pdfbox/src: main/java/org/apache/pdfbox/cos/ main/java/org/apache/pdfbox/filter/ main/java/org/apache/pdfbox/pdmodel/common/ main/java/org/apache/pdfbox/pdmodel/graphics/image/ main/java/org/apache/pdfbox/r...

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/SampledImageReader.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/SampledImageReader.java?rev=1826161&r1=1826160&r2=1826161&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/SampledImageReader.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/SampledImageReader.java Wed Mar  7 21:40:42 2018
@@ -1,491 +1,600 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.pdfbox.pdmodel.graphics.image;
-
-import java.awt.Graphics2D;
-import java.awt.Paint;
-import java.awt.Point;
-import java.awt.image.BufferedImage;
-import java.awt.image.DataBuffer;
-import java.awt.image.DataBufferByte;
-import java.awt.image.Raster;
-import java.awt.image.WritableRaster;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Arrays;
-import javax.imageio.stream.ImageInputStream;
-import javax.imageio.stream.MemoryCacheImageInputStream;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.pdfbox.cos.COSArray;
-import org.apache.pdfbox.cos.COSNumber;
-import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace;
-import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray;
-import org.apache.pdfbox.pdmodel.graphics.color.PDIndexed;
-
-/**
- * Reads a sampled image from a PDF file.
- * @author John Hewson
- */
-final class SampledImageReader
-{
-    private static final Log LOG = LogFactory.getLog(SampledImageReader.class);
-    
-    private SampledImageReader()
-    {
-    }
-
-    /**
-     * Returns an ARGB image filled with the given paint and using the given image as a mask.
-     * @param paint the paint to fill the visible portions of the image with
-     * @return a masked image filled with the given paint
-     * @throws IOException if the image cannot be read
-     * @throws IllegalStateException if the image is not a stencil.
-     */
-    public static BufferedImage getStencilImage(PDImage pdImage, Paint paint) throws IOException
-    {
-        int width = pdImage.getWidth();
-        int height = pdImage.getHeight();
-
-        // compose to ARGB
-        BufferedImage masked = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB);
-        Graphics2D g = masked.createGraphics();
-
-        // draw the mask
-        //g.drawImage(mask, 0, 0, null);
-
-        // fill with paint using src-in
-        //g.setComposite(AlphaComposite.SrcIn);
-        g.setPaint(paint);
-        g.fillRect(0, 0, width, height);
-        g.dispose();
-
-        // set the alpha
-        WritableRaster raster = masked.getRaster();
-
-        final int[] transparent = new int[4];
-
-        // avoid getting a BufferedImage for the mask to lessen memory footprint.
-        // Such masks are always bpc=1 and have no colorspace, but have a decode.
-        // (see 8.9.6.2 Stencil Masking)
-        try (InputStream iis = pdImage.createInputStream())
-        {
-            final float[] decode = getDecodeArray(pdImage);
-            int value = decode[0] < decode[1] ? 1 : 0;
-            int rowLen = width / 8;
-            if (width % 8 > 0)
-            {
-                rowLen++;
-            }
-            byte[] buff = new byte[rowLen];
-            for (int y = 0; y < height; y++)
-            {
-                int x = 0;
-                int readLen = iis.read(buff);
-                for (int r = 0; r < rowLen && r < readLen; r++)
-                {
-                    int byteValue = buff[r];
-                    int mask = 128;
-                    int shift = 7;
-                    for (int i = 0; i < 8; i++)
-                    {
-                        int bit = (byteValue & mask) >> shift;
-                        mask >>= 1;
-                        --shift;
-                        if (bit == value)
-                        {
-                            raster.setPixel(x, y, transparent);
-                        }
-                        x++;
-                        if (x == width)
-                        {
-                            break;
-                        }
-                    }
-                }
-                if (readLen != rowLen)
-                {
-                    LOG.warn("premature EOF, image will be incomplete");
-                    break;
-                }
-            }            
-        }
-
-        return masked;
-    }
-
-    /**
-     * Returns the content of the given image as an AWT buffered image with an RGB color space.
-     * If a color key mask is provided then an ARGB image is returned instead.
-     * This method never returns null.
-     * @param pdImage the image to read
-     * @param colorKey an optional color key mask
-     * @return content of this image as an RGB buffered image
-     * @throws IOException if the image cannot be read
-     */
-    public static BufferedImage getRGBImage(PDImage pdImage, COSArray colorKey) throws IOException
-    {
-        if (pdImage.isEmpty())
-        {
-            throw new IOException("Image stream is empty");
-        }
-
-        // get parameters, they must be valid or have been repaired
-        final PDColorSpace colorSpace = pdImage.getColorSpace();
-        final int numComponents = colorSpace.getNumberOfComponents();
-        final int width = pdImage.getWidth();
-        final int height = pdImage.getHeight();
-        final int bitsPerComponent = pdImage.getBitsPerComponent();
-        final float[] decode = getDecodeArray(pdImage);
-
-        if (width <= 0 || height <= 0)
-        {
-            throw new IOException("image width and height must be positive");
-        }
-
-        if (bitsPerComponent == 1 && colorKey == null && numComponents == 1)
-        {
-            return from1Bit(pdImage);
-        }
-
-        //
-        // An AWT raster must use 8/16/32 bits per component. Images with < 8bpc
-        // will be unpacked into a byte-backed raster. Images with 16bpc will be reduced
-        // in depth to 8bpc as they will be drawn to TYPE_INT_RGB images anyway. All code
-        // in PDColorSpace#toRGBImage expects an 8-bit range, i.e. 0-255.
-        //
-        WritableRaster raster = Raster.createBandedRaster(DataBuffer.TYPE_BYTE, width, height,
-                numComponents, new Point(0, 0));
-        final float[] defaultDecode = pdImage.getColorSpace().getDefaultDecode(8);
-        if (bitsPerComponent == 8 && Arrays.equals(decode, defaultDecode) && colorKey == null)
-        {
-            // convert image, faster path for non-decoded, non-colormasked 8-bit images
-            return from8bit(pdImage, raster);
-        }
-        return fromAny(pdImage, raster, colorKey);
-    }
-
-    private static BufferedImage from1Bit(PDImage pdImage) throws IOException
-    {
-        final PDColorSpace colorSpace = pdImage.getColorSpace();
-        final int width = pdImage.getWidth();
-        final int height = pdImage.getHeight();
-        final float[] decode = getDecodeArray(pdImage);
-        BufferedImage bim = null;
-        WritableRaster raster;
-        byte[] output;
-        if (colorSpace instanceof PDDeviceGray)
-        {
-            // TYPE_BYTE_GRAY and not TYPE_BYTE_BINARY because this one is handled
-            // without conversion to RGB by Graphics.drawImage
-            // this reduces the memory footprint, only one byte per pixel instead of three.
-            bim = new BufferedImage(width, height, BufferedImage.TYPE_BYTE_GRAY);
-            raster = bim.getRaster();
-        }
-        else
-        {
-            raster = Raster.createBandedRaster(DataBuffer.TYPE_BYTE, width, height, 1, new Point(0, 0));
-        }
-        output = ((DataBufferByte) raster.getDataBuffer()).getData();
-
-        // read bit stream
-        try (InputStream iis = pdImage.createInputStream())
-        {
-            final boolean isIndexed = colorSpace instanceof PDIndexed;
-
-            int rowLen = width / 8;
-            if (width % 8 > 0)
-            {
-                rowLen++;
-            }
-
-            // read stream
-            byte value0;
-            byte value1;
-            if (isIndexed || decode[0] < decode[1])
-            {
-                value0 = 0;
-                value1 = (byte) 255;
-            }
-            else
-            {
-                value0 = (byte) 255;
-                value1 = 0;
-            }
-            byte[] buff = new byte[rowLen];
-            int idx = 0;
-            for (int y = 0; y < height; y++)
-            {
-                int x = 0;
-                int readLen = iis.read(buff);
-                for (int r = 0; r < rowLen && r < readLen; r++)
-                {
-                    int value = buff[r];
-                    int mask = 128;
-                    for (int i = 0; i < 8; i++)
-                    {
-                        int bit = value & mask;
-                        mask >>= 1;
-                        output[idx++] = bit == 0 ? value0 : value1;
-                        x++;
-                        if (x == width)
-                        {
-                            break;
-                        }
-                    }
-                }
-                if (readLen != rowLen)
-                {
-                    LOG.warn("premature EOF, image will be incomplete");
-                    break;
-                }
-            }
-
-            if (bim != null)
-            {
-                return bim;
-            }
-
-            // use the color space to convert the image to RGB
-            return colorSpace.toRGBImage(raster);
-        }
-    }
-
-    // faster, 8-bit non-decoded, non-colormasked image conversion
-    private static BufferedImage from8bit(PDImage pdImage, WritableRaster raster)
-            throws IOException
-    {
-        try (InputStream input = pdImage.createInputStream())
-        {
-            // get the raster's underlying byte buffer
-            byte[][] banks = ((DataBufferByte) raster.getDataBuffer()).getBankData();
-            final int width = pdImage.getWidth();
-            final int height = pdImage.getHeight();
-            final int numComponents = pdImage.getColorSpace().getNumberOfComponents();
-            byte[] tempBytes = new byte[numComponents * width];
-            // compromise between memory and time usage:
-            // reading the whole image consumes too much memory
-            // reading one pixel at a time makes it slow in our buffering infrastructure 
-            int i = 0;
-            for (int y = 0; y < height; ++y)
-            {
-                long inputResult = input.read(tempBytes);
-
-                if (Long.compare(inputResult, tempBytes.length) != 0)
-                {
-                    LOG.debug("Tried reading " + tempBytes.length + " bytes but only " + inputResult + " bytes read");
-                }
-
-                for (int x = 0; x < width; ++x)
-                {
-                    for (int c = 0; c < numComponents; c++)
-                    {
-                        banks[c][i] = tempBytes[x * numComponents + c];
-                    }
-                    ++i;
-                }
-            }
-            // use the color space to convert the image to RGB
-            return pdImage.getColorSpace().toRGBImage(raster);
-        }
-    }
-
-    // slower, general-purpose image conversion from any image format
-    private static BufferedImage fromAny(PDImage pdImage, WritableRaster raster, COSArray colorKey)
-            throws IOException
-    {
-        final PDColorSpace colorSpace = pdImage.getColorSpace();
-        final int numComponents = colorSpace.getNumberOfComponents();
-        final int width = pdImage.getWidth();
-        final int height = pdImage.getHeight();
-        final int bitsPerComponent = pdImage.getBitsPerComponent();
-        final float[] decode = getDecodeArray(pdImage);
-
-        // read bit stream
-        try (ImageInputStream iis = new MemoryCacheImageInputStream(pdImage.createInputStream()))
-        {
-            final float sampleMax = (float) Math.pow(2, bitsPerComponent) - 1f;
-            final boolean isIndexed = colorSpace instanceof PDIndexed;
-
-            // init color key mask
-            float[] colorKeyRanges = null;
-            BufferedImage colorKeyMask = null;
-            if (colorKey != null)
-            {
-                colorKeyRanges = colorKey.toFloatArray();
-                colorKeyMask = new BufferedImage(width, height, BufferedImage.TYPE_BYTE_GRAY);
-            }
-
-            // calculate row padding
-            int padding = 0;
-            if (width * numComponents * bitsPerComponent % 8 > 0)
-            {
-                padding = 8 - (width * numComponents * bitsPerComponent % 8);
-            }
-
-            // read stream
-            byte[] srcColorValues = new byte[numComponents];
-            byte[] alpha = new byte[1];
-            for (int y = 0; y < height; y++)
-            {
-                for (int x = 0; x < width; x++)
-                {
-                    boolean isMasked = true;
-                    for (int c = 0; c < numComponents; c++)
-                    {
-                        int value = (int)iis.readBits(bitsPerComponent);
-
-                        // color key mask requires values before they are decoded
-                        if (colorKeyRanges != null)
-                        {
-                            isMasked &= value >= colorKeyRanges[c * 2] &&
-                                        value <= colorKeyRanges[c * 2 + 1];
-                        }
-
-                        // decode array
-                        final float dMin = decode[c * 2];
-                        final float dMax = decode[(c * 2) + 1];
-
-                        // interpolate to domain
-                        float output = dMin + (value * ((dMax - dMin) / sampleMax));
-
-                        if (isIndexed)
-                        {
-                            // indexed color spaces get the raw value, because the TYPE_BYTE
-                            // below cannot be reversed by the color space without it having
-                            // knowledge of the number of bits per component
-                            srcColorValues[c] = (byte)Math.round(output);
-                        }
-                        else
-                        {
-                            // interpolate to TYPE_BYTE
-                            int outputByte = Math.round(((output - Math.min(dMin, dMax)) /
-                                    Math.abs(dMax - dMin)) * 255f);
-
-                            srcColorValues[c] = (byte)outputByte;
-                        }
-                    }
-                    raster.setDataElements(x, y, srcColorValues);
-
-                    // set alpha channel in color key mask, if any
-                    if (colorKeyMask != null)
-                    {
-                        alpha[0] = (byte)(isMasked ? 255 : 0);
-                        colorKeyMask.getRaster().setDataElements(x, y, alpha);
-                    }
-                }
-
-                // rows are padded to the nearest byte
-                iis.readBits(padding);
-            }
-
-            // use the color space to convert the image to RGB
-            BufferedImage rgbImage = colorSpace.toRGBImage(raster);
-
-            // apply color mask, if any
-            if (colorKeyMask != null)
-            {
-                return applyColorKeyMask(rgbImage, colorKeyMask);
-            }
-            else
-            {
-                return rgbImage;
-            }
-        }
-    }
-
-    // color key mask: RGB + Binary -> ARGB
-    private static BufferedImage applyColorKeyMask(BufferedImage image, BufferedImage mask)
-            throws IOException
-    {
-        int width = image.getWidth();
-        int height = image.getHeight();
-
-        // compose to ARGB
-        BufferedImage masked = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB);
-
-        WritableRaster src = image.getRaster();
-        WritableRaster dest = masked.getRaster();
-        WritableRaster alpha = mask.getRaster();
-
-        float[] rgb = new float[3];
-        float[] rgba = new float[4];
-        float[] alphaPixel = null;
-        for (int y = 0; y < height; y++)
-        {
-            for (int x = 0; x < width; x++)
-            {
-                src.getPixel(x, y, rgb);
-
-                rgba[0] = rgb[0];
-                rgba[1] = rgb[1];
-                rgba[2] = rgb[2];
-                alphaPixel = alpha.getPixel(x, y, alphaPixel);
-                rgba[3] = 255 - alphaPixel[0];
-
-                dest.setPixel(x, y, rgba);
-            }
-        }
-
-        return masked;
-    }
-
-    // gets decode array from dictionary or returns default
-    private static float[] getDecodeArray(PDImage pdImage) throws IOException
-    {
-        final COSArray cosDecode = pdImage.getDecode();
-        float[] decode = null;
-
-        if (cosDecode != null)
-        {
-            int numberOfComponents = pdImage.getColorSpace().getNumberOfComponents();
-            if (cosDecode.size() != numberOfComponents * 2)
-            {
-                if (pdImage.isStencil() && cosDecode.size() >= 2
-                        && cosDecode.get(0) instanceof COSNumber
-                        && cosDecode.get(1) instanceof COSNumber)
-                {
-                    float decode0 = ((COSNumber) cosDecode.get(0)).floatValue();
-                    float decode1 = ((COSNumber) cosDecode.get(1)).floatValue();
-                    if (decode0 >= 0 && decode0 <= 1 && decode1 >= 0 && decode1 <= 1)
-                    {
-                        LOG.warn("decode array " + cosDecode
-                                + " not compatible with color space, using the first two entries");
-                        return new float[]
-                        {
-                            decode0, decode1
-                        };
-                    }
-                }
-                LOG.error("decode array " + cosDecode
-                        + " not compatible with color space, using default");
-            }
-            else
-            {
-                decode = cosDecode.toFloatArray();
-            }
-        }
-
-        // use color space default
-        if (decode == null)
-        {
-            return pdImage.getColorSpace().getDefaultDecode(pdImage.getBitsPerComponent());
-        }
-
-        return decode;
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdmodel.graphics.image;
+
+import java.awt.Graphics2D;
+import java.awt.Paint;
+import java.awt.Point;
+import java.awt.Rectangle;
+import java.awt.image.BufferedImage;
+import java.awt.image.DataBuffer;
+import java.awt.image.DataBufferByte;
+import java.awt.image.Raster;
+import java.awt.image.WritableRaster;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import javax.imageio.stream.ImageInputStream;
+import javax.imageio.stream.MemoryCacheImageInputStream;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.pdfbox.cos.COSArray;
+import org.apache.pdfbox.cos.COSNumber;
+import org.apache.pdfbox.filter.DecodeOptions;
+import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace;
+import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray;
+import org.apache.pdfbox.pdmodel.graphics.color.PDIndexed;
+
+/**
+ * Reads a sampled image from a PDF file.
+ * @author John Hewson
+ */
+final class SampledImageReader
+{
+    private static final Log LOG = LogFactory.getLog(SampledImageReader.class);
+    
+    private SampledImageReader()
+    {
+    }
+
+    /**
+     * Returns an ARGB image filled with the given paint and using the given image as a mask.
+     * @param paint the paint to fill the visible portions of the image with
+     * @return a masked image filled with the given paint
+     * @throws IOException if the image cannot be read
+     * @throws IllegalStateException if the image is not a stencil.
+     */
+    public static BufferedImage getStencilImage(PDImage pdImage, Paint paint) throws IOException
+    {
+        int width = pdImage.getWidth();
+        int height = pdImage.getHeight();
+
+        // compose to ARGB
+        BufferedImage masked = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB);
+        Graphics2D g = masked.createGraphics();
+
+        // draw the mask
+        //g.drawImage(mask, 0, 0, null);
+
+        // fill with paint using src-in
+        //g.setComposite(AlphaComposite.SrcIn);
+        g.setPaint(paint);
+        g.fillRect(0, 0, width, height);
+        g.dispose();
+
+        // set the alpha
+        WritableRaster raster = masked.getRaster();
+
+        final int[] transparent = new int[4];
+
+        // avoid getting a BufferedImage for the mask to lessen memory footprint.
+        // Such masks are always bpc=1 and have no colorspace, but have a decode.
+        // (see 8.9.6.2 Stencil Masking)
+        try (InputStream iis = pdImage.createInputStream())
+        {
+            final float[] decode = getDecodeArray(pdImage);
+            int value = decode[0] < decode[1] ? 1 : 0;
+            int rowLen = width / 8;
+            if (width % 8 > 0)
+            {
+                rowLen++;
+            }
+            byte[] buff = new byte[rowLen];
+            for (int y = 0; y < height; y++)
+            {
+                int x = 0;
+                int readLen = iis.read(buff);
+                for (int r = 0; r < rowLen && r < readLen; r++)
+                {
+                    int byteValue = buff[r];
+                    int mask = 128;
+                    int shift = 7;
+                    for (int i = 0; i < 8; i++)
+                    {
+                        int bit = (byteValue & mask) >> shift;
+                        mask >>= 1;
+                        --shift;
+                        if (bit == value)
+                        {
+                            raster.setPixel(x, y, transparent);
+                        }
+                        x++;
+                        if (x == width)
+                        {
+                            break;
+                        }
+                    }
+                }
+                if (readLen != rowLen)
+                {
+                    LOG.warn("premature EOF, image will be incomplete");
+                    break;
+                }
+            }            
+        }
+
+        return masked;
+    }
+
+    /**
+     * Returns the content of the given image as an AWT buffered image with an RGB color space.
+     * If a color key mask is provided then an ARGB image is returned instead.
+     * This method never returns null.
+     * @param pdImage the image to read
+     * @param colorKey an optional color key mask
+     * @return content of this image as an RGB buffered image
+     * @throws IOException if the image cannot be read
+     */
+    public static BufferedImage getRGBImage(PDImage pdImage, COSArray colorKey) throws IOException
+    {
+        return getRGBImage(pdImage, null, 1, colorKey);
+    }
+
+    private static Rectangle clipRegion(PDImage pdImage, Rectangle region)
+    {
+        if (region == null)
+        {
+            return new Rectangle(0, 0, pdImage.getWidth(), pdImage.getHeight());
+        }
+        else
+        {
+            int x = Math.max(0, region.x);
+            int y = Math.max(0, region.y);
+            int width = Math.min(region.width, pdImage.getWidth() - x);
+            int height = Math.min(region.height, pdImage.getHeight() - y);
+            return new Rectangle(x, y, width, height);
+        }
+    }
+
+    public static BufferedImage getRGBImage(PDImage pdImage, Rectangle region, int subsampling,
+                                            COSArray colorKey) throws IOException
+    {
+        if (pdImage.isEmpty())
+        {
+            throw new IOException("Image stream is empty");
+        }
+        Rectangle clipped = clipRegion(pdImage, region);
+
+        // get parameters, they must be valid or have been repaired
+        final PDColorSpace colorSpace = pdImage.getColorSpace();
+        final int numComponents = colorSpace.getNumberOfComponents();
+        final int width = (int) Math.ceil(clipped.getWidth() / subsampling);
+        final int height = (int) Math.ceil(clipped.getHeight() / subsampling);
+        final int bitsPerComponent = pdImage.getBitsPerComponent();
+        final float[] decode = getDecodeArray(pdImage);
+
+        if (width <= 0 || height <= 0 || pdImage.getWidth() <= 0 || pdImage.getHeight() <= 0)
+        {
+            throw new IOException("image width and height must be positive");
+        }
+
+        if (bitsPerComponent == 1 && colorKey == null && numComponents == 1)
+        {
+            return from1Bit(pdImage, clipped, subsampling, width, height);
+        }
+
+        //
+        // An AWT raster must use 8/16/32 bits per component. Images with < 8bpc
+        // will be unpacked into a byte-backed raster. Images with 16bpc will be reduced
+        // in depth to 8bpc as they will be drawn to TYPE_INT_RGB images anyway. All code
+        // in PDColorSpace#toRGBImage expects an 8-bit range, i.e. 0-255.
+        //
+        WritableRaster raster = Raster.createBandedRaster(DataBuffer.TYPE_BYTE, width, height,
+                numComponents, new Point(0, 0));
+        final float[] defaultDecode = pdImage.getColorSpace().getDefaultDecode(8);
+        if (bitsPerComponent == 8 && Arrays.equals(decode, defaultDecode) && colorKey == null)
+        {
+            // convert image, faster path for non-decoded, non-colormasked 8-bit images
+            return from8bit(pdImage, raster, clipped, subsampling, width, height);
+        }
+        return fromAny(pdImage, raster, colorKey, clipped, subsampling, width, height);
+    }
+
+    private static BufferedImage from1Bit(PDImage pdImage, Rectangle clipped, int subsampling,
+                                          final int width, final int height) throws IOException
+    {
+        final PDColorSpace colorSpace = pdImage.getColorSpace();
+        final float[] decode = getDecodeArray(pdImage);
+        BufferedImage bim = null;
+        WritableRaster raster;
+        byte[] output;
+
+        DecodeOptions options = new DecodeOptions(subsampling);
+        options.setSourceRegion(clipped);
+        // read bit stream
+        try (InputStream iis = pdImage.createInputStream(options))
+        {
+            final int inputWidth, inputHeight, startx, starty, scanWidth, scanHeight;
+            if (options.isFilterSubsampled())
+            {
+                // Decode options were honored, and so there is no need for additional clipping or subsampling
+                inputWidth = width;
+                inputHeight = height;
+                startx = 0;
+                starty = 0;
+                scanWidth = width;
+                scanHeight = height;
+                subsampling = 1;
+            }
+            else
+            {
+                // Decode options not honored, so we need to clip and subsample ourselves.
+                inputWidth = pdImage.getWidth();
+                inputHeight = pdImage.getHeight();
+                startx = clipped.x;
+                starty = clipped.y;
+                scanWidth = clipped.width;
+                scanHeight = clipped.height;
+            }
+            if (colorSpace instanceof PDDeviceGray)
+            {
+                // TYPE_BYTE_GRAY and not TYPE_BYTE_BINARY because this one is handled
+                // without conversion to RGB by Graphics.drawImage
+                // this reduces the memory footprint, only one byte per pixel instead of three.
+                bim = new BufferedImage(width, height, BufferedImage.TYPE_BYTE_GRAY);
+                raster = bim.getRaster();
+            }
+            else
+            {
+                raster = Raster.createBandedRaster(DataBuffer.TYPE_BYTE, width, height, 1, new Point(0, 0));
+            }
+            output = ((DataBufferByte) raster.getDataBuffer()).getData();
+            final boolean isIndexed = colorSpace instanceof PDIndexed;
+
+            int rowLen = inputWidth / 8;
+            if (inputWidth % 8 > 0)
+            {
+                rowLen++;
+            }
+
+            // read stream
+            byte value0;
+            byte value1;
+            if (isIndexed || decode[0] < decode[1])
+            {
+                value0 = 0;
+                value1 = (byte) 255;
+            }
+            else
+            {
+                value0 = (byte) 255;
+                value1 = 0;
+            }
+            byte[] buff = new byte[rowLen];
+            int idx = 0;
+            for (int y = 0; y < starty + scanHeight; y++)
+            {
+                int x = 0;
+                int readLen = iis.read(buff);
+                if (y < starty || y % subsampling > 0)
+                {
+                    continue;
+                }
+                for (int r = 0; r < rowLen && r < readLen; r++)
+                {
+                    int value = buff[r];
+                    int mask = 128;
+                    for (int i = 0; i < 8; i++)
+                    {
+                        if (x >= startx + scanWidth)
+                        {
+                            break;
+                        }
+                        int bit = value & mask;
+                        mask >>= 1;
+                        if (x >= startx && x % subsampling == 0)
+                        {
+                            output[idx++] = bit == 0 ? value0 : value1;
+                        }
+                        x++;
+                    }
+                }
+                if (readLen != rowLen)
+                {
+                    LOG.warn("premature EOF, image will be incomplete");
+                    break;
+                }
+            }
+
+            if (bim != null)
+            {
+                return bim;
+            }
+
+            // use the color space to convert the image to RGB
+            return colorSpace.toRGBImage(raster);
+        }
+    }
+
+    // faster, 8-bit non-decoded, non-colormasked image conversion
+    private static BufferedImage from8bit(PDImage pdImage, WritableRaster raster, Rectangle clipped, int subsampling,
+                                          final int width, final int height) throws IOException
+    {
+        DecodeOptions options = new DecodeOptions(subsampling);
+        options.setSourceRegion(clipped);
+        try (InputStream input = pdImage.createInputStream(options))
+        {
+            final int inputWidth, inputHeight, startx, starty, scanWidth, scanHeight;
+            if (options.isFilterSubsampled())
+            {
+                // Decode options were honored, and so there is no need for additional clipping or subsampling
+                inputWidth = width;
+                inputHeight = height;
+                startx = 0;
+                starty = 0;
+                scanWidth = width;
+                scanHeight = height;
+                subsampling = 1;
+            }
+            else
+            {
+                // Decode options not honored, so we need to clip and subsample ourselves.
+                inputWidth = pdImage.getWidth();
+                inputHeight = pdImage.getHeight();
+                startx = clipped.x;
+                starty = clipped.y;
+                scanWidth = clipped.width;
+                scanHeight = clipped.height;
+            }
+            final int numComponents = pdImage.getColorSpace().getNumberOfComponents();
+            // get the raster's underlying byte buffer
+            byte[][] banks = ((DataBufferByte) raster.getDataBuffer()).getBankData();
+            byte[] tempBytes = new byte[numComponents * inputWidth];
+            // compromise between memory and time usage:
+            // reading the whole image consumes too much memory
+            // reading one pixel at a time makes it slow in our buffering infrastructure 
+            int i = 0;
+            for (int y = 0; y < starty + scanHeight; ++y)
+            {
+                long inputResult = input.read(tempBytes);
+
+                if (Long.compare(inputResult, tempBytes.length) != 0)
+                {
+                    LOG.debug("Tried reading " + tempBytes.length + " bytes but only " + inputResult + " bytes read");
+                }
+
+                if (y < starty || y % subsampling > 0)
+                {
+                    continue;
+                }
+
+                for (int x = startx; x < startx + scanWidth; x += subsampling)
+                {
+                    for (int c = 0; c < numComponents; c++)
+                    {
+                        banks[c][i] = tempBytes[x * numComponents + c];
+                    }
+                    ++i;
+                }
+            }
+            // use the color space to convert the image to RGB
+            return pdImage.getColorSpace().toRGBImage(raster);
+        }
+    }
+
+    // slower, general-purpose image conversion from any image format
+    private static BufferedImage fromAny(PDImage pdImage, WritableRaster raster, COSArray colorKey, Rectangle clipped,
+                                         int subsampling, final int width, final int height)
+            throws IOException
+    {
+        final PDColorSpace colorSpace = pdImage.getColorSpace();
+        final int numComponents = colorSpace.getNumberOfComponents();
+        final int bitsPerComponent = pdImage.getBitsPerComponent();
+        final float[] decode = getDecodeArray(pdImage);
+
+        DecodeOptions options = new DecodeOptions(subsampling);
+        options.setSourceRegion(clipped);
+        // read bit stream
+        try (ImageInputStream iis = new MemoryCacheImageInputStream(pdImage.createInputStream(options)))
+        {
+            final int inputWidth, inputHeight, startx, starty, scanWidth, scanHeight;
+            if (options.isFilterSubsampled())
+            {
+                // Decode options were honored, and so there is no need for additional clipping or subsampling
+                inputWidth = width;
+                inputHeight = height;
+                startx = 0;
+                starty = 0;
+                scanWidth = width;
+                scanHeight = height;
+                subsampling = 1;
+            }
+            else
+            {
+                // Decode options not honored, so we need to clip and subsample ourselves.
+                inputWidth = pdImage.getWidth();
+                inputHeight = pdImage.getHeight();
+                startx = clipped.x;
+                starty = clipped.y;
+                scanWidth = clipped.width;
+                scanHeight = clipped.height;
+            }
+            final float sampleMax = (float) Math.pow(2, bitsPerComponent) - 1f;
+            final boolean isIndexed = colorSpace instanceof PDIndexed;
+
+            // init color key mask
+            float[] colorKeyRanges = null;
+            BufferedImage colorKeyMask = null;
+            if (colorKey != null)
+            {
+                colorKeyRanges = colorKey.toFloatArray();
+                colorKeyMask = new BufferedImage(width, height, BufferedImage.TYPE_BYTE_GRAY);
+            }
+
+            // calculate row padding
+            int padding = 0;
+            if (inputWidth * numComponents * bitsPerComponent % 8 > 0)
+            {
+                padding = 8 - (inputWidth * numComponents * bitsPerComponent % 8);
+            }
+
+            // read stream
+            byte[] srcColorValues = new byte[numComponents];
+            byte[] alpha = new byte[1];
+            for (int y = 0; y < starty + scanHeight; y++)
+            {
+                for (int x = 0; x < startx + scanWidth; x++)
+                {
+                    boolean isMasked = true;
+                    for (int c = 0; c < numComponents; c++)
+                    {
+                        int value = (int)iis.readBits(bitsPerComponent);
+
+                        // color key mask requires values before they are decoded
+                        if (colorKeyRanges != null)
+                        {
+                            isMasked &= value >= colorKeyRanges[c * 2] &&
+                                        value <= colorKeyRanges[c * 2 + 1];
+                        }
+
+                        // decode array
+                        final float dMin = decode[c * 2];
+                        final float dMax = decode[(c * 2) + 1];
+
+                        // interpolate to domain
+                        float output = dMin + (value * ((dMax - dMin) / sampleMax));
+
+                        if (isIndexed)
+                        {
+                            // indexed color spaces get the raw value, because the TYPE_BYTE
+                            // below cannot be reversed by the color space without it having
+                            // knowledge of the number of bits per component
+                            srcColorValues[c] = (byte)Math.round(output);
+                        }
+                        else
+                        {
+                            // interpolate to TYPE_BYTE
+                            int outputByte = Math.round(((output - Math.min(dMin, dMax)) /
+                                    Math.abs(dMax - dMin)) * 255f);
+
+                            srcColorValues[c] = (byte)outputByte;
+                        }
+                    }
+                    // only write to output if within requested region and subsample.
+                    if (x >= startx && y >= starty && x % subsampling == 0 && y % subsampling == 0)
+                    {
+                        raster.setDataElements((x - startx) / subsampling, (y - starty) / subsampling, srcColorValues);
+
+                        // set alpha channel in color key mask, if any
+                        if (colorKeyMask != null)
+                        {
+                            alpha[0] = (byte)(isMasked ? 255 : 0);
+                            colorKeyMask.getRaster().setDataElements((x - startx) / subsampling, (y - starty) / subsampling, alpha);
+                        }
+                    }
+                }
+
+                // rows are padded to the nearest byte
+                iis.readBits(padding);
+            }
+
+            // use the color space to convert the image to RGB
+            BufferedImage rgbImage = colorSpace.toRGBImage(raster);
+
+            // apply color mask, if any
+            if (colorKeyMask != null)
+            {
+                return applyColorKeyMask(rgbImage, colorKeyMask);
+            }
+            else
+            {
+                return rgbImage;
+            }
+        }
+    }
+
+    // color key mask: RGB + Binary -> ARGB
+    private static BufferedImage applyColorKeyMask(BufferedImage image, BufferedImage mask)
+            throws IOException
+    {
+        int width = image.getWidth();
+        int height = image.getHeight();
+
+        // compose to ARGB
+        BufferedImage masked = new BufferedImage(width, height, BufferedImage.TYPE_INT_ARGB);
+
+        WritableRaster src = image.getRaster();
+        WritableRaster dest = masked.getRaster();
+        WritableRaster alpha = mask.getRaster();
+
+        float[] rgb = new float[3];
+        float[] rgba = new float[4];
+        float[] alphaPixel = null;
+        for (int y = 0; y < height; y++)
+        {
+            for (int x = 0; x < width; x++)
+            {
+                src.getPixel(x, y, rgb);
+
+                rgba[0] = rgb[0];
+                rgba[1] = rgb[1];
+                rgba[2] = rgb[2];
+                alphaPixel = alpha.getPixel(x, y, alphaPixel);
+                rgba[3] = 255 - alphaPixel[0];
+
+                dest.setPixel(x, y, rgba);
+            }
+        }
+
+        return masked;
+    }
+
+    // gets decode array from dictionary or returns default
+    private static float[] getDecodeArray(PDImage pdImage) throws IOException
+    {
+        final COSArray cosDecode = pdImage.getDecode();
+        float[] decode = null;
+
+        if (cosDecode != null)
+        {
+            int numberOfComponents = pdImage.getColorSpace().getNumberOfComponents();
+            if (cosDecode.size() != numberOfComponents * 2)
+            {
+                if (pdImage.isStencil() && cosDecode.size() >= 2
+                        && cosDecode.get(0) instanceof COSNumber
+                        && cosDecode.get(1) instanceof COSNumber)
+                {
+                    float decode0 = ((COSNumber) cosDecode.get(0)).floatValue();
+                    float decode1 = ((COSNumber) cosDecode.get(1)).floatValue();
+                    if (decode0 >= 0 && decode0 <= 1 && decode1 >= 0 && decode1 <= 1)
+                    {
+                        LOG.warn("decode array " + cosDecode
+                                + " not compatible with color space, using the first two entries");
+                        return new float[]
+                        {
+                            decode0, decode1
+                        };
+                    }
+                }
+                LOG.error("decode array " + cosDecode
+                        + " not compatible with color space, using default");
+            }
+            else
+            {
+                decode = cosDecode.toFloatArray();
+            }
+        }
+
+        // use color space default
+        if (decode == null)
+        {
+            return pdImage.getColorSpace().getDefaultDecode(pdImage.getBitsPerComponent());
+        }
+
+        return decode;
+    }
+}

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PDFRenderer.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PDFRenderer.java?rev=1826161&r1=1826160&r2=1826161&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PDFRenderer.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PDFRenderer.java Wed Mar  7 21:40:42 2018
@@ -1,318 +1,348 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.pdfbox.rendering;
-
-import java.awt.Color;
-import java.awt.Graphics2D;
-import java.awt.image.BufferedImage;
-import java.io.IOException;
-import org.apache.pdfbox.cos.COSName;
-import org.apache.pdfbox.pdmodel.PDDocument;
-import org.apache.pdfbox.pdmodel.PDPage;
-import org.apache.pdfbox.pdmodel.PDResources;
-import org.apache.pdfbox.pdmodel.common.PDRectangle;
-import org.apache.pdfbox.pdmodel.graphics.blend.BlendMode;
-import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState;
-import org.apache.pdfbox.pdmodel.interactive.annotation.AnnotationFilter;
-import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
-
-/**
- * Renders a PDF document to an AWT BufferedImage.
- * This class may be overridden in order to perform custom rendering.
- *
- * @author John Hewson
- */
-public class PDFRenderer
-{
-    protected final PDDocument document;
-    // TODO keep rendering state such as caches here
-    
-    /**
-    * Default annotations filter, returns all annotations
-    */
-    private AnnotationFilter annotationFilter = new AnnotationFilter()
-    {
-        @Override
-        public boolean accept(PDAnnotation annotation)
-        {
-            return true;
-        }
-    };
-
-    /**
-     * Creates a new PDFRenderer.
-     * @param document the document to render
-     */
-    public PDFRenderer(PDDocument document)
-    {
-        this.document = document;
-    }
-    
-    /**
-     * Return the AnnotationFilter.
-     * 
-     * @return the AnnotationFilter
-     */
-    public AnnotationFilter getAnnotationsFilter()
-    {
-        return annotationFilter;
-    }
-
-    /**
-     * Set the AnnotationFilter.
-     * 
-     * <p>Allows to only render annotation accepted by the filter.
-     * 
-     * @param annotationsFilter the AnnotationFilter
-     */
-    public void setAnnotationsFilter(AnnotationFilter annotationsFilter)
-    {
-        this.annotationFilter = annotationsFilter;
-    }
-
-    /**
-     * Returns the given page as an RGB image at 72 DPI
-     * @param pageIndex the zero-based index of the page to be converted.
-     * @return the rendered page image
-     * @throws IOException if the PDF cannot be read
-     */
-    public BufferedImage renderImage(int pageIndex) throws IOException
-    {
-        return renderImage(pageIndex, 1);
-    }
-
-    /**
-     * Returns the given page as an RGB image at the given scale.
-     * A scale of 1 will render at 72 DPI.
-     * @param pageIndex the zero-based index of the page to be converted
-     * @param scale the scaling factor, where 1 = 72 DPI
-     * @return the rendered page image
-     * @throws IOException if the PDF cannot be read
-     */
-    public BufferedImage renderImage(int pageIndex, float scale) throws IOException
-    {
-        return renderImage(pageIndex, scale, ImageType.RGB);
-    }
-
-    /**
-     * Returns the given page as an RGB image at the given DPI.
-     * @param pageIndex the zero-based index of the page to be converted
-     * @param dpi the DPI (dots per inch) to render at
-     * @return the rendered page image
-     * @throws IOException if the PDF cannot be read
-     */
-    public BufferedImage renderImageWithDPI(int pageIndex, float dpi) throws IOException
-    {
-        return renderImage(pageIndex, dpi / 72f, ImageType.RGB);
-    }
-
-    /**
-     * Returns the given page as an RGB image at the given DPI.
-     * @param pageIndex the zero-based index of the page to be converted
-     * @param dpi the DPI (dots per inch) to render at
-     * @param imageType the type of image to return
-     * @return the rendered page image
-     * @throws IOException if the PDF cannot be read
-     */
-    public BufferedImage renderImageWithDPI(int pageIndex, float dpi, ImageType imageType)
-            throws IOException
-    {
-        return renderImage(pageIndex, dpi / 72f, imageType);
-    }
-
-    /**
-     * Returns the given page as an RGB or ARGB image at the given scale.
-     * @param pageIndex the zero-based index of the page to be converted
-     * @param scale the scaling factor, where 1 = 72 DPI
-     * @param imageType the type of image to return
-     * @return the rendered page image
-     * @throws IOException if the PDF cannot be read
-     */
-    public BufferedImage renderImage(int pageIndex, float scale, ImageType imageType)
-            throws IOException
-    {
-        PDPage page = document.getPage(pageIndex);
-
-        PDRectangle cropbBox = page.getCropBox();
-        float widthPt = cropbBox.getWidth();
-        float heightPt = cropbBox.getHeight();
-        int widthPx = Math.round(widthPt * scale);
-        int heightPx = Math.round(heightPt * scale);
-        int rotationAngle = page.getRotation();
-
-        int bimType = imageType.toBufferedImageType();
-        if (imageType != ImageType.ARGB && hasBlendMode(page))
-        {
-            // PDFBOX-4095: if the PDF has blending on the top level, draw on transparent background
-            // Inpired from PDF.js: if a PDF page uses any blend modes other than Normal, 
-            // PDF.js renders everything on a fully transparent RGBA canvas. 
-            // Finally when the page has been rendered, PDF.js draws the RGBA canvas on a white canvas.
-            bimType = BufferedImage.TYPE_INT_ARGB;
-        }
-
-        // swap width and height
-        BufferedImage image;
-        if (rotationAngle == 90 || rotationAngle == 270)
-        {
-            image = new BufferedImage(heightPx, widthPx, bimType);
-        }
-        else
-        {
-            image = new BufferedImage(widthPx, heightPx, bimType);
-        }
-
-        // use a transparent background if the image type supports alpha
-        Graphics2D g = image.createGraphics();
-        if (image.getType() == BufferedImage.TYPE_INT_ARGB)
-        {
-            g.setBackground(new Color(0, 0, 0, 0));
-        }
-        else
-        {
-            g.setBackground(Color.WHITE);
-        }
-        g.clearRect(0, 0, image.getWidth(), image.getHeight());
-        
-        transform(g, page, scale);
-
-        // the end-user may provide a custom PageDrawer
-        PageDrawerParameters parameters = new PageDrawerParameters(this, page);
-        PageDrawer drawer = createPageDrawer(parameters);
-        drawer.drawPage(g, page.getCropBox());       
-        
-        g.dispose();
-
-        if (image.getType() != imageType.toBufferedImageType())
-        {
-            // PDFBOX-4095: draw temporary transparent image on white background
-            BufferedImage newImage = 
-                    new BufferedImage(image.getWidth(), image.getHeight(), imageType.toBufferedImageType());
-            Graphics2D dstGraphics = newImage.createGraphics();
-            dstGraphics.setBackground(Color.WHITE);
-            dstGraphics.clearRect(0, 0, image.getWidth(), image.getHeight());
-            dstGraphics.drawImage(image, 0, 0, null);
-            dstGraphics.dispose();
-            image = newImage;
-        }
-
-        return image;
-    }
-
-    /**
-     * Renders a given page to an AWT Graphics2D instance.
-     * @param pageIndex the zero-based index of the page to be converted
-     * @param graphics the Graphics2D on which to draw the page
-     * @throws IOException if the PDF cannot be read
-     */
-    public void renderPageToGraphics(int pageIndex, Graphics2D graphics) throws IOException
-    {
-        renderPageToGraphics(pageIndex, graphics, 1);
-    }
-
-    /**
-     * Renders a given page to an AWT Graphics2D instance.
-     * @param pageIndex the zero-based index of the page to be converted
-     * @param graphics the Graphics2D on which to draw the page
-     * @param scale the scale to draw the page at
-     * @throws IOException if the PDF cannot be read
-     */
-    public void renderPageToGraphics(int pageIndex, Graphics2D graphics, float scale)
-            throws IOException
-    {
-        PDPage page = document.getPage(pageIndex);
-        // TODO need width/wight calculations? should these be in PageDrawer?
-
-        transform(graphics, page, scale);
-
-        PDRectangle cropBox = page.getCropBox();
-        graphics.clearRect(0, 0, (int) cropBox.getWidth(), (int) cropBox.getHeight());
-
-        // the end-user may provide a custom PageDrawer
-        PageDrawerParameters parameters = new PageDrawerParameters(this, page);
-        PageDrawer drawer = createPageDrawer(parameters);
-        drawer.drawPage(graphics, cropBox);
-    }
-
-    // scale rotate translate
-    private void transform(Graphics2D graphics, PDPage page, float scale)
-    {
-        graphics.scale(scale, scale);
-
-        // TODO should we be passing the scale to PageDrawer rather than messing with Graphics?
-        int rotationAngle = page.getRotation();
-        PDRectangle cropBox = page.getCropBox();
-
-        if (rotationAngle != 0)
-        {
-            float translateX = 0;
-            float translateY = 0;
-            switch (rotationAngle)
-            {
-                case 90:
-                    translateX = cropBox.getHeight();
-                    break;
-                case 270:
-                    translateY = cropBox.getWidth();
-                    break;
-                case 180:
-                    translateX = cropBox.getWidth();
-                    translateY = cropBox.getHeight();
-                    break;
-                default:
-                    break;
-            }
-            graphics.translate(translateX, translateY);
-            graphics.rotate((float) Math.toRadians(rotationAngle));
-        }
-    }
-
-    /**
-     * Returns a new PageDrawer instance, using the given parameters. May be overridden.
-     */
-    protected PageDrawer createPageDrawer(PageDrawerParameters parameters) throws IOException
-    {
-        PageDrawer pageDrawer = new PageDrawer(parameters);
-        pageDrawer.setAnnotationFilter(annotationFilter);
-        return pageDrawer;
-    }
-
-    private boolean hasBlendMode(PDPage page)
-    {
-        // check the current resources for blend modes
-        PDResources resources = page.getResources();
-        if (resources == null)
-        {
-            return false;
-        }
-        for (COSName name : resources.getExtGStateNames())
-        {
-            PDExtendedGraphicsState extGState = resources.getExtGState(name);
-            if (extGState == null)
-            {
-                // can happen if key exists but no value 
-                // see PDFBOX-3950-23EGDHXSBBYQLKYOKGZUOVYVNE675PRD.pdf
-                continue;
-            }
-            BlendMode blendMode = extGState.getBlendMode();
-            if (blendMode != BlendMode.NORMAL)
-            {
-                return true;
-            }
-        }
-        return false;
-    }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.rendering;
+
+import java.awt.Color;
+import java.awt.Graphics2D;
+import java.awt.image.BufferedImage;
+import java.io.IOException;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.PDResources;
+import org.apache.pdfbox.pdmodel.common.PDRectangle;
+import org.apache.pdfbox.pdmodel.graphics.blend.BlendMode;
+import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState;
+import org.apache.pdfbox.pdmodel.interactive.annotation.AnnotationFilter;
+import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
+
+/**
+ * Renders a PDF document to an AWT BufferedImage.
+ * This class may be overridden in order to perform custom rendering.
+ *
+ * @author John Hewson
+ */
+public class PDFRenderer
+{
+    protected final PDDocument document;
+    // TODO keep rendering state such as caches here
+    
+    /**
+    * Default annotations filter, returns all annotations
+    */
+    private AnnotationFilter annotationFilter = new AnnotationFilter()
+    {
+        @Override
+        public boolean accept(PDAnnotation annotation)
+        {
+            return true;
+        }
+    };
+
+    private boolean subsamplingAllowed = false;
+
+    /**
+     * Creates a new PDFRenderer.
+     * @param document the document to render
+     */
+    public PDFRenderer(PDDocument document)
+    {
+        this.document = document;
+    }
+    
+    /**
+     * Return the AnnotationFilter.
+     * 
+     * @return the AnnotationFilter
+     */
+    public AnnotationFilter getAnnotationsFilter()
+    {
+        return annotationFilter;
+    }
+
+    /**
+     * Set the AnnotationFilter.
+     * 
+     * <p>Allows to only render annotation accepted by the filter.
+     * 
+     * @param annotationsFilter the AnnotationFilter
+     */
+    public void setAnnotationsFilter(AnnotationFilter annotationsFilter)
+    {
+        this.annotationFilter = annotationsFilter;
+    }
+
+    /**
+     * Value indicating if the renderer is allowed to subsample images before drawing, according to
+     * image dimensions and requested scale.
+     *
+     * Subsampling may be faster and less memory-intensive in some cases, but it may also lead to
+     * loss of quality, especially in images with high spatial frequency.
+     *
+     * @return true if subsampling of images is allowed, false otherwise.
+     */
+    public boolean isSubsamplingAllowed()
+    {
+        return subsamplingAllowed;
+    }
+
+    /**
+     * Sets a value instructing the renderer whether it is allowed to subsample images before
+     * drawing. The subsampling frequency is determined according to image size and requested scale.
+     *
+     * Subsampling may be faster and less memory-intensive in some cases, but it may also lead to
+     * loss of quality, especially in images with high spatial frequency.
+     *
+     * @param subsamplingAllowed The new value indicating if subsampling is allowed.
+     */
+    public void setSubsamplingAllowed(boolean subsamplingAllowed)
+    {
+        this.subsamplingAllowed = subsamplingAllowed;
+    }
+
+    /**
+     * Returns the given page as an RGB image at 72 DPI
+     * @param pageIndex the zero-based index of the page to be converted.
+     * @return the rendered page image
+     * @throws IOException if the PDF cannot be read
+     */
+    public BufferedImage renderImage(int pageIndex) throws IOException
+    {
+        return renderImage(pageIndex, 1);
+    }
+
+    /**
+     * Returns the given page as an RGB image at the given scale.
+     * A scale of 1 will render at 72 DPI.
+     * @param pageIndex the zero-based index of the page to be converted
+     * @param scale the scaling factor, where 1 = 72 DPI
+     * @return the rendered page image
+     * @throws IOException if the PDF cannot be read
+     */
+    public BufferedImage renderImage(int pageIndex, float scale) throws IOException
+    {
+        return renderImage(pageIndex, scale, ImageType.RGB);
+    }
+
+    /**
+     * Returns the given page as an RGB image at the given DPI.
+     * @param pageIndex the zero-based index of the page to be converted
+     * @param dpi the DPI (dots per inch) to render at
+     * @return the rendered page image
+     * @throws IOException if the PDF cannot be read
+     */
+    public BufferedImage renderImageWithDPI(int pageIndex, float dpi) throws IOException
+    {
+        return renderImage(pageIndex, dpi / 72f, ImageType.RGB);
+    }
+
+    /**
+     * Returns the given page as an RGB image at the given DPI.
+     * @param pageIndex the zero-based index of the page to be converted
+     * @param dpi the DPI (dots per inch) to render at
+     * @param imageType the type of image to return
+     * @return the rendered page image
+     * @throws IOException if the PDF cannot be read
+     */
+    public BufferedImage renderImageWithDPI(int pageIndex, float dpi, ImageType imageType)
+            throws IOException
+    {
+        return renderImage(pageIndex, dpi / 72f, imageType);
+    }
+
+    /**
+     * Returns the given page as an RGB or ARGB image at the given scale.
+     * @param pageIndex the zero-based index of the page to be converted
+     * @param scale the scaling factor, where 1 = 72 DPI
+     * @param imageType the type of image to return
+     * @return the rendered page image
+     * @throws IOException if the PDF cannot be read
+     */
+    public BufferedImage renderImage(int pageIndex, float scale, ImageType imageType)
+            throws IOException
+    {
+        PDPage page = document.getPage(pageIndex);
+
+        PDRectangle cropbBox = page.getCropBox();
+        float widthPt = cropbBox.getWidth();
+        float heightPt = cropbBox.getHeight();
+        int widthPx = Math.round(widthPt * scale);
+        int heightPx = Math.round(heightPt * scale);
+        int rotationAngle = page.getRotation();
+
+        int bimType = imageType.toBufferedImageType();
+        if (imageType != ImageType.ARGB && hasBlendMode(page))
+        {
+            // PDFBOX-4095: if the PDF has blending on the top level, draw on transparent background
+            // Inpired from PDF.js: if a PDF page uses any blend modes other than Normal, 
+            // PDF.js renders everything on a fully transparent RGBA canvas. 
+            // Finally when the page has been rendered, PDF.js draws the RGBA canvas on a white canvas.
+            bimType = BufferedImage.TYPE_INT_ARGB;
+        }
+
+        // swap width and height
+        BufferedImage image;
+        if (rotationAngle == 90 || rotationAngle == 270)
+        {
+            image = new BufferedImage(heightPx, widthPx, bimType);
+        }
+        else
+        {
+            image = new BufferedImage(widthPx, heightPx, bimType);
+        }
+
+        // use a transparent background if the image type supports alpha
+        Graphics2D g = image.createGraphics();
+        if (image.getType() == BufferedImage.TYPE_INT_ARGB)
+        {
+            g.setBackground(new Color(0, 0, 0, 0));
+        }
+        else
+        {
+            g.setBackground(Color.WHITE);
+        }
+        g.clearRect(0, 0, image.getWidth(), image.getHeight());
+        
+        transform(g, page, scale);
+
+        // the end-user may provide a custom PageDrawer
+        PageDrawerParameters parameters = new PageDrawerParameters(this, page, subsamplingAllowed);
+        PageDrawer drawer = createPageDrawer(parameters);
+        drawer.drawPage(g, page.getCropBox());       
+        
+        g.dispose();
+
+        if (image.getType() != imageType.toBufferedImageType())
+        {
+            // PDFBOX-4095: draw temporary transparent image on white background
+            BufferedImage newImage = 
+                    new BufferedImage(image.getWidth(), image.getHeight(), imageType.toBufferedImageType());
+            Graphics2D dstGraphics = newImage.createGraphics();
+            dstGraphics.setBackground(Color.WHITE);
+            dstGraphics.clearRect(0, 0, image.getWidth(), image.getHeight());
+            dstGraphics.drawImage(image, 0, 0, null);
+            dstGraphics.dispose();
+            image = newImage;
+        }
+
+        return image;
+    }
+
+    /**
+     * Renders a given page to an AWT Graphics2D instance.
+     * @param pageIndex the zero-based index of the page to be converted
+     * @param graphics the Graphics2D on which to draw the page
+     * @throws IOException if the PDF cannot be read
+     */
+    public void renderPageToGraphics(int pageIndex, Graphics2D graphics) throws IOException
+    {
+        renderPageToGraphics(pageIndex, graphics, 1);
+    }
+
+    /**
+     * Renders a given page to an AWT Graphics2D instance.
+     * @param pageIndex the zero-based index of the page to be converted
+     * @param graphics the Graphics2D on which to draw the page
+     * @param scale the scale to draw the page at
+     * @throws IOException if the PDF cannot be read
+     */
+    public void renderPageToGraphics(int pageIndex, Graphics2D graphics, float scale)
+            throws IOException
+    {
+        PDPage page = document.getPage(pageIndex);
+        // TODO need width/wight calculations? should these be in PageDrawer?
+
+        transform(graphics, page, scale);
+
+        PDRectangle cropBox = page.getCropBox();
+        graphics.clearRect(0, 0, (int) cropBox.getWidth(), (int) cropBox.getHeight());
+
+        // the end-user may provide a custom PageDrawer
+        PageDrawerParameters parameters = new PageDrawerParameters(this, page, subsamplingAllowed);
+        PageDrawer drawer = createPageDrawer(parameters);
+        drawer.drawPage(graphics, cropBox);
+    }
+
+    // scale rotate translate
+    private void transform(Graphics2D graphics, PDPage page, float scale)
+    {
+        graphics.scale(scale, scale);
+
+        // TODO should we be passing the scale to PageDrawer rather than messing with Graphics?
+        int rotationAngle = page.getRotation();
+        PDRectangle cropBox = page.getCropBox();
+
+        if (rotationAngle != 0)
+        {
+            float translateX = 0;
+            float translateY = 0;
+            switch (rotationAngle)
+            {
+                case 90:
+                    translateX = cropBox.getHeight();
+                    break;
+                case 270:
+                    translateY = cropBox.getWidth();
+                    break;
+                case 180:
+                    translateX = cropBox.getWidth();
+                    translateY = cropBox.getHeight();
+                    break;
+                default:
+                    break;
+            }
+            graphics.translate(translateX, translateY);
+            graphics.rotate((float) Math.toRadians(rotationAngle));
+        }
+    }
+
+    /**
+     * Returns a new PageDrawer instance, using the given parameters. May be overridden.
+     */
+    protected PageDrawer createPageDrawer(PageDrawerParameters parameters) throws IOException
+    {
+        PageDrawer pageDrawer = new PageDrawer(parameters);
+        pageDrawer.setAnnotationFilter(annotationFilter);
+        return pageDrawer;
+    }
+
+    private boolean hasBlendMode(PDPage page)
+    {
+        // check the current resources for blend modes
+        PDResources resources = page.getResources();
+        if (resources == null)
+        {
+            return false;
+        }
+        for (COSName name : resources.getExtGStateNames())
+        {
+            PDExtendedGraphicsState extGState = resources.getExtGState(name);
+            if (extGState == null)
+            {
+                // can happen if key exists but no value 
+                // see PDFBOX-3950-23EGDHXSBBYQLKYOKGZUOVYVNE675PRD.pdf
+                continue;
+            }
+            BlendMode blendMode = extGState.getBlendMode();
+            if (blendMode != BlendMode.NORMAL)
+            {
+                return true;
+            }
+        }
+        return false;
+    }
+}

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java?rev=1826161&r1=1826160&r2=1826161&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawer.java Wed Mar  7 21:40:42 2018
@@ -95,6 +95,8 @@ public class PageDrawer extends PDFGraph
     // parent document renderer - note: this is needed for not-yet-implemented resource caching
     private final PDFRenderer renderer;
     
+    private final boolean subsamplingAllowed;
+    
     // the graphics device to draw to, xform is the initial transform of the device (i.e. DPI)
     private Graphics2D graphics;
     private AffineTransform xform;
@@ -145,6 +147,7 @@ public class PageDrawer extends PDFGraph
     {
         super(parameters.getPage());
         this.renderer = parameters.getRenderer();
+        this.subsamplingAllowed = parameters.isSubsamplingAllowed();
     }
 
     /**
@@ -954,8 +957,17 @@ public class PageDrawer extends PDFGraph
         }
         else
         {
-            // draw the image
-            drawBufferedImage(pdImage.getImage(), at);
+            if (subsamplingAllowed)
+            {
+                int subsampling = getSubsampling(pdImage, at);
+                // draw the subsampled image
+                drawBufferedImage(pdImage.getImage(null, subsampling), at);
+            }
+            else
+            {
+                // subsampling not allowed, draw the image
+                drawBufferedImage(pdImage.getImage(), at);
+            }
         }
 
         if (!pdImage.getInterpolate())
@@ -966,6 +978,38 @@ public class PageDrawer extends PDFGraph
         }
     }
 
+    /**
+     * Calculated the subsampling frequency for a given PDImage based on the current transformation
+     * and its calculated transform
+     *
+     * @param pdImage PDImage to be drawn
+     * @param at Transform that will be applied to the image when drawing
+     * @return The rounded-down ratio of image pixels to drawn pixels. Returned value will always be
+     * >=1.
+     */
+    private int getSubsampling(PDImage pdImage, AffineTransform at)
+    {
+        // calculate subsampling according to the resulting image size
+        double scale = Math.abs(at.getDeterminant() * xform.getDeterminant());
+
+        int subsampling = (int) Math.floor(Math.sqrt(pdImage.getWidth() * pdImage.getHeight() / scale));
+        if (subsampling > 8)
+        {
+            subsampling = 8;
+        }
+        if (subsampling < 1)
+        {
+            subsampling = 1;
+        }
+        if (subsampling > pdImage.getWidth() || subsampling > pdImage.getHeight())
+        {
+            // For very small images it is possible that the subsampling would imply 0 size.
+            // To avoid problems, the subsampling is set to no less than the smallest dimension.
+            subsampling = Math.min(pdImage.getWidth(), pdImage.getHeight());
+        }
+        return subsampling;
+    }
+
     private void drawBufferedImage(BufferedImage image, AffineTransform at) throws IOException
     {
         graphics.setComposite(getGraphicsState().getNonStrokingJavaComposite());

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawerParameters.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawerParameters.java?rev=1826161&r1=1826160&r2=1826161&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawerParameters.java (original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/rendering/PageDrawerParameters.java Wed Mar  7 21:40:42 2018
@@ -30,14 +30,16 @@ public final class PageDrawerParameters
 {
     private final PDFRenderer renderer;
     private final PDPage page;
+    private final boolean subsamplingAllowed;
 
     /**
      * Package-private constructor.
      */
-    PageDrawerParameters(PDFRenderer renderer, PDPage page)
+    PageDrawerParameters(PDFRenderer renderer, PDPage page, boolean subsamplingAllowed)
     {
         this.renderer = renderer;
         this.page = page;
+        this.subsamplingAllowed = subsamplingAllowed;
     }
 
     /**
@@ -55,4 +57,12 @@ public final class PageDrawerParameters
     {
         return renderer;
     }
+
+    /**
+     * Returns whether to allow subsampling of images.
+     */
+    public boolean isSubsamplingAllowed()
+    {
+        return subsamplingAllowed;
+    }
 }

Modified: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/PDStreamTest.java
URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/PDStreamTest.java?rev=1826161&r1=1826160&r2=1826161&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/PDStreamTest.java (original)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/common/PDStreamTest.java Wed Mar  7 21:40:42 2018
@@ -92,7 +92,7 @@ public class PDStreamTest
             PDStream pdStream = new PDStream(doc, is, new COSArray());
             Assert.assertEquals(0, pdStream.getFilters().size());
             
-            is = pdStream.createInputStream(null);
+            is = pdStream.createInputStream((List<String>) null);
             Assert.assertEquals(12, is.read());
             Assert.assertEquals(34, is.read());
             Assert.assertEquals(56, is.read());