You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2016/11/30 21:14:21 UTC
svn commit: r1772120 - in /pdfbox/branches/2.0/pdfbox/src:
main/java/org/apache/pdfbox/pdmodel/graphics/image/
test/java/org/apache/pdfbox/pdmodel/graphics/image/
test/resources/org/apache/pdfbox/pdmodel/graphics/image/
Author: tilman
Date: Wed Nov 30 21:14:20 2016
New Revision: 1772120
URL: http://svn.apache.org/viewvc?rev=1772120&view=rev
Log:
PDFBOX-3608: ignore garbage in big endian byte/short tiff tags, as suggested by \u0160t?p�n Schejbal
Added:
pdfbox/branches/2.0/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/ccittg3-garbage-padded-fields-bigendian.tif (with props)
pdfbox/branches/2.0/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/ccittg3-garbage-padded-fields.tif (with props)
Modified:
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/CCITTFactory.java
pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/CCITTFactoryTest.java
Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/CCITTFactory.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/CCITTFactory.java?rev=1772120&r1=1772119&r2=1772120&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/CCITTFactory.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/CCITTFactory.java Wed Nov 30 21:14:20 2016
@@ -1,490 +1,481 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.pdfbox.pdmodel.graphics.image;
-
-import java.awt.image.BufferedImage;
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.IOException;
-import java.io.OutputStream;
-import javax.imageio.stream.MemoryCacheImageOutputStream;
-import org.apache.pdfbox.cos.COSDictionary;
-import org.apache.pdfbox.cos.COSName;
-import org.apache.pdfbox.filter.Filter;
-import org.apache.pdfbox.filter.FilterFactory;
-import org.apache.pdfbox.io.RandomAccess;
-import org.apache.pdfbox.io.RandomAccessFile;
-import org.apache.pdfbox.pdmodel.PDDocument;
-import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace;
-import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray;
-
-/**
- * Factory for creating a PDImageXObject containing a CCITT Fax compressed TIFF image.
- *
- * @author Ben Litchfield
- * @author Paul King
- */
-public final class CCITTFactory
-{
- private CCITTFactory()
- {
- }
-
- /**
- * Creates a new CCITT group 4 (T6) compressed image XObject from a b/w BufferedImage. This
- * compression technique usually results in smaller images than those produced by {@link LosslessFactory#createFromImage(PDDocument, BufferedImage)
- * }.
- *
- * @param document the document to create the image as part of.
- * @param image the image.
- * @return a new image XObject.
- * @throws IOException if there is an error creating the image.
- * @throws IllegalArgumentException if the BufferedImage is not a b/w image.
- */
- public static PDImageXObject createFromImage(PDDocument document, BufferedImage image)
- throws IOException
- {
- if (image.getType() != BufferedImage.TYPE_BYTE_BINARY && image.getColorModel().getPixelSize() != 1)
- {
- throw new IllegalArgumentException("Only 1-bit b/w images supported");
- }
-
- int height = image.getHeight();
- int width = image.getWidth();
-
- ByteArrayOutputStream bos = new ByteArrayOutputStream();
- MemoryCacheImageOutputStream mcios = new MemoryCacheImageOutputStream(bos);
-
- for (int y = 0; y < height; ++y)
- {
- for (int x = 0; x < width; ++x)
- {
- // flip bit to avoid having to set /BlackIs1
- mcios.writeBits(~(image.getRGB(x, y) & 1), 1);
- }
- while (mcios.getBitOffset() != 0)
- {
- mcios.writeBit(0);
- }
- }
- mcios.flush();
- mcios.close();
-
- return prepareImageXObject(document, bos.toByteArray(), width, height, PDDeviceGray.INSTANCE);
- }
-
- private static PDImageXObject prepareImageXObject(PDDocument document,
- byte[] byteArray, int width, int height,
- PDColorSpace initColorSpace) throws IOException
- {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
-
- Filter filter = FilterFactory.INSTANCE.getFilter(COSName.CCITTFAX_DECODE);
- COSDictionary dict = new COSDictionary();
- dict.setInt(COSName.COLUMNS, width);
- dict.setInt(COSName.ROWS, height);
- filter.encode(new ByteArrayInputStream(byteArray), baos, dict, 0);
-
- ByteArrayInputStream encodedByteStream = new ByteArrayInputStream(baos.toByteArray());
- PDImageXObject image = new PDImageXObject(document, encodedByteStream, COSName.CCITTFAX_DECODE,
- width, height, 1, initColorSpace);
- dict.setInt(COSName.K, -1);
- image.getCOSObject().setItem(COSName.DECODE_PARMS, dict);
- return image;
- }
-
- /**
- * Creates a new CCITT Fax compressed image XObject from the first image of a TIFF file.
- *
- * @param document the document to create the image as part of.
- * @param reader the random access TIFF file which contains a suitable CCITT
- * compressed image
- * @return a new image XObject
- * @throws IOException if there is an error reading the TIFF data.
- *
- * @deprecated Use {@link #createFromFile(PDDocument, File)} instead.
- */
- @Deprecated
- public static PDImageXObject createFromRandomAccess(PDDocument document, RandomAccess reader)
- throws IOException
- {
- return createFromRandomAccessImpl(document, reader, 0);
- }
-
- /**
- * Creates a new CCITT Fax compressed image XObject from a specific image of a TIFF file.
- *
- * @param document the document to create the image as part of.
- * @param reader the random access TIFF file which contains a suitable CCITT
- * compressed image
- * @param number TIFF image number, starting from 0
- * @return a new image XObject, or null if no such page
- * @throws IOException if there is an error reading the TIFF data.
- *
- * @deprecated Use {@link #createFromFile(PDDocument, File, int)} instead.
- */
- @Deprecated
- public static PDImageXObject createFromRandomAccess(PDDocument document, RandomAccess reader,
- int number) throws IOException
- {
- return createFromRandomAccessImpl(document, reader, number);
- }
-
- /**
- * Creates a new CCITT Fax compressed image XObject from the first image of a TIFF file. Only
- * single-strip CCITT T4 or T6 compressed TIFF files are supported. If you're not sure what TIFF
- * files you have, use
- * {@link LosslessFactory#createFromImage(org.apache.pdfbox.pdmodel.PDDocument, java.awt.image.BufferedImage)}
- * or {@link CCITTFactory#createFromImage(PDDocument, BufferedImage) }
- * instead.
- *
- * @param document the document to create the image as part of.
- * @param file the TIFF file which contains a suitable CCITT compressed image
- * @return a new Image XObject
- * @throws IOException if there is an error reading the TIFF data.
- */
- public static PDImageXObject createFromFile(PDDocument document, File file)
- throws IOException
- {
- RandomAccessFile raf = new RandomAccessFile(file, "r");
- try
- {
- return createFromRandomAccessImpl(document, raf, 0);
- }
- finally
- {
- raf.close();
- }
- }
-
- /**
- * Creates a new CCITT Fax compressed image XObject from a specific image of a TIFF file. Only
- * single-strip CCITT T4 or T6 compressed TIFF files are supported. If you're not sure what TIFF
- * files you have, use
- * {@link LosslessFactory#createFromImage(PDDocument, BufferedImage) }
- * or {@link CCITTFactory#createFromImage(PDDocument, BufferedImage) }
- * instead.
- *
- * @param document the document to create the image as part of.
- * @param file the TIFF file which contains a suitable CCITT compressed image
- * @param number TIFF image number, starting from 0
- * @return a new Image XObject
- * @throws IOException if there is an error reading the TIFF data.
- */
- public static PDImageXObject createFromFile(PDDocument document, File file, int number)
- throws IOException
- {
- RandomAccessFile raf = new RandomAccessFile(file, "r");
- try
- {
- return createFromRandomAccessImpl(document, raf, number);
- }
- finally
- {
- raf.close();
- }
- }
-
- /**
- * Creates a new CCITT Fax compressed image XObject from a TIFF file.
- *
- * @param document the document to create the image as part of.
- * @param reader the random access TIFF file which contains a suitable CCITT
- * compressed image
- * @param number TIFF image number, starting from 0
- * @return a new Image XObject, or null if no such page
- * @throws IOException if there is an error reading the TIFF data.
- */
- private static PDImageXObject createFromRandomAccessImpl(PDDocument document,
- RandomAccess reader,
- int number) throws IOException
- {
- COSDictionary decodeParms = new COSDictionary();
- ByteArrayOutputStream bos = new ByteArrayOutputStream();
- extractFromTiff(reader, bos, decodeParms, number);
- if (bos.size() == 0)
- {
- return null;
- }
- ByteArrayInputStream encodedByteStream = new ByteArrayInputStream(bos.toByteArray());
- PDImageXObject pdImage = new PDImageXObject(document,
- encodedByteStream,
- COSName.CCITTFAX_DECODE,
- decodeParms.getInt(COSName.COLUMNS),
- decodeParms.getInt(COSName.ROWS),
- 1,
- PDDeviceGray.INSTANCE);
-
- COSDictionary dict = pdImage.getCOSObject();
- dict.setItem(COSName.DECODE_PARMS, decodeParms);
- return pdImage;
- }
-
- // extracts the CCITT stream from the TIFF file
- private static void extractFromTiff(RandomAccess reader, OutputStream os,
- COSDictionary params, int number) throws IOException
- {
- try
- {
- // First check the basic tiff header
- reader.seek(0);
- char endianess = (char) reader.read();
- if ((char) reader.read() != endianess)
- {
- throw new IOException("Not a valid tiff file");
- }
- // ensure that endianess is either M or I
- if (endianess != 'M' && endianess != 'I')
- {
- throw new IOException("Not a valid tiff file");
- }
- int magicNumber = readshort(endianess, reader);
- if (magicNumber != 42)
- {
- throw new IOException("Not a valid tiff file");
- }
-
- // Relocate to the first set of tags
- int address = readlong(endianess, reader);
- reader.seek(address);
-
- // If some higher page number is required, skip this page's tags,
- // then read the next page's address
- for (int i = 0; i < number; i++)
- {
- int numtags = readshort(endianess, reader);
- if (numtags > 50)
- {
- throw new IOException("Not a valid tiff file");
- }
- reader.seek(address + 2 + numtags * 12);
- address = readlong(endianess, reader);
- if (address == 0)
- {
- return;
- }
- reader.seek(address);
- }
-
- int numtags = readshort(endianess, reader);
-
- // The number 50 is somewhat arbitary, it just stops us load up junk from somewhere
- // and tramping on
- if (numtags > 50)
- {
- throw new IOException("Not a valid tiff file");
- }
-
- // Loop through the tags, some will convert to items in the params dictionary
- // Other point us to where to find the data stream.
- // The only param which might change as a result of other TIFF tags is K, so
- // we'll deal with that differently.
-
- // Default value to detect error
- int k = -1000;
-
- int dataoffset = 0;
- int datalength = 0;
-
- for (int i = 0; i < numtags; i++)
- {
- int tag = readshort(endianess, reader);
- int type = readshort(endianess, reader);
- int count = readlong(endianess, reader);
- int val = readlong(endianess, reader); // See note
-
- // Note, we treated that value as a long. The value always occupies 4 bytes
- // But it might only use the first byte or two. Depending on endianess we might
- // need to correct.
- // Note we ignore all other types, they are of little interest for PDFs/CCITT Fax
- if (endianess == 'M')
- {
- switch (type)
- {
- case 1:
- {
- val = val >> 24;
- break; // byte value
- }
- case 3:
- {
- val = val >> 16;
- break; // short value
- }
- case 4:
- {
- break; // long value
- }
- default:
- {
- // do nothing
- }
- }
- }
- switch (tag)
- {
- case 256:
- {
- params.setInt(COSName.COLUMNS, val);
- break;
- }
- case 257:
- {
- params.setInt(COSName.ROWS, val);
- break;
- }
- case 259:
- {
- if (val == 4)
- {
- k = -1;
- }
- if (val == 3)
- {
- k = 0;
- }
- break; // T6/T4 Compression
- }
- case 262:
- {
- if (val == 1)
- {
- params.setBoolean(COSName.BLACK_IS_1, true);
- }
- break;
- }
- case 266:
- {
- if (val != 1)
- {
- throw new IOException("FillOrder " + val + " is not supported");
- }
- break;
- }
- case 273:
- {
- if (count == 1)
- {
- dataoffset = val;
- }
- break;
- }
- case 274:
- {
- // http://www.awaresystems.be/imaging/tiff/tifftags/orientation.html
- if (val != 1)
- {
- throw new IOException("Orientation " + val + " is not supported");
- }
- break;
- }
- case 279:
- {
- if (count == 1)
- {
- datalength = val;
- }
- break;
- }
- case 292:
- {
- if ((val & 1) != 0)
- {
- // T4 2D - arbitary positive K value
- k = 50;
- }
- // http://www.awaresystems.be/imaging/tiff/tifftags/t4options.html
- if ((val & 4) != 0)
- {
- throw new IOException("CCITT Group 3 'uncompressed mode' is not supported");
- }
- if ((val & 2) != 0)
- {
- throw new IOException("CCITT Group 3 'fill bits before EOL' is not supported");
- }
- break;
- }
- case 324:
- {
- if (count == 1)
- {
- dataoffset = val;
- }
- break;
- }
- case 325:
- {
- if (count == 1)
- {
- datalength = val;
- }
- break;
- }
- default:
- {
- // do nothing
- }
- }
- }
-
- if (k == -1000)
- {
- throw new IOException("First image in tiff is not CCITT T4 or T6 compressed");
- }
- if (dataoffset == 0)
- {
- throw new IOException("First image in tiff is not a single tile/strip");
- }
-
- params.setInt(COSName.K, k);
-
- reader.seek(dataoffset);
-
- byte[] buf = new byte[8192];
- int amountRead;
- while ((amountRead = reader.read(buf, 0, Math.min(8192, datalength))) > 0)
- {
- datalength -= amountRead;
- os.write(buf, 0, amountRead);
- }
-
- }
- finally
- {
- os.close();
- }
- }
-
- private static int readshort(char endianess, RandomAccess raf) throws IOException
- {
- if (endianess == 'I')
- {
- return raf.read() | (raf.read() << 8);
- }
- return (raf.read() << 8) | raf.read();
- }
-
- private static int readlong(char endianess, RandomAccess raf) throws IOException
- {
- if (endianess == 'I')
- {
- return raf.read() | (raf.read() << 8) | (raf.read() << 16) | (raf.read() << 24);
- }
- return (raf.read() << 24) | (raf.read() << 16) | (raf.read() << 8) | raf.read();
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdmodel.graphics.image;
+
+import java.awt.image.BufferedImage;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.OutputStream;
+import javax.imageio.stream.MemoryCacheImageOutputStream;
+import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.filter.Filter;
+import org.apache.pdfbox.filter.FilterFactory;
+import org.apache.pdfbox.io.RandomAccess;
+import org.apache.pdfbox.io.RandomAccessFile;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace;
+import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray;
+
+/**
+ * Factory for creating a PDImageXObject containing a CCITT Fax compressed TIFF image.
+ *
+ * @author Ben Litchfield
+ * @author Paul King
+ */
+public final class CCITTFactory
+{
+ private CCITTFactory()
+ {
+ }
+
+ /**
+ * Creates a new CCITT group 4 (T6) compressed image XObject from a b/w BufferedImage. This
+ * compression technique usually results in smaller images than those produced by {@link LosslessFactory#createFromImage(PDDocument, BufferedImage)
+ * }.
+ *
+ * @param document the document to create the image as part of.
+ * @param image the image.
+ * @return a new image XObject.
+ * @throws IOException if there is an error creating the image.
+ * @throws IllegalArgumentException if the BufferedImage is not a b/w image.
+ */
+ public static PDImageXObject createFromImage(PDDocument document, BufferedImage image)
+ throws IOException
+ {
+ if (image.getType() != BufferedImage.TYPE_BYTE_BINARY && image.getColorModel().getPixelSize() != 1)
+ {
+ throw new IllegalArgumentException("Only 1-bit b/w images supported");
+ }
+
+ int height = image.getHeight();
+ int width = image.getWidth();
+
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ MemoryCacheImageOutputStream mcios = new MemoryCacheImageOutputStream(bos);
+
+ for (int y = 0; y < height; ++y)
+ {
+ for (int x = 0; x < width; ++x)
+ {
+ // flip bit to avoid having to set /BlackIs1
+ mcios.writeBits(~(image.getRGB(x, y) & 1), 1);
+ }
+ while (mcios.getBitOffset() != 0)
+ {
+ mcios.writeBit(0);
+ }
+ }
+ mcios.flush();
+ mcios.close();
+
+ return prepareImageXObject(document, bos.toByteArray(), width, height, PDDeviceGray.INSTANCE);
+ }
+
+ private static PDImageXObject prepareImageXObject(PDDocument document,
+ byte[] byteArray, int width, int height,
+ PDColorSpace initColorSpace) throws IOException
+ {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+
+ Filter filter = FilterFactory.INSTANCE.getFilter(COSName.CCITTFAX_DECODE);
+ COSDictionary dict = new COSDictionary();
+ dict.setInt(COSName.COLUMNS, width);
+ dict.setInt(COSName.ROWS, height);
+ filter.encode(new ByteArrayInputStream(byteArray), baos, dict, 0);
+
+ ByteArrayInputStream encodedByteStream = new ByteArrayInputStream(baos.toByteArray());
+ PDImageXObject image = new PDImageXObject(document, encodedByteStream, COSName.CCITTFAX_DECODE,
+ width, height, 1, initColorSpace);
+ dict.setInt(COSName.K, -1);
+ image.getCOSObject().setItem(COSName.DECODE_PARMS, dict);
+ return image;
+ }
+
+ /**
+ * Creates a new CCITT Fax compressed image XObject from the first image of a TIFF file.
+ *
+ * @param document the document to create the image as part of.
+ * @param reader the random access TIFF file which contains a suitable CCITT
+ * compressed image
+ * @return a new image XObject
+ * @throws IOException if there is an error reading the TIFF data.
+ *
+ * @deprecated Use {@link #createFromFile(PDDocument, File)} instead.
+ */
+ @Deprecated
+ public static PDImageXObject createFromRandomAccess(PDDocument document, RandomAccess reader)
+ throws IOException
+ {
+ return createFromRandomAccessImpl(document, reader, 0);
+ }
+
+ /**
+ * Creates a new CCITT Fax compressed image XObject from a specific image of a TIFF file.
+ *
+ * @param document the document to create the image as part of.
+ * @param reader the random access TIFF file which contains a suitable CCITT
+ * compressed image
+ * @param number TIFF image number, starting from 0
+ * @return a new image XObject, or null if no such page
+ * @throws IOException if there is an error reading the TIFF data.
+ *
+ * @deprecated Use {@link #createFromFile(PDDocument, File, int)} instead.
+ */
+ @Deprecated
+ public static PDImageXObject createFromRandomAccess(PDDocument document, RandomAccess reader,
+ int number) throws IOException
+ {
+ return createFromRandomAccessImpl(document, reader, number);
+ }
+
+ /**
+ * Creates a new CCITT Fax compressed image XObject from the first image of a TIFF file. Only
+ * single-strip CCITT T4 or T6 compressed TIFF files are supported. If you're not sure what TIFF
+ * files you have, use
+ * {@link LosslessFactory#createFromImage(org.apache.pdfbox.pdmodel.PDDocument, java.awt.image.BufferedImage)}
+ * or {@link CCITTFactory#createFromImage(PDDocument, BufferedImage) }
+ * instead.
+ *
+ * @param document the document to create the image as part of.
+ * @param file the TIFF file which contains a suitable CCITT compressed image
+ * @return a new Image XObject
+ * @throws IOException if there is an error reading the TIFF data.
+ */
+ public static PDImageXObject createFromFile(PDDocument document, File file)
+ throws IOException
+ {
+ RandomAccessFile raf = new RandomAccessFile(file, "r");
+ try
+ {
+ return createFromRandomAccessImpl(document, raf, 0);
+ }
+ finally
+ {
+ raf.close();
+ }
+ }
+
+ /**
+ * Creates a new CCITT Fax compressed image XObject from a specific image of a TIFF file. Only
+ * single-strip CCITT T4 or T6 compressed TIFF files are supported. If you're not sure what TIFF
+ * files you have, use
+ * {@link LosslessFactory#createFromImage(PDDocument, BufferedImage) }
+ * or {@link CCITTFactory#createFromImage(PDDocument, BufferedImage) }
+ * instead.
+ *
+ * @param document the document to create the image as part of.
+ * @param file the TIFF file which contains a suitable CCITT compressed image
+ * @param number TIFF image number, starting from 0
+ * @return a new Image XObject
+ * @throws IOException if there is an error reading the TIFF data.
+ */
+ public static PDImageXObject createFromFile(PDDocument document, File file, int number)
+ throws IOException
+ {
+ RandomAccessFile raf = new RandomAccessFile(file, "r");
+ try
+ {
+ return createFromRandomAccessImpl(document, raf, number);
+ }
+ finally
+ {
+ raf.close();
+ }
+ }
+
+ /**
+ * Creates a new CCITT Fax compressed image XObject from a TIFF file.
+ *
+ * @param document the document to create the image as part of.
+ * @param reader the random access TIFF file which contains a suitable CCITT
+ * compressed image
+ * @param number TIFF image number, starting from 0
+ * @return a new Image XObject, or null if no such page
+ * @throws IOException if there is an error reading the TIFF data.
+ */
+ private static PDImageXObject createFromRandomAccessImpl(PDDocument document,
+ RandomAccess reader,
+ int number) throws IOException
+ {
+ COSDictionary decodeParms = new COSDictionary();
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ extractFromTiff(reader, bos, decodeParms, number);
+ if (bos.size() == 0)
+ {
+ return null;
+ }
+ ByteArrayInputStream encodedByteStream = new ByteArrayInputStream(bos.toByteArray());
+ PDImageXObject pdImage = new PDImageXObject(document,
+ encodedByteStream,
+ COSName.CCITTFAX_DECODE,
+ decodeParms.getInt(COSName.COLUMNS),
+ decodeParms.getInt(COSName.ROWS),
+ 1,
+ PDDeviceGray.INSTANCE);
+
+ COSDictionary dict = pdImage.getCOSObject();
+ dict.setItem(COSName.DECODE_PARMS, decodeParms);
+ return pdImage;
+ }
+
+ // extracts the CCITT stream from the TIFF file
+ private static void extractFromTiff(RandomAccess reader, OutputStream os,
+ COSDictionary params, int number) throws IOException
+ {
+ try
+ {
+ // First check the basic tiff header
+ reader.seek(0);
+ char endianess = (char) reader.read();
+ if ((char) reader.read() != endianess)
+ {
+ throw new IOException("Not a valid tiff file");
+ }
+ // ensure that endianess is either M or I
+ if (endianess != 'M' && endianess != 'I')
+ {
+ throw new IOException("Not a valid tiff file");
+ }
+ int magicNumber = readshort(endianess, reader);
+ if (magicNumber != 42)
+ {
+ throw new IOException("Not a valid tiff file");
+ }
+
+ // Relocate to the first set of tags
+ int address = readlong(endianess, reader);
+ reader.seek(address);
+
+ // If some higher page number is required, skip this page's tags,
+ // then read the next page's address
+ for (int i = 0; i < number; i++)
+ {
+ int numtags = readshort(endianess, reader);
+ if (numtags > 50)
+ {
+ throw new IOException("Not a valid tiff file");
+ }
+ reader.seek(address + 2 + numtags * 12);
+ address = readlong(endianess, reader);
+ if (address == 0)
+ {
+ return;
+ }
+ reader.seek(address);
+ }
+
+ int numtags = readshort(endianess, reader);
+
+ // The number 50 is somewhat arbitary, it just stops us load up junk from somewhere
+ // and tramping on
+ if (numtags > 50)
+ {
+ throw new IOException("Not a valid tiff file");
+ }
+
+ // Loop through the tags, some will convert to items in the params dictionary
+ // Other point us to where to find the data stream.
+ // The only param which might change as a result of other TIFF tags is K, so
+ // we'll deal with that differently.
+
+ // Default value to detect error
+ int k = -1000;
+
+ int dataoffset = 0;
+ int datalength = 0;
+
+ for (int i = 0; i < numtags; i++)
+ {
+ int tag = readshort(endianess, reader);
+ int type = readshort(endianess, reader);
+ int count = readlong(endianess, reader);
+ int val;
+ // Note that when the type is shorter than 4 bytes, the rest can be garbage
+ // and must be ignored. E.g. short (2 bytes) from "01 00 38 32" (little endian)
+ // is 1, not 842530817 (seen in a real-life TIFF image).
+ switch (type)
+ {
+ case 1: // byte value
+ val = reader.read();
+ reader.read();
+ reader.read();
+ reader.read();
+ break;
+ case 3: // short value
+ val = readshort(endianess, reader);
+ reader.read();
+ reader.read();
+ break;
+ default: // long and other types
+ val = readlong(endianess, reader);
+ break;
+ }
+ switch (tag)
+ {
+ case 256:
+ {
+ params.setInt(COSName.COLUMNS, val);
+ break;
+ }
+ case 257:
+ {
+ params.setInt(COSName.ROWS, val);
+ break;
+ }
+ case 259:
+ {
+ if (val == 4)
+ {
+ k = -1;
+ }
+ if (val == 3)
+ {
+ k = 0;
+ }
+ break; // T6/T4 Compression
+ }
+ case 262:
+ {
+ if (val == 1)
+ {
+ params.setBoolean(COSName.BLACK_IS_1, true);
+ }
+ break;
+ }
+ case 266:
+ {
+ if (val != 1)
+ {
+ throw new IOException("FillOrder " + val + " is not supported");
+ }
+ break;
+ }
+ case 273:
+ {
+ if (count == 1)
+ {
+ dataoffset = val;
+ }
+ break;
+ }
+ case 274:
+ {
+ // http://www.awaresystems.be/imaging/tiff/tifftags/orientation.html
+ if (val != 1)
+ {
+ throw new IOException("Orientation " + val + " is not supported");
+ }
+ break;
+ }
+ case 279:
+ {
+ if (count == 1)
+ {
+ datalength = val;
+ }
+ break;
+ }
+ case 292:
+ {
+ if ((val & 1) != 0)
+ {
+ // T4 2D - arbitary positive K value
+ k = 50;
+ }
+ // http://www.awaresystems.be/imaging/tiff/tifftags/t4options.html
+ if ((val & 4) != 0)
+ {
+ throw new IOException("CCITT Group 3 'uncompressed mode' is not supported");
+ }
+ if ((val & 2) != 0)
+ {
+ throw new IOException("CCITT Group 3 'fill bits before EOL' is not supported");
+ }
+ break;
+ }
+ case 324:
+ {
+ if (count == 1)
+ {
+ dataoffset = val;
+ }
+ break;
+ }
+ case 325:
+ {
+ if (count == 1)
+ {
+ datalength = val;
+ }
+ break;
+ }
+ default:
+ {
+ // do nothing
+ }
+ }
+ }
+
+ if (k == -1000)
+ {
+ throw new IOException("First image in tiff is not CCITT T4 or T6 compressed");
+ }
+ if (dataoffset == 0)
+ {
+ throw new IOException("First image in tiff is not a single tile/strip");
+ }
+
+ params.setInt(COSName.K, k);
+
+ reader.seek(dataoffset);
+
+ byte[] buf = new byte[8192];
+ int amountRead;
+ while ((amountRead = reader.read(buf, 0, Math.min(8192, datalength))) > 0)
+ {
+ datalength -= amountRead;
+ os.write(buf, 0, amountRead);
+ }
+
+ }
+ finally
+ {
+ os.close();
+ }
+ }
+
+ private static int readshort(char endianess, RandomAccess raf) throws IOException
+ {
+ if (endianess == 'I')
+ {
+ return raf.read() | (raf.read() << 8);
+ }
+ return (raf.read() << 8) | raf.read();
+ }
+
+ private static int readlong(char endianess, RandomAccess raf) throws IOException
+ {
+ if (endianess == 'I')
+ {
+ return raf.read() | (raf.read() << 8) | (raf.read() << 16) | (raf.read() << 24);
+ }
+ return (raf.read() << 24) | (raf.read() << 16) | (raf.read() << 8) | raf.read();
+ }
+}
Modified: pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/CCITTFactoryTest.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/CCITTFactoryTest.java?rev=1772120&r1=1772119&r2=1772120&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/CCITTFactoryTest.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/test/java/org/apache/pdfbox/pdmodel/graphics/image/CCITTFactoryTest.java Wed Nov 30 21:14:20 2016
@@ -22,6 +22,7 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import java.util.Arrays;
import javax.imageio.ImageIO;
import javax.imageio.ImageReader;
import javax.imageio.stream.ImageInputStream;
@@ -247,4 +248,22 @@ public class CCITTFactoryTest extends Te
os.close();
}
}
+
+ /**
+ * Tests that byte/short tag values are read correctly (ignoring possible garbage in remaining
+ * bytes).
+ */
+ public void testByteShortPaddedWithGarbage() throws IOException
+ {
+ PDDocument document = new PDDocument();
+ String basePath = "src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/ccittg3-garbage-padded-fields";
+ for (String ext : Arrays.asList(".tif", "-bigendian.tif"))
+ {
+ String tiffPath = basePath + ext;
+ PDImageXObject ximage3 = CCITTFactory.createFromFile(document, new File(tiffPath));
+ validate(ximage3, 1, 344, 287, "tiff", PDDeviceGray.INSTANCE.getName());
+ }
+ document.close();
+ }
+
}
Added: pdfbox/branches/2.0/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/ccittg3-garbage-padded-fields-bigendian.tif
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/ccittg3-garbage-padded-fields-bigendian.tif?rev=1772120&view=auto
==============================================================================
Binary file - no diff available.
Propchange: pdfbox/branches/2.0/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/ccittg3-garbage-padded-fields-bigendian.tif
------------------------------------------------------------------------------
svn:mime-type = image/tiff
Added: pdfbox/branches/2.0/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/ccittg3-garbage-padded-fields.tif
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/ccittg3-garbage-padded-fields.tif?rev=1772120&view=auto
==============================================================================
Binary file - no diff available.
Propchange: pdfbox/branches/2.0/pdfbox/src/test/resources/org/apache/pdfbox/pdmodel/graphics/image/ccittg3-garbage-padded-fields.tif
------------------------------------------------------------------------------
svn:mime-type = image/tiff