You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2020/12/26 14:37:13 UTC

svn commit: r1884818 - in /pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter: CCITTFaxDecoderStream.java CCITTFaxFilter.java

Author: tilman
Date: Sat Dec 26 14:37:12 2020
New Revision: 1884818

URL: http://svn.apache.org/viewvc?rev=1884818&view=rev
Log:
PDFBOX-5057: update to recent changes in twelvemonkeys decoder by Oliver Schmidtmer; closes #82

Modified:
    pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxDecoderStream.java
    pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxFilter.java

Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxDecoderStream.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxDecoderStream.java?rev=1884818&r1=1884817&r2=1884818&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxDecoderStream.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxDecoderStream.java Sat Dec 26 14:37:12 2020
@@ -4,28 +4,29 @@
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in the
- *       documentation and/or other materials provided with the distribution.
- *     * Neither the name "TwelveMonkeys" nor the
- *       names of its contributors may be used to endorse or promote products
- *       derived from this software without specific prior written permission.
  *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * * Redistributions of source code must retain the above copyright notice, this
+ *   list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ *
+ * * Neither the name of the copyright holder nor the names of its
+ *   contributors may be used to endorse or promote products derived from
+ *   this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
-
 package org.apache.pdfbox.filter;
 
 
@@ -43,10 +44,9 @@ import java.util.Arrays;
  * @author last modified by $Author: haraldk$
  * @version $Id: CCITTFaxDecoderStream.java,v 1.0 23.05.12 15:55 haraldk Exp$
  * 
- * Taken from commit fa0341f30237effe523e9905e672d709ffe9c6bd of 7.5.2016 from twelvemonkeys/imageio/plugins/tiff/CCITTFaxDecoderStream.java
+ * Taken from commit 24c6682236e5a02151359486aa4075ddc5ab1534 of 18.08.2018 from twelvemonkeys/imageio/plugins/tiff/CCITTFaxDecoderStream.java
  * 
  * Initial changes for PDFBox, discussed in PDFBOX-3338:
- * - added optionByteAligned to constructor and to each decodeRowType() method
  * - removed Validate() usages
  * - catch VALUE_EOL in decode1D()
  */
@@ -56,61 +56,94 @@ final class CCITTFaxDecoderStream extend
     private final int columns;
     private final byte[] decodedRow;
 
-    private int decodedLength;
-    private int decodedPos;
+    private final boolean optionG32D;
+    // Leading zeros for aligning EOL
+    private final boolean optionG3Fill;
+    private final boolean optionUncompressed;
+    private final boolean optionByteAligned;
 
     // Need to take fill order into account (?) (use flip table?)
     private final int fillOrder;
     private final int type;
 
+    private int decodedLength;
+    private int decodedPos;
+
     private int[] changesReferenceRow;
     private int[] changesCurrentRow;
     private int changesReferenceRowCount;
     private int changesCurrentRowCount;
-    private int lastChangingElement = 0;
-
-    private boolean optionG32D = false;
 
-    @SuppressWarnings("unused") // Leading zeros for aligning EOL
-    private boolean optionG3Fill = false;
-
-    private boolean optionUncompressed = false;
-    private boolean optionByteAligned = false;
+    private int lastChangingElement = 0;
 
-    CCITTFaxDecoderStream(final InputStream stream, final int columns, final int type, final int fillOrder,
-                                 final long options) {
+    /**
+     * Creates a CCITTFaxDecoderStream.
+     * This constructor may be used for CCITT streams embedded in PDF files,
+     * which use EncodedByteAlign.
+     *
+     * @param stream the compressed CCITT stream.
+     * @param columns the number of columns in the stream.
+     * @param type the type of stream, must be one of {@code COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE},
+     *             {@code COMPRESSION_CCITT_T4} or {@code COMPRESSION_CCITT_T6}.
+     * @param fillOrder fillOrder, must be {@code FILL_LEFT_TO_RIGHT} or
+     * {@code FILL_RIGHT_TO_LEFT}.
+     * @param options CCITT T.4 or T.6 options.
+     * @param byteAligned enable byte alignment used in PDF files (EncodedByteAlign).
+     */
+    public CCITTFaxDecoderStream(final InputStream stream, final int columns, final int type, final int fillOrder,
+                                 final long options, final boolean byteAligned) {
         super(stream);
 
         this.columns = columns;
-        // We know this is only used for b/w (1 bit)
-        this.decodedRow = new byte[(columns + 7) / 8];
         this.type = type;
-        
         this.fillOrder = fillOrder;
 
-        this.changesReferenceRow = new int[columns + 2];
-        this.changesCurrentRow = new int[columns + 2];
+        // We know this is only used for b/w (1 bit)
+        decodedRow = new byte[(columns + 7) / 8];
+        changesReferenceRow = new int[columns + 2];
+        changesCurrentRow = new int[columns + 2];
 
         switch (type) {
             case TIFFExtension.COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE:
-                optionByteAligned = (options & TIFFExtension.GROUP3OPT_BYTEALIGNED) != 0;
+                optionByteAligned = byteAligned;
+                optionG32D = false;
+                optionG3Fill = false;
+                optionUncompressed = false;
                 break;
             case TIFFExtension.COMPRESSION_CCITT_T4:
+                optionByteAligned = byteAligned;
                 optionG32D = (options & TIFFExtension.GROUP3OPT_2DENCODING) != 0;
                 optionG3Fill = (options & TIFFExtension.GROUP3OPT_FILLBITS) != 0;
                 optionUncompressed = (options & TIFFExtension.GROUP3OPT_UNCOMPRESSED) != 0;
-                optionByteAligned = (options & TIFFExtension.GROUP3OPT_BYTEALIGNED) != 0;
                 break;
             case TIFFExtension.COMPRESSION_CCITT_T6:
+                optionByteAligned = byteAligned;
+                optionG32D = false;
+                optionG3Fill = false;
                 optionUncompressed = (options & TIFFExtension.GROUP4OPT_UNCOMPRESSED) != 0;
-                optionByteAligned = (options & TIFFExtension.GROUP4OPT_BYTEALIGNED) != 0;
                 break;
             default:
-                break;
+                throw new AssertionError();
         }
 
     }
 
+    /**
+     * Creates a CCITTFaxDecoderStream.
+     *
+     * @param stream the compressed CCITT stream.
+     * @param columns the number of columns in the stream.
+     * @param type the type of stream, must be one of {@code COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE},
+     *             {@code COMPRESSION_CCITT_T4} or {@code COMPRESSION_CCITT_T6}.
+     * @param fillOrder fillOrder, must be {@code FILL_LEFT_TO_RIGHT} or
+     * {@code FILL_RIGHT_TO_LEFT}.
+     * @param options CCITT T.4 or T.6 options.
+     */
+    public CCITTFaxDecoderStream(final InputStream stream, final int columns, final int type, final int fillOrder,
+                                 final long options) {
+        this(stream, columns, type, fillOrder, options, type == TIFFExtension.COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE);
+    }
+
     private void fetch() throws IOException {
         if (decodedPos >= decodedLength) {
             decodedLength = 0;
@@ -124,8 +157,8 @@ final class CCITTFaxDecoderStream extend
                     throw e;
                 }
 
-                // ..otherwise, just client code trying to read past the end of
-                // stream
+                // ..otherwise, just let client code try to read past the
+                // end of stream
                 decodedLength = -1;
             }
 
@@ -147,11 +180,6 @@ final class CCITTFaxDecoderStream extend
             else {
                 completeRun = decodeRun(blackRunTree);
             }
-            
-            if (completeRun == VALUE_EOL)
-            {
-                continue;
-            }
 
             index += completeRun;
             changesCurrentRow[changesCurrentRowCount++] = index;
@@ -251,15 +279,14 @@ final class CCITTFaxDecoderStream extend
     }
 
     private void decodeRowType2() throws IOException {
-        if (optionByteAligned)
-        {
+        if (optionByteAligned) {
             resetBuffer();
         }
         decode1D();
     }
 
     private void decodeRowType4() throws IOException {
-        if(optionByteAligned) {
+        if (optionByteAligned) {
             resetBuffer();
         }
         eof: while (true) {
@@ -288,7 +315,7 @@ final class CCITTFaxDecoderStream extend
     }
 
     private void decodeRowType6() throws IOException {
-        if(optionByteAligned) {
+        if (optionByteAligned) {
             resetBuffer();
         }
         decode2D();
@@ -305,15 +332,13 @@ final class CCITTFaxDecoderStream extend
             case TIFFExtension.COMPRESSION_CCITT_T6:
                 decodeRowType6();
                 break;
-            default:
-                break;
         }
 
         int index = 0;
         boolean white = true;
 
-            lastChangingElement = 0;
-            for (int i = 0; i <= changesCurrentRowCount; i++) {
+        lastChangingElement = 0;
+        for (int i = 0; i <= changesCurrentRowCount; i++) {
             int nextChange = columns;
 
             if (i != changesCurrentRowCount) {
@@ -361,7 +386,7 @@ final class CCITTFaxDecoderStream extend
         decodedLength = (index + 7) / 8;
     }
 
-    private int decodeRun(final Tree tree) throws IOException {     
+    private int decodeRun(final Tree tree) throws IOException {
         int total = 0;
 
         Node n = tree.root;
@@ -376,18 +401,20 @@ final class CCITTFaxDecoderStream extend
 
             if (n.isLeaf) {
                 total += n.value;
-                if (n.value < 64) {
+                if (n.value >= 64) {
+                    n = tree.root;
+                }
+                else if (n.value >= 0) {
                     return total;
                 }
                 else {
-                    n = tree.root;
+                    return columns;
                 }
             }
         }
     }
 
-    private void resetBuffer()
-    {
+    private void resetBuffer() {
         bufferPos = -1;
     }
 
@@ -443,7 +470,6 @@ final class CCITTFaxDecoderStream extend
     @Override
     public int read(byte[] b, int off, int len) throws IOException {
         if (decodedLength < 0) {
-            //TODO better? Math.min(off + len, b.length)
             Arrays.fill(b, off, off + len, (byte) 0x0);
             return len;
         }
@@ -811,4 +837,3 @@ final class CCITTFaxDecoderStream extend
         }
     }
 }
-

Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxFilter.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxFilter.java?rev=1884818&r1=1884817&r2=1884818&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxFilter.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxFilter.java Sat Dec 26 14:37:12 2020
@@ -19,6 +19,8 @@ package org.apache.pdfbox.filter;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.io.PushbackInputStream;
+
 import org.apache.pdfbox.cos.COSDictionary;
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.io.IOUtils;
@@ -63,28 +65,43 @@ final class CCITTFaxFilter extends Filte
         byte[] decompressed = new byte[arraySize];
         CCITTFaxDecoderStream s;
         int type;
-        long tiffOptions;
+        long tiffOptions = 0;
         if (k == 0)
         {
-            tiffOptions = encodedByteAlign ? TIFFExtension.GROUP3OPT_BYTEALIGNED : 0;
-            type = TIFFExtension.COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE;
+            type = TIFFExtension.COMPRESSION_CCITT_T4; // Group 3 1D
+            byte[] streamData = new byte[20];
+            encoded.read(streamData);
+            encoded = new PushbackInputStream(encoded, streamData.length);
+            ((PushbackInputStream) encoded).unread(streamData);
+            if (streamData[0] != 0 || (streamData[1] >> 4 != 1 && streamData[1] != 1))
+            {
+                // leading EOL (0b000000000001) not found, search further and try RLE if not
+                // found
+                type = TIFFExtension.COMPRESSION_CCITT_MODIFIED_HUFFMAN_RLE;
+                short b = (short) (((streamData[0] << 8) + streamData[1]) >> 4);
+                for (int i = 12; i < 160; i++)
+                {
+                    b = (short) ((b << 1) + ((streamData[(i / 8)] >> (7 - (i % 8))) & 0x01));
+                    if ((b & 0xFFF) == 1)
+                    {
+                        type = TIFFExtension.COMPRESSION_CCITT_T4;
+                        break;
+                    }
+                }
+            }
+        }
+        else if (k > 0)
+        {
+            // Group 3 2D
+            type = TIFFExtension.COMPRESSION_CCITT_T4;
+            tiffOptions = TIFFExtension.GROUP3OPT_2DENCODING;
         }
         else
         {
-            if (k > 0)
-            {
-                tiffOptions = encodedByteAlign ? TIFFExtension.GROUP3OPT_BYTEALIGNED : 0;
-                tiffOptions |= TIFFExtension.GROUP3OPT_2DENCODING;
-                type = TIFFExtension.COMPRESSION_CCITT_T4;
-            }
-            else
-            {
-                // k < 0
-                tiffOptions = encodedByteAlign ? TIFFExtension.GROUP4OPT_BYTEALIGNED : 0;
-                type = TIFFExtension.COMPRESSION_CCITT_T6;
-            }
+            // Group 4
+            type = TIFFExtension.COMPRESSION_CCITT_T6;
         }
-        s = new CCITTFaxDecoderStream(encoded, cols, type, TIFFExtension.FILL_LEFT_TO_RIGHT, tiffOptions);
+        s = new CCITTFaxDecoderStream(encoded, cols, type, TIFFExtension.FILL_LEFT_TO_RIGHT, tiffOptions, encodedByteAlign);
         readFromDecoderStream(s, decompressed);
 
         // invert bitmap