You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2023/03/19 18:47:38 UTC

svn commit: r1908526 - /pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java

Author: tilman
Date: Sun Mar 19 18:47:38 2023
New Revision: 1908526

URL: http://svn.apache.org/viewvc?rev=1908526&view=rev
Log:
PDFBOX-5575: optimize LZWFilter, by Axel Howind; fix javadoc

Modified:
    pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java

Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java?rev=1908526&r1=1908525&r2=1908526&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java Sun Mar 19 18:47:38 2023
@@ -66,18 +66,12 @@ public class LZWFilter extends Filter
             COSDictionary parameters, int index) throws IOException
     {
         COSDictionary decodeParams = getDecodeParams(parameters, index);
-        int earlyChange = decodeParams.getInt(COSName.EARLY_CHANGE, 1);
-
-        if (earlyChange != 0 && earlyChange != 1)
-        {
-            earlyChange = 1;
-        }
-
+        boolean earlyChange = decodeParams.getInt(COSName.EARLY_CHANGE, 1) != 0;
         doLZWDecode(encoded, Predictor.wrapPredictor(decoded, decodeParams), earlyChange);
         return new DecodeResult(parameters);
     }
 
-    private void doLZWDecode(InputStream encoded, OutputStream decoded, int earlyChange) throws IOException
+    private void doLZWDecode(InputStream encoded, OutputStream decoded, boolean earlyChange) throws IOException
     {
         List<byte[]> codeTable = new ArrayList<byte[]>();
         int chunk = 9;
@@ -180,7 +174,7 @@ public class LZWFilter extends Filter
                 if (newFoundCode == -1)
                 {
                     // use previous
-                    chunk = calculateChunk(codeTable.size() - 1, 1);
+                    chunk = calculateChunk(codeTable.size() - 1, true);
                     out.writeBits(foundCode, chunk);
                     // create new table entry
                     codeTable.add(inputPattern);
@@ -203,7 +197,7 @@ public class LZWFilter extends Filter
         }
         if (foundCode != -1)
         {
-            chunk = calculateChunk(codeTable.size() - 1, 1);
+            chunk = calculateChunk(codeTable.size() - 1, true);
             out.writeBits(foundCode, chunk);
         }
 
@@ -212,7 +206,7 @@ public class LZWFilter extends Filter
         // possibly adjusted the chunk. Therefore, the encoder must behave as 
         // if the code table had just grown and thus it must be checked it is
         // needed to adjust the chunk, based on an increased table size parameter
-        chunk = calculateChunk(codeTable.size(), 1);
+        chunk = calculateChunk(codeTable.size(), true);
 
         out.writeBits(EOD, chunk);
         
@@ -225,50 +219,47 @@ public class LZWFilter extends Filter
     }
 
     /**
-     * Find the longest matching pattern in the code table.
+     * Find a matching pattern in the code table.
      *
      * @param codeTable The LZW code table.
      * @param pattern The pattern to be searched for.
-     * @return The index of the longest matching pattern or -1 if nothing is
-     * found.
+     * @return The index of the matching pattern or -1 if nothing is found.
      */
-    private int findPatternCode(List<byte[]> codeTable, byte[] pattern)
+    private static int findPatternCode(List<byte[]> codeTable, byte[] pattern)
     {
-        int foundCode = -1;
-        int foundLen = 0;
-        for (int i = codeTable.size() - 1; i >= 0; --i)
+        // for the first 256 entries, index matches value
+        if (pattern.length == 1)
         {
-            if (i <= EOD)
-            {
-                // we're in the single byte area
-                if (foundCode != -1)
-                {
-                    // we already found pattern with size > 1
-                    return foundCode; 
-                }
-                else if (pattern.length > 1)
-                {
-                    // we won't find anything here anyway
-                    return -1;
-                }
-            }
-            byte[] tryPattern = codeTable.get(i);
-            if ((foundCode != -1 || tryPattern.length > foundLen) && Arrays.equals(tryPattern, pattern))
+            return pattern[0];
+        }
+
+        // no need to test the first 256 + 2 entries against longer patterns
+        for (int i = 257; i < codeTable.size(); i++)
+        {
+            if (Arrays.equals(codeTable.get(i), pattern))
             {
-                foundCode = i;
-                foundLen = tryPattern.length;
+                return i;
             }
         }
-        return foundCode;
+
+        return -1;
     }
 
     /**
-     * Init the code table with 1 byte entries and the EOD and CLEAR_TABLE
-     * markers.
+     * Init the code table with 1 byte entries and the EOD and CLEAR_TABLE markers.
      */
-    private List<byte[]> createCodeTable()
+    private static List<byte[]> createCodeTable()
     {
         List<byte[]> codeTable = new ArrayList<byte[]>(4096);
+        codeTable.addAll(INITIAL_CODE_TABLE);
+        return codeTable;
+    }
+
+    private static final List<byte[]> INITIAL_CODE_TABLE = createInitialCodeTable();
+
+    private static List<byte[]> createInitialCodeTable()
+    {
+        List<byte[]> codeTable = new ArrayList<byte[]>(258);
         for (int i = 0; i < 256; ++i)
         {
             codeTable.add(new byte[] { (byte) (i & 0xFF) });
@@ -282,21 +273,22 @@ public class LZWFilter extends Filter
      * Calculate the appropriate chunk size
      *
      * @param tabSize the size of the code table
-     * @param earlyChange 0 or 1 for early chunk increase
+     * @param earlyChange true for early chunk increase
      *
      * @return a value between 9 and 12
      */
-    private int calculateChunk(int tabSize, int earlyChange)
+    private static int calculateChunk(int tabSize, boolean earlyChange)
     {
-        if (tabSize >= 2048 - earlyChange)
+        int i = tabSize + (earlyChange ? 1 : 0);
+        if (i >= 2048)
         {
             return 12;
         }
-        if (tabSize >= 1024 - earlyChange)
+        if (i >= 1024)
         {
             return 11;
         }
-        if (tabSize >= 512 - earlyChange)
+        if (i >= 512)
         {
             return 10;
         }