You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pdfbox.apache.org by ti...@apache.org on 2023/03/19 18:47:38 UTC
svn commit: r1908526 - /pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java
Author: tilman
Date: Sun Mar 19 18:47:38 2023
New Revision: 1908526
URL: http://svn.apache.org/viewvc?rev=1908526&view=rev
Log:
PDFBOX-5575: optimize LZWFilter, by Axel Howind; fix javadoc
Modified:
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java
Modified: pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java
URL: http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java?rev=1908526&r1=1908525&r2=1908526&view=diff
==============================================================================
--- pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java (original)
+++ pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java Sun Mar 19 18:47:38 2023
@@ -66,18 +66,12 @@ public class LZWFilter extends Filter
COSDictionary parameters, int index) throws IOException
{
COSDictionary decodeParams = getDecodeParams(parameters, index);
- int earlyChange = decodeParams.getInt(COSName.EARLY_CHANGE, 1);
-
- if (earlyChange != 0 && earlyChange != 1)
- {
- earlyChange = 1;
- }
-
+ boolean earlyChange = decodeParams.getInt(COSName.EARLY_CHANGE, 1) != 0;
doLZWDecode(encoded, Predictor.wrapPredictor(decoded, decodeParams), earlyChange);
return new DecodeResult(parameters);
}
- private void doLZWDecode(InputStream encoded, OutputStream decoded, int earlyChange) throws IOException
+ private void doLZWDecode(InputStream encoded, OutputStream decoded, boolean earlyChange) throws IOException
{
List<byte[]> codeTable = new ArrayList<byte[]>();
int chunk = 9;
@@ -180,7 +174,7 @@ public class LZWFilter extends Filter
if (newFoundCode == -1)
{
// use previous
- chunk = calculateChunk(codeTable.size() - 1, 1);
+ chunk = calculateChunk(codeTable.size() - 1, true);
out.writeBits(foundCode, chunk);
// create new table entry
codeTable.add(inputPattern);
@@ -203,7 +197,7 @@ public class LZWFilter extends Filter
}
if (foundCode != -1)
{
- chunk = calculateChunk(codeTable.size() - 1, 1);
+ chunk = calculateChunk(codeTable.size() - 1, true);
out.writeBits(foundCode, chunk);
}
@@ -212,7 +206,7 @@ public class LZWFilter extends Filter
// possibly adjusted the chunk. Therefore, the encoder must behave as
// if the code table had just grown and thus it must be checked it is
// needed to adjust the chunk, based on an increased table size parameter
- chunk = calculateChunk(codeTable.size(), 1);
+ chunk = calculateChunk(codeTable.size(), true);
out.writeBits(EOD, chunk);
@@ -225,50 +219,47 @@ public class LZWFilter extends Filter
}
/**
- * Find the longest matching pattern in the code table.
+ * Find a matching pattern in the code table.
*
* @param codeTable The LZW code table.
* @param pattern The pattern to be searched for.
- * @return The index of the longest matching pattern or -1 if nothing is
- * found.
+ * @return The index of the matching pattern or -1 if nothing is found.
*/
- private int findPatternCode(List<byte[]> codeTable, byte[] pattern)
+ private static int findPatternCode(List<byte[]> codeTable, byte[] pattern)
{
- int foundCode = -1;
- int foundLen = 0;
- for (int i = codeTable.size() - 1; i >= 0; --i)
+ // for the first 256 entries, index matches value
+ if (pattern.length == 1)
{
- if (i <= EOD)
- {
- // we're in the single byte area
- if (foundCode != -1)
- {
- // we already found pattern with size > 1
- return foundCode;
- }
- else if (pattern.length > 1)
- {
- // we won't find anything here anyway
- return -1;
- }
- }
- byte[] tryPattern = codeTable.get(i);
- if ((foundCode != -1 || tryPattern.length > foundLen) && Arrays.equals(tryPattern, pattern))
+ return pattern[0];
+ }
+
+ // no need to test the first 256 + 2 entries against longer patterns
+ for (int i = 257; i < codeTable.size(); i++)
+ {
+ if (Arrays.equals(codeTable.get(i), pattern))
{
- foundCode = i;
- foundLen = tryPattern.length;
+ return i;
}
}
- return foundCode;
+
+ return -1;
}
/**
- * Init the code table with 1 byte entries and the EOD and CLEAR_TABLE
- * markers.
+ * Init the code table with 1 byte entries and the EOD and CLEAR_TABLE markers.
*/
- private List<byte[]> createCodeTable()
+ private static List<byte[]> createCodeTable()
{
List<byte[]> codeTable = new ArrayList<byte[]>(4096);
+ codeTable.addAll(INITIAL_CODE_TABLE);
+ return codeTable;
+ }
+
+ private static final List<byte[]> INITIAL_CODE_TABLE = createInitialCodeTable();
+
+ private static List<byte[]> createInitialCodeTable()
+ {
+ List<byte[]> codeTable = new ArrayList<byte[]>(258);
for (int i = 0; i < 256; ++i)
{
codeTable.add(new byte[] { (byte) (i & 0xFF) });
@@ -282,21 +273,22 @@ public class LZWFilter extends Filter
* Calculate the appropriate chunk size
*
* @param tabSize the size of the code table
- * @param earlyChange 0 or 1 for early chunk increase
+ * @param earlyChange true for early chunk increase
*
* @return a value between 9 and 12
*/
- private int calculateChunk(int tabSize, int earlyChange)
+ private static int calculateChunk(int tabSize, boolean earlyChange)
{
- if (tabSize >= 2048 - earlyChange)
+ int i = tabSize + (earlyChange ? 1 : 0);
+ if (i >= 2048)
{
return 12;
}
- if (tabSize >= 1024 - earlyChange)
+ if (i >= 1024)
{
return 11;
}
- if (tabSize >= 512 - earlyChange)
+ if (i >= 512)
{
return 10;
}