You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by da...@apache.org on 2014/10/28 05:01:00 UTC

svn commit: r1634775 - in /commons/proper/compress/trunk/src/main/java/org/apache/commons/compress: archivers/zip/ compressors/lzw/ compressors/z/ compressors/z/_internal_/ utils/

Author: damjan
Date: Tue Oct 28 04:01:00 2014
New Revision: 1634775

URL: http://svn.apache.org/r1634775
Log:
Optimize the LZW implementation,
support reading bit patterns that are either big or little endian,
and make LZWInputStream public.


Added:
    commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/lzw/
    commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/lzw/LZWInputStream.java   (with props)
    commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/lzw/package.html   (with props)
    commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/BitInputStream.java   (with props)
Removed:
    commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/z/_internal_/
Modified:
    commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/UnshrinkingInputStream.java
    commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/z/ZCompressorInputStream.java

Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/UnshrinkingInputStream.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/UnshrinkingInputStream.java?rev=1634775&r1=1634774&r2=1634775&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/UnshrinkingInputStream.java (original)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/zip/UnshrinkingInputStream.java Tue Oct 28 04:01:00 2014
@@ -20,21 +20,22 @@ package org.apache.commons.compress.arch
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.nio.ByteOrder;
 
-import org.apache.commons.compress.compressors.z._internal_.InternalLZWInputStream;
+import org.apache.commons.compress.compressors.lzw.LZWInputStream;
 
 /**
  * Input stream that decompresses ZIP method 1 (unshrinking). A variation of the LZW algorithm, with some twists.
  * @NotThreadSafe
  * @since 1.7
  */
-class UnshrinkingInputStream extends InternalLZWInputStream {
+class UnshrinkingInputStream extends LZWInputStream {
     private static final int MAX_CODE_SIZE = 13;
     private static final int MAX_TABLE_SIZE = 1 << MAX_CODE_SIZE;
     private final boolean[] isUsed;
     
     public UnshrinkingInputStream(InputStream inputStream) throws IOException {
-        super(inputStream);
+        super(inputStream, ByteOrder.LITTLE_ENDIAN);
         setClearCode(codeSize);
         initializeTables(MAX_CODE_SIZE);
         isUsed = new boolean[prefixes.length];

Added: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/lzw/LZWInputStream.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/lzw/LZWInputStream.java?rev=1634775&view=auto
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/lzw/LZWInputStream.java (added)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/lzw/LZWInputStream.java Tue Oct 28 04:01:00 2014
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.commons.compress.compressors.lzw;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteOrder;
+
+import org.apache.commons.compress.compressors.CompressorInputStream;
+import org.apache.commons.compress.utils.BitInputStream;
+
+/**
+ * <p>Generic LZW implementation. It is used internally for
+ * the Z decompressor and the Unshrinking Zip file compression method,
+ * but may be useful for third-party projects in implementing their own LZW variations.</p>
+ *
+ * @NotThreadSafe
+ * @since 1.10
+ */
+public abstract class LZWInputStream extends CompressorInputStream {
+    private final byte[] oneByte = new byte[1];
+
+    protected final BitInputStream in;
+    protected int clearCode = -1;
+    protected int codeSize = 9;
+    protected byte previousCodeFirstChar;
+    protected int previousCode = -1;
+    protected int tableSize = 0;
+    protected int[] prefixes;
+    protected byte[] characters;
+    private byte[] outputStack;
+    private int outputStackLocation;
+
+    protected LZWInputStream(final InputStream inputStream, final ByteOrder byteOrder) {
+        this.in = new BitInputStream(inputStream, byteOrder);
+    }
+
+    @Override
+    public void close() throws IOException {
+        in.close();
+    }
+    
+    @Override
+    public int read() throws IOException {
+        int ret = read(oneByte);
+        if (ret < 0) {
+            return ret;
+        }
+        return 0xff & oneByte[0];
+    }
+    
+    @Override
+    public int read(byte[] b, int off, int len) throws IOException {
+        int bytesRead = readFromStack(b, off, len);
+        while (len - bytesRead > 0) {
+            int result = decompressNextSymbol();
+            if (result < 0) {
+                if (bytesRead > 0) {
+                    count(bytesRead);
+                    return bytesRead;
+                }
+                return result;
+            }
+            bytesRead += readFromStack(b, off + bytesRead, len - bytesRead);
+        }
+        count(bytesRead);
+        return bytesRead;
+    }
+
+    /**
+     * Read the next code and expand it.
+     */
+    protected abstract int decompressNextSymbol() throws IOException;
+
+    /**
+     * Add a new entry to the dictionary.
+     */
+    protected abstract int addEntry(int previousCode, byte character)
+        throws IOException;
+
+    /**
+     * Sets the clear code based on the code size.
+     */
+    protected void setClearCode(int codeSize) {
+        clearCode = (1 << (codeSize - 1));
+    }
+
+    /**
+     * Initializes the arrays based on the maximum code size.
+     */
+    protected void initializeTables(int maxCodeSize) {
+        final int maxTableSize = 1 << maxCodeSize;
+        prefixes = new int[maxTableSize];
+        characters = new byte[maxTableSize];
+        outputStack = new byte[maxTableSize];
+        outputStackLocation = maxTableSize;
+        final int max = 1 << 8;
+        for (int i = 0; i < max; i++) {
+            prefixes[i] = -1;
+            characters[i] = (byte) i;
+        }
+    }
+
+    /**
+     * Reads the next code from the stream.
+     */
+    protected int readNextCode() throws IOException {
+        return in.readBits(codeSize);
+    }
+    
+    /**
+     * Adds a new entry if the maximum table size hasn't been exceeded
+     * and returns the new index.
+     */
+    protected int addEntry(int previousCode, byte character, int maxTableSize) {
+        if (tableSize < maxTableSize) {
+            prefixes[tableSize] = previousCode;
+            characters[tableSize] = character;
+            return tableSize++;
+        }
+        return -1;
+    }
+
+    /**
+     * Add entry for repeat of previousCode we haven't added, yet.
+     */
+    protected int addRepeatOfPreviousCode() throws IOException {
+        if (previousCode == -1) {
+            // can't have a repeat for the very first code
+            throw new IOException("The first code can't be a reference to its preceding code");
+        }
+        return addEntry(previousCode, previousCodeFirstChar);
+    }
+
+    /**
+     * Expands the entry with index code to the output stack and may
+     * create a new entry
+     */
+    protected int expandCodeToOutputStack(int code, boolean addedUnfinishedEntry)
+        throws IOException {
+        for (int entry = code; entry >= 0; entry = prefixes[entry]) {
+            outputStack[--outputStackLocation] = characters[entry];
+        }
+        if (previousCode != -1 && !addedUnfinishedEntry) {
+            addEntry(previousCode, outputStack[outputStackLocation]);
+        }
+        previousCode = code;
+        previousCodeFirstChar = outputStack[outputStackLocation];
+        return outputStackLocation;
+    }
+
+    private int readFromStack(byte[] b, int off, int len) {
+        int remainingInStack = outputStack.length - outputStackLocation;
+        if (remainingInStack > 0) {
+            int maxLength = Math.min(remainingInStack, len);
+            System.arraycopy(outputStack, outputStackLocation, b, off, maxLength);
+            outputStackLocation += maxLength;
+            return maxLength;
+        }
+        return 0;
+    }
+}

Propchange: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/lzw/LZWInputStream.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/lzw/package.html
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/lzw/package.html?rev=1634775&view=auto
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/lzw/package.html (added)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/lzw/package.html Tue Oct 28 04:01:00 2014
@@ -0,0 +1,23 @@
+<html>
+<!--
+
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+-->
+  <body>
+    <p>Generic LZW implementation.</p>
+  </body>
+</html>

Propchange: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/lzw/package.html
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/z/ZCompressorInputStream.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/z/ZCompressorInputStream.java?rev=1634775&r1=1634774&r2=1634775&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/z/ZCompressorInputStream.java (original)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/compressors/z/ZCompressorInputStream.java Tue Oct 28 04:01:00 2014
@@ -20,14 +20,16 @@ package org.apache.commons.compress.comp
 
 import java.io.IOException;
 import java.io.InputStream;
-import org.apache.commons.compress.compressors.z._internal_.InternalLZWInputStream;
+import java.nio.ByteOrder;
+
+import org.apache.commons.compress.compressors.lzw.LZWInputStream;
 
 /**
  * Input stream that decompresses .Z files.
  * @NotThreadSafe
  * @since 1.7
  */
-public class ZCompressorInputStream extends InternalLZWInputStream {
+public class ZCompressorInputStream extends LZWInputStream {
     private static final int MAGIC_1 = 0x1f;
     private static final int MAGIC_2 = 0x9d;
     private static final int BLOCK_MODE_MASK = 0x80;
@@ -37,10 +39,10 @@ public class ZCompressorInputStream exte
     private long totalCodesRead = 0;
     
     public ZCompressorInputStream(InputStream inputStream) throws IOException {
-        super(inputStream);
-        int firstByte = in.read();
-        int secondByte = in.read();
-        int thirdByte = in.read();
+        super(inputStream, ByteOrder.LITTLE_ENDIAN);
+        int firstByte = in.readBits(8);
+        int secondByte = in.readBits(8);
+        int thirdByte = in.readBits(8);
         if (firstByte != MAGIC_1 || secondByte != MAGIC_2 || thirdByte < 0) {
             throw new IOException("Input is not in .Z format");
         }
@@ -87,8 +89,7 @@ public class ZCompressorInputStream exte
         for (long i = 0; i < codeReadsToThrowAway; i++) {
             readNextCode();
         }
-        bitsCached = 0;
-        bitsCachedSize = 0;
+        in.clearBitCache();
     }
     
     /**

Added: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/BitInputStream.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/BitInputStream.java?rev=1634775&view=auto
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/BitInputStream.java (added)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/BitInputStream.java Tue Oct 28 04:01:00 2014
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.commons.compress.utils;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteOrder;
+
+/**
+ * Reads bits from an InputStream.
+ * @since 1.10
+ * @NotThreadSafe
+ */
+public class BitInputStream implements Closeable {
+    private final InputStream in;
+    private final ByteOrder byteOrder;
+    private int bitsCached = 0;
+    private int bitsCachedSize = 0;
+
+    /**
+     * Constructor taking an InputStream and its bit arrangement. 
+     * @param in the InputStream
+     * @param byteOrder the bit arrangement across byte boundaries,
+     *      either BIG_ENDIAN (aaaaabbb bb000000) or LITTLE_ENDIAN (bbbaaaaa 000000bb)
+     */
+    public BitInputStream(final InputStream in, final ByteOrder byteOrder) {
+        this.in = in;
+        this.byteOrder = byteOrder;
+    }
+    
+    public void close() throws IOException {
+        in.close();
+    }
+    
+    public void clearBitCache() {
+        bitsCached = 0;
+        bitsCachedSize = 0;
+    }
+    
+    public int readBits(final int count) throws IOException {
+        while (bitsCachedSize < count) {
+            final int nextByte = in.read();
+            if (nextByte < 0) {
+                return nextByte;
+            }
+            if (byteOrder == ByteOrder.LITTLE_ENDIAN) {
+                bitsCached |= (nextByte << bitsCachedSize);
+            } else {
+                bitsCached <<= 8;
+                bitsCached |= nextByte;
+            }
+            bitsCachedSize += 8;
+        }
+        
+        final int mask = (1 << count) - 1;
+        final int bitsOut;
+        if (byteOrder == ByteOrder.LITTLE_ENDIAN) {
+            bitsOut = (bitsCached & mask);
+            bitsCached >>>= count;
+        } else {
+            bitsOut = (bitsCached >> (bitsCachedSize - count)) & mask;
+        }
+        bitsCachedSize -= count;
+        return bitsOut;
+    }
+}

Propchange: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/utils/BitInputStream.java
------------------------------------------------------------------------------
    svn:eol-style = native