You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ni...@apache.org on 2010/12/21 06:18:35 UTC
svn commit: r1051377 - in /poi/trunk/src: java/org/apache/poi/util/ scratchpad/src/org/apache/poi/hdgf/ scratchpad/src/org/apache/poi/hdgf/streams/ scratchpad/testcases/org/apache/poi/hdgf/

Author: nick
Date: Tue Dec 21 05:18:34 2010
New Revision: 1051377

URL: http://svn.apache.org/viewvc?rev=1051377&view=rev
Log:
Refactor the common LZW decompression code out into utils

Added:
    poi/trunk/src/java/org/apache/poi/util/LZWDecompresser.java
      - copied, changed from r1051354, poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java
Modified:
    poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/streams/CompressedStreamStore.java
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java

Copied: poi/trunk/src/java/org/apache/poi/util/LZWDecompresser.java (from r1051354, poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java)
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/util/LZWDecompresser.java?p2=poi/trunk/src/java/org/apache/poi/util/LZWDecompresser.java&p1=poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java&r1=1051354&r2=1051377&rev=1051377&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java (original)
+++ poi/trunk/src/java/org/apache/poi/util/LZWDecompresser.java Tue Dec 21 05:18:34 2010
@@ -14,7 +14,7 @@
    See the License for the specific language governing permissions and
    limitations under the License.
 ==================================================================== */
-package org.apache.poi.hdgf;
+package org.apache.poi.util;
 
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
@@ -22,57 +22,44 @@ import java.io.InputStream;
 import java.io.OutputStream;
 
 /**
- * A decoder for the crazy LZW implementation used
- *  in Visio.
- * According to VSDump, "it's a slightly perverted version of LZW
- *  compression, with inverted meaning of flag byte and 0xFEE as an
- *  'initial shift'". It uses 12 bit codes
- * (http://www.gnome.ru/projects/vsdump_en.html)
+ * This class provides common functionality for the
+ *  various LZW implementations in the different file
+ *  formats.
+ * It's currently used by HDGF and HMEF.
  *
  * Two good resources on LZW are:
  *  http://en.wikipedia.org/wiki/LZW
  *  http://marknelson.us/1989/10/01/lzw-data-compression/
  */
-public class HDGFLZW {
-
+public abstract class LZWDecompresser {
    /**
-    * Given an integer, turn it into a java byte, handling
-    *  the wrapping.
-    * This is a convenience method
+    * Does the mask bit mean it's compressed or uncompressed?
     */
-   public static byte fromInt(int b) {
-      if(b < 128) return (byte)b;
-      return (byte)(b - 256);
+   private boolean maskMeansCompressed;
+   
+   protected LZWDecompresser(boolean maskMeansCompressed) {
+      this.maskMeansCompressed = maskMeansCompressed;
    }
+   
    /**
-    * Given a java byte, turn it into an integer between 0
-    *  and 255 (i.e. handle the unwrapping).
-    * This is a convenience method
+    * Populates the dictionary. May not need
+    *  to do anything if all zeros is fine.
     */
-   public static int fromByte(byte b) {
-      if(b >= 0) {
-         return b;
-      }
-      return b + 256;
-   }
-
+   protected abstract void populateDictionary(byte[] dict);
+   
    /**
-    * Compress the given input stream, returning the array of bytes
-    *  of the compressed input
+    * Adjusts the position offset if needed when looking
+    *  something up in the dictionary.
     */
-   public byte[] compress(InputStream src) throws IOException {
-      ByteArrayOutputStream res = new ByteArrayOutputStream();
-      compress(src,res);
-      return res.toByteArray();
-   }
-
+   protected abstract int adjustDictionaryOffset(int offset); 
+   
    /**
     * Decompresses the given input stream, returning the array of bytes
     *  of the decompressed input.
     */
-   public byte[] decode(InputStream src) throws IOException {
+   public byte[] decompress(InputStream src) throws IOException {
       ByteArrayOutputStream res = new ByteArrayOutputStream();
-      decode(src,res);
+      decompress(src,res);
       return res.toByteArray();
    }
    
@@ -89,13 +76,14 @@ public class HDGFLZW {
     * 5) Loop until we've done all 8 bits, then read in the next
     *     flag byte
     */
-   public void decode(InputStream src, OutputStream res) throws IOException {
+   public void decompress(InputStream src, OutputStream res) throws IOException {
       // We use 12 bit codes:
       // * 0-255 are real bytes
       // * 256-4095 are the substring codes
       // Java handily initialises our buffer / dictionary
       //  to all zeros
       byte[] buffer = new byte[4096];
+      populateDictionary(buffer);
 
       // How far through the output we've got
       // (This is normally used &4095, so it nicely wraps)
@@ -126,7 +114,8 @@ public class HDGFLZW {
          for(mask = 1; mask < 256 ; mask <<= 1) {
             // Is this a new code (un-compressed), or
             //  the use of existing codes (compressed)?
-            if( (flag & mask) > 0 ) {
+            boolean isMaskSet = (flag & mask) > 0;
+            if( isMaskSet && !maskMeansCompressed ) {
                // Retrieve the un-compressed code
                if( (dataI = src.read()) != -1) {
                   // Save the byte into the dictionary
@@ -149,13 +138,8 @@ public class HDGFLZW {
                len = (dataIPt2 & 15) + 3;
                pntr = (dataIPt2 & 240)*16 + dataIPt1;
 
-               // If the pointer happens to be passed the end
-               //  of our buffer, then wrap around
-               if(pntr > 4078) {
-                  pntr = pntr - 4078;
-               } else {
-                  pntr = pntr + 18;
-               }
+               // Adjust the pointer as needed
+               pntr = adjustDictionaryOffset(pntr);
 
                // Loop over the codes, outputting what they correspond to
                for(int i=0; i<len; i++) {
@@ -172,10 +156,23 @@ public class HDGFLZW {
    }
 
    /**
-    * Performs the Visio compatible streaming LZW compression.
+    * Given an integer, turn it into a java byte, handling
+    *  the wrapping.
+    * This is a convenience method
     */
-   public void compress(InputStream src, OutputStream res) throws IOException {
-      HDGFLZWCompressor c = new HDGFLZWCompressor();
-      c.compress(src, res);
+   public static byte fromInt(int b) {
+      if(b < 128) return (byte)b;
+      return (byte)(b - 256);
+   }
+   /**
+    * Given a java byte, turn it into an integer between 0
+    *  and 255 (i.e. handle the unwrapping).
+    * This is a convenience method
+    */
+   public static int fromByte(byte b) {
+      if(b >= 0) {
+         return b;
+      }
+      return b + 256;
    }
 }

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java?rev=1051377&r1=1051376&r2=1051377&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java Tue Dec 21 05:18:34 2010
@@ -21,6 +21,8 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 
+import org.apache.poi.util.LZWDecompresser;
+
 /**
  * A decoder for the crazy LZW implementation used
  *  in Visio.
@@ -33,27 +35,10 @@ import java.io.OutputStream;
  *  http://en.wikipedia.org/wiki/LZW
  *  http://marknelson.us/1989/10/01/lzw-data-compression/
  */
-public class HDGFLZW {
-
-   /**
-    * Given an integer, turn it into a java byte, handling
-    *  the wrapping.
-    * This is a convenience method
-    */
-   public static byte fromInt(int b) {
-      if(b < 128) return (byte)b;
-      return (byte)(b - 256);
-   }
-   /**
-    * Given a java byte, turn it into an integer between 0
-    *  and 255 (i.e. handle the unwrapping).
-    * This is a convenience method
-    */
-   public static int fromByte(byte b) {
-      if(b >= 0) {
-         return b;
-      }
-      return b + 256;
+public class HDGFLZW extends LZWDecompresser {
+   public HDGFLZW() {
+      // We're the wrong way round!
+      super(false);
    }
 
    /**
@@ -67,108 +52,23 @@ public class HDGFLZW {
    }
 
    /**
-    * Decompresses the given input stream, returning the array of bytes
-    *  of the decompressed input.
+    * We have a slight shift by 18 bytes
     */
-   public byte[] decode(InputStream src) throws IOException {
-      ByteArrayOutputStream res = new ByteArrayOutputStream();
-      decode(src,res);
-      return res.toByteArray();
+   @Override
+   protected int adjustDictionaryOffset(int pntr) {
+      if(pntr > 4078) {
+         pntr = pntr - 4078;
+      } else {
+         pntr = pntr + 18;
+      }
+      return pntr;
    }
-   
+
    /**
-    * Perform a streaming decompression of the input.
-    * Works by:
-    * 1) Reading a flag byte, the 8 bits of which tell you if the
-    *     following 8 codes are compressed our un-compressed
-    * 2) Consider the 8 bits in turn
-    * 3) If the bit is set, the next code is un-compressed, so
-    *     add it to the dictionary and output it
-    * 4) If the bit isn't set, then read in the length and start
-    *     position in the dictionary, and output the bytes there
-    * 5) Loop until we've done all 8 bits, then read in the next
-    *     flag byte
+    * We want an empty dictionary, so do nothing
     */
-   public void decode(InputStream src, OutputStream res) throws IOException {
-      // We use 12 bit codes:
-      // * 0-255 are real bytes
-      // * 256-4095 are the substring codes
-      // Java handily initialises our buffer / dictionary
-      //  to all zeros
-      byte[] buffer = new byte[4096];
-
-      // How far through the output we've got
-      // (This is normally used &4095, so it nicely wraps)
-      int pos = 0;
-      // The flag byte is treated as its 8 individual
-      //  bits, which tell us if the following 8 codes
-      //  are compressed or un-compressed
-      int flag;
-      // The mask, between 1 and 255, which is used when
-      //  processing each bit of the flag byte in turn
-      int mask;
-
-      // These are bytes as looked up in the dictionary
-      // It needs to be signed, as it'll get passed on to
-      //  the output stream
-      byte[] dataB = new byte[19];
-      // This is an unsigned byte read from the stream
-      // It needs to be unsigned, so that bit stuff works
-      int dataI;
-      // The compressed code sequence is held over 2 bytes
-      int dataIPt1, dataIPt2;
-      // How long a code sequence is, and where in the
-      //  dictionary to start at
-      int len, pntr;
-
-      while( (flag = src.read()) != -1 ) {
-         // Compare each bit in our flag byte in turn:
-         for(mask = 1; mask < 256 ; mask <<= 1) {
-            // Is this a new code (un-compressed), or
-            //  the use of existing codes (compressed)?
-            if( (flag & mask) > 0 ) {
-               // Retrieve the un-compressed code
-               if( (dataI = src.read()) != -1) {
-                  // Save the byte into the dictionary
-                  buffer[(pos&4095)] = fromInt(dataI);
-                  pos++;
-                  // And output the byte
-                  res.write( new byte[] {fromInt(dataI)} );
-               }
-            } else {
-               // We have a compressed sequence
-               // Grab the next 16 bits of data
-               dataIPt1 = src.read();
-               dataIPt2 = src.read();
-               if(dataIPt1 == -1 || dataIPt2 == -1) break;
-
-               // Build up how long the code sequence is, and
-               //  what position of the code to start at
-               // (The position is the first 12 bits, the
-               //  length is the last 4 bits)
-               len = (dataIPt2 & 15) + 3;
-               pntr = (dataIPt2 & 240)*16 + dataIPt1;
-
-               // If the pointer happens to be passed the end
-               //  of our buffer, then wrap around
-               if(pntr > 4078) {
-                  pntr = pntr - 4078;
-               } else {
-                  pntr = pntr + 18;
-               }
-
-               // Loop over the codes, outputting what they correspond to
-               for(int i=0; i<len; i++) {
-                  dataB[i] = buffer[(pntr + i) & 4095];
-                  buffer[ (pos + i) & 4095 ] = dataB[i];
-               }
-               res.write(dataB, 0, len);
-
-               // Record how far along the stream we have moved
-               pos = pos + len;
-            }
-         }
-      }
+   @Override
+   protected void populateDictionary(byte[] dict) {
    }
 
    /**

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/streams/CompressedStreamStore.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/streams/CompressedStreamStore.java?rev=1051377&r1=1051376&r2=1051377&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/streams/CompressedStreamStore.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/streams/CompressedStreamStore.java Tue Dec 21 05:18:34 2010
@@ -78,7 +78,7 @@ public final class CompressedStreamStore
 
 		// Decompress
 		HDGFLZW lzw = new HDGFLZW();
-		byte[] decompressed = lzw.decode(bais);
+		byte[] decompressed = lzw.decompress(bais);
 
 		// Split into header and contents
 		byte[][] ret = new byte[2][];

Modified: poi/trunk/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java?rev=1051377&r1=1051376&r2=1051377&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java (original)
+++ poi/trunk/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java Tue Dec 21 05:18:34 2010
@@ -139,9 +139,9 @@ public final class TestHDGFLZW extends T
 		assertEquals(339, testTrailerComp.length);
 		assertEquals(632, testTrailerDecomp.length);
 
-		// Decode it using our engine
+		// decompress it using our engine
 		HDGFLZW lzw = new HDGFLZW();
-		byte[] dec = lzw.decode(new ByteArrayInputStream(testTrailerComp));
+		byte[] dec = lzw.decompress(new ByteArrayInputStream(testTrailerComp));
 
 		// Check it's of the right size
 		assertEquals(632, dec.length);
@@ -159,9 +159,9 @@ public final class TestHDGFLZW extends T
 		assertEquals(339, testTrailerComp.length);
 		assertEquals(632, testTrailerDecomp.length);
 
-		// Decode it using our engine
+		// decompress it using our engine
 		HDGFLZW lzw = new HDGFLZW();
-		byte[] dec = lzw.decode(new ByteArrayInputStream(testTrailerComp));
+		byte[] dec = lzw.decompress(new ByteArrayInputStream(testTrailerComp));
 
 		// Now check it's the right data
 		assertEquals(632, dec.length);
@@ -188,7 +188,7 @@ public final class TestHDGFLZW extends T
 		byte[] comp = lzw.compress(new ByteArrayInputStream(sourceDecomp));
 		
 		// Now decompress it again
-		byte[] decomp = lzw.decode(new ByteArrayInputStream(comp));
+		byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
 
 		// First up, check the round tripping
 		assertEquals(12, decomp.length);
@@ -223,7 +223,7 @@ public final class TestHDGFLZW extends T
       assertEquals(27, comp.length);
       
       // Now decompress it again
-      byte[] decomp = lzw.decode(new ByteArrayInputStream(comp));
+      byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
 
       // We can only check the round-tripping, as for now
       //  visio cheats on re-using a block
@@ -246,7 +246,7 @@ public final class TestHDGFLZW extends T
       byte[] comp = lzw.compress(new ByteArrayInputStream(testTrailerDecomp));
       
       // Now decompress it again
-      byte[] decomp = lzw.decode(new ByteArrayInputStream(comp));
+      byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
 
 //      for(int i=0; i<comp.length; i++) {
 //         System.err.println(i + "\t" + comp[i] + "\t" + testTrailerComp[i]);



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org