You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ni...@apache.org on 2010/12/21 06:18:35 UTC
svn commit: r1051377 - in /poi/trunk/src: java/org/apache/poi/util/
scratchpad/src/org/apache/poi/hdgf/
scratchpad/src/org/apache/poi/hdgf/streams/
scratchpad/testcases/org/apache/poi/hdgf/
Author: nick
Date: Tue Dec 21 05:18:34 2010
New Revision: 1051377
URL: http://svn.apache.org/viewvc?rev=1051377&view=rev
Log:
Refactor the common LZW decompression code out into utils
Added:
poi/trunk/src/java/org/apache/poi/util/LZWDecompresser.java
- copied, changed from r1051354, poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java
Modified:
poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java
poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/streams/CompressedStreamStore.java
poi/trunk/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java
Copied: poi/trunk/src/java/org/apache/poi/util/LZWDecompresser.java (from r1051354, poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java)
URL: http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/util/LZWDecompresser.java?p2=poi/trunk/src/java/org/apache/poi/util/LZWDecompresser.java&p1=poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java&r1=1051354&r2=1051377&rev=1051377&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java (original)
+++ poi/trunk/src/java/org/apache/poi/util/LZWDecompresser.java Tue Dec 21 05:18:34 2010
@@ -14,7 +14,7 @@
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
-package org.apache.poi.hdgf;
+package org.apache.poi.util;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
@@ -22,57 +22,44 @@ import java.io.InputStream;
import java.io.OutputStream;
/**
- * A decoder for the crazy LZW implementation used
- * in Visio.
- * According to VSDump, "it's a slightly perverted version of LZW
- * compression, with inverted meaning of flag byte and 0xFEE as an
- * 'initial shift'". It uses 12 bit codes
- * (http://www.gnome.ru/projects/vsdump_en.html)
+ * This class provides common functionality for the
+ * various LZW implementations in the different file
+ * formats.
+ * It's currently used by HDGF and HMEF.
*
* Two good resources on LZW are:
* http://en.wikipedia.org/wiki/LZW
* http://marknelson.us/1989/10/01/lzw-data-compression/
*/
-public class HDGFLZW {
-
+public abstract class LZWDecompresser {
/**
- * Given an integer, turn it into a java byte, handling
- * the wrapping.
- * This is a convenience method
+ * Does the mask bit mean it's compressed or uncompressed?
*/
- public static byte fromInt(int b) {
- if(b < 128) return (byte)b;
- return (byte)(b - 256);
+ private boolean maskMeansCompressed;
+
+ protected LZWDecompresser(boolean maskMeansCompressed) {
+ this.maskMeansCompressed = maskMeansCompressed;
}
+
/**
- * Given a java byte, turn it into an integer between 0
- * and 255 (i.e. handle the unwrapping).
- * This is a convenience method
+ * Populates the dictionary. May not need
+ * to do anything if all zeros is fine.
*/
- public static int fromByte(byte b) {
- if(b >= 0) {
- return b;
- }
- return b + 256;
- }
-
+ protected abstract void populateDictionary(byte[] dict);
+
/**
- * Compress the given input stream, returning the array of bytes
- * of the compressed input
+ * Adjusts the position offset if needed when looking
+ * something up in the dictionary.
*/
- public byte[] compress(InputStream src) throws IOException {
- ByteArrayOutputStream res = new ByteArrayOutputStream();
- compress(src,res);
- return res.toByteArray();
- }
-
+ protected abstract int adjustDictionaryOffset(int offset);
+
/**
* Decompresses the given input stream, returning the array of bytes
* of the decompressed input.
*/
- public byte[] decode(InputStream src) throws IOException {
+ public byte[] decompress(InputStream src) throws IOException {
ByteArrayOutputStream res = new ByteArrayOutputStream();
- decode(src,res);
+ decompress(src,res);
return res.toByteArray();
}
@@ -89,13 +76,14 @@ public class HDGFLZW {
* 5) Loop until we've done all 8 bits, then read in the next
* flag byte
*/
- public void decode(InputStream src, OutputStream res) throws IOException {
+ public void decompress(InputStream src, OutputStream res) throws IOException {
// We use 12 bit codes:
// * 0-255 are real bytes
// * 256-4095 are the substring codes
// Java handily initialises our buffer / dictionary
// to all zeros
byte[] buffer = new byte[4096];
+ populateDictionary(buffer);
// How far through the output we've got
// (This is normally used &4095, so it nicely wraps)
@@ -126,7 +114,8 @@ public class HDGFLZW {
for(mask = 1; mask < 256 ; mask <<= 1) {
// Is this a new code (un-compressed), or
// the use of existing codes (compressed)?
- if( (flag & mask) > 0 ) {
+ boolean isMaskSet = (flag & mask) > 0;
+ if( isMaskSet && !maskMeansCompressed ) {
// Retrieve the un-compressed code
if( (dataI = src.read()) != -1) {
// Save the byte into the dictionary
@@ -149,13 +138,8 @@ public class HDGFLZW {
len = (dataIPt2 & 15) + 3;
pntr = (dataIPt2 & 240)*16 + dataIPt1;
- // If the pointer happens to be passed the end
- // of our buffer, then wrap around
- if(pntr > 4078) {
- pntr = pntr - 4078;
- } else {
- pntr = pntr + 18;
- }
+ // Adjust the pointer as needed
+ pntr = adjustDictionaryOffset(pntr);
// Loop over the codes, outputting what they correspond to
for(int i=0; i<len; i++) {
@@ -172,10 +156,23 @@ public class HDGFLZW {
}
/**
- * Performs the Visio compatible streaming LZW compression.
+ * Given an integer, turn it into a java byte, handling
+ * the wrapping.
+ * This is a convenience method
*/
- public void compress(InputStream src, OutputStream res) throws IOException {
- HDGFLZWCompressor c = new HDGFLZWCompressor();
- c.compress(src, res);
+ public static byte fromInt(int b) {
+ if(b < 128) return (byte)b;
+ return (byte)(b - 256);
+ }
+ /**
+ * Given a java byte, turn it into an integer between 0
+ * and 255 (i.e. handle the unwrapping).
+ * This is a convenience method
+ */
+ public static int fromByte(byte b) {
+ if(b >= 0) {
+ return b;
+ }
+ return b + 256;
}
}
Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java?rev=1051377&r1=1051376&r2=1051377&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java Tue Dec 21 05:18:34 2010
@@ -21,6 +21,8 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import org.apache.poi.util.LZWDecompresser;
+
/**
* A decoder for the crazy LZW implementation used
* in Visio.
@@ -33,27 +35,10 @@ import java.io.OutputStream;
* http://en.wikipedia.org/wiki/LZW
* http://marknelson.us/1989/10/01/lzw-data-compression/
*/
-public class HDGFLZW {
-
- /**
- * Given an integer, turn it into a java byte, handling
- * the wrapping.
- * This is a convenience method
- */
- public static byte fromInt(int b) {
- if(b < 128) return (byte)b;
- return (byte)(b - 256);
- }
- /**
- * Given a java byte, turn it into an integer between 0
- * and 255 (i.e. handle the unwrapping).
- * This is a convenience method
- */
- public static int fromByte(byte b) {
- if(b >= 0) {
- return b;
- }
- return b + 256;
+public class HDGFLZW extends LZWDecompresser {
+ public HDGFLZW() {
+ // We're the wrong way round!
+ super(false);
}
/**
@@ -67,108 +52,23 @@ public class HDGFLZW {
}
/**
- * Decompresses the given input stream, returning the array of bytes
- * of the decompressed input.
+ * We have a slight shift by 18 bytes
*/
- public byte[] decode(InputStream src) throws IOException {
- ByteArrayOutputStream res = new ByteArrayOutputStream();
- decode(src,res);
- return res.toByteArray();
+ @Override
+ protected int adjustDictionaryOffset(int pntr) {
+ if(pntr > 4078) {
+ pntr = pntr - 4078;
+ } else {
+ pntr = pntr + 18;
+ }
+ return pntr;
}
-
+
/**
- * Perform a streaming decompression of the input.
- * Works by:
- * 1) Reading a flag byte, the 8 bits of which tell you if the
- * following 8 codes are compressed our un-compressed
- * 2) Consider the 8 bits in turn
- * 3) If the bit is set, the next code is un-compressed, so
- * add it to the dictionary and output it
- * 4) If the bit isn't set, then read in the length and start
- * position in the dictionary, and output the bytes there
- * 5) Loop until we've done all 8 bits, then read in the next
- * flag byte
+ * We want an empty dictionary, so do nothing
*/
- public void decode(InputStream src, OutputStream res) throws IOException {
- // We use 12 bit codes:
- // * 0-255 are real bytes
- // * 256-4095 are the substring codes
- // Java handily initialises our buffer / dictionary
- // to all zeros
- byte[] buffer = new byte[4096];
-
- // How far through the output we've got
- // (This is normally used &4095, so it nicely wraps)
- int pos = 0;
- // The flag byte is treated as its 8 individual
- // bits, which tell us if the following 8 codes
- // are compressed or un-compressed
- int flag;
- // The mask, between 1 and 255, which is used when
- // processing each bit of the flag byte in turn
- int mask;
-
- // These are bytes as looked up in the dictionary
- // It needs to be signed, as it'll get passed on to
- // the output stream
- byte[] dataB = new byte[19];
- // This is an unsigned byte read from the stream
- // It needs to be unsigned, so that bit stuff works
- int dataI;
- // The compressed code sequence is held over 2 bytes
- int dataIPt1, dataIPt2;
- // How long a code sequence is, and where in the
- // dictionary to start at
- int len, pntr;
-
- while( (flag = src.read()) != -1 ) {
- // Compare each bit in our flag byte in turn:
- for(mask = 1; mask < 256 ; mask <<= 1) {
- // Is this a new code (un-compressed), or
- // the use of existing codes (compressed)?
- if( (flag & mask) > 0 ) {
- // Retrieve the un-compressed code
- if( (dataI = src.read()) != -1) {
- // Save the byte into the dictionary
- buffer[(pos&4095)] = fromInt(dataI);
- pos++;
- // And output the byte
- res.write( new byte[] {fromInt(dataI)} );
- }
- } else {
- // We have a compressed sequence
- // Grab the next 16 bits of data
- dataIPt1 = src.read();
- dataIPt2 = src.read();
- if(dataIPt1 == -1 || dataIPt2 == -1) break;
-
- // Build up how long the code sequence is, and
- // what position of the code to start at
- // (The position is the first 12 bits, the
- // length is the last 4 bits)
- len = (dataIPt2 & 15) + 3;
- pntr = (dataIPt2 & 240)*16 + dataIPt1;
-
- // If the pointer happens to be passed the end
- // of our buffer, then wrap around
- if(pntr > 4078) {
- pntr = pntr - 4078;
- } else {
- pntr = pntr + 18;
- }
-
- // Loop over the codes, outputting what they correspond to
- for(int i=0; i<len; i++) {
- dataB[i] = buffer[(pntr + i) & 4095];
- buffer[ (pos + i) & 4095 ] = dataB[i];
- }
- res.write(dataB, 0, len);
-
- // Record how far along the stream we have moved
- pos = pos + len;
- }
- }
- }
+ @Override
+ protected void populateDictionary(byte[] dict) {
}
/**
Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/streams/CompressedStreamStore.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/streams/CompressedStreamStore.java?rev=1051377&r1=1051376&r2=1051377&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/streams/CompressedStreamStore.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/streams/CompressedStreamStore.java Tue Dec 21 05:18:34 2010
@@ -78,7 +78,7 @@ public final class CompressedStreamStore
// Decompress
HDGFLZW lzw = new HDGFLZW();
- byte[] decompressed = lzw.decode(bais);
+ byte[] decompressed = lzw.decompress(bais);
// Split into header and contents
byte[][] ret = new byte[2][];
Modified: poi/trunk/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java?rev=1051377&r1=1051376&r2=1051377&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java (original)
+++ poi/trunk/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java Tue Dec 21 05:18:34 2010
@@ -139,9 +139,9 @@ public final class TestHDGFLZW extends T
assertEquals(339, testTrailerComp.length);
assertEquals(632, testTrailerDecomp.length);
- // Decode it using our engine
+ // decompress it using our engine
HDGFLZW lzw = new HDGFLZW();
- byte[] dec = lzw.decode(new ByteArrayInputStream(testTrailerComp));
+ byte[] dec = lzw.decompress(new ByteArrayInputStream(testTrailerComp));
// Check it's of the right size
assertEquals(632, dec.length);
@@ -159,9 +159,9 @@ public final class TestHDGFLZW extends T
assertEquals(339, testTrailerComp.length);
assertEquals(632, testTrailerDecomp.length);
- // Decode it using our engine
+ // decompress it using our engine
HDGFLZW lzw = new HDGFLZW();
- byte[] dec = lzw.decode(new ByteArrayInputStream(testTrailerComp));
+ byte[] dec = lzw.decompress(new ByteArrayInputStream(testTrailerComp));
// Now check it's the right data
assertEquals(632, dec.length);
@@ -188,7 +188,7 @@ public final class TestHDGFLZW extends T
byte[] comp = lzw.compress(new ByteArrayInputStream(sourceDecomp));
// Now decompress it again
- byte[] decomp = lzw.decode(new ByteArrayInputStream(comp));
+ byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
// First up, check the round tripping
assertEquals(12, decomp.length);
@@ -223,7 +223,7 @@ public final class TestHDGFLZW extends T
assertEquals(27, comp.length);
// Now decompress it again
- byte[] decomp = lzw.decode(new ByteArrayInputStream(comp));
+ byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
// We can only check the round-tripping, as for now
// visio cheats on re-using a block
@@ -246,7 +246,7 @@ public final class TestHDGFLZW extends T
byte[] comp = lzw.compress(new ByteArrayInputStream(testTrailerDecomp));
// Now decompress it again
- byte[] decomp = lzw.decode(new ByteArrayInputStream(comp));
+ byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
// for(int i=0; i<comp.length; i++) {
// System.err.println(i + "\t" + comp[i] + "\t" + testTrailerComp[i]);
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org