You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by bo...@apache.org on 2016/05/22 13:02:17 UTC
[2/2] commons-compress git commit: COMPRESS-352 add support for IWA
files
COMPRESS-352 add support for IWA files
Apple has created a Snappy dialect used in iWorks archives.
The test case is copied from Tika.
Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/18daf66b
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/18daf66b
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/18daf66b
Branch: refs/heads/master
Commit: 18daf66b2ccf4c7df9618c9a7067ab56bfa96593
Parents: cfd5387
Author: Stefan Bodewig <bo...@apache.org>
Authored: Sun May 22 15:00:14 2016 +0200
Committer: Stefan Bodewig <bo...@apache.org>
Committed: Sun May 22 15:01:20 2016 +0200
----------------------------------------------------------------------
.../FramedSnappyCompressorInputStream.java | 32 ++++++++++--
.../compressors/snappy/FramedSnappyDialect.java | 52 +++++++++++++++++++
.../FramedSnappyCompressorInputStreamTest.java | 27 ++++++++++
src/test/resources/testNumbersNew.numbers | Bin 0 -> 179147 bytes
4 files changed, 107 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/commons-compress/blob/18daf66b/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStream.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStream.java b/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStream.java
index 2ef2dcc..96ac7f9 100644
--- a/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStream.java
+++ b/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStream.java
@@ -58,6 +58,8 @@ public class FramedSnappyCompressorInputStream extends CompressorInputStream {
/** The underlying stream to read compressed data from */
private final PushbackInputStream in;
+ /** The dialect to expect */
+ private final FramedSnappyDialect dialect;
private SnappyCompressorInputStream currentCompressedChunk;
@@ -71,14 +73,31 @@ public class FramedSnappyCompressorInputStream extends CompressorInputStream {
private final PureJavaCrc32C checksum = new PureJavaCrc32C();
/**
+ * Constructs a new input stream that decompresses
+ * snappy-framed-compressed data from the specified input stream
+ * using the {@link FramedSnappyDialect#STANDARD} dialect.
+ * @param in the InputStream from which to read the compressed data
+ * @throws IOException if reading fails
+ */
+ public FramedSnappyCompressorInputStream(final InputStream in) throws IOException {
+ this(in, FramedSnappyDialect.STANDARD);
+ }
+
+ /**
* Constructs a new input stream that decompresses snappy-framed-compressed data
* from the specified input stream.
* @param in the InputStream from which to read the compressed data
+ * @param dialect the dialect used by the compressed stream
* @throws IOException if reading fails
*/
- public FramedSnappyCompressorInputStream(final InputStream in) throws IOException {
+ public FramedSnappyCompressorInputStream(final InputStream in,
+ final FramedSnappyDialect dialect)
+ throws IOException {
this.in = new PushbackInputStream(in, 1);
- readStreamIdentifier();
+ this.dialect = dialect;
+ if (dialect.hasStreamIdentifier()) {
+ readStreamIdentifier();
+ }
}
/** {@inheritDoc} */
@@ -182,8 +201,13 @@ public class FramedSnappyCompressorInputStream extends CompressorInputStream {
uncompressedBytesRemaining = readSize() - 4 /* CRC */;
expectedChecksum = unmask(readCrc());
} else if (type == COMPRESSED_CHUNK_TYPE) {
- final long size = readSize() - 4 /* CRC */;
- expectedChecksum = unmask(readCrc());
+ boolean expectChecksum = dialect.usesChecksumWithCompressedChunks();
+ final long size = readSize() - (expectChecksum ? 4 : 0);
+ if (expectChecksum) {
+ expectedChecksum = unmask(readCrc());
+ } else {
+ expectedChecksum = -1;
+ }
currentCompressedChunk =
new SnappyCompressorInputStream(new BoundedInputStream(in, size));
// constructor reads uncompressed size
http://git-wip-us.apache.org/repos/asf/commons-compress/blob/18daf66b/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyDialect.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyDialect.java b/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyDialect.java
new file mode 100644
index 0000000..1f0d2b8
--- /dev/null
+++ b/src/main/java/org/apache/commons/compress/compressors/snappy/FramedSnappyDialect.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.commons.compress.compressors.snappy;
+
+/**
+ * Dialects of the framing format that {@link FramedSnappyCompressorInputStream} can deal with.
+ * @since 1.12
+ */
+public enum FramedSnappyDialect {
+ /**
+ * The standard as defined by the <a
+ * href="https://github.com/google/snappy/blob/master/framing_format.txt">Snappy
+ * framing format description</a>
+ */
+ STANDARD(true, true),
+ /**
+ * The format used by Apple's iWork Archives (.iwa files).
+ */
+ IWORK_ARCHIVE(false, false);
+
+ private final boolean streamIdentifier, checksumWithCompressedChunks;
+
+ private FramedSnappyDialect(boolean hasStreamIdentifier,
+ boolean usesChecksumWithCompressedChunks) {
+ this.streamIdentifier = hasStreamIdentifier;
+ this.checksumWithCompressedChunks = usesChecksumWithCompressedChunks;
+ }
+
+ boolean hasStreamIdentifier() {
+ return streamIdentifier;
+ }
+
+ boolean usesChecksumWithCompressedChunks() {
+ return checksumWithCompressedChunks;
+ }
+}
http://git-wip-us.apache.org/repos/asf/commons-compress/blob/18daf66b/src/test/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStreamTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStreamTest.java b/src/test/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStreamTest.java
index 3126c4d..b12ddeb 100644
--- a/src/test/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStreamTest.java
+++ b/src/test/java/org/apache/commons/compress/compressors/snappy/FramedSnappyCompressorInputStreamTest.java
@@ -28,6 +28,7 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import org.apache.commons.compress.AbstractTestCase;
+import org.apache.commons.compress.archivers.zip.ZipFile;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.commons.compress.utils.IOUtils;
import org.junit.Test;
@@ -170,6 +171,32 @@ public final class FramedSnappyCompressorInputStreamTest
testChecksumUnmasking(0xffffc757l);
}
+ @Test
+ public void readIWAFile() throws Exception {
+ final ZipFile zip = new ZipFile(getFile("testNumbersNew.numbers"));
+ try {
+ InputStream is = zip.getInputStream(zip.getEntry("Index/Document.iwa"));
+ try {
+ final FramedSnappyCompressorInputStream in =
+ new FramedSnappyCompressorInputStream(is, FramedSnappyDialect.IWORK_ARCHIVE);
+ FileOutputStream out = null;
+ try {
+ out = new FileOutputStream(new File(dir, "snappyIWATest.raw"));
+ IOUtils.copy(in, out);
+ } finally {
+ if (out != null) {
+ out.close();
+ }
+ in.close();
+ }
+ } finally {
+ is.close();
+ }
+ } finally {
+ zip.close();
+ }
+ }
+
private void testChecksumUnmasking(final long x) {
assertEquals(Long.toHexString(x),
Long.toHexString(FramedSnappyCompressorInputStream
http://git-wip-us.apache.org/repos/asf/commons-compress/blob/18daf66b/src/test/resources/testNumbersNew.numbers
----------------------------------------------------------------------
diff --git a/src/test/resources/testNumbersNew.numbers b/src/test/resources/testNumbersNew.numbers
new file mode 100644
index 0000000..3f9a013
Binary files /dev/null and b/src/test/resources/testNumbersNew.numbers differ