You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by bo...@apache.org on 2017/10/22 05:07:03 UTC

commons-compress git commit: COMPRESS-425 add auto-detection of Zstandard inputs

Repository: commons-compress
Updated Branches:
  refs/heads/master 099cdc213 -> a0aec901f


COMPRESS-425 add auto-detection of Zstandard inputs


Project: http://git-wip-us.apache.org/repos/asf/commons-compress/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-compress/commit/a0aec901
Tree: http://git-wip-us.apache.org/repos/asf/commons-compress/tree/a0aec901
Diff: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/a0aec901

Branch: refs/heads/master
Commit: a0aec901f411b4bc26734745297b7651d7e6b9a2
Parents: 099cdc2
Author: Stefan Bodewig <bo...@apache.org>
Authored: Sun Oct 22 07:06:19 2017 +0200
Committer: Stefan Bodewig <bo...@apache.org>
Committed: Sun Oct 22 07:06:19 2017 +0200

----------------------------------------------------------------------
 src/changes/changes.xml                         |  3 ++
 .../compressors/CompressorStreamFactory.java    |  4 ++
 .../compressors/zstandard/ZstdUtils.java        | 52 ++++++++++++++++++++
 src/site/xdoc/examples.xml                      |  2 +-
 src/site/xdoc/limitations.xml                   |  2 -
 .../compressors/DetectCompressorTestCase.java   |  6 +++
 .../compressors/zstandard/ZstdUtilsTest.java    | 52 ++++++++++++++++++++
 7 files changed, 118 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-compress/blob/a0aec901/src/changes/changes.xml
----------------------------------------------------------------------
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 8854b6c..2a94f16 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -49,6 +49,9 @@ The <action> type attribute can be add,update,fix,remove.
         Add read-only support for Zstandard compression based on the
         Zstd-jni project.
       </action>
+      <action issue="COMPRESS-425" type="add" date="2017-10-22">
+        Added auto-detection for Zstandard compressed streams.
+      </action>
     </release>
     <release version="1.15" date="2017-10-17"
              description="Release 1.15

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/a0aec901/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java b/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java
index ea32223..ab9d5fd 100644
--- a/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java
+++ b/src/main/java/org/apache/commons/compress/compressors/CompressorStreamFactory.java
@@ -493,6 +493,10 @@ public class CompressorStreamFactory implements CompressorStreamProvider {
             return LZ4_FRAMED;
         }
 
+        if (ZstdUtils.matches(signature, signatureLength)) {
+            return ZSTANDARD;
+        }
+
         throw new CompressorException("No Compressor found for the stream signature.");
     }
     /**

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/a0aec901/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdUtils.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdUtils.java b/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdUtils.java
index a12492e..8588519 100644
--- a/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdUtils.java
+++ b/src/main/java/org/apache/commons/compress/compressors/zstandard/ZstdUtils.java
@@ -29,6 +29,20 @@ public class ZstdUtils {
         DONT_CACHE, CACHED_AVAILABLE, CACHED_UNAVAILABLE
     }
 
+    /**
+     * Zstandard Frame Magic Bytes.
+     */
+    private static final byte[] ZSTANDARD_FRAME_MAGIC = {
+        (byte) 0x28, (byte) 0xB5, (byte) 0x2F, (byte) 0xFD
+    };
+
+    /**
+     * Skippable Frame Magic Bytes - the three common bytes.
+     */
+    private static final byte[] SKIPPABLE_FRAME_MAGIC = {
+                     (byte) 0x2A, (byte) 0x4D, (byte) 0x18
+    };
+
     private static volatile CachedAvailability cachedZstdAvailability;
 
     static {
@@ -81,6 +95,44 @@ public class ZstdUtils {
         }
     }
 
+    /**
+     * Checks if the signature matches what is expected for a Zstandard file.
+     *
+     * @param   signature     the bytes to check
+     * @param   length        the number of bytes to check
+     * @return true if signature matches the Ztstandard or skippable
+     * frame magic bytes, false otherwise
+     */
+    public static boolean matches(final byte[] signature, final int length) {
+        if (length < ZSTANDARD_FRAME_MAGIC.length) {
+            return false;
+        }
+
+        boolean isZstandard = true;
+        for (int i = 0; i < ZSTANDARD_FRAME_MAGIC.length; ++i) {
+            if (signature[i] != ZSTANDARD_FRAME_MAGIC[i]) {
+                isZstandard = false;
+                break;
+            }
+        }
+        if (isZstandard) {
+            return true;
+        }
+
+        if (0x50 == (signature[0] & 0xF0)) {
+            // skippable frame
+            for (int i = 0; i < SKIPPABLE_FRAME_MAGIC.length; ++i) {
+                if (signature[i + 1] != SKIPPABLE_FRAME_MAGIC[i]) {
+                    return false;
+                }
+            }
+
+            return true;
+        }
+
+        return false;
+    }
+
     // only exists to support unit tests
     static CachedAvailability getCachedZstdAvailability() {
         return cachedZstdAvailability;

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/a0aec901/src/site/xdoc/examples.xml
----------------------------------------------------------------------
diff --git a/src/site/xdoc/examples.xml b/src/site/xdoc/examples.xml
index 6bac23a..cced8a8 100644
--- a/src/site/xdoc/examples.xml
+++ b/src/site/xdoc/examples.xml
@@ -82,7 +82,7 @@ CompressorInputStream input = new CompressorStreamFactory()
     .createCompressorInputStream(originalInput);
 ]]></source>
 
-        <p>Note that there is no way to detect the lzma, Zstandard or Brotli formats so only
+        <p>Note that there is no way to detect the lzma or Brotli formats so only
         the two-arg version of
         <code>createCompressorInputStream</code> can be used.  Prior
         to Compress 1.9 the .Z format hasn't been auto-detected

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/a0aec901/src/site/xdoc/limitations.xml
----------------------------------------------------------------------
diff --git a/src/site/xdoc/limitations.xml b/src/site/xdoc/limitations.xml
index 18b5b66..d651204 100644
--- a/src/site/xdoc/limitations.xml
+++ b/src/site/xdoc/limitations.xml
@@ -200,8 +200,6 @@
          href="https://github.com/luben/zstd-jni">Zstandard JNI</a>
          library.</li>
          <li>read-only support</li>
-         <li><code>CompressorStreamFactory</code> is not able to auto-detect
-         streams using Zstandard compression.</li>
        </ul>
      </section>
    </body>

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/a0aec901/src/test/java/org/apache/commons/compress/compressors/DetectCompressorTestCase.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/compress/compressors/DetectCompressorTestCase.java b/src/test/java/org/apache/commons/compress/compressors/DetectCompressorTestCase.java
index 6fde36d..7f3d316 100644
--- a/src/test/java/org/apache/commons/compress/compressors/DetectCompressorTestCase.java
+++ b/src/test/java/org/apache/commons/compress/compressors/DetectCompressorTestCase.java
@@ -38,6 +38,7 @@ import org.apache.commons.compress.compressors.deflate.DeflateCompressorInputStr
 import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
 import org.apache.commons.compress.compressors.pack200.Pack200CompressorInputStream;
 import org.apache.commons.compress.compressors.xz.XZCompressorInputStream;
+import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream;
 import org.junit.Test;
 
 @SuppressWarnings("deprecation") // deliberately tests setDecompressConcatenated
@@ -113,6 +114,10 @@ public final class DetectCompressorTestCase {
         assertNotNull(zlib);
         assertTrue(zlib instanceof DeflateCompressorInputStream);
 
+        final CompressorInputStream zstd = getStreamFor("bla.tar.zst");
+        assertNotNull(zstd);
+        assertTrue(zstd instanceof ZstdCompressorInputStream);
+
         try {
             factory.createCompressorInputStream(new ByteArrayInputStream(new byte[0]));
             fail("No exception thrown for an empty input stream");
@@ -133,6 +138,7 @@ public final class DetectCompressorTestCase {
         assertEquals(CompressorStreamFactory.LZMA, detect("bla.tar.lzma"));
         assertEquals(CompressorStreamFactory.SNAPPY_FRAMED, detect("bla.tar.sz"));
         assertEquals(CompressorStreamFactory.Z, detect("bla.tar.Z"));
+        assertEquals(CompressorStreamFactory.ZSTANDARD, detect("bla.tar.zst"));
 
         //make sure we don't oom on detect
         assertEquals(CompressorStreamFactory.Z, detect("COMPRESS-386"));

http://git-wip-us.apache.org/repos/asf/commons-compress/blob/a0aec901/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdUtilsTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdUtilsTest.java b/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdUtilsTest.java
new file mode 100644
index 0000000..5bba0ad
--- /dev/null
+++ b/src/test/java/org/apache/commons/compress/compressors/zstandard/ZstdUtilsTest.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.compress.compressors.zstandard;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.junit.Test;
+
+public class ZstdUtilsTest {
+
+    @Test
+    public void testMatchesZstandardFrame() {
+        final byte[] data = {
+            (byte) 0x28, (byte) 0xB5, (byte) 0x2F, (byte) 0xFD,
+        };
+        assertFalse(ZstdUtils.matches(data, 3));
+        assertTrue(ZstdUtils.matches(data, 4));
+        assertTrue(ZstdUtils.matches(data, 5));
+        data[3] = '0';
+        assertFalse(ZstdUtils.matches(data, 4));
+    }
+
+    @Test
+    public void testMatchesSkippableFrame() {
+        final byte[] data = {
+            0, (byte) 0x2A, (byte) 0x4D, (byte) 0x18,
+        };
+        assertFalse(ZstdUtils.matches(data, 4));
+        for (byte b = (byte) 0x50; b < 0x60; b++) {
+            data[0] = b;
+            assertTrue(ZstdUtils.matches(data, 4));
+        }
+        assertFalse(ZstdUtils.matches(data, 3));
+        assertTrue(ZstdUtils.matches(data, 5));
+    }
+}