You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by bo...@apache.org on 2019/08/23 15:13:15 UTC

[commons-compress] 01/05: unit tests for encoding logic

This is an automated email from the ASF dual-hosted git repository.

bodewig pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-compress.git

commit 4ad5d80a6272e007f64a6ac66829ca189a8093b9
Author: Stefan Bodewig <bo...@apache.org>
AuthorDate: Fri Aug 23 16:12:05 2019 +0200

    unit tests for encoding logic
---
 .../compress/archivers/zip/NioZipEncoding.java     |  3 +
 .../compress/archivers/zip/NioZipEncodingTest.java | 97 ++++++++++++++++++++++
 2 files changed, 100 insertions(+)

diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java b/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
index 0a7581a..4ce9c20 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/NioZipEncoding.java
@@ -112,6 +112,9 @@ class NioZipEncoding implements ZipEncoding, CharsetAccessor {
             } else if (res.isOverflow()) {
                 int increment = estimateIncrementalEncodingSize(enc, cb.remaining());
                 out = ZipEncodingHelper.growBufferBy(out, increment);
+
+            } else if (res.isUnderflow() || res.isError()) {
+                break;
             }
         }
         // tell the encoder we are done
diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/NioZipEncodingTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/NioZipEncodingTest.java
new file mode 100644
index 0000000..a04730c
--- /dev/null
+++ b/src/test/java/org/apache/commons/compress/archivers/zip/NioZipEncodingTest.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.commons.compress.archivers.zip;
+
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class NioZipEncodingTest {
+
+    private static final String UMLAUTS = "\u00e4\u00f6\u00fc";
+
+    @Test
+    public void umlautToUTF16BE() {
+        NioZipEncoding e = new NioZipEncoding(StandardCharsets.UTF_16BE, false);
+        ByteBuffer bb = e.encode(UMLAUTS);
+        final int off = bb.arrayOffset();
+        byte[] result = Arrays.copyOfRange(bb.array(), off, off + bb.limit() - bb.position());
+        Assert.assertArrayEquals(UMLAUTS.getBytes(StandardCharsets.UTF_16BE), result);
+    }
+
+    @Test
+    public void umlautToUTF8() {
+        NioZipEncoding e = new NioZipEncoding(StandardCharsets.UTF_8, true);
+        ByteBuffer bb = e.encode("\u00e4\u00f6\u00fc");
+        final int off = bb.arrayOffset();
+        byte[] result = Arrays.copyOfRange(bb.array(), off, off + bb.limit() - bb.position());
+        Assert.assertArrayEquals(UMLAUTS.getBytes(StandardCharsets.UTF_8), result);
+    }
+
+    @Test
+    public void umlautToISO88591() {
+        NioZipEncoding e = new NioZipEncoding(StandardCharsets.ISO_8859_1, true);
+        ByteBuffer bb = e.encode("\u00e4\u00f6\u00fc");
+        final int off = bb.arrayOffset();
+        byte[] result = Arrays.copyOfRange(bb.array(), off, off + bb.limit() - bb.position());
+        Assert.assertArrayEquals(UMLAUTS.getBytes(StandardCharsets.ISO_8859_1), result);
+    }
+
+    @Test
+    public void unmappableUmlauts() {
+        NioZipEncoding e = new NioZipEncoding(StandardCharsets.US_ASCII, false);
+        ByteBuffer bb = e.encode("\u00e4\u00f6\u00fc");
+        final int off = bb.arrayOffset();
+        byte[] result = Arrays.copyOfRange(bb.array(), off, off + bb.limit() - bb.position());
+        Assert.assertEquals("%U00E4%U00F6%U00FC", new String(result, StandardCharsets.US_ASCII));
+    }
+
+    private static final String RAINBOW_EMOJI = "\ud83c\udf08";
+
+    @Test
+    public void unmappableRainbowEmoji() {
+        NioZipEncoding e = new NioZipEncoding(StandardCharsets.US_ASCII, false);
+        ByteBuffer bb = e.encode(RAINBOW_EMOJI);
+        final int off = bb.arrayOffset();
+        byte[] result = Arrays.copyOfRange(bb.array(), off, off + bb.limit() - bb.position());
+        Assert.assertEquals("%UD83C%UDF08", new String(result, StandardCharsets.US_ASCII));
+    }
+
+    @Test
+    public void rainbowEmojiToSurrogatePairUTF16() {
+        NioZipEncoding e = new NioZipEncoding(StandardCharsets.UTF_16BE, false);
+        ByteBuffer bb = e.encode(RAINBOW_EMOJI);
+        final int off = bb.arrayOffset();
+        byte[] result = Arrays.copyOfRange(bb.array(), off, off + bb.limit() - bb.position());
+        Assert.assertArrayEquals(RAINBOW_EMOJI.getBytes(StandardCharsets.UTF_16BE), result);
+    }
+
+    @Test
+    public void partialSurrogatePair() {
+        NioZipEncoding e = new NioZipEncoding(StandardCharsets.US_ASCII, false);
+        ByteBuffer bb = e.encode("\ud83c");
+        final int off = bb.arrayOffset();
+        byte[] result = Arrays.copyOfRange(bb.array(), off, off + bb.limit() - bb.position());
+        Assert.assertEquals(0, result.length);
+    }
+}