You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by gg...@apache.org on 2024/01/13 21:38:57 UTC

(commons-compress) branch master updated: Add and use ZipEncodingHelper.getZipEncoding(Charset)

This is an automated email from the ASF dual-hosted git repository.

ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-compress.git


The following commit(s) were added to refs/heads/master by this push:
     new 37047a928 Add and use ZipEncodingHelper.getZipEncoding(Charset)
37047a928 is described below

commit 37047a92822ad75f9a5e33d35a7c8c4417b0903b
Author: Gary Gregory <ga...@gmail.com>
AuthorDate: Sat Jan 13 16:38:52 2024 -0500

    Add and use ZipEncodingHelper.getZipEncoding(Charset)
    
    ZipFile now uses a Charset instance variable instead of a String
---
 .../commons/compress/archivers/tar/TarUtils.java   |  2 +-
 .../compress/archivers/zip/ZipEncodingHelper.java  | 42 ++++++++---
 .../commons/compress/archivers/zip/ZipFile.java    | 81 ++++++++++++----------
 .../compress/archivers/zip/ZipEncodingTest.java    |  3 +-
 4 files changed, 81 insertions(+), 47 deletions(-)

diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java
index 285ed65bb..8ce32b42c 100644
--- a/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java
+++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java
@@ -48,7 +48,7 @@ public class TarUtils {
 
     private static final int BYTE_MASK = 255;
 
-    static final ZipEncoding DEFAULT_ENCODING = ZipEncodingHelper.getZipEncoding(null);
+    static final ZipEncoding DEFAULT_ENCODING = ZipEncodingHelper.getZipEncoding(Charset.defaultCharset());
 
     /**
      * Encapsulates the algorithms used up to Commons Compress 1.3 as ZipEncoding.
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
index 75c56ba7e..c73476ed8 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
@@ -37,8 +37,25 @@ public abstract class ZipEncodingHelper {
     static final ZipEncoding ZIP_ENCODING_UTF_8 = getZipEncoding(CharsetNames.UTF_8);
 
     /**
-     * Instantiates a ZIP encoding. An NIO based character set encoder/decoder will be returned. As a special case, if the character set is UTF-8, theNIOencoder
-     * will be configured replace malformed and unmappable characters with '?'. This matches existing behavior from the older fallback encoder.
+     * Instantiates a ZIP encoding. An NIO based character set encoder/decoder will be returned. As a special case, if the character set is UTF-8, the NIO
+     * encoder will be configured replace malformed and unmappable characters with '?'. This matches existing behavior from the older fallback encoder.
+     * <p>
+     * If the requested character set cannot be found, the platform default will be used instead.
+     * </p>
+     *
+     * @param charset The charset of the ZIP encoding. Specify {@code null} for the platform's default encoding.
+     * @return A ZIP encoding for the given encoding name.
+     * @since 1.26.0
+     */
+    public static ZipEncoding getZipEncoding(final Charset charset) {
+        final Charset actual = Charsets.toCharset(charset);
+        final boolean useReplacement = isUTF8(actual);
+        return new NioZipEncoding(actual, useReplacement);
+    }
+
+    /**
+     * Instantiates a ZIP encoding. An NIO based character set encoder/decoder will be returned. As a special case, if the character set is UTF-8, the NIO
+     * encoder will be configured replace malformed and unmappable characters with '?'. This matches existing behavior from the older fallback encoder.
      * <p>
      * If the requested character set cannot be found, the platform default will be used instead.
      * </p>
@@ -59,23 +76,30 @@ public abstract class ZipEncodingHelper {
     static ByteBuffer growBufferBy(final ByteBuffer buffer, final int increment) {
         buffer.limit(buffer.position());
         buffer.rewind();
-
         final ByteBuffer on = ByteBuffer.allocate(buffer.capacity() + increment);
-
         on.put(buffer);
         return on;
     }
 
+    /**
+     * Tests whether a given encoding is UTF-8. If the given name is null, then check the platform's default encoding.
+     *
+     * @param charset If the given charset is null, then check the platform's default encoding.
+     */
+    static boolean isUTF8(final Charset charset) {
+        return isUTF8Alias(Charsets.toCharset(charset).name());
+    }
+
+    private static boolean isUTF8Alias(final String actual) {
+        return UTF_8.name().equalsIgnoreCase(actual) || UTF_8.aliases().stream().anyMatch(alias -> alias.equalsIgnoreCase(actual));
+    }
+
     /**
      * Tests whether a given encoding is UTF-8. If the given name is null, then check the platform's default encoding.
      *
      * @param charsetName If the given name is null, then check the platform's default encoding.
      */
     static boolean isUTF8(final String charsetName) {
-        final String actual = charsetName != null ? charsetName : Charset.defaultCharset().name();
-        if (UTF_8.name().equalsIgnoreCase(actual)) {
-            return true;
-        }
-        return UTF_8.aliases().stream().anyMatch(alias -> alias.equalsIgnoreCase(actual));
+        return isUTF8Alias(charsetName != null ? charsetName : Charset.defaultCharset().name());
     }
 }
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
index 2764b5753..d2791b108 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
@@ -27,6 +27,7 @@ import java.io.SequenceInputStream;
 import java.nio.ByteBuffer;
 import java.nio.channels.FileChannel;
 import java.nio.channels.SeekableByteChannel;
+import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.OpenOption;
@@ -53,6 +54,7 @@ import org.apache.commons.compress.utils.CharsetNames;
 import org.apache.commons.compress.utils.IOUtils;
 import org.apache.commons.compress.utils.InputStreamStatistics;
 import org.apache.commons.compress.utils.SeekableInMemoryByteChannel;
+import org.apache.commons.io.Charsets;
 import org.apache.commons.io.build.AbstractOrigin.ByteArrayOrigin;
 import org.apache.commons.io.build.AbstractStreamBuilder;
 import org.apache.commons.io.input.CountingInputStream;
@@ -121,13 +123,15 @@ public class ZipFile implements Closeable {
      */
     public static class Builder extends AbstractStreamBuilder<ZipFile, Builder> {
 
+        static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8;
+
         private SeekableByteChannel seekableByteChannel;
         private boolean useUnicodeExtraFields = true;
         private boolean ignoreLocalFileHeader;
 
         public Builder() {
-            setCharset(StandardCharsets.UTF_8);
-            setCharsetDefault(StandardCharsets.UTF_8);
+            setCharset(DEFAULT_CHARSET);
+            setCharsetDefault(DEFAULT_CHARSET);
         }
 
         @SuppressWarnings("resource") // caller closes
@@ -151,7 +155,7 @@ public class ZipFile implements Closeable {
                 actualDescription = path.toString();
             }
             final boolean closeOnError = seekableByteChannel != null;
-            return new ZipFile(actualChannel, actualDescription, getCharset().name(), useUnicodeExtraFields, closeOnError, ignoreLocalFileHeader);
+            return new ZipFile(actualChannel, actualDescription, getCharset(), useUnicodeExtraFields, closeOnError, ignoreLocalFileHeader);
         }
 
         /**
@@ -444,6 +448,16 @@ public class ZipFile implements Closeable {
     private static final Comparator<ZipArchiveEntry> offsetComparator = Comparator.comparingLong(ZipArchiveEntry::getDiskNumberStart)
             .thenComparingLong(ZipArchiveEntry::getLocalHeaderOffset);
 
+    /**
+     * Creates a new Builder.
+     *
+     * @return a new Builder.
+     * @since 1.26.0
+     */
+    public static Builder builder() {
+        return new Builder();
+    }
+
     /**
      * Closes a ZIP file quietly; throwing no IOException, does nothing on null input.
      *
@@ -481,7 +495,7 @@ public class ZipFile implements Closeable {
      * Defaults to UTF-8.
      * </p>
      */
-    private final String encoding;
+    private final Charset encoding;
 
     /**
      * The ZIP encoding to use for file names and the file comment.
@@ -531,16 +545,6 @@ public class ZipFile implements Closeable {
 
     private long firstLocalFileHeaderOffset;
 
-    /**
-     * Creates a new Builder.
-     *
-     * @return a new Builder.
-     * @since 1.26.0
-     */
-    public static Builder builder() {
-        return new Builder();
-    }
-
     /**
      * Opens the given file for reading, assuming "UTF8" for file names.
      *
@@ -705,6 +709,31 @@ public class ZipFile implements Closeable {
         this(channel, "a SeekableByteChannel", encoding, true);
     }
 
+    private ZipFile(final SeekableByteChannel channel, final String channelDescription, final Charset encoding, final boolean useUnicodeExtraFields,
+            final boolean closeOnError, final boolean ignoreLocalFileHeader) throws IOException {
+        this.isSplitZipArchive = channel instanceof ZipSplitReadOnlySeekableByteChannel;
+        this.encoding = Charsets.toCharset(encoding, Builder.DEFAULT_CHARSET);
+        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
+        this.useUnicodeExtraFields = useUnicodeExtraFields;
+        this.archive = channel;
+        boolean success = false;
+        try {
+            final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = populateFromCentralDirectory();
+            if (!ignoreLocalFileHeader) {
+                resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
+            }
+            fillNameMap();
+            success = true;
+        } catch (final IOException e) {
+            throw new IOException("Error reading Zip content from " + channelDescription, e);
+        } finally {
+            this.closed = !success;
+            if (!success && closeOnError) {
+                org.apache.commons.io.IOUtils.closeQuietly(archive);
+            }
+        }
+    }
+
     /**
      * Opens the given channel for reading, assuming the specified encoding for file names.
      * <p>
@@ -755,27 +784,7 @@ public class ZipFile implements Closeable {
 
     private ZipFile(final SeekableByteChannel channel, final String channelDescription, final String encoding, final boolean useUnicodeExtraFields,
             final boolean closeOnError, final boolean ignoreLocalFileHeader) throws IOException {
-        this.isSplitZipArchive = channel instanceof ZipSplitReadOnlySeekableByteChannel;
-        this.encoding = encoding;
-        this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
-        this.useUnicodeExtraFields = useUnicodeExtraFields;
-        this.archive = channel;
-        boolean success = false;
-        try {
-            final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = populateFromCentralDirectory();
-            if (!ignoreLocalFileHeader) {
-                resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
-            }
-            fillNameMap();
-            success = true;
-        } catch (final IOException e) {
-            throw new IOException("Error reading Zip content from " + channelDescription, e);
-        } finally {
-            this.closed = !success;
-            if (!success && closeOnError) {
-                org.apache.commons.io.IOUtils.closeQuietly(archive);
-            }
-        }
+        this(channel, channelDescription, Charsets.toCharset(encoding), useUnicodeExtraFields, closeOnError, ignoreLocalFileHeader);
     }
 
     /**
@@ -914,7 +923,7 @@ public class ZipFile implements Closeable {
      * @return null if using the platform's default character encoding.
      */
     public String getEncoding() {
-        return encoding;
+        return encoding.name();
     }
 
     /**
diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java
index a4697ce1f..6a9c1e4df 100644
--- a/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java
+++ b/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java
@@ -111,7 +111,8 @@ public class ZipEncodingTest {
     public void testIsUTF8() {
         assertTrue(ZipEncodingHelper.isUTF8(CharsetNames.UTF_8));
         assertTrue(ZipEncodingHelper.isUTF8("UTF8"));
-        Assertions.assertEquals(Charset.defaultCharset().name().equals(CharsetNames.UTF_8), ZipEncodingHelper.isUTF8(null));
+        Assertions.assertEquals(Charset.defaultCharset().name().equals(CharsetNames.UTF_8), ZipEncodingHelper.isUTF8((Charset) null));
+        Assertions.assertEquals(Charset.defaultCharset().name().equals(CharsetNames.UTF_8), ZipEncodingHelper.isUTF8((String) null));
     }
 
     @Test