You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by gg...@apache.org on 2024/01/13 21:38:57 UTC
(commons-compress) branch master updated: Add and use ZipEncodingHelper.getZipEncoding(Charset)
This is an automated email from the ASF dual-hosted git repository.
ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-compress.git
The following commit(s) were added to refs/heads/master by this push:
new 37047a928 Add and use ZipEncodingHelper.getZipEncoding(Charset)
37047a928 is described below
commit 37047a92822ad75f9a5e33d35a7c8c4417b0903b
Author: Gary Gregory <ga...@gmail.com>
AuthorDate: Sat Jan 13 16:38:52 2024 -0500
Add and use ZipEncodingHelper.getZipEncoding(Charset)
ZipFile now uses a Charset instance variable instead of a String
---
.../commons/compress/archivers/tar/TarUtils.java | 2 +-
.../compress/archivers/zip/ZipEncodingHelper.java | 42 ++++++++---
.../commons/compress/archivers/zip/ZipFile.java | 81 ++++++++++++----------
.../compress/archivers/zip/ZipEncodingTest.java | 3 +-
4 files changed, 81 insertions(+), 47 deletions(-)
diff --git a/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java b/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java
index 285ed65bb..8ce32b42c 100644
--- a/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java
+++ b/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java
@@ -48,7 +48,7 @@ public class TarUtils {
private static final int BYTE_MASK = 255;
- static final ZipEncoding DEFAULT_ENCODING = ZipEncodingHelper.getZipEncoding(null);
+ static final ZipEncoding DEFAULT_ENCODING = ZipEncodingHelper.getZipEncoding(Charset.defaultCharset());
/**
* Encapsulates the algorithms used up to Commons Compress 1.3 as ZipEncoding.
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
index 75c56ba7e..c73476ed8 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipEncodingHelper.java
@@ -37,8 +37,25 @@ public abstract class ZipEncodingHelper {
static final ZipEncoding ZIP_ENCODING_UTF_8 = getZipEncoding(CharsetNames.UTF_8);
/**
- * Instantiates a ZIP encoding. An NIO based character set encoder/decoder will be returned. As a special case, if the character set is UTF-8, theNIOencoder
- * will be configured replace malformed and unmappable characters with '?'. This matches existing behavior from the older fallback encoder.
+ * Instantiates a ZIP encoding. An NIO based character set encoder/decoder will be returned. As a special case, if the character set is UTF-8, the NIO
+ * encoder will be configured replace malformed and unmappable characters with '?'. This matches existing behavior from the older fallback encoder.
+ * <p>
+ * If the requested character set cannot be found, the platform default will be used instead.
+ * </p>
+ *
+ * @param charset The charset of the ZIP encoding. Specify {@code null} for the platform's default encoding.
+ * @return A ZIP encoding for the given encoding name.
+ * @since 1.26.0
+ */
+ public static ZipEncoding getZipEncoding(final Charset charset) {
+ final Charset actual = Charsets.toCharset(charset);
+ final boolean useReplacement = isUTF8(actual);
+ return new NioZipEncoding(actual, useReplacement);
+ }
+
+ /**
+ * Instantiates a ZIP encoding. An NIO based character set encoder/decoder will be returned. As a special case, if the character set is UTF-8, the NIO
+ * encoder will be configured replace malformed and unmappable characters with '?'. This matches existing behavior from the older fallback encoder.
* <p>
* If the requested character set cannot be found, the platform default will be used instead.
* </p>
@@ -59,23 +76,30 @@ public abstract class ZipEncodingHelper {
static ByteBuffer growBufferBy(final ByteBuffer buffer, final int increment) {
buffer.limit(buffer.position());
buffer.rewind();
-
final ByteBuffer on = ByteBuffer.allocate(buffer.capacity() + increment);
-
on.put(buffer);
return on;
}
+ /**
+ * Tests whether a given encoding is UTF-8. If the given name is null, then check the platform's default encoding.
+ *
+ * @param charset If the given charset is null, then check the platform's default encoding.
+ */
+ static boolean isUTF8(final Charset charset) {
+ return isUTF8Alias(Charsets.toCharset(charset).name());
+ }
+
+ private static boolean isUTF8Alias(final String actual) {
+ return UTF_8.name().equalsIgnoreCase(actual) || UTF_8.aliases().stream().anyMatch(alias -> alias.equalsIgnoreCase(actual));
+ }
+
/**
* Tests whether a given encoding is UTF-8. If the given name is null, then check the platform's default encoding.
*
* @param charsetName If the given name is null, then check the platform's default encoding.
*/
static boolean isUTF8(final String charsetName) {
- final String actual = charsetName != null ? charsetName : Charset.defaultCharset().name();
- if (UTF_8.name().equalsIgnoreCase(actual)) {
- return true;
- }
- return UTF_8.aliases().stream().anyMatch(alias -> alias.equalsIgnoreCase(actual));
+ return isUTF8Alias(charsetName != null ? charsetName : Charset.defaultCharset().name());
}
}
diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
index 2764b5753..d2791b108 100644
--- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
+++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java
@@ -27,6 +27,7 @@ import java.io.SequenceInputStream;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.SeekableByteChannel;
+import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.OpenOption;
@@ -53,6 +54,7 @@ import org.apache.commons.compress.utils.CharsetNames;
import org.apache.commons.compress.utils.IOUtils;
import org.apache.commons.compress.utils.InputStreamStatistics;
import org.apache.commons.compress.utils.SeekableInMemoryByteChannel;
+import org.apache.commons.io.Charsets;
import org.apache.commons.io.build.AbstractOrigin.ByteArrayOrigin;
import org.apache.commons.io.build.AbstractStreamBuilder;
import org.apache.commons.io.input.CountingInputStream;
@@ -121,13 +123,15 @@ public class ZipFile implements Closeable {
*/
public static class Builder extends AbstractStreamBuilder<ZipFile, Builder> {
+ static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8;
+
private SeekableByteChannel seekableByteChannel;
private boolean useUnicodeExtraFields = true;
private boolean ignoreLocalFileHeader;
public Builder() {
- setCharset(StandardCharsets.UTF_8);
- setCharsetDefault(StandardCharsets.UTF_8);
+ setCharset(DEFAULT_CHARSET);
+ setCharsetDefault(DEFAULT_CHARSET);
}
@SuppressWarnings("resource") // caller closes
@@ -151,7 +155,7 @@ public class ZipFile implements Closeable {
actualDescription = path.toString();
}
final boolean closeOnError = seekableByteChannel != null;
- return new ZipFile(actualChannel, actualDescription, getCharset().name(), useUnicodeExtraFields, closeOnError, ignoreLocalFileHeader);
+ return new ZipFile(actualChannel, actualDescription, getCharset(), useUnicodeExtraFields, closeOnError, ignoreLocalFileHeader);
}
/**
@@ -444,6 +448,16 @@ public class ZipFile implements Closeable {
private static final Comparator<ZipArchiveEntry> offsetComparator = Comparator.comparingLong(ZipArchiveEntry::getDiskNumberStart)
.thenComparingLong(ZipArchiveEntry::getLocalHeaderOffset);
+ /**
+ * Creates a new Builder.
+ *
+ * @return a new Builder.
+ * @since 1.26.0
+ */
+ public static Builder builder() {
+ return new Builder();
+ }
+
/**
* Closes a ZIP file quietly; throwing no IOException, does nothing on null input.
*
@@ -481,7 +495,7 @@ public class ZipFile implements Closeable {
* Defaults to UTF-8.
* </p>
*/
- private final String encoding;
+ private final Charset encoding;
/**
* The ZIP encoding to use for file names and the file comment.
@@ -531,16 +545,6 @@ public class ZipFile implements Closeable {
private long firstLocalFileHeaderOffset;
- /**
- * Creates a new Builder.
- *
- * @return a new Builder.
- * @since 1.26.0
- */
- public static Builder builder() {
- return new Builder();
- }
-
/**
* Opens the given file for reading, assuming "UTF8" for file names.
*
@@ -705,6 +709,31 @@ public class ZipFile implements Closeable {
this(channel, "a SeekableByteChannel", encoding, true);
}
+ private ZipFile(final SeekableByteChannel channel, final String channelDescription, final Charset encoding, final boolean useUnicodeExtraFields,
+ final boolean closeOnError, final boolean ignoreLocalFileHeader) throws IOException {
+ this.isSplitZipArchive = channel instanceof ZipSplitReadOnlySeekableByteChannel;
+ this.encoding = Charsets.toCharset(encoding, Builder.DEFAULT_CHARSET);
+ this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
+ this.useUnicodeExtraFields = useUnicodeExtraFields;
+ this.archive = channel;
+ boolean success = false;
+ try {
+ final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = populateFromCentralDirectory();
+ if (!ignoreLocalFileHeader) {
+ resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
+ }
+ fillNameMap();
+ success = true;
+ } catch (final IOException e) {
+ throw new IOException("Error reading Zip content from " + channelDescription, e);
+ } finally {
+ this.closed = !success;
+ if (!success && closeOnError) {
+ org.apache.commons.io.IOUtils.closeQuietly(archive);
+ }
+ }
+ }
+
/**
* Opens the given channel for reading, assuming the specified encoding for file names.
* <p>
@@ -755,27 +784,7 @@ public class ZipFile implements Closeable {
private ZipFile(final SeekableByteChannel channel, final String channelDescription, final String encoding, final boolean useUnicodeExtraFields,
final boolean closeOnError, final boolean ignoreLocalFileHeader) throws IOException {
- this.isSplitZipArchive = channel instanceof ZipSplitReadOnlySeekableByteChannel;
- this.encoding = encoding;
- this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
- this.useUnicodeExtraFields = useUnicodeExtraFields;
- this.archive = channel;
- boolean success = false;
- try {
- final Map<ZipArchiveEntry, NameAndComment> entriesWithoutUTF8Flag = populateFromCentralDirectory();
- if (!ignoreLocalFileHeader) {
- resolveLocalFileHeaderData(entriesWithoutUTF8Flag);
- }
- fillNameMap();
- success = true;
- } catch (final IOException e) {
- throw new IOException("Error reading Zip content from " + channelDescription, e);
- } finally {
- this.closed = !success;
- if (!success && closeOnError) {
- org.apache.commons.io.IOUtils.closeQuietly(archive);
- }
- }
+ this(channel, channelDescription, Charsets.toCharset(encoding), useUnicodeExtraFields, closeOnError, ignoreLocalFileHeader);
}
/**
@@ -914,7 +923,7 @@ public class ZipFile implements Closeable {
* @return null if using the platform's default character encoding.
*/
public String getEncoding() {
- return encoding;
+ return encoding.name();
}
/**
diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java
index a4697ce1f..6a9c1e4df 100644
--- a/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java
+++ b/src/test/java/org/apache/commons/compress/archivers/zip/ZipEncodingTest.java
@@ -111,7 +111,8 @@ public class ZipEncodingTest {
public void testIsUTF8() {
assertTrue(ZipEncodingHelper.isUTF8(CharsetNames.UTF_8));
assertTrue(ZipEncodingHelper.isUTF8("UTF8"));
- Assertions.assertEquals(Charset.defaultCharset().name().equals(CharsetNames.UTF_8), ZipEncodingHelper.isUTF8(null));
+ Assertions.assertEquals(Charset.defaultCharset().name().equals(CharsetNames.UTF_8), ZipEncodingHelper.isUTF8((Charset) null));
+ Assertions.assertEquals(Charset.defaultCharset().name().equals(CharsetNames.UTF_8), ZipEncodingHelper.isUTF8((String) null));
}
@Test