You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by gg...@apache.org on 2023/01/21 14:23:40 UTC
[commons-compress] branch master updated: [COMPRESS-638] The GzipCompressorOutputStream#writeHeader() uses ISO_8859_1 to write the file name and comment.
This is an automated email from the ASF dual-hosted git repository.
ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-compress.git
The following commit(s) were added to refs/heads/master by this push:
new 770ea82a [COMPRESS-638] The GzipCompressorOutputStream#writeHeader() uses ISO_8859_1 to write the file name and comment.
770ea82a is described below
commit 770ea82a132282fb0edb186fe6db646a1a0b7a35
Author: Gary Gregory <ga...@gmail.com>
AuthorDate: Sat Jan 21 09:23:35 2023 -0500
[COMPRESS-638] The GzipCompressorOutputStream#writeHeader() uses
ISO_8859_1 to write the file name and comment.
If the strings contains non-ISO_8859_1 characters, unknown characters
are displayed after decompression.
Use percent encoding for non ISO_8859_1 characters.
---
src/changes/changes.xml | 5 ++++
.../gzip/GzipCompressorOutputStream.java | 29 +++++++++++++++++++---
.../gzip/GzipCompressorOutputStreamTest.java | 14 +++++------
3 files changed, 38 insertions(+), 10 deletions(-)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 0ccf35aa..2f0341a0 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -52,6 +52,11 @@ The <action> type attribute can be add,update,fix,remove.
<action type="fix" dev="ggregory" due-to="Arturo Bernal">Remove duplicate conditions. Use switch instead. #298.</action>
<action type="fix" dev="ggregory" due-to="Robin Schimpf">Replace JUnit 3 and 4 with JUnit 5 #344, #346.</action>
<action type="fix" dev="ggregory" due-to="Glavo">Make 'ZipFile.offsetComparator' static #353.</action>
+ <action type="fix" issue="COMPRESS-638" dev="ggregory" due-to="Radar wen, Gary Gregory, Michael Osipov">
+ The GzipCompressorOutputStream#writeHeader() uses ISO_8859_1 to write the file name and comment.
+ If the strings contains non-ISO_8859_1 characters, unknown characters are displayed after decompression.
+ Use percent encoding for non ISO_8859_1 characters.
+ </action>
<!-- ADD -->
<action type="add" issue="COMPRESS-614" dev="ggregory" due-to="Andre Brait, Gary Gregory">Use FileTime for time fields in SevenZipArchiveEntry #256.</action>
<action type="add" issue="COMPRESS-621" dev="ggregory" due-to="Glavo">Fix calculation the offset of the first zip central directory entry #334.</action>
diff --git a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java
index e9d259dd..7b1975ae 100644
--- a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java
+++ b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java
@@ -20,8 +20,11 @@ package org.apache.commons.compress.compressors.gzip;
import java.io.IOException;
import java.io.OutputStream;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
+import java.nio.charset.StandardCharsets;
import java.util.zip.CRC32;
import java.util.zip.Deflater;
import java.util.zip.GZIPInputStream;
@@ -132,6 +135,27 @@ public class GzipCompressorOutputStream extends CompressorOutputStream {
out.flush();
}
+ /**
+ * Gets the bytes encoded in the {@value GzipUtils#GZIP_ENCODING} Charset.
+ * <p>
+ * If the string cannot be encoded directly with {@value GzipUtils#GZIP_ENCODING}, then use URI-style percent encoding.
+ * </p>
+ *
+ * @param string The string to encode.
+ * @return
+ * @throws IOException
+ */
+ private byte[] getBytes(final String string) throws IOException {
+ if (GzipUtils.GZIP_ENCODING.newEncoder().canEncode(string)) {
+ return string.getBytes(GzipUtils.GZIP_ENCODING);
+ }
+ try {
+ return new URI(null, null, string, null).toASCIIString().getBytes(StandardCharsets.US_ASCII);
+ } catch (final URISyntaxException e) {
+ throw new IOException(string, e);
+ }
+ }
+
/**
* {@inheritDoc}
*
@@ -151,7 +175,6 @@ public class GzipCompressorOutputStream extends CompressorOutputStream {
public void write(final byte[] buffer, final int offset, final int length) throws IOException {
if (deflater.finished()) {
throw new IOException("Cannot write more data, the end of the compressed data stream has been reached");
-
}
if (length > 0) {
deflater.setInput(buffer, offset, length);
@@ -195,12 +218,12 @@ public class GzipCompressorOutputStream extends CompressorOutputStream {
out.write(buffer.array());
if (filename != null) {
- out.write(filename.getBytes(GzipUtils.GZIP_ENCODING));
+ out.write(getBytes(filename));
out.write(0);
}
if (comment != null) {
- out.write(comment.getBytes(GzipUtils.GZIP_ENCODING));
+ out.write(getBytes(comment));
out.write(0);
}
}
diff --git a/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java b/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java
index 70bd8833..aa6922d8 100644
--- a/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java
+++ b/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java
@@ -27,7 +27,6 @@ import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
-import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
/**
@@ -35,7 +34,7 @@ import org.junit.jupiter.api.Test;
*/
public class GzipCompressorOutputStreamTest {
- private void testFileName(final String sourceFile) throws IOException {
+ private void testFileName(final String expected, final String sourceFile) throws IOException {
final Path tempSourceFile = Files.createTempFile(sourceFile, sourceFile);
Files.write(tempSourceFile, "<text>Hello World!</text>".getBytes(StandardCharsets.ISO_8859_1));
final Path targetFile = Files.createTempFile("test", ".gz");
@@ -45,25 +44,26 @@ public class GzipCompressorOutputStreamTest {
Files.copy(tempSourceFile, gos);
}
try (GzipCompressorInputStream gis = new GzipCompressorInputStream(Files.newInputStream(targetFile))) {
- assertEquals(sourceFile, gis.getMetaData().getFilename());
+ assertEquals(expected, gis.getMetaData().getFilename());
}
}
@Test
public void testFileNameAscii() throws IOException {
- testFileName("ASCII.xml");
+ testFileName("ASCII.xml", "ASCII.xml");
}
/**
* Tests COMPRESS-638.
*
+ * GZip RFC requires ISO 8859-1 (LATIN-1).
+ *
* @throws IOException When the test fails.
*/
@Test
- @Disabled("COMPRESS-638")
- public void testFileNameChinese() throws IOException {
+ public void testFileNameChinesePercentEncoded() throws IOException {
// "Test Chinese name"
- testFileName("\u6D4B\u8BD5\u4E2D\u6587\u540D\u79F0.xml");
+ testFileName("%E6%B5%8B%E8%AF%95%E4%B8%AD%E6%96%87%E5%90%8D%E7%A7%B0.xml", "\u6D4B\u8BD5\u4E2D\u6587\u540D\u79F0.xml");
}
}