You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by gg...@apache.org on 2023/01/21 14:23:40 UTC

[commons-compress] branch master updated: [COMPRESS-638] The GzipCompressorOutputStream#writeHeader() uses ISO_8859_1 to write the file name and comment.

This is an automated email from the ASF dual-hosted git repository.

ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-compress.git


The following commit(s) were added to refs/heads/master by this push:
     new 770ea82a [COMPRESS-638] The GzipCompressorOutputStream#writeHeader() uses ISO_8859_1 to write the file name and comment.
770ea82a is described below

commit 770ea82a132282fb0edb186fe6db646a1a0b7a35
Author: Gary Gregory <ga...@gmail.com>
AuthorDate: Sat Jan 21 09:23:35 2023 -0500

    [COMPRESS-638] The GzipCompressorOutputStream#writeHeader() uses
    ISO_8859_1 to write the file name and comment.
    
    If the strings contains non-ISO_8859_1 characters, unknown characters
    are displayed after decompression.
    Use percent encoding for non ISO_8859_1 characters.
---
 src/changes/changes.xml                            |  5 ++++
 .../gzip/GzipCompressorOutputStream.java           | 29 +++++++++++++++++++---
 .../gzip/GzipCompressorOutputStreamTest.java       | 14 +++++------
 3 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 0ccf35aa..2f0341a0 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -52,6 +52,11 @@ The <action> type attribute can be add,update,fix,remove.
       <action type="fix" dev="ggregory" due-to="Arturo Bernal">Remove duplicate conditions. Use switch instead. #298.</action>
       <action type="fix" dev="ggregory" due-to="Robin Schimpf">Replace JUnit 3 and 4 with JUnit 5 #344, #346.</action>
       <action type="fix" dev="ggregory" due-to="Glavo">Make 'ZipFile.offsetComparator' static #353.</action>
+      <action type="fix" issue="COMPRESS-638" dev="ggregory" due-to="Radar wen, Gary Gregory, Michael Osipov">
+        The GzipCompressorOutputStream#writeHeader() uses ISO_8859_1 to write the file name and comment. 
+        If the strings contains non-ISO_8859_1 characters, unknown characters are displayed after decompression. 
+        Use percent encoding for non ISO_8859_1 characters.
+      </action>
       <!-- ADD -->
       <action type="add" issue="COMPRESS-614" dev="ggregory" due-to="Andre Brait, Gary Gregory">Use FileTime for time fields in SevenZipArchiveEntry #256.</action>
       <action type="add" issue="COMPRESS-621" dev="ggregory" due-to="Glavo">Fix calculation the offset of the first zip central directory entry #334.</action>
diff --git a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java
index e9d259dd..7b1975ae 100644
--- a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java
+++ b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java
@@ -20,8 +20,11 @@ package org.apache.commons.compress.compressors.gzip;
 
 import java.io.IOException;
 import java.io.OutputStream;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
+import java.nio.charset.StandardCharsets;
 import java.util.zip.CRC32;
 import java.util.zip.Deflater;
 import java.util.zip.GZIPInputStream;
@@ -132,6 +135,27 @@ public class GzipCompressorOutputStream extends CompressorOutputStream {
         out.flush();
     }
 
+    /**
+     * Gets the bytes encoded in the {@value GzipUtils#GZIP_ENCODING} Charset.
+     * <p>
+     * If the string cannot be encoded directly with {@value GzipUtils#GZIP_ENCODING}, then use URI-style percent encoding.
+     * </p>
+     *
+     * @param string The string to encode.
+     * @return
+     * @throws IOException
+     */
+    private byte[] getBytes(final String string) throws IOException {
+        if (GzipUtils.GZIP_ENCODING.newEncoder().canEncode(string)) {
+            return string.getBytes(GzipUtils.GZIP_ENCODING);
+        }
+        try {
+            return new URI(null, null, string, null).toASCIIString().getBytes(StandardCharsets.US_ASCII);
+        } catch (final URISyntaxException e) {
+            throw new IOException(string, e);
+        }
+    }
+
     /**
      * {@inheritDoc}
      *
@@ -151,7 +175,6 @@ public class GzipCompressorOutputStream extends CompressorOutputStream {
     public void write(final byte[] buffer, final int offset, final int length) throws IOException {
         if (deflater.finished()) {
             throw new IOException("Cannot write more data, the end of the compressed data stream has been reached");
-
         }
         if (length > 0) {
             deflater.setInput(buffer, offset, length);
@@ -195,12 +218,12 @@ public class GzipCompressorOutputStream extends CompressorOutputStream {
         out.write(buffer.array());
 
         if (filename != null) {
-            out.write(filename.getBytes(GzipUtils.GZIP_ENCODING));
+            out.write(getBytes(filename));
             out.write(0);
         }
 
         if (comment != null) {
-            out.write(comment.getBytes(GzipUtils.GZIP_ENCODING));
+            out.write(getBytes(comment));
             out.write(0);
         }
     }
diff --git a/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java b/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java
index 70bd8833..aa6922d8 100644
--- a/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java
+++ b/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java
@@ -27,7 +27,6 @@ import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
 
-import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 
 /**
@@ -35,7 +34,7 @@ import org.junit.jupiter.api.Test;
  */
 public class GzipCompressorOutputStreamTest {
 
-    private void testFileName(final String sourceFile) throws IOException {
+    private void testFileName(final String expected, final String sourceFile) throws IOException {
         final Path tempSourceFile = Files.createTempFile(sourceFile, sourceFile);
         Files.write(tempSourceFile, "<text>Hello World!</text>".getBytes(StandardCharsets.ISO_8859_1));
         final Path targetFile = Files.createTempFile("test", ".gz");
@@ -45,25 +44,26 @@ public class GzipCompressorOutputStreamTest {
             Files.copy(tempSourceFile, gos);
         }
         try (GzipCompressorInputStream gis = new GzipCompressorInputStream(Files.newInputStream(targetFile))) {
-            assertEquals(sourceFile, gis.getMetaData().getFilename());
+            assertEquals(expected, gis.getMetaData().getFilename());
         }
     }
 
     @Test
     public void testFileNameAscii() throws IOException {
-        testFileName("ASCII.xml");
+        testFileName("ASCII.xml", "ASCII.xml");
     }
 
     /**
      * Tests COMPRESS-638.
      *
+     * GZip RFC requires ISO 8859-1 (LATIN-1).
+     *
      * @throws IOException When the test fails.
      */
     @Test
-    @Disabled("COMPRESS-638")
-    public void testFileNameChinese() throws IOException {
+    public void testFileNameChinesePercentEncoded() throws IOException {
         // "Test Chinese name"
-        testFileName("\u6D4B\u8BD5\u4E2D\u6587\u540D\u79F0.xml");
+        testFileName("%E6%B5%8B%E8%AF%95%E4%B8%AD%E6%96%87%E5%90%8D%E7%A7%B0.xml", "\u6D4B\u8BD5\u4E2D\u6587\u540D\u79F0.xml");
     }
 
 }