You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by bo...@apache.org on 2013/08/10 18:22:49 UTC

svn commit: r1512789 - in /commons/proper/compress/trunk/src: changes/ main/java/org/apache/commons/compress/archivers/ main/java/org/apache/commons/compress/archivers/dump/ main/java/org/apache/commons/compress/archivers/sevenz/ main/java/org/apache/c...

Author: bodewig
Date: Sat Aug 10 16:22:49 2013
New Revision: 1512789

URL: http://svn.apache.org/r1512789
Log:
Add encoding support to DumpArchiveInputStream - related to COMPRESS-180

Modified:
    commons/proper/compress/trunk/src/changes/changes.xml
    commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/ArchiveStreamFactory.java
    commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveInputStream.java
    commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveSummary.java
    commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveUtil.java
    commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java
    commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java

Modified: commons/proper/compress/trunk/src/changes/changes.xml
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/changes/changes.xml?rev=1512789&r1=1512788&r2=1512789&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/changes/changes.xml (original)
+++ commons/proper/compress/trunk/src/changes/changes.xml Sat Aug 10 16:22:49 2013
@@ -95,6 +95,10 @@ The <action> type attribute can be add,u
         TarArchiveOutputStream now properly handles link names that
         are too long to fit into a traditional TAR header.
       </action>
+      <action type="add" date="2013-08-10">
+        DumpArchiveInputStream now supports an encoding parameter that
+        can be used to specify the default encoding of file names.
+      </action>
     </release>
     <release version="1.5" date="2013-03-14"
              description="Release 1.5">

Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/ArchiveStreamFactory.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/ArchiveStreamFactory.java?rev=1512789&r1=1512788&r2=1512789&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/ArchiveStreamFactory.java (original)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/ArchiveStreamFactory.java Sat Aug 10 16:22:49 2013
@@ -116,7 +116,7 @@ public class ArchiveStreamFactory {
     private String entryEncoding = null;
 
     /**
-     * Returns the encoding to use for arj, zip and tar files,
+     * Returns the encoding to use for arj, zip, dump and tar files,
      * or null for the default.
      *
      * @return entry encoding, or null
@@ -127,8 +127,8 @@ public class ArchiveStreamFactory {
     }
 
     /**
-     * Sets the encoding to use for arj, zip and tar files.
-     * Use null for the default.
+     * Sets the encoding to use for arj, zip, dump and tar files.  Use
+     * null for the default.
      *
      * @since 1.5
      */
@@ -188,7 +188,11 @@ public class ArchiveStreamFactory {
             return new CpioArchiveInputStream(in);
         }
         if (DUMP.equalsIgnoreCase(archiverName)) {
-            return new DumpArchiveInputStream(in);
+            if (entryEncoding != null) {
+                return new DumpArchiveInputStream(in, entryEncoding);
+            } else {
+                return new DumpArchiveInputStream(in);
+            }
         }
 
         throw new ArchiveException("Archiver: " + archiverName + " not found.");

Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveInputStream.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveInputStream.java?rev=1512789&r1=1512788&r2=1512789&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveInputStream.java (original)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveInputStream.java Sat Aug 10 16:22:49 2013
@@ -20,6 +20,8 @@ package org.apache.commons.compress.arch
 
 import org.apache.commons.compress.archivers.ArchiveException;
 import org.apache.commons.compress.archivers.ArchiveInputStream;
+import org.apache.commons.compress.archivers.zip.ZipEncoding;
+import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
 
 import java.io.EOFException;
 import java.io.IOException;
@@ -39,6 +41,11 @@ import java.util.Stack;
  * the archive, and the read each entry as a normal input stream
  * using read().
  *
+ * There doesn't seem to exist a hint on the encoding of string values
+ * in any piece documentation.  Given the main purpose of dump/restore
+ * is backing up a system it seems very likely the format uses the
+ * current default encoding of the system.
+ *
  * @NotThreadSafe
  */
 public class DumpArchiveInputStream extends ArchiveInputStream {
@@ -65,14 +72,34 @@ public class DumpArchiveInputStream exte
     private Queue<DumpArchiveEntry> queue;
 
     /**
-     * Constructor.
+     * The encoding to use for filenames and labels.
+     */
+    private final ZipEncoding encoding;
+
+    /**
+     * Constructor using the platform's default encoding for file
+     * names.
      *
      * @param is
      * @throws ArchiveException
      */
     public DumpArchiveInputStream(InputStream is) throws ArchiveException {
+        this(is, null);
+    }
+
+    /**
+     * Constructor.
+     *
+     * @param is
+     * @param encoding the encoding to use for file names, use null
+     * for the platform's default encoding
+     * @since 1.6
+     */
+    public DumpArchiveInputStream(InputStream is, String encoding)
+        throws ArchiveException {
         this.raw = new TapeInputStream(is);
         this.hasHitEOF = false;
+        this.encoding = ZipEncodingHelper.getZipEncoding(encoding);
 
         try {
             // read header, verify it's a dump archive.
@@ -83,7 +110,7 @@ public class DumpArchiveInputStream exte
             }
 
             // get summary information
-            summary = new DumpArchiveSummary(headerBytes);
+            summary = new DumpArchiveSummary(headerBytes, this.encoding);
 
             // reset buffer with actual block size.
             raw.resetBlockSize(summary.getNTRec(), summary.isCompressed());
@@ -324,7 +351,7 @@ public class DumpArchiveInputStream exte
 
                 byte type = blockBuffer[i + 6];
 
-                String name = new String(blockBuffer, i + 8, blockBuffer[i + 7]); // TODO default charset?
+                String name = DumpArchiveUtil.decode(encoding, blockBuffer, i + 8, blockBuffer[i + 7]);
 
                 if (".".equals(name) || "..".equals(name)) {
                     // do nothing...

Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveSummary.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveSummary.java?rev=1512789&r1=1512788&r2=1512789&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveSummary.java (original)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveSummary.java Sat Aug 10 16:22:49 2013
@@ -18,8 +18,10 @@
  */
 package org.apache.commons.compress.archivers.dump;
 
+import java.io.IOException;
 import java.util.Date;
 
+import org.apache.commons.compress.archivers.zip.ZipEncoding;
 
 /**
  * This class represents identifying information about a Dump archive volume.
@@ -41,15 +43,15 @@ public class DumpArchiveSummary {
     private int firstrec;
     private int ntrec;
 
-    DumpArchiveSummary(byte[] buffer) {
+    DumpArchiveSummary(byte[] buffer, ZipEncoding encoding) throws IOException {
         dumpDate = 1000L * DumpArchiveUtil.convert32(buffer, 4);
         previousDumpDate = 1000L * DumpArchiveUtil.convert32(buffer, 8);
         volume = DumpArchiveUtil.convert32(buffer, 12);
-        label = new String(buffer, 676, DumpArchiveConstants.LBLSIZE).trim(); // TODO default charset?
+        label = DumpArchiveUtil.decode(encoding, buffer, 676, DumpArchiveConstants.LBLSIZE).trim();
         level = DumpArchiveUtil.convert32(buffer, 692);
-        filesys = new String(buffer, 696, DumpArchiveConstants.NAMELEN).trim(); // TODO default charset?
-        devname = new String(buffer, 760, DumpArchiveConstants.NAMELEN).trim(); // TODO default charset?
-        hostname = new String(buffer, 824, DumpArchiveConstants.NAMELEN).trim(); // TODO default charset?
+        filesys = DumpArchiveUtil.decode(encoding, buffer, 696, DumpArchiveConstants.NAMELEN).trim();
+        devname = DumpArchiveUtil.decode(encoding, buffer, 760, DumpArchiveConstants.NAMELEN).trim();
+        hostname = DumpArchiveUtil.decode(encoding, buffer, 824, DumpArchiveConstants.NAMELEN).trim();
         flags = DumpArchiveUtil.convert32(buffer, 888);
         firstrec = DumpArchiveUtil.convert32(buffer, 892);
         ntrec = DumpArchiveUtil.convert32(buffer, 896);

Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveUtil.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveUtil.java?rev=1512789&r1=1512788&r2=1512789&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveUtil.java (original)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveUtil.java Sat Aug 10 16:22:49 2013
@@ -18,6 +18,8 @@
  */
 package org.apache.commons.compress.archivers.dump;
 
+import java.io.IOException;
+import org.apache.commons.compress.archivers.zip.ZipEncoding;
 
 /**
  * Various utilities for dump archives.
@@ -130,4 +132,14 @@ class DumpArchiveUtil {
 
         return i;
     }
+
+    /**
+     * Decodes a byte array to a string.
+     */
+    static String decode(ZipEncoding encoding, byte[] b, int offset, int len)
+        throws IOException {
+        byte[] copy = new byte[len];
+        System.arraycopy(b, offset, copy, 0, len);
+        return encoding.decode(copy);
+    }
 }

Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java?rev=1512789&r1=1512788&r2=1512789&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java (original)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java Sat Aug 10 16:22:49 2013
@@ -30,6 +30,7 @@ import java.util.zip.CRC32;
 
 import org.apache.commons.compress.utils.BoundedInputStream;
 import org.apache.commons.compress.utils.CRC32VerifyingInputStream;
+import org.apache.commons.compress.utils.CharsetNames;
 
 /**
  * Reads a 7z file, using RandomAccessFile under
@@ -670,7 +671,7 @@ public class SevenZFile {
                         int nextName = 0;
                         for (int i = 0; i < names.length; i += 2) {
                             if (names[i] == 0 && names[i+1] == 0) {
-                                files[nextFile++].setName(new String(names, nextName, i-nextName, "UTF-16LE"));
+                                files[nextFile++].setName(new String(names, nextName, i-nextName, CharsetNames.UTF_16LE));
                                 nextName = i + 2;
                             }
                         }

Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java?rev=1512789&r1=1512788&r2=1512789&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java (original)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java Sat Aug 10 16:22:49 2013
@@ -239,7 +239,15 @@ public class TarUtils {
     // Helper method to generate the exception message
     private static String exceptionMessage(byte[] buffer, final int offset,
             final int length, int current, final byte currentByte) {
-        String string = new String(buffer, offset, length); // TODO default charset?
+        // default charset is good enough for an exception message,
+        //
+        // the alternative was to modify parseOctal and
+        // parseOctalOrBinary to receive the ZipEncoding of the
+        // archive (deprecating the existing public methods, of
+        // course) and dealing with the fact that ZipEncoding#decode
+        // can throw an IOException which parseOctal* doesn't declare
+        String string = new String(buffer, offset, length);
+
         string=string.replaceAll("\0", "{NUL}"); // Replace NULs to allow string to be printed
         final String s = "Invalid byte "+currentByte+" at offset "+(current-offset)+" in '"+string+"' len="+length;
         return s;