You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by bo...@apache.org on 2013/08/10 18:22:49 UTC
svn commit: r1512789 - in /commons/proper/compress/trunk/src: changes/
main/java/org/apache/commons/compress/archivers/
main/java/org/apache/commons/compress/archivers/dump/
main/java/org/apache/commons/compress/archivers/sevenz/
main/java/org/apache/c...
Author: bodewig
Date: Sat Aug 10 16:22:49 2013
New Revision: 1512789
URL: http://svn.apache.org/r1512789
Log:
Add encoding support to DumpArchiveInputStream - related to COMPRESS-180
Modified:
commons/proper/compress/trunk/src/changes/changes.xml
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/ArchiveStreamFactory.java
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveInputStream.java
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveSummary.java
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveUtil.java
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java
Modified: commons/proper/compress/trunk/src/changes/changes.xml
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/changes/changes.xml?rev=1512789&r1=1512788&r2=1512789&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/changes/changes.xml (original)
+++ commons/proper/compress/trunk/src/changes/changes.xml Sat Aug 10 16:22:49 2013
@@ -95,6 +95,10 @@ The <action> type attribute can be add,u
TarArchiveOutputStream now properly handles link names that
are too long to fit into a traditional TAR header.
</action>
+ <action type="add" date="2013-08-10">
+ DumpArchiveInputStream now supports an encoding parameter that
+ can be used to specify the default encoding of file names.
+ </action>
</release>
<release version="1.5" date="2013-03-14"
description="Release 1.5">
Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/ArchiveStreamFactory.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/ArchiveStreamFactory.java?rev=1512789&r1=1512788&r2=1512789&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/ArchiveStreamFactory.java (original)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/ArchiveStreamFactory.java Sat Aug 10 16:22:49 2013
@@ -116,7 +116,7 @@ public class ArchiveStreamFactory {
private String entryEncoding = null;
/**
- * Returns the encoding to use for arj, zip and tar files,
+ * Returns the encoding to use for arj, zip, dump and tar files,
* or null for the default.
*
* @return entry encoding, or null
@@ -127,8 +127,8 @@ public class ArchiveStreamFactory {
}
/**
- * Sets the encoding to use for arj, zip and tar files.
- * Use null for the default.
+ * Sets the encoding to use for arj, zip, dump and tar files. Use
+ * null for the default.
*
* @since 1.5
*/
@@ -188,7 +188,11 @@ public class ArchiveStreamFactory {
return new CpioArchiveInputStream(in);
}
if (DUMP.equalsIgnoreCase(archiverName)) {
- return new DumpArchiveInputStream(in);
+ if (entryEncoding != null) {
+ return new DumpArchiveInputStream(in, entryEncoding);
+ } else {
+ return new DumpArchiveInputStream(in);
+ }
}
throw new ArchiveException("Archiver: " + archiverName + " not found.");
Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveInputStream.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveInputStream.java?rev=1512789&r1=1512788&r2=1512789&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveInputStream.java (original)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveInputStream.java Sat Aug 10 16:22:49 2013
@@ -20,6 +20,8 @@ package org.apache.commons.compress.arch
import org.apache.commons.compress.archivers.ArchiveException;
import org.apache.commons.compress.archivers.ArchiveInputStream;
+import org.apache.commons.compress.archivers.zip.ZipEncoding;
+import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
import java.io.EOFException;
import java.io.IOException;
@@ -39,6 +41,11 @@ import java.util.Stack;
* the archive, and the read each entry as a normal input stream
* using read().
*
+ * There doesn't seem to exist a hint on the encoding of string values
+ * in any piece documentation. Given the main purpose of dump/restore
+ * is backing up a system it seems very likely the format uses the
+ * current default encoding of the system.
+ *
* @NotThreadSafe
*/
public class DumpArchiveInputStream extends ArchiveInputStream {
@@ -65,14 +72,34 @@ public class DumpArchiveInputStream exte
private Queue<DumpArchiveEntry> queue;
/**
- * Constructor.
+ * The encoding to use for filenames and labels.
+ */
+ private final ZipEncoding encoding;
+
+ /**
+ * Constructor using the platform's default encoding for file
+ * names.
*
* @param is
* @throws ArchiveException
*/
public DumpArchiveInputStream(InputStream is) throws ArchiveException {
+ this(is, null);
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param is
+ * @param encoding the encoding to use for file names, use null
+ * for the platform's default encoding
+ * @since 1.6
+ */
+ public DumpArchiveInputStream(InputStream is, String encoding)
+ throws ArchiveException {
this.raw = new TapeInputStream(is);
this.hasHitEOF = false;
+ this.encoding = ZipEncodingHelper.getZipEncoding(encoding);
try {
// read header, verify it's a dump archive.
@@ -83,7 +110,7 @@ public class DumpArchiveInputStream exte
}
// get summary information
- summary = new DumpArchiveSummary(headerBytes);
+ summary = new DumpArchiveSummary(headerBytes, this.encoding);
// reset buffer with actual block size.
raw.resetBlockSize(summary.getNTRec(), summary.isCompressed());
@@ -324,7 +351,7 @@ public class DumpArchiveInputStream exte
byte type = blockBuffer[i + 6];
- String name = new String(blockBuffer, i + 8, blockBuffer[i + 7]); // TODO default charset?
+ String name = DumpArchiveUtil.decode(encoding, blockBuffer, i + 8, blockBuffer[i + 7]);
if (".".equals(name) || "..".equals(name)) {
// do nothing...
Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveSummary.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveSummary.java?rev=1512789&r1=1512788&r2=1512789&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveSummary.java (original)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveSummary.java Sat Aug 10 16:22:49 2013
@@ -18,8 +18,10 @@
*/
package org.apache.commons.compress.archivers.dump;
+import java.io.IOException;
import java.util.Date;
+import org.apache.commons.compress.archivers.zip.ZipEncoding;
/**
* This class represents identifying information about a Dump archive volume.
@@ -41,15 +43,15 @@ public class DumpArchiveSummary {
private int firstrec;
private int ntrec;
- DumpArchiveSummary(byte[] buffer) {
+ DumpArchiveSummary(byte[] buffer, ZipEncoding encoding) throws IOException {
dumpDate = 1000L * DumpArchiveUtil.convert32(buffer, 4);
previousDumpDate = 1000L * DumpArchiveUtil.convert32(buffer, 8);
volume = DumpArchiveUtil.convert32(buffer, 12);
- label = new String(buffer, 676, DumpArchiveConstants.LBLSIZE).trim(); // TODO default charset?
+ label = DumpArchiveUtil.decode(encoding, buffer, 676, DumpArchiveConstants.LBLSIZE).trim();
level = DumpArchiveUtil.convert32(buffer, 692);
- filesys = new String(buffer, 696, DumpArchiveConstants.NAMELEN).trim(); // TODO default charset?
- devname = new String(buffer, 760, DumpArchiveConstants.NAMELEN).trim(); // TODO default charset?
- hostname = new String(buffer, 824, DumpArchiveConstants.NAMELEN).trim(); // TODO default charset?
+ filesys = DumpArchiveUtil.decode(encoding, buffer, 696, DumpArchiveConstants.NAMELEN).trim();
+ devname = DumpArchiveUtil.decode(encoding, buffer, 760, DumpArchiveConstants.NAMELEN).trim();
+ hostname = DumpArchiveUtil.decode(encoding, buffer, 824, DumpArchiveConstants.NAMELEN).trim();
flags = DumpArchiveUtil.convert32(buffer, 888);
firstrec = DumpArchiveUtil.convert32(buffer, 892);
ntrec = DumpArchiveUtil.convert32(buffer, 896);
Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveUtil.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveUtil.java?rev=1512789&r1=1512788&r2=1512789&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveUtil.java (original)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/dump/DumpArchiveUtil.java Sat Aug 10 16:22:49 2013
@@ -18,6 +18,8 @@
*/
package org.apache.commons.compress.archivers.dump;
+import java.io.IOException;
+import org.apache.commons.compress.archivers.zip.ZipEncoding;
/**
* Various utilities for dump archives.
@@ -130,4 +132,14 @@ class DumpArchiveUtil {
return i;
}
+
+ /**
+ * Decodes a byte array to a string.
+ */
+ static String decode(ZipEncoding encoding, byte[] b, int offset, int len)
+ throws IOException {
+ byte[] copy = new byte[len];
+ System.arraycopy(b, offset, copy, 0, len);
+ return encoding.decode(copy);
+ }
}
Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java?rev=1512789&r1=1512788&r2=1512789&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java (original)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/sevenz/SevenZFile.java Sat Aug 10 16:22:49 2013
@@ -30,6 +30,7 @@ import java.util.zip.CRC32;
import org.apache.commons.compress.utils.BoundedInputStream;
import org.apache.commons.compress.utils.CRC32VerifyingInputStream;
+import org.apache.commons.compress.utils.CharsetNames;
/**
* Reads a 7z file, using RandomAccessFile under
@@ -670,7 +671,7 @@ public class SevenZFile {
int nextName = 0;
for (int i = 0; i < names.length; i += 2) {
if (names[i] == 0 && names[i+1] == 0) {
- files[nextFile++].setName(new String(names, nextName, i-nextName, "UTF-16LE"));
+ files[nextFile++].setName(new String(names, nextName, i-nextName, CharsetNames.UTF_16LE));
nextName = i + 2;
}
}
Modified: commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java
URL: http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java?rev=1512789&r1=1512788&r2=1512789&view=diff
==============================================================================
--- commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java (original)
+++ commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarUtils.java Sat Aug 10 16:22:49 2013
@@ -239,7 +239,15 @@ public class TarUtils {
// Helper method to generate the exception message
private static String exceptionMessage(byte[] buffer, final int offset,
final int length, int current, final byte currentByte) {
- String string = new String(buffer, offset, length); // TODO default charset?
+ // default charset is good enough for an exception message,
+ //
+ // the alternative was to modify parseOctal and
+ // parseOctalOrBinary to receive the ZipEncoding of the
+ // archive (deprecating the existing public methods, of
+ // course) and dealing with the fact that ZipEncoding#decode
+ // can throw an IOException which parseOctal* doesn't declare
+ String string = new String(buffer, offset, length);
+
string=string.replaceAll("\0", "{NUL}"); // Replace NULs to allow string to be printed
final String s = "Invalid byte "+currentByte+" at offset "+(current-offset)+" in '"+string+"' len="+length;
return s;