You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by sz...@apache.org on 2010/03/12 19:56:39 UTC
svn commit: r922395 - in /hadoop/mapreduce/trunk: CHANGES.txt
src/test/mapred/org/apache/hadoop/tools/TestHadoopArchives.java
src/tools/org/apache/hadoop/tools/HadoopArchives.java
Author: szetszwo
Date: Fri Mar 12 18:56:39 2010
New Revision: 922395
URL: http://svn.apache.org/viewvc?rev=922395&view=rev
Log:
MAPREDUCE-1579. archive: check and possibly replace the space charater in source paths.
Added:
hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestHadoopArchives.java
Modified:
hadoop/mapreduce/trunk/CHANGES.txt
hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/HadoopArchives.java
Modified: hadoop/mapreduce/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/CHANGES.txt?rev=922395&r1=922394&r2=922395&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/CHANGES.txt (original)
+++ hadoop/mapreduce/trunk/CHANGES.txt Fri Mar 12 18:56:39 2010
@@ -1379,3 +1379,6 @@ Release 0.21.0 - Unreleased
MAPREDUCE-890. After HADOOP-4491, the user who started mapred system is
not able to run job. (Ravi Gummadi via vinodkv)
+
+ MAPREDUCE-1579. archive: check and possibly replace the space charater
+ in source paths. (szetszwo)
Added: hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestHadoopArchives.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestHadoopArchives.java?rev=922395&view=auto
==============================================================================
--- hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestHadoopArchives.java (added)
+++ hadoop/mapreduce/trunk/src/test/mapred/org/apache/hadoop/tools/TestHadoopArchives.java Fri Mar 12 18:56:39 2010
@@ -0,0 +1,272 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.tools;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.StringTokenizer;
+
+import junit.framework.TestCase;
+
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.impl.Log4JLogger;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FsShell;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.mapred.MiniMRCluster;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.log4j.Level;
+
+/**
+ * test {@link HadoopArchives}
+ */
+public class TestHadoopArchives extends TestCase {
+ {
+ ((Log4JLogger)LogFactory.getLog(org.apache.hadoop.security.Groups.class)
+ ).getLogger().setLevel(Level.OFF);
+ ((Log4JLogger)org.apache.hadoop.ipc.Server.LOG
+ ).getLogger().setLevel(Level.OFF);
+ ((Log4JLogger)org.apache.hadoop.util.AsyncDiskService.LOG
+ ).getLogger().setLevel(Level.OFF);
+ }
+
+ private static final String inputDir = "input";
+
+ private Path inputPath;
+ private MiniDFSCluster dfscluster;
+ private MiniMRCluster mapred;
+ private FileSystem fs;
+ private Path archivePath;
+
+ static private Path createFile(Path dir, String filename, FileSystem fs
+ ) throws IOException {
+ final Path f = new Path(dir, filename);
+ final FSDataOutputStream out = fs.create(f);
+ out.write(filename.getBytes());
+ out.close();
+ return f;
+ }
+
+ protected void setUp() throws Exception {
+ super.setUp();
+ dfscluster = new MiniDFSCluster(new Configuration(), 2, true, null);
+ fs = dfscluster.getFileSystem();
+ mapred = new MiniMRCluster(2, fs.getUri().toString(), 1);
+ inputPath = new Path(fs.getHomeDirectory(), inputDir);
+ archivePath = new Path(fs.getHomeDirectory(), "archive");
+ fs.mkdirs(inputPath);
+ createFile(inputPath, "a", fs);
+ createFile(inputPath, "b", fs);
+ createFile(inputPath, "c", fs);
+ }
+
+ protected void tearDown() throws Exception {
+ try {
+ if (mapred != null) {
+ mapred.shutdown();
+ }
+ if (dfscluster != null) {
+ dfscluster.shutdown();
+ }
+ } catch(Exception e) {
+ System.err.println(e);
+ }
+ super.tearDown();
+ }
+
+
+ public void testPathWithSpaces() throws Exception {
+ fs.delete(archivePath, true);
+
+ //create files/directories with spaces
+ createFile(inputPath, "c c", fs);
+ final Path sub1 = new Path(inputPath, "sub 1");
+ fs.mkdirs(sub1);
+ createFile(sub1, "file x y z", fs);
+ createFile(sub1, "file", fs);
+ createFile(sub1, "x", fs);
+ createFile(sub1, "y", fs);
+ createFile(sub1, "z", fs);
+ final Path sub2 = new Path(inputPath, "sub 2");
+ fs.mkdirs(sub2);
+ final Configuration conf = mapred.createJobConf();
+ final FsShell shell = new FsShell(conf);
+
+ final String inputPathStr = inputPath.toUri().getPath();
+ System.out.println("inputPathStr = " + inputPathStr);
+
+ final List<String> originalPaths = lsr(shell, inputPathStr);
+ final URI uri = fs.getUri();
+ final String prefix = "har://hdfs-" + uri.getHost() +":" + uri.getPort()
+ + archivePath.toUri().getPath() + Path.SEPARATOR;
+
+ {//space replacement is not enabled
+ final String[] args = {
+ "-archiveName",
+ "fail.har",
+ "-p",
+ inputPathStr,
+ "*",
+ archivePath.toString()
+ };
+ final HadoopArchives har = new HadoopArchives(mapred.createJobConf());
+ assertEquals(-1, ToolRunner.run(har, args));
+ }
+
+ {//Enable space replacement
+ final String harName = "foo.har";
+ final String[] args = {
+ "-D" + HadoopArchives.SPACE_REPLACE_LABEL + "=true",
+ "-archiveName",
+ harName,
+ "-p",
+ inputPathStr,
+ "*",
+ archivePath.toString()
+ };
+ final HadoopArchives har = new HadoopArchives(mapred.createJobConf());
+ assertEquals(0, ToolRunner.run(har, args));
+
+ //compare results
+ final List<String> harPaths = lsr(shell, prefix + harName);
+ final List<String> replaced = replace(originalPaths, HadoopArchives.SPACE_REPLACEMENT_DEFAULT);
+ assertEquals(replaced, harPaths);
+ }
+
+ {//Replace space with space
+ final String[] args = {
+ "-D" + HadoopArchives.SPACE_REPLACE_LABEL + "=true",
+ "-D" + HadoopArchives.SPACE_REPLACEMENT_LABEL + "=p q",
+ "-Dhar.space.replace.enable=true",
+ "-archiveName",
+ "fail.har",
+ "-p",
+ inputPathStr,
+ "*",
+ archivePath.toString()
+ };
+ final HadoopArchives har = new HadoopArchives(mapred.createJobConf());
+ try {
+ ToolRunner.run(har, args);
+ fail();
+ } catch(IllegalArgumentException iae) {
+ System.out.println("GOOD");
+ iae.printStackTrace();
+ }
+ }
+
+ {//Replace space with Path.SEPARATOR
+ final String[] args = {
+ "-D" + HadoopArchives.SPACE_REPLACE_LABEL + "=true",
+ "-D" + HadoopArchives.SPACE_REPLACEMENT_LABEL + "=" + Path.SEPARATOR,
+ "-Dhar.space.replace.enable=true",
+ "-archiveName",
+ "fail.har",
+ "-p",
+ inputPathStr,
+ "*",
+ archivePath.toString()
+ };
+ final HadoopArchives har = new HadoopArchives(mapred.createJobConf());
+ try {
+ ToolRunner.run(har, args);
+ fail();
+ } catch(IllegalArgumentException iae) {
+ System.out.println("GOOD");
+ iae.printStackTrace();
+ }
+ }
+
+ {//Replace space with a valid replacement
+ final String harName = "bar.har";
+ final String replacement = "+-";
+ final String[] args = {
+ "-D" + HadoopArchives.SPACE_REPLACE_LABEL + "=true",
+ "-D" + HadoopArchives.SPACE_REPLACEMENT_LABEL + "=" + replacement,
+ "-Dhar.space.replace.enable=true",
+ "-archiveName",
+ harName,
+ "-p",
+ inputPathStr,
+ "*",
+ archivePath.toString()
+ };
+ final HadoopArchives har = new HadoopArchives(mapred.createJobConf());
+ assertEquals(0, ToolRunner.run(har, args));
+
+ //compare results
+ final List<String> harPaths = lsr(shell, prefix + harName);
+ final List<String> replaced = replace(originalPaths, replacement);
+ assertEquals(replaced, harPaths);
+ }
+ }
+
+ private static List<String> replace(List<String> paths, String replacement) {
+ final List<String> replaced = new ArrayList<String>();
+ for(int i = 0; i < paths.size(); i++) {
+ replaced.add(paths.get(i).replace(" ", replacement));
+ }
+ Collections.sort(replaced);
+ return replaced;
+ }
+
+ private static List<String> lsr(final FsShell shell, String dir
+ ) throws Exception {
+ System.out.println("lsr root=" + dir);
+ final ByteArrayOutputStream bytes = new ByteArrayOutputStream();
+ final PrintStream out = new PrintStream(bytes);
+ final PrintStream oldOut = System.out;
+ final PrintStream oldErr = System.err;
+ System.setOut(out);
+ System.setErr(out);
+ final String results;
+ try {
+ assertEquals(0, shell.run(new String[]{"-lsr", dir}));
+ results = bytes.toString();
+ } finally {
+ IOUtils.closeStream(out);
+ System.setOut(oldOut);
+ System.setErr(oldErr);
+ }
+ System.out.println("lsr results:\n" + results);
+
+ final String dirname = dir.substring(dir.lastIndexOf(Path.SEPARATOR));
+ final List<String> paths = new ArrayList<String>();
+ for(StringTokenizer t = new StringTokenizer(results, "\n");
+ t.hasMoreTokens(); ) {
+ final String s = t.nextToken();
+ final int i = s.indexOf(dirname);
+ if (i >= 0) {
+ paths.add(s.substring(i + dirname.length()));
+ }
+ }
+ Collections.sort(paths);
+ System.out.println("lsr paths = " + paths.toString().replace(", ", ",\n "));
+ return paths;
+ }
+}
Modified: hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/HadoopArchives.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/HadoopArchives.java?rev=922395&r1=922394&r2=922395&view=diff
==============================================================================
--- hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/HadoopArchives.java (original)
+++ hadoop/mapreduce/trunk/src/tools/org/apache/hadoop/tools/HadoopArchives.java Fri Mar 12 18:56:39 2010
@@ -18,9 +18,10 @@
package org.apache.hadoop.tools;
+import java.io.DataInput;
+import java.io.DataOutput;
import java.io.FileNotFoundException;
import java.io.IOException;
-
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
@@ -45,6 +46,7 @@ import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.FileSplit;
@@ -87,6 +89,24 @@ public class HadoopArchives implements T
static final String TOTAL_SIZE_LABEL = NAME + ".total.size";
static final String DST_HAR_LABEL = NAME + ".archive.name";
static final String SRC_PARENT_LABEL = NAME + ".parent.path";
+
+ static final String SPACE_REPLACE_LABEL = NAME + ".space.replace.enable";
+ static final boolean SPACE_REPLACE_DEFAULT = false;
+ static final String SPACE_REPLACEMENT_LABEL = NAME + ".space.replacement";
+ static final String SPACE_REPLACEMENT_DEFAULT = "_";
+ static final String SPACE_REPLACEMENT_DESCRIPTION
+ = HadoopArchives.class.getSimpleName() + " (version=" + VERSION
+ + ") does not support source paths with the space character."
+ + "\n\nThere is a space replacement option, which can be enabled by"
+ + "\n -D" + SPACE_REPLACE_LABEL + "=true"
+ + "\nThe space replacement string can be specified by"
+ + "\n -D" + SPACE_REPLACEMENT_LABEL + "=<REPLACEMENT_STRING>"
+ + "\nThe default <REPLACEMENT_STRING> is \""
+ + SPACE_REPLACEMENT_DEFAULT
+ + "\".\n*** Note that the original paths will not be changed"
+ + " by the space replacement option."
+ + " The resulted har contains only the replaced paths.";
+
// size of each part file
// its fixed for now.
static final long partSize = 2 * 1024 * 1024 * 1024l;
@@ -97,6 +117,11 @@ public class HadoopArchives implements T
private JobConf conf;
+ private String spaceReplacement = null;
+
+ private boolean isSpaceReplaceEnabled() {
+ return spaceReplacement != null;
+ }
public void setConf(Configuration conf) {
if (conf instanceof JobConf) {
@@ -149,12 +174,56 @@ public class HadoopArchives implements T
}
}
+ /** HarEntry is used in the {@link HArchivesMapper} as the input value. */
+ private static class HarEntry implements Writable {
+ String path;
+ String[] children;
+
+ HarEntry() {}
+
+ HarEntry(String path, String[] children) {
+ this.path = path;
+ this.children = children;
+ }
+
+ boolean isDir() {
+ return children != null;
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ path = Text.readString(in);
+
+ if (in.readBoolean()) {
+ children = new String[in.readInt()];
+ for(int i = 0; i < children.length; i++) {
+ children[i] = Text.readString(in);
+ }
+ } else {
+ children = null;
+ }
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ Text.writeString(out, path);
+
+ final boolean dir = isDir();
+ out.writeBoolean(dir);
+ if (dir) {
+ out.writeInt(children.length);
+ for(String c : children) {
+ Text.writeString(out, c);
+ }
+ }
+ }
+ }
+
/**
* Input format of a hadoop archive job responsible for
* generating splits of the file list
*/
-
- static class HArchiveInputFormat implements InputFormat<LongWritable, Text> {
+ static class HArchiveInputFormat implements InputFormat<LongWritable, HarEntry> {
//generate input splits from the src file lists
public InputSplit[] getSplits(JobConf jconf, int numSplits)
@@ -174,7 +243,7 @@ public class HadoopArchives implements T
FileStatus fstatus = fs.getFileStatus(src);
ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits);
LongWritable key = new LongWritable();
- Text value = new Text();
+ final HarEntry value = new HarEntry();
SequenceFile.Reader reader = null;
// the remaining bytes in the file split
long remaining = fstatus.getLen();
@@ -211,9 +280,10 @@ public class HadoopArchives implements T
return splits.toArray(new FileSplit[splits.size()]);
}
- public RecordReader<LongWritable, Text> getRecordReader(InputSplit split,
+ @Override
+ public RecordReader<LongWritable, HarEntry> getRecordReader(InputSplit split,
JobConf job, Reporter reporter) throws IOException {
- return new SequenceFileRecordReader<LongWritable, Text>(job,
+ return new SequenceFileRecordReader<LongWritable, HarEntry>(job,
(FileSplit)split);
}
}
@@ -244,12 +314,13 @@ public class HadoopArchives implements T
* @param fullPath the full path
* @param root the prefix root to be truncated
* @return the relative path
+ * @throws IOException
*/
- private Path relPathToRoot(Path fullPath, Path root) {
+ private String relPathToRoot(Path fullPath, Path root) throws IOException {
// just take some effort to do it
// rather than just using substring
// so that we do not break sometime later
- Path justRoot = new Path(Path.SEPARATOR);
+ final String justRoot = Path.SEPARATOR;
if (fullPath.depth() == root.depth()) {
return justRoot;
}
@@ -260,7 +331,7 @@ public class HadoopArchives implements T
retPath = new Path(parent.getName(), retPath);
parent = parent.getParent();
}
- return new Path(justRoot, retPath);
+ return new Path(justRoot, retPath.toString()).toString();
}
return null;
}
@@ -331,19 +402,36 @@ public class HadoopArchives implements T
}
Set<Map.Entry<String, HashSet<String>>> keyVals = allpaths.entrySet();
for (Map.Entry<String, HashSet<String>> entry : keyVals) {
- Path relPath = relPathToRoot(new Path(entry.getKey()), parentPath);
+ final String relPath = relPathToRoot(new Path(entry.getKey()), parentPath);
if (relPath != null) {
- String toWrite = relPath + " dir ";
- HashSet<String> children = entry.getValue();
- StringBuffer sbuff = new StringBuffer();
- sbuff.append(toWrite);
- for (String child: children) {
- sbuff.append(child + " ");
+ final String[] children = new String[entry.getValue().size()];
+ int i = 0;
+ for(String child: entry.getValue()) {
+ children[i++] = child;
}
- toWrite = sbuff.toString();
- srcWriter.append(new LongWritable(0L), new Text(toWrite));
+ append(srcWriter, 0L, relPath, children);
+ }
+ }
+ }
+
+ //check whether the path contains the space character.
+ private void checkSpace(String p) throws IOException {
+ //check only if space replacement is disabled.
+ if (!isSpaceReplaceEnabled() && p.indexOf(' ') >= 0) {
+ throw new IOException("Source \"" + p + "\" contains the space character. "
+ + SPACE_REPLACEMENT_DESCRIPTION);
+ }
+ }
+
+ private void append(SequenceFile.Writer srcWriter, long len,
+ String path, String[] children) throws IOException {
+ checkSpace(path);
+ if (children != null) {
+ for(String child: children) {
+ checkSpace(child);
}
}
+ srcWriter.append(new LongWritable(len), new HarEntry(path, children));
}
/**
@@ -431,7 +519,7 @@ public class HadoopArchives implements T
Path srcFiles = new Path(jobDirectory, "_har_src_files");
conf.set(SRC_LIST_LABEL, srcFiles.toString());
SequenceFile.Writer srcWriter = SequenceFile.createWriter(jobfs, conf,
- srcFiles, LongWritable.class, Text.class,
+ srcFiles, LongWritable.class, HarEntry.class,
SequenceFile.CompressionType.NONE);
// get the list of files
// create single list of files and dirs
@@ -451,24 +539,21 @@ public class HadoopArchives implements T
recursivels(fs, fdir, allFiles);
for (FileStatusDir statDir: allFiles) {
FileStatus stat = statDir.getFileStatus();
- String toWrite = "";
long len = stat.isDir()? 0:stat.getLen();
+ final String path = relPathToRoot(stat.getPath(), parentPath);
+ final String[] children;
if (stat.isDir()) {
- toWrite = "" + relPathToRoot(stat.getPath(), parentPath) + " dir ";
//get the children
FileStatus[] list = statDir.getChildren();
- StringBuffer sbuff = new StringBuffer();
- sbuff.append(toWrite);
- for (FileStatus stats: list) {
- sbuff.append(stats.getPath().getName() + " ");
+ children = new String[list.length];
+ for (int i = 0; i < list.length; i++) {
+ children[i] = list[i].getPath().getName();
}
- toWrite = sbuff.toString();
}
else {
- toWrite += relPathToRoot(stat.getPath(), parentPath) + " file ";
+ children = null;
}
- srcWriter.append(new LongWritable(len), new
- Text(toWrite));
+ append(srcWriter, len, path, children);
srcWriter.sync();
numFiles++;
totalSize += len;
@@ -505,8 +590,10 @@ public class HadoopArchives implements T
}
static class HArchivesMapper
- implements Mapper<LongWritable, Text, IntWritable, Text> {
+ implements Mapper<LongWritable, HarEntry, IntWritable, Text> {
private JobConf conf = null;
+ private String spaceReplacement;
+
int partId = -1 ;
Path tmpOutputDir = null;
Path tmpOutput = null;
@@ -523,6 +610,9 @@ public class HadoopArchives implements T
// tmp files.
public void configure(JobConf conf) {
this.conf = conf;
+ this.spaceReplacement = conf.get(SPACE_REPLACEMENT_LABEL,
+ SPACE_REPLACEMENT_DEFAULT);
+
// this is tightly tied to map reduce
// since it does not expose an api
// to get the partition
@@ -565,28 +655,6 @@ public class HadoopArchives implements T
fsin.close();
}
}
-
- static class MapStat {
- private String pathname;
- private boolean isDir;
- private List<String> children;
- public MapStat(String line) {
- String[] splits = line.split(" ");
- pathname = splits[0];
- if ("dir".equals(splits[1])) {
- isDir = true;
- }
- else {
- isDir = false;
- }
- if (isDir) {
- children = new ArrayList<String>();
- for (int i = 2; i < splits.length; i++) {
- children.add(splits[i]);
- }
- }
- }
- }
/**
* get rid of / in the beginning of path
@@ -601,27 +669,29 @@ public class HadoopArchives implements T
return new Path(parent, new Path(p.toString().substring(1)));
}
+ private String replaceSpaces(String s) {
+ return s.replace(" ", spaceReplacement);
+ }
+
// read files from the split input
// and write it onto the part files.
// also output hash(name) and string
// for reducer to create index
// and masterindex files.
- public void map(LongWritable key, Text value,
+ public void map(LongWritable key, HarEntry value,
OutputCollector<IntWritable, Text> out,
Reporter reporter) throws IOException {
- String line = value.toString();
- MapStat mstat = new MapStat(line);
- Path relPath = new Path(mstat.pathname);
+ Path relPath = new Path(value.path);
int hash = HarFileSystem.getHarHash(relPath);
- String towrite = null;
+ String towrite = replaceSpaces(relPath.toString());
Path srcPath = realPath(relPath, rootPath);
long startPos = partStream.getPos();
- if (mstat.isDir) {
- towrite = relPath.toString() + " " + "dir none " + 0 + " " + 0 + " ";
+ if (value.isDir()) {
+ towrite += " dir none " + 0 + " " + 0 + " ";
StringBuffer sbuff = new StringBuffer();
sbuff.append(towrite);
- for (String child: mstat.children) {
- sbuff.append(child + " ");
+ for (String child: value.children) {
+ sbuff.append(replaceSpaces(child) + " ");
}
towrite = sbuff.toString();
//reading directories is also progress
@@ -634,7 +704,7 @@ public class HadoopArchives implements T
reporter.setStatus("Copying file " + srcStatus.getPath() +
" to archive.");
copyData(srcStatus.getPath(), input, partStream, reporter);
- towrite = relPath.toString() + " file " + partname + " " + startPos
+ towrite += " file " + partname + " " + startPos
+ " " + srcStatus.getLen() + " ";
}
out.collect(new IntWritable(hash), new Text(towrite));
@@ -801,6 +871,23 @@ public class HadoopArchives implements T
}
}
}
+
+ //process space replacement configuration
+ if (conf.getBoolean(SPACE_REPLACE_LABEL, SPACE_REPLACE_DEFAULT)) {
+ spaceReplacement = conf.get(SPACE_REPLACEMENT_LABEL,
+ SPACE_REPLACEMENT_DEFAULT);
+ if (spaceReplacement.indexOf(' ') >= 0) {
+ throw new IllegalArgumentException("spaceReplacement = \""
+ + spaceReplacement + "\" cannot contain the space character.");
+ }
+ if (spaceReplacement.indexOf(Path.SEPARATOR) >= 0) {
+ throw new IllegalArgumentException("spaceReplacement = \""
+ + spaceReplacement + "\" cannot contain the path separator \""
+ + Path.SEPARATOR + "\".");
+ }
+ LOG.info(SPACE_REPLACEMENT_LABEL + " = " + spaceReplacement);
+ }
+
archive(parentPath, globPaths, archiveName, destPath);
} catch(IOException ie) {
System.err.println(ie.getLocalizedMessage());