You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sm...@apache.org on 2013/06/26 15:35:11 UTC
svn commit: r1496929 -
/mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java
Author: smarthi
Date: Wed Jun 26 13:35:11 2013
New Revision: 1496929
URL: http://svn.apache.org/r1496929
Log:
MAHOUT-833: Make conversion to sequence files map-reduce (changes based on feedback from code review)
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java?rev=1496929&r1=1496928&r2=1496929&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java Wed Jun 26 13:35:11 2013
@@ -24,8 +24,11 @@ import java.net.URI;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;
+import java.util.List;
+import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
import com.google.common.io.Closeables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
@@ -270,9 +273,9 @@ public final class HadoopUtil {
/**
* Return the first cached file in the list, else null if thre are no cached files.
- * @param conf
- * @return
- * @throws IOException
+ * @param conf - MapReduce Configuration
+ * @return Path of Cached file
+ * @throws IOException - IO Exception
*/
public static Path getSingleCachedFile(Configuration conf) throws IOException {
return getCachedFiles(conf)[0];
@@ -280,9 +283,9 @@ public final class HadoopUtil {
/**
* Retrieves paths to cached files.
- * @param conf
- * @return
- * @throws IOException
+ * @param conf - MapReduce Configuration
+ * @return Path[] of Cached Files
+ * @throws IOException - IO Exception
* @throws IllegalStateException if no cache files are found
*/
public static Path[] getCachedFiles(Configuration conf) throws IOException {
@@ -316,13 +319,13 @@ public final class HadoopUtil {
return cacheFiles;
}
- public static void setSerializations(Configuration conf) {
- conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
+ public static void setSerializations(Configuration configuration) {
+ configuration.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
+ "org.apache.hadoop.io.serializer.WritableSerialization");
}
- public static void writeInt(int value, Path path, Configuration conf) throws IOException {
- FileSystem fs = FileSystem.get(path.toUri(), conf);
+ public static void writeInt(int value, Path path, Configuration configuration) throws IOException {
+ FileSystem fs = FileSystem.get(path.toUri(), configuration);
FSDataOutputStream out = fs.create(path);
try {
out.writeInt(value);
@@ -331,8 +334,8 @@ public final class HadoopUtil {
}
}
- public static int readInt(Path path, Configuration conf) throws IOException {
- FileSystem fs = FileSystem.get(path.toUri(), conf);
+ public static int readInt(Path path, Configuration configuration) throws IOException {
+ FileSystem fs = FileSystem.get(path.toUri(), configuration);
FSDataInputStream in = fs.open(path);
try {
return in.readInt();
@@ -343,45 +346,42 @@ public final class HadoopUtil {
/**
* Builds a comma-separated list of input splits
+ * @param fs - File System
+ * @param fileStatus - File Status
+ * @return list of directories as a comma-separated String
+ * @throws IOException - IO Exception
*/
public static String buildDirList(FileSystem fs, FileStatus fileStatus) throws IOException {
- StringBuilder dirList = new StringBuilder();
boolean bContainsFiles = false;
-
+ List<String> directoriesList = Lists.newArrayList();
for (FileStatus childFileStatus : fs.listStatus(fileStatus.getPath())) {
if (childFileStatus.isDir()) {
String subDirectoryList = buildDirList(fs, childFileStatus);
- if (subDirectoryList.length() > 0 && dirList.length() > 0) {
- dirList.append(",");
- }
- dirList.append(subDirectoryList);
+ directoriesList.add(subDirectoryList);
} else {
bContainsFiles = true;
}
}
if (bContainsFiles) {
- if (dirList.length() > 0) {
- dirList.append(",");
- }
- dirList.append(fileStatus.getPath().toUri().getPath());
+ directoriesList.add(fileStatus.getPath().toUri().getPath());
}
- return dirList.toString();
+ return Joiner.on(',').skipNulls().join(directoriesList.iterator());
}
/**
*
- * @param conf - configuration
+ * @param configuration - configuration
* @param filePath - Input File Path
* @return relative file Path
- * @throws IOException
+ * @throws IOException - IO Exception
*/
- public static String calcRelativeFilePath(Configuration conf, Path filePath) throws IOException {
- FileSystem fs = filePath.getFileSystem(conf);
+ public static String calcRelativeFilePath(Configuration configuration, Path filePath) throws IOException {
+ FileSystem fs = filePath.getFileSystem(configuration);
FileStatus fst = fs.getFileStatus(filePath);
String currentPath = fst.getPath().toString().replaceFirst("file:", "");
- String basePath = conf.get("baseinputpath");
+ String basePath = configuration.get("baseinputpath");
if (!basePath.endsWith("/")) {
basePath += "/";
}