You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ss...@apache.org on 2012/05/07 16:16:38 UTC
svn commit: r1335032 -
/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
Author: ssc
Date: Mon May 7 14:16:37 2012
New Revision: 1335032
URL: http://svn.apache.org/viewvc?rev=1335032&view=rev
Log:
MAHOUT-834 rowsimilarityjob doesn't clean it's temp dir, and fails when seeing it again
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java?rev=1335032&r1=1335031&r2=1335032&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java Mon May 7 14:16:37 2012
@@ -29,6 +29,8 @@ import org.apache.hadoop.util.ToolRunner
import org.apache.mahout.cf.taste.common.TopK;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.ClassUtils;
+import org.apache.mahout.common.HadoopUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.mapreduce.VectorSumReducer;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.Vector;
@@ -84,6 +86,7 @@ public class RowSimilarityJob extends Ab
+ DEFAULT_MAX_SIMILARITIES_PER_ROW + ')', String.valueOf(DEFAULT_MAX_SIMILARITIES_PER_ROW));
addOption("excludeSelfSimilarity", "ess", "compute similarity of rows to themselves?", String.valueOf(false));
addOption("threshold", "tr", "discard row pairs with a similarity value below this", false);
+ addOption(DefaultOptionCreator.overwriteOption().create());
Map<String,List<String>> parsedArgs = parseArguments(args);
if (parsedArgs == null) {
@@ -99,6 +102,14 @@ public class RowSimilarityJob extends Ab
similarityClassname = similarityClassnameArg;
}
+ // Clear the output and temp paths if the overwrite option has been set
+ if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
+ // Clear the temp path
+ HadoopUtil.delete(getConf(), getTempPath());
+ // Clear the output path
+ HadoopUtil.delete(getConf(), getOutputPath());
+ }
+
int maxSimilaritiesPerRow = Integer.parseInt(getOption("maxSimilaritiesPerRow"));
boolean excludeSelfSimilarity = Boolean.parseBoolean(getOption("excludeSelfSimilarity"));
double threshold = hasOption("threshold") ?