You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@mahout.apache.org by ss...@apache.org on 2012/05/07 16:16:38 UTC

svn commit: r1335032 - /mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java

Author: ssc
Date: Mon May  7 14:16:37 2012
New Revision: 1335032

URL: http://svn.apache.org/viewvc?rev=1335032&view=rev
Log:
MAHOUT-834 rowsimilarityjob doesn't clean it's temp dir, and fails when seeing it again

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java?rev=1335032&r1=1335031&r2=1335032&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java Mon May  7 14:16:37 2012
@@ -29,6 +29,8 @@ import org.apache.hadoop.util.ToolRunner
 import org.apache.mahout.cf.taste.common.TopK;
 import org.apache.mahout.common.AbstractJob;
 import org.apache.mahout.common.ClassUtils;
+import org.apache.mahout.common.HadoopUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.apache.mahout.common.mapreduce.VectorSumReducer;
 import org.apache.mahout.math.RandomAccessSparseVector;
 import org.apache.mahout.math.Vector;
@@ -84,6 +86,7 @@ public class RowSimilarityJob extends Ab
         + DEFAULT_MAX_SIMILARITIES_PER_ROW + ')', String.valueOf(DEFAULT_MAX_SIMILARITIES_PER_ROW));
     addOption("excludeSelfSimilarity", "ess", "compute similarity of rows to themselves?", String.valueOf(false));
     addOption("threshold", "tr", "discard row pairs with a similarity value below this", false);
+    addOption(DefaultOptionCreator.overwriteOption().create());
 
     Map<String,List<String>> parsedArgs = parseArguments(args);
     if (parsedArgs == null) {
@@ -99,6 +102,14 @@ public class RowSimilarityJob extends Ab
       similarityClassname = similarityClassnameArg;
     }
 
+    // Clear the output and temp paths if the overwrite option has been set
+    if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
+      // Clear the temp path
+      HadoopUtil.delete(getConf(), getTempPath());
+      // Clear the output path
+      HadoopUtil.delete(getConf(), getOutputPath());
+    }
+
     int maxSimilaritiesPerRow = Integer.parseInt(getOption("maxSimilaritiesPerRow"));
     boolean excludeSelfSimilarity = Boolean.parseBoolean(getOption("excludeSelfSimilarity"));
     double threshold = hasOption("threshold") ?