You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by pr...@apache.org on 2012/04/10 21:00:42 UTC

svn commit: r1311928 - in /mahout/trunk/core/src/main/java/org/apache/mahout/clustering: fuzzykmeans/FuzzyKMeansDriver.java kmeans/KMeansDriver.java

Author: pranjan
Date: Tue Apr 10 19:00:41 2012
New Revision: 1311928

URL: http://svn.apache.org/viewvc?rev=1311928&view=rev
Log:
MAHOUT-999, Creating directories to write Clustering Policies. For both KMeans and FuzzyK.
All Junit Tests Pass. Clusters-Reuters.sh kmeans and fuzzyk both run successfully.

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java?rev=1311928&r1=1311927&r2=1311928&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java Tue Apr 10 19:00:41 2012
@@ -21,22 +21,17 @@ import static org.apache.mahout.clusteri
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Collection;
 import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.util.ToolRunner;
 import org.apache.mahout.clustering.Cluster;
 import org.apache.mahout.clustering.classify.ClusterClassificationDriver;
 import org.apache.mahout.clustering.classify.ClusterClassifier;
 import org.apache.mahout.clustering.iterator.ClusterIterator;
-import org.apache.mahout.clustering.iterator.ClusterWritable;
 import org.apache.mahout.clustering.iterator.FuzzyKMeansClusteringPolicy;
-import org.apache.mahout.clustering.iterator.KMeansClusteringPolicy;
 import org.apache.mahout.clustering.kmeans.RandomSeedGenerator;
 import org.apache.mahout.common.AbstractJob;
 import org.apache.mahout.common.ClassUtils;
@@ -44,14 +39,9 @@ import org.apache.mahout.common.HadoopUt
 import org.apache.mahout.common.commandline.DefaultOptionCreator;
 import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
-import org.apache.mahout.common.iterator.sequencefile.PathFilters;
-import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterator;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import com.google.common.collect.Lists;
-import com.google.common.io.Closeables;
-
 public class FuzzyKMeansDriver extends AbstractJob {
 
   public static final String M_OPTION = "m";
@@ -279,11 +269,22 @@ public class FuzzyKMeansDriver extends A
     List<Cluster> clusters = new ArrayList<Cluster>();
     FuzzyKMeansUtil.configureWithClusterInfo(clustersIn, clusters);
     
+    if(conf==null){
+      conf = new Configuration();
+    }
+    
     if (clusters.isEmpty()) {
       throw new IllegalStateException("Clusters is empty!");
     }
     
     Path priorClustersPath = new Path(clustersIn, "clusters-0");
+    
+    FileSystem fileSystem = clustersIn.getFileSystem(conf);
+    
+    if(fileSystem.isFile(clustersIn)){
+      priorClustersPath = new Path(clustersIn.getParent(), "prior");
+      fileSystem.mkdirs(priorClustersPath);
+    }
     FuzzyKMeansClusteringPolicy policy = new FuzzyKMeansClusteringPolicy(m, convergenceDelta);
     
     ClusterClassifier prior = new ClusterClassifier(clusters, policy);

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=1311928&r1=1311927&r2=1311928&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java Tue Apr 10 19:00:41 2012
@@ -23,6 +23,7 @@ import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.util.ToolRunner;
 import org.apache.mahout.clustering.Cluster;
@@ -246,8 +247,15 @@ public class KMeansDriver extends Abstra
     if (clusters.isEmpty()) {
       throw new IllegalStateException("Clusters is empty!");
     }
-    
+
     Path priorClustersPath = new Path(clustersIn, "clusters-0");
+    
+    FileSystem fileSystem = clustersIn.getFileSystem(conf);
+    if(fileSystem.isFile(clustersIn)){
+      priorClustersPath = new Path(clustersIn.getParent(), "prior");
+      fileSystem.mkdirs(priorClustersPath);
+    }
+    
     KMeansClusteringPolicy policy = new KMeansClusteringPolicy(convergenceDelta);
     
     ClusterClassifier prior = new ClusterClassifier(clusters, policy);