You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sm...@apache.org on 2013/06/28 03:33:55 UTC

svn commit: r1497630 - in /mahout/trunk/core/src/main/java/org/apache/mahout: clustering/kmeans/EigenSeedGenerator.java math/hadoop/decomposer/EigenVerificationJob.java

Author: smarthi
Date: Fri Jun 28 01:33:54 2013
New Revision: 1497630

URL: http://svn.apache.org/r1497630
Log:
MAHOUT-1214: Improve the accuracy of the Spectral KMeans Method - more minor cleanups.

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/EigenSeedGenerator.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/EigenSeedGenerator.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/EigenSeedGenerator.java?rev=1497630&r1=1497629&r2=1497630&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/EigenSeedGenerator.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/EigenSeedGenerator.java Fri Jun 28 01:33:54 2013
@@ -82,27 +82,26 @@ public final class EigenSeedGenerator {
       Map<Integer,ClusterWritable> chosenClusters = Maps.newHashMapWithExpectedSize(k);
 
       for (FileStatus fileStatus : inputFiles) {
-        if (fileStatus.isDir()) {
-          continue;
-        }
-        for (Pair<Writable,VectorWritable> record : new SequenceFileIterable<Writable,VectorWritable>(
-            fileStatus.getPath(), true, conf)) {
-          Writable key = record.getFirst();
-          VectorWritable value = record.getSecond();
-
-          for (Vector.Element e : value.get().nonZeroes()) {
-            int index = e.index();
-            double v = Math.abs(e.get());
-
-            if (!maxEigens.containsKey(index) || v > maxEigens.get(index)) {
-              maxEigens.put(index, v);
-              Text newText = new Text(key.toString());
-              chosenTexts.put(index, newText);
-              Kluster newCluster = new Kluster(value.get(), index, measure);
-              newCluster.observe(value.get(), 1);
-              ClusterWritable clusterWritable = new ClusterWritable();
-              clusterWritable.setValue(newCluster);
-              chosenClusters.put(index, clusterWritable);
+        if (!fileStatus.isDir()) {
+          for (Pair<Writable,VectorWritable> record : new SequenceFileIterable<Writable,VectorWritable>(
+              fileStatus.getPath(), true, conf)) {
+            Writable key = record.getFirst();
+            VectorWritable value = record.getSecond();
+
+            for (Vector.Element e : value.get().nonZeroes()) {
+              int index = e.index();
+              double v = Math.abs(e.get());
+
+              if (!maxEigens.containsKey(index) || v > maxEigens.get(index)) {
+                maxEigens.put(index, v);
+                Text newText = new Text(key.toString());
+                chosenTexts.put(index, newText);
+                Kluster newCluster = new Kluster(value.get(), index, measure);
+                newCluster.observe(value.get(), 1);
+                ClusterWritable clusterWritable = new ClusterWritable();
+                clusterWritable.setValue(newCluster);
+                chosenClusters.put(index, clusterWritable);
+              }
             }
           }
         }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java?rev=1497630&r1=1497629&r2=1497630&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java Fri Jun 28 01:33:54 2013
@@ -230,15 +230,9 @@ public class EigenVerificationJob extend
       @Override
       public int compare(Map.Entry<MatrixSlice,EigenStatus> e1, Map.Entry<MatrixSlice,EigenStatus> e2) {
         // sort eigens on eigenvalues in descending order
-        double eg1 = e1.getValue().getEigenValue();
-        double eg2 = e2.getValue().getEigenValue();
-        if (eg1 < eg2) {
-          return 1;
-        }
-        if (eg1 > eg2) {
-          return -1;
-        }
-        return 0;
+        Double eg1 = e1.getValue().getEigenValue();
+        Double eg2 = e2.getValue().getEigenValue();
+        return eg1.compareTo(eg2);
       }
     });