You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sm...@apache.org on 2014/01/09 22:34:43 UTC

svn commit: r1556958 - in /mahout/trunk: CHANGELOG integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java

Author: smarthi
Date: Thu Jan  9 21:34:43 2014
New Revision: 1556958

URL: http://svn.apache.org/r1556958
Log:
MAHOUT-1393: Removed duplicated code from getTopTerms and getTopFeatures in AbstractClusterWriter

Modified:
    mahout/trunk/CHANGELOG
    mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java

Modified: mahout/trunk/CHANGELOG
URL: http://svn.apache.org/viewvc/mahout/trunk/CHANGELOG?rev=1556958&r1=1556957&r2=1556958&view=diff
==============================================================================
--- mahout/trunk/CHANGELOG (original)
+++ mahout/trunk/CHANGELOG Thu Jan  9 21:34:43 2014
@@ -2,6 +2,8 @@ Mahout Change Log
 
 Release 0.9 - unreleased
 
+  MAHOUT-1393: Remove duplicated code from getTopTerms and getTopFeatures in AbstractClusterWriter (Diego Carrion via smarthi)
+
   MAHOUT-1392: Streaming KMeans should write centroid output to a 'part-r-xxxx' file when executed in sequential mode (smarthi)
 
   MAHOUT-1390: SVD hangs for certain inputs (tdunning)

Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java?rev=1556958&r1=1556957&r2=1556958&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java Thu Jan  9 21:34:43 2014
@@ -62,7 +62,7 @@ public abstract class AbstractClusterWri
     this.clusterIdToPoints = clusterIdToPoints;
     this.measure = measure;
   }
-  
+
   protected Writer getWriter() {
     return writer;
   }
@@ -73,35 +73,9 @@ public abstract class AbstractClusterWri
 
   public static String getTopFeatures(Vector vector, String[] dictionary, int numTerms) {
 
-    List<TermIndexWeight> vectorTerms = Lists.newArrayList();
-
-    for (Vector.Element elt : vector.nonZeroes()) {
-      vectorTerms.add(new TermIndexWeight(elt.index(), elt.get()));
-    }
-
-    // Sort results in reverse order (ie weight in descending order)
-    Collections.sort(vectorTerms, new Comparator<TermIndexWeight>() {
-      @Override
-      public int compare(TermIndexWeight one, TermIndexWeight two) {
-        return Double.compare(two.weight, one.weight);
-      }
-    });
-
-    Collection<Pair<String, Double>> topTerms = Lists.newLinkedList();
-
-    for (int i = 0; i < vectorTerms.size() && i < numTerms; i++) {
-      int index = vectorTerms.get(i).index;
-      String dictTerm = dictionary[index];
-      if (dictTerm == null) {
-        log.error("Dictionary entry missing for {}", index);
-        continue;
-      }
-      topTerms.add(new Pair<String, Double>(dictTerm, vectorTerms.get(i).weight));
-    }
-
     StringBuilder sb = new StringBuilder(100);
 
-    for (Pair<String, Double> item : topTerms) {
+    for (Pair<String, Double> item : getTopPairs(vector, dictionary, numTerms)) {
       String term = item.getFirst();
       sb.append("\n\t\t");
       sb.append(StringUtils.rightPad(term, 40));
@@ -113,35 +87,9 @@ public abstract class AbstractClusterWri
 
   public static String getTopTerms(Vector vector, String[] dictionary, int numTerms) {
 
-    List<TermIndexWeight> vectorTerms = Lists.newArrayList();
-
-    for (Vector.Element elt : vector.nonZeroes()) {
-      vectorTerms.add(new TermIndexWeight(elt.index(), elt.get()));
-    }
-
-    // Sort results in reverse order (ie weight in descending order)
-    Collections.sort(vectorTerms, new Comparator<TermIndexWeight>() {
-      @Override
-      public int compare(TermIndexWeight one, TermIndexWeight two) {
-        return Double.compare(two.weight, one.weight);
-      }
-    });
-
-    Collection<Pair<String, Double>> topTerms = Lists.newLinkedList();
-
-    for (int i = 0; i < vectorTerms.size() && i < numTerms; i++) {
-      int index = vectorTerms.get(i).index;
-      String dictTerm = dictionary[index];
-      if (dictTerm == null) {
-        log.error("Dictionary entry missing for {}", index);
-        continue;
-      }
-      topTerms.add(new Pair<String, Double>(dictTerm, vectorTerms.get(i).weight));
-    }
-
     StringBuilder sb = new StringBuilder(100);
 
-    for (Pair<String, Double> item : topTerms) {
+    for (Pair<String, Double> item : getTopPairs(vector, dictionary, numTerms)) {
       String term = item.getFirst();
       sb.append(term).append('_');
     }
@@ -170,6 +118,36 @@ public abstract class AbstractClusterWri
     return result;
   }
 
+  private static Collection<Pair<String, Double>> getTopPairs(Vector vector, String[] dictionary, int numTerms) {
+    List<TermIndexWeight> vectorTerms = Lists.newArrayList();
+
+    for (Vector.Element elt : vector.nonZeroes()) {
+      vectorTerms.add(new TermIndexWeight(elt.index(), elt.get()));
+    }
+
+    // Sort results in reverse order (ie weight in descending order)
+    Collections.sort(vectorTerms, new Comparator<TermIndexWeight>() {
+      @Override
+      public int compare(TermIndexWeight one, TermIndexWeight two) {
+        return Double.compare(two.weight, one.weight);
+      }
+    });
+
+    Collection<Pair<String, Double>> topTerms = Lists.newLinkedList();
+
+    for (int i = 0; i < vectorTerms.size() && i < numTerms; i++) {
+      int index = vectorTerms.get(i).index;
+      String dictTerm = dictionary[index];
+      if (dictTerm == null) {
+        log.error("Dictionary entry missing for {}", index);
+        continue;
+      }
+      topTerms.add(new Pair<String, Double>(dictTerm, vectorTerms.get(i).weight));
+    }
+
+    return topTerms;
+  }
+
   private static class TermIndexWeight {
     private final int index;
     private final double weight;