You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sm...@apache.org on 2014/01/09 22:34:43 UTC
svn commit: r1556958 - in /mahout/trunk: CHANGELOG
integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java
Author: smarthi
Date: Thu Jan 9 21:34:43 2014
New Revision: 1556958
URL: http://svn.apache.org/r1556958
Log:
MAHOUT-1393: Removed duplicated code from getTopTerms and getTopFeatures in AbstractClusterWriter
Modified:
mahout/trunk/CHANGELOG
mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java
Modified: mahout/trunk/CHANGELOG
URL: http://svn.apache.org/viewvc/mahout/trunk/CHANGELOG?rev=1556958&r1=1556957&r2=1556958&view=diff
==============================================================================
--- mahout/trunk/CHANGELOG (original)
+++ mahout/trunk/CHANGELOG Thu Jan 9 21:34:43 2014
@@ -2,6 +2,8 @@ Mahout Change Log
Release 0.9 - unreleased
+ MAHOUT-1393: Remove duplicated code from getTopTerms and getTopFeatures in AbstractClusterWriter (Diego Carrion via smarthi)
+
MAHOUT-1392: Streaming KMeans should write centroid output to a 'part-r-xxxx' file when executed in sequential mode (smarthi)
MAHOUT-1390: SVD hangs for certain inputs (tdunning)
Modified: mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java?rev=1556958&r1=1556957&r2=1556958&view=diff
==============================================================================
--- mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java (original)
+++ mahout/trunk/integration/src/main/java/org/apache/mahout/utils/clustering/AbstractClusterWriter.java Thu Jan 9 21:34:43 2014
@@ -62,7 +62,7 @@ public abstract class AbstractClusterWri
this.clusterIdToPoints = clusterIdToPoints;
this.measure = measure;
}
-
+
protected Writer getWriter() {
return writer;
}
@@ -73,35 +73,9 @@ public abstract class AbstractClusterWri
public static String getTopFeatures(Vector vector, String[] dictionary, int numTerms) {
- List<TermIndexWeight> vectorTerms = Lists.newArrayList();
-
- for (Vector.Element elt : vector.nonZeroes()) {
- vectorTerms.add(new TermIndexWeight(elt.index(), elt.get()));
- }
-
- // Sort results in reverse order (ie weight in descending order)
- Collections.sort(vectorTerms, new Comparator<TermIndexWeight>() {
- @Override
- public int compare(TermIndexWeight one, TermIndexWeight two) {
- return Double.compare(two.weight, one.weight);
- }
- });
-
- Collection<Pair<String, Double>> topTerms = Lists.newLinkedList();
-
- for (int i = 0; i < vectorTerms.size() && i < numTerms; i++) {
- int index = vectorTerms.get(i).index;
- String dictTerm = dictionary[index];
- if (dictTerm == null) {
- log.error("Dictionary entry missing for {}", index);
- continue;
- }
- topTerms.add(new Pair<String, Double>(dictTerm, vectorTerms.get(i).weight));
- }
-
StringBuilder sb = new StringBuilder(100);
- for (Pair<String, Double> item : topTerms) {
+ for (Pair<String, Double> item : getTopPairs(vector, dictionary, numTerms)) {
String term = item.getFirst();
sb.append("\n\t\t");
sb.append(StringUtils.rightPad(term, 40));
@@ -113,35 +87,9 @@ public abstract class AbstractClusterWri
public static String getTopTerms(Vector vector, String[] dictionary, int numTerms) {
- List<TermIndexWeight> vectorTerms = Lists.newArrayList();
-
- for (Vector.Element elt : vector.nonZeroes()) {
- vectorTerms.add(new TermIndexWeight(elt.index(), elt.get()));
- }
-
- // Sort results in reverse order (ie weight in descending order)
- Collections.sort(vectorTerms, new Comparator<TermIndexWeight>() {
- @Override
- public int compare(TermIndexWeight one, TermIndexWeight two) {
- return Double.compare(two.weight, one.weight);
- }
- });
-
- Collection<Pair<String, Double>> topTerms = Lists.newLinkedList();
-
- for (int i = 0; i < vectorTerms.size() && i < numTerms; i++) {
- int index = vectorTerms.get(i).index;
- String dictTerm = dictionary[index];
- if (dictTerm == null) {
- log.error("Dictionary entry missing for {}", index);
- continue;
- }
- topTerms.add(new Pair<String, Double>(dictTerm, vectorTerms.get(i).weight));
- }
-
StringBuilder sb = new StringBuilder(100);
- for (Pair<String, Double> item : topTerms) {
+ for (Pair<String, Double> item : getTopPairs(vector, dictionary, numTerms)) {
String term = item.getFirst();
sb.append(term).append('_');
}
@@ -170,6 +118,36 @@ public abstract class AbstractClusterWri
return result;
}
+ private static Collection<Pair<String, Double>> getTopPairs(Vector vector, String[] dictionary, int numTerms) {
+ List<TermIndexWeight> vectorTerms = Lists.newArrayList();
+
+ for (Vector.Element elt : vector.nonZeroes()) {
+ vectorTerms.add(new TermIndexWeight(elt.index(), elt.get()));
+ }
+
+ // Sort results in reverse order (ie weight in descending order)
+ Collections.sort(vectorTerms, new Comparator<TermIndexWeight>() {
+ @Override
+ public int compare(TermIndexWeight one, TermIndexWeight two) {
+ return Double.compare(two.weight, one.weight);
+ }
+ });
+
+ Collection<Pair<String, Double>> topTerms = Lists.newLinkedList();
+
+ for (int i = 0; i < vectorTerms.size() && i < numTerms; i++) {
+ int index = vectorTerms.get(i).index;
+ String dictTerm = dictionary[index];
+ if (dictTerm == null) {
+ log.error("Dictionary entry missing for {}", index);
+ continue;
+ }
+ topTerms.add(new Pair<String, Double>(dictTerm, vectorTerms.get(i).weight));
+ }
+
+ return topTerms;
+ }
+
private static class TermIndexWeight {
private final int index;
private final double weight;