You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by jm...@apache.org on 2010/02/19 00:12:30 UTC
svn commit: r911641 -
/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFConverter.java
Author: jmannix
Date: Thu Feb 18 23:12:30 2010
New Revision: 911641
URL: http://svn.apache.org/viewvc?rev=911641&view=rev
Log:
When converting to tfIdf vectors, the dimension of the document vectors needs to be one more than the *highest* termId, not necessarily the number of terms encountered.
Modified:
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFConverter.java
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFConverter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFConverter.java?rev=911641&r1=911640&r2=911641&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFConverter.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFConverter.java Thu Feb 18 23:12:30 2010
@@ -223,10 +223,11 @@
} else if (key.get() == -1) {
vectorCount = value.get();
}
- featureCount++;
+ featureCount = Math.max(key.get(), featureCount);
}
}
+ featureCount++;
freqWriter.close();
Long[] counts = {Long.valueOf(featureCount), Long.valueOf(vectorCount)};
return new Pair<Long[],List<Path>>(counts, chunkPaths);