You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by gs...@apache.org on 2009/06/17 23:59:52 UTC
svn commit: r785829 - in
/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors:
Driver.java lucene/LuceneIteratable.java
Author: gsingers
Date: Wed Jun 17 21:59:51 2009
New Revision: 785829
URL: http://svn.apache.org/viewvc?rev=785829&view=rev
Log:
MAHOUT-126: fix normalization argument issues
Modified:
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/Driver.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIteratable.java
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/Driver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/Driver.java?rev=785829&r1=785828&r2=785829&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/Driver.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/Driver.java Wed Jun 17 21:59:51 2009
@@ -136,8 +136,8 @@
if (cmdLine.hasOption(idFieldOpt)){
idField = cmdLine.getValue(idFieldOpt).toString();
}
- if (norm == -1) {
- iteratable = new LuceneIteratable(reader, idField, field, mapper);
+ if (norm == LuceneIteratable.NO_NORMALIZING) {
+ iteratable = new LuceneIteratable(reader, idField, field, mapper, LuceneIteratable.NO_NORMALIZING);
} else {
iteratable = new LuceneIteratable(reader, idField, field, mapper, norm);
}
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIteratable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIteratable.java?rev=785829&r1=785828&r2=785829&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIteratable.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIteratable.java Wed Jun 17 21:59:51 2009
@@ -41,27 +41,32 @@
private FieldSelector idFieldSelector;
private VectorMapper mapper;
- private double normPower = -1;
+ private double normPower = NO_NORMALIZING;
+
+ public static final double NO_NORMALIZING = -1.0;
public LuceneIteratable(IndexReader reader, String idField, String field, VectorMapper mapper) {
- this(reader, idField, field, mapper, 2.0);
+ this(reader, idField, field, mapper, NO_NORMALIZING);
}
/**
* Produce a LuceneIterable that can create the Vector plus normalize it.
- * @param reader
+ * @param reader The {@link org.apache.lucene.index.IndexReader} to read the documents from.
* @param idField - The Field containing the id. May be null
* @param field The field to use for the Vector
- * @param mapper
- * @param normPower
+ * @param mapper The {@link org.apache.mahout.utils.vectors.lucene.VectorMapper} for creating {@link org.apache.mahout.matrix.Vector}s from Lucene's TermVectors.
+ * @param normPower The normalization value. Must be greater than or equal to 0 or equal to {@link #NO_NORMALIZING}
*/
public LuceneIteratable(IndexReader reader, String idField, String field, VectorMapper mapper, double normPower) {
+ if (normPower != NO_NORMALIZING && normPower < 0){
+ throw new IllegalArgumentException("normPower must either be -1 or >= 0");
+ }
+ idFieldSelector = new SetBasedFieldSelector(Collections.singleton(idField), Collections.emptySet());
this.indexReader = reader;
this.idField = idField;
this.field = field;
this.mapper = mapper;
this.normPower = normPower;
- idFieldSelector = new SetBasedFieldSelector(Collections.singleton(idField), Collections.emptySet());
}
@@ -105,7 +110,7 @@
} else {
result.setName(String.valueOf(doc));
}
- if (normPower >= 0){
+ if (normPower != NO_NORMALIZING){
result = result.normalize(normPower);
}
} catch (IOException e) {