You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by gs...@apache.org on 2009/06/17 23:59:52 UTC

svn commit: r785829 - in /lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors: Driver.java lucene/LuceneIteratable.java

Author: gsingers
Date: Wed Jun 17 21:59:51 2009
New Revision: 785829

URL: http://svn.apache.org/viewvc?rev=785829&view=rev
Log:
MAHOUT-126: fix normalization argument issues

Modified:
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/Driver.java
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIteratable.java

Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/Driver.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/Driver.java?rev=785829&r1=785828&r2=785829&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/Driver.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/Driver.java Wed Jun 17 21:59:51 2009
@@ -136,8 +136,8 @@
           if (cmdLine.hasOption(idFieldOpt)){
             idField = cmdLine.getValue(idFieldOpt).toString();
           }
-          if (norm == -1) {
-            iteratable = new LuceneIteratable(reader, idField, field, mapper);
+          if (norm == LuceneIteratable.NO_NORMALIZING) {
+            iteratable = new LuceneIteratable(reader, idField, field, mapper, LuceneIteratable.NO_NORMALIZING);
           } else {
             iteratable = new LuceneIteratable(reader, idField, field, mapper, norm);
           }

Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIteratable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIteratable.java?rev=785829&r1=785828&r2=785829&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIteratable.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIteratable.java Wed Jun 17 21:59:51 2009
@@ -41,27 +41,32 @@
   private FieldSelector idFieldSelector;
 
   private VectorMapper mapper;
-  private double normPower = -1;
+  private double normPower = NO_NORMALIZING;
+
+  public static final double NO_NORMALIZING = -1.0;
 
   public LuceneIteratable(IndexReader reader, String idField, String field, VectorMapper mapper) {
-    this(reader, idField, field, mapper, 2.0);
+    this(reader, idField, field, mapper, NO_NORMALIZING);
   }
 
   /**
    * Produce a LuceneIterable that can create the Vector plus normalize it.
-   * @param reader
+   * @param reader The {@link org.apache.lucene.index.IndexReader} to read the documents from.
    * @param idField - The Field containing the id.  May be null
    * @param field The field to use for the Vector
-   * @param mapper
-   * @param normPower
+   * @param mapper The {@link org.apache.mahout.utils.vectors.lucene.VectorMapper} for creating {@link org.apache.mahout.matrix.Vector}s from Lucene's TermVectors.
+   * @param normPower The normalization value.  Must be greater than or equal to 0 or equal to {@link #NO_NORMALIZING}
    */
   public LuceneIteratable(IndexReader reader, String idField, String field, VectorMapper mapper, double normPower) {
+    if (normPower != NO_NORMALIZING && normPower < 0){
+      throw new IllegalArgumentException("normPower must either be -1 or >= 0");
+    }
+      idFieldSelector = new SetBasedFieldSelector(Collections.singleton(idField), Collections.emptySet());
     this.indexReader = reader;
     this.idField = idField;
     this.field = field;
     this.mapper = mapper;
     this.normPower = normPower;
-    idFieldSelector = new SetBasedFieldSelector(Collections.singleton(idField), Collections.emptySet());
   }
 
 
@@ -105,7 +110,7 @@
         } else {
           result.setName(String.valueOf(doc));
         }
-        if (normPower >= 0){
+        if (normPower != NO_NORMALIZING){
           result = result.normalize(normPower);
         }
       } catch (IOException e) {