You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by gs...@apache.org on 2010/06/30 21:02:56 UTC

svn commit: r959410 - in /mahout/trunk/utils/src: main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java

Author: gsingers
Date: Wed Jun 30 19:02:56 2010
New Revision: 959410

URL: http://svn.apache.org/viewvc?rev=959410&view=rev
Log:
MAHOUT-374: restore the labeling of docs for Lucene iterable

Modified:
    mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java
    mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java

Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java?rev=959410&r1=959409&r2=959410&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java Wed Jun 30 19:02:56 2010
@@ -18,10 +18,14 @@
 package org.apache.mahout.utils.vectors.lucene;
 
 import java.io.IOException;
+import java.util.Collections;
 import java.util.Iterator;
 
+import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.document.SetBasedFieldSelector;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.TermDocs;
+import org.apache.mahout.math.NamedVector;
 import org.apache.mahout.math.Vector;
 
 /**
@@ -34,8 +38,8 @@ public class LuceneIterable implements I
 
   private final IndexReader indexReader;
   private final String field;
-  //private final String idField;
-  //private final FieldSelector idFieldSelector;
+  private final String idField;
+  private final FieldSelector idFieldSelector;
   
   private final VectorMapper mapper;
   private double normPower = NO_NORMALIZING;
@@ -67,9 +71,9 @@ public class LuceneIterable implements I
     if (normPower != NO_NORMALIZING && normPower < 0) {
       throw new IllegalArgumentException("normPower must either be -1 or >= 0");
     }
-    //idFieldSelector = new SetBasedFieldSelector(Collections.singleton(idField), Collections.<String>emptySet());
+    idFieldSelector = new SetBasedFieldSelector(Collections.singleton(idField), Collections.<String>emptySet());
     this.indexReader = reader;
-    //this.idField = idField;
+    this.idField = idField;
     this.field = field;
     this.mapper = mapper;
     this.normPower = normPower;
@@ -114,6 +118,13 @@ public class LuceneIterable implements I
         if (result == null) {
           return null;
         }
+        String name;
+        if (idField != null) {
+          name = indexReader.document(doc, idFieldSelector).get(idField);
+        } else {
+          name = String.valueOf(doc);
+        }
+        result = new NamedVector(result, name);
         if (normPower != NO_NORMALIZING) {
           result = result.normalize(normPower);
         }

Modified: mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java?rev=959410&r1=959409&r2=959410&view=diff
==============================================================================
--- mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java (original)
+++ mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java Wed Jun 30 19:02:56 2010
@@ -27,6 +27,7 @@ import org.apache.lucene.index.IndexWrit
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.Version;
 import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.math.NamedVector;
 import org.apache.mahout.math.RandomAccessSparseVector;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.utils.vectors.TFIDF;
@@ -72,8 +73,9 @@ public class LuceneIterableTest extends 
     //TODO: do something more meaningful here
     for (Vector vector : iterable) {
       Assert.assertNotNull(vector);
-      Assert.assertTrue("vector is not an instanceof " + RandomAccessSparseVector.class, vector instanceof RandomAccessSparseVector);
+      Assert.assertTrue("vector is not an instanceof " + NamedVector.class, vector instanceof NamedVector);
       Assert.assertTrue("vector Size: " + vector.size() + " is not greater than: " + 0, vector.size() > 0);
+      assertTrue(((NamedVector)vector).getName().startsWith("doc_"));
     }
   }