You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by gs...@apache.org on 2010/06/30 21:02:56 UTC
svn commit: r959410 - in /mahout/trunk/utils/src:
main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java
test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java
Author: gsingers
Date: Wed Jun 30 19:02:56 2010
New Revision: 959410
URL: http://svn.apache.org/viewvc?rev=959410&view=rev
Log:
MAHOUT-374: restore the labeling of docs for Lucene iterable
Modified:
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java
mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java?rev=959410&r1=959409&r2=959410&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/lucene/LuceneIterable.java Wed Jun 30 19:02:56 2010
@@ -18,10 +18,14 @@
package org.apache.mahout.utils.vectors.lucene;
import java.io.IOException;
+import java.util.Collections;
import java.util.Iterator;
+import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.document.SetBasedFieldSelector;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermDocs;
+import org.apache.mahout.math.NamedVector;
import org.apache.mahout.math.Vector;
/**
@@ -34,8 +38,8 @@ public class LuceneIterable implements I
private final IndexReader indexReader;
private final String field;
- //private final String idField;
- //private final FieldSelector idFieldSelector;
+ private final String idField;
+ private final FieldSelector idFieldSelector;
private final VectorMapper mapper;
private double normPower = NO_NORMALIZING;
@@ -67,9 +71,9 @@ public class LuceneIterable implements I
if (normPower != NO_NORMALIZING && normPower < 0) {
throw new IllegalArgumentException("normPower must either be -1 or >= 0");
}
- //idFieldSelector = new SetBasedFieldSelector(Collections.singleton(idField), Collections.<String>emptySet());
+ idFieldSelector = new SetBasedFieldSelector(Collections.singleton(idField), Collections.<String>emptySet());
this.indexReader = reader;
- //this.idField = idField;
+ this.idField = idField;
this.field = field;
this.mapper = mapper;
this.normPower = normPower;
@@ -114,6 +118,13 @@ public class LuceneIterable implements I
if (result == null) {
return null;
}
+ String name;
+ if (idField != null) {
+ name = indexReader.document(doc, idFieldSelector).get(idField);
+ } else {
+ name = String.valueOf(doc);
+ }
+ result = new NamedVector(result, name);
if (normPower != NO_NORMALIZING) {
result = result.normalize(normPower);
}
Modified: mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java?rev=959410&r1=959409&r2=959410&view=diff
==============================================================================
--- mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java (original)
+++ mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/lucene/LuceneIterableTest.java Wed Jun 30 19:02:56 2010
@@ -27,6 +27,7 @@ import org.apache.lucene.index.IndexWrit
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.math.NamedVector;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.utils.vectors.TFIDF;
@@ -72,8 +73,9 @@ public class LuceneIterableTest extends
//TODO: do something more meaningful here
for (Vector vector : iterable) {
Assert.assertNotNull(vector);
- Assert.assertTrue("vector is not an instanceof " + RandomAccessSparseVector.class, vector instanceof RandomAccessSparseVector);
+ Assert.assertTrue("vector is not an instanceof " + NamedVector.class, vector instanceof NamedVector);
Assert.assertTrue("vector Size: " + vector.size() + " is not greater than: " + 0, vector.size() > 0);
+ assertTrue(((NamedVector)vector).getName().startsWith("doc_"));
}
}