You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by gs...@apache.org on 2009/06/24 15:23:54 UTC
svn commit: r788014 - in /lucene/mahout/trunk/utils/src:
main/java/org/apache/mahout/utils/vectors/io/
test/java/org/apache/mahout/utils/vectors/
test/java/org/apache/mahout/utils/vectors/io/
Author: gsingers
Date: Wed Jun 24 13:23:54 2009
New Revision: 788014
URL: http://svn.apache.org/viewvc?rev=788014&view=rev
Log:
Put some more testing in place and some javadocs
Added:
lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java
lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/
lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriterTest.java
Modified:
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java?rev=788014&r1=788013&r2=788014&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java Wed Jun 24 13:23:54 2009
@@ -9,7 +9,7 @@
/**
- *
+ * Closes the writer when done
*
**/
public class SequenceFileVectorWriter implements VectorWriter {
@@ -21,18 +21,16 @@
@Override
public long write(VectorIterable iterable, long maxDocs) throws IOException {
- long i = 0;
long recNum = 0;
for (Vector point : iterable) {
- if (i >= maxDocs) {
+ if (recNum >= maxDocs) {
break;
}
//point.write(dataOut);
writer.append(new LongWritable(recNum++), point);
- i++;
}
- return i;
+ return recNum;
}
@Override
@@ -42,6 +40,12 @@
@Override
public void close() throws IOException {
+ if (writer != null) {
+ writer.close();
+ }
+ }
+ public SequenceFile.Writer getWriter() {
+ return writer;
}
}
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java?rev=788014&r1=788013&r2=788014&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java Wed Jun 24 13:23:54 2009
@@ -10,9 +10,29 @@
*
**/
public interface VectorWriter {
+ /**
+ * Write all values in the Iterable to the output
+ * @param iterable The {@link org.apache.mahout.utils.vectors.VectorIterable}
+ * @return the number of docs written
+ * @throws IOException if there was a problem writing
+ *
+ * @see #write(org.apache.mahout.utils.vectors.VectorIterable, long)
+ */
public long write(VectorIterable iterable) throws IOException;
+ /**
+ * Write the first <code>maxDocs</code> to the output.
+ * @param iterable The {@link org.apache.mahout.utils.vectors.VectorIterable}
+ * @param maxDocs the maximum number of docs to write
+ * @return The number of docs written
+ * @throws IOException if there was a problem writing
+ */
public long write(VectorIterable iterable, long maxDocs) throws IOException;
+ /**
+ * Close any internally held resources. If external Writers are passed in, the implementation should indicate
+ * whether it also closes them
+ * @throws IOException if there was an issue closing the item
+ */
public void close() throws IOException;
}
Added: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java?rev=788014&view=auto
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java (added)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java Wed Jun 24 13:23:54 2009
@@ -0,0 +1,66 @@
+package org.apache.mahout.utils.vectors;
+
+import org.apache.mahout.matrix.Vector;
+import org.apache.mahout.matrix.DenseVector;
+import org.apache.mahout.matrix.UnaryFunction;
+import org.apache.mahout.matrix.SparseVector;
+
+import java.util.Iterator;
+import java.util.Random;
+
+
+/**
+ *
+ *
+ **/
+public class RandomVectorIterable implements VectorIterable{
+
+ int numItems = 100;
+ public static enum VectorType {DENSE, SPARSE};
+
+ VectorType type = VectorType.SPARSE;
+
+ public RandomVectorIterable() {
+ }
+
+ public RandomVectorIterable(int numItems) {
+ this.numItems = numItems;
+ }
+
+ public RandomVectorIterable(int numItems, VectorType type) {
+ this.numItems = numItems;
+ this.type = type;
+ }
+
+ @Override
+ public Iterator<Vector> iterator() {
+ return new VectIterator();
+ }
+
+ private class VectIterator implements Iterator<Vector>{
+ int count = 0;
+ Random random = new Random();
+ @Override
+ public boolean hasNext() {
+ return count < numItems;
+ }
+
+ @Override
+ public Vector next() {
+ Vector result = type.equals(VectorType.SPARSE) ? new SparseVector(numItems) : new DenseVector(numItems);
+ result.assign(new UnaryFunction(){
+ @Override
+ public double apply(double arg1) {
+ return random.nextDouble();
+ }
+ });
+ count++;
+ return result;
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+}
Added: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriterTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriterTest.java?rev=788014&view=auto
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriterTest.java (added)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriterTest.java Wed Jun 24 13:23:54 2009
@@ -0,0 +1,45 @@
+package org.apache.mahout.utils.vectors.io;
+
+import junit.framework.TestCase;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.mahout.matrix.SparseVector;
+import org.apache.mahout.utils.vectors.RandomVectorIterable;
+
+import java.io.File;
+
+
+/**
+ *
+ *
+ **/
+public class SequenceFileVectorWriterTest extends TestCase {
+
+ public void testSFVW() throws Exception {
+ File tmpDir = new File(System.getProperty("java.io.tmpdir"));
+ File tmpLoc = new File(tmpDir, "sfvwt");
+ tmpLoc.mkdirs();
+ File tmpFile = File.createTempFile("sfvwt", ".dat", tmpLoc);
+
+ Path path = new Path(tmpFile.getAbsolutePath());
+ Configuration conf = new Configuration();
+ FileSystem fs = FileSystem.get(conf);
+ SequenceFile.Writer seqWriter = new SequenceFile.Writer(fs, conf, path, LongWritable.class, SparseVector.class);
+ SequenceFileVectorWriter writer = new SequenceFileVectorWriter(seqWriter);
+ RandomVectorIterable iter = new RandomVectorIterable(50);
+ writer.write(iter);
+ writer.close();
+
+ SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, path, conf);
+ LongWritable key = new LongWritable();
+ SparseVector value = new SparseVector();
+ int count = 0;
+ while (seqReader.next(key, value)){
+ count++;
+ }
+ assertTrue(count + " does not equal: " + 50, count == 50);
+ }
+}