You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by gs...@apache.org on 2009/06/24 15:23:54 UTC

svn commit: r788014 - in /lucene/mahout/trunk/utils/src: main/java/org/apache/mahout/utils/vectors/io/ test/java/org/apache/mahout/utils/vectors/ test/java/org/apache/mahout/utils/vectors/io/

Author: gsingers
Date: Wed Jun 24 13:23:54 2009
New Revision: 788014

URL: http://svn.apache.org/viewvc?rev=788014&view=rev
Log:
Put some more testing in place and some javadocs

Added:
    lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java
    lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/
    lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriterTest.java
Modified:
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java
    lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java

Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java?rev=788014&r1=788013&r2=788014&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriter.java Wed Jun 24 13:23:54 2009
@@ -9,7 +9,7 @@
 
 
 /**
- *
+ * Closes the writer when done
  *
  **/
 public class SequenceFileVectorWriter implements VectorWriter {
@@ -21,18 +21,16 @@
 
   @Override
   public long write(VectorIterable iterable, long maxDocs) throws IOException {
-    long i = 0;
     long recNum = 0;
     for (Vector point : iterable) {
-      if (i >= maxDocs) {
+      if (recNum >= maxDocs) {
         break;
       }
       //point.write(dataOut);
       writer.append(new LongWritable(recNum++), point);
 
-      i++;
     }
-    return i;
+    return recNum;
   }
 
   @Override
@@ -42,6 +40,12 @@
 
   @Override
   public void close() throws IOException {
+    if (writer != null) {
+      writer.close();
+    }
+  }
 
+  public SequenceFile.Writer getWriter() {
+    return writer;
   }
 }

Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java?rev=788014&r1=788013&r2=788014&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/io/VectorWriter.java Wed Jun 24 13:23:54 2009
@@ -10,9 +10,29 @@
  *
  **/
 public interface VectorWriter {
+  /**
+   * Write all values in the Iterable to the output
+   * @param iterable The {@link org.apache.mahout.utils.vectors.VectorIterable}
+   * @return the number of docs written
+   * @throws IOException if there was a problem writing
+   *
+   * @see #write(org.apache.mahout.utils.vectors.VectorIterable, long)
+   */
   public long write(VectorIterable iterable) throws IOException;
 
+  /**
+   * Write the first <code>maxDocs</code> to the output.
+   * @param iterable The {@link org.apache.mahout.utils.vectors.VectorIterable}
+   * @param maxDocs the maximum number of docs to write
+   * @return The number of docs written
+   * @throws IOException if there was a problem writing
+   */
   public long write(VectorIterable iterable, long maxDocs) throws IOException;
 
+  /**
+   * Close any internally held resources.  If external Writers are passed in, the implementation should indicate
+   * whether it also closes them
+   * @throws IOException if there was an issue closing the item
+   */
   public void close() throws IOException;
 }

Added: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java?rev=788014&view=auto
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java (added)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/RandomVectorIterable.java Wed Jun 24 13:23:54 2009
@@ -0,0 +1,66 @@
+package org.apache.mahout.utils.vectors;
+
+import org.apache.mahout.matrix.Vector;
+import org.apache.mahout.matrix.DenseVector;
+import org.apache.mahout.matrix.UnaryFunction;
+import org.apache.mahout.matrix.SparseVector;
+
+import java.util.Iterator;
+import java.util.Random;
+
+
+/**
+ *
+ *
+ **/
+public class RandomVectorIterable implements VectorIterable{
+
+  int numItems = 100;
+  public static enum VectorType {DENSE, SPARSE};
+
+  VectorType type = VectorType.SPARSE;
+
+  public RandomVectorIterable() {
+  }
+
+  public RandomVectorIterable(int numItems) {
+    this.numItems = numItems;
+  }
+
+  public RandomVectorIterable(int numItems, VectorType type) {
+    this.numItems = numItems;
+    this.type = type;
+  }
+
+  @Override
+  public Iterator<Vector> iterator() {
+    return new VectIterator();
+  }
+
+  private class VectIterator implements Iterator<Vector>{
+    int count = 0;
+    Random random = new Random();
+    @Override
+    public boolean hasNext() {
+      return count < numItems;
+    }
+
+    @Override
+    public Vector next() {
+      Vector result = type.equals(VectorType.SPARSE) ? new SparseVector(numItems) : new DenseVector(numItems);
+      result.assign(new UnaryFunction(){
+        @Override
+        public double apply(double arg1) {
+          return random.nextDouble();
+        }
+      });
+      count++;
+      return result;
+    }
+
+    @Override
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+  }
+}

Added: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriterTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriterTest.java?rev=788014&view=auto
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriterTest.java (added)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/utils/vectors/io/SequenceFileVectorWriterTest.java Wed Jun 24 13:23:54 2009
@@ -0,0 +1,45 @@
+package org.apache.mahout.utils.vectors.io;
+
+import junit.framework.TestCase;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.mahout.matrix.SparseVector;
+import org.apache.mahout.utils.vectors.RandomVectorIterable;
+
+import java.io.File;
+
+
+/**
+ *
+ *
+ **/
+public class SequenceFileVectorWriterTest extends TestCase {
+
+  public void testSFVW() throws Exception {
+    File tmpDir = new File(System.getProperty("java.io.tmpdir"));
+    File tmpLoc = new File(tmpDir, "sfvwt");
+    tmpLoc.mkdirs();
+    File tmpFile = File.createTempFile("sfvwt", ".dat", tmpLoc);
+
+    Path path = new Path(tmpFile.getAbsolutePath());
+    Configuration conf = new Configuration();
+    FileSystem fs = FileSystem.get(conf);
+    SequenceFile.Writer seqWriter = new SequenceFile.Writer(fs, conf, path, LongWritable.class, SparseVector.class);
+    SequenceFileVectorWriter writer = new SequenceFileVectorWriter(seqWriter);
+    RandomVectorIterable iter = new RandomVectorIterable(50);
+    writer.write(iter);
+    writer.close();
+
+    SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, path, conf);
+    LongWritable key = new LongWritable();
+    SparseVector value = new SparseVector();
+    int count = 0;
+    while (seqReader.next(key, value)){
+      count++;
+    }
+    assertTrue(count + " does not equal: " + 50, count == 50);
+  }
+}