You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ka...@apache.org on 2008/04/23 19:35:21 UTC

svn commit: r650950 - in /lucene/mahout/trunk/core/src: main/java/org/apache/mahout/matrix/ main/java/org/apache/mahout/utils/ test/java/org/apache/mahout/matrix/

Author: kalle
Date: Wed Apr 23 10:35:16 2008
New Revision: 650950

URL: http://svn.apache.org/viewvc?rev=650950&view=rev
Log:
MAHOUT-50, Vector extends Writable.
Old VectorWritable is removed.

Removed:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVectorWritable.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseVectorWritable.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorWritable.java
Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/AbstractVector.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseVector.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/Vector.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/WeightedDistanceMeasure.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestSparseVector.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestVectorWritable.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/AbstractVector.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/AbstractVector.java?rev=650950&r1=650949&r2=650950&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/AbstractVector.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/AbstractVector.java Wed Apr 23 10:35:16 2008
@@ -17,12 +17,13 @@
 package org.apache.mahout.matrix;
 
 import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.Writable;
 
 /**
  * Implementations of generic capabilities like sum of elements and dot products
  * 
  */
-public abstract class AbstractVector implements Vector {
+public abstract class AbstractVector implements Vector, Writable {
 
   /*
    * (non-Javadoc)

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java?rev=650950&r1=650949&r2=650950&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java Wed Apr 23 10:35:16 2008
@@ -19,10 +19,19 @@
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.WritableComparable;
 
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.DataInput;
+
 /**
  * Implements vector as an array of doubles
  */
 public class DenseVector extends AbstractVector {
+
+  /** For serialization purposes only */
+  public DenseVector() {
+  }
+
   private double[] values;
 
   /**
@@ -167,5 +176,21 @@
     public void remove() {
       throw new UnsupportedOperationException();
     }
+  }
+
+
+  public void write(DataOutput dataOutput) throws IOException {
+    dataOutput.writeInt(cardinality());
+    for (Vector.Element element : this) {
+      dataOutput.writeDouble(element.get());
+    }
+  }
+
+  public void readFields(DataInput dataInput) throws IOException {
+    double[] values = new double[dataInput.readInt()];
+    for (int i = 0; i < values.length; i++) {
+      values[i] = dataInput.readDouble();
+    }
+    this.values = values;
   }
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseVector.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseVector.java?rev=650950&r1=650949&r2=650950&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseVector.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseVector.java Wed Apr 23 10:35:16 2008
@@ -18,23 +18,31 @@
 
 import java.util.HashMap;
 import java.util.Map;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.DataInput;
 
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.WritableComparable;
 
 /**
  * Implements vector that only stores non-zero doubles
- * 
+ *
  */
 public class SparseVector extends AbstractVector {
 
-  private Map<Integer, Double> values = new HashMap<Integer, Double>();
+  /** For serialization purposes only. */
+  public SparseVector() {
+  }
+
+  private Map<Integer, Double> values;
+
 
   private int cardinality;
 
   /**
    * Decode a new instance from the formatted string
-   * 
+   *
    * @param formattedString
    *            a string produced by the asFormatString method
    * @return a DenseVector
@@ -59,6 +67,7 @@
 
   public SparseVector(int cardinality) {
     super();
+    values = new HashMap<Integer, Double>();
     this.cardinality = cardinality;
   }
 
@@ -160,6 +169,26 @@
     return new Iterator();
   }
 
+
+  public boolean equals(Object o) {
+    if (this == o) return true;
+    if (o == null || getClass() != o.getClass()) return false;
+
+    SparseVector that = (SparseVector) o;
+
+    if (cardinality != that.cardinality) return false;
+    if (values != null ? !values.equals(that.values) : that.values != null) return false;
+
+    return true;
+  }
+
+  public int hashCode() {
+    int result;
+    result = (values != null ? values.hashCode() : 0);
+    result = 31 * result + cardinality;
+    return result;
+  }
+
   private class Iterator implements java.util.Iterator<Vector.Element> {
     private java.util.Iterator<Map.Entry<Integer, Double>> it;
 
@@ -200,6 +229,28 @@
       result += getQuick(nextIndex) * x.getQuick(nextIndex);
     }
     return result;
+  }
+
+  public void write(DataOutput dataOutput) throws IOException {
+    dataOutput.writeInt(cardinality());
+    dataOutput.writeInt(size());
+    for (Vector.Element element : this) {
+      if (element.get() != 0d) {
+        dataOutput.writeInt(element.index());
+        dataOutput.writeDouble(element.get());
+      }
+    }
+  }
+
+  public void readFields(DataInput dataInput) throws IOException {
+    int cardinality = dataInput.readInt();
+    Map<Integer, Double> values = new HashMap<Integer, Double>();
+    int size = dataInput.readInt();
+    for (int i = 0; i < size; i++) {
+      values.put(dataInput.readInt(), dataInput.readDouble());
+    }
+    this.cardinality = cardinality;
+    this.values = values;
   }
 
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/Vector.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/Vector.java?rev=650950&r1=650949&r2=650950&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/Vector.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/Vector.java Wed Apr 23 10:35:16 2008
@@ -17,30 +17,31 @@
 package org.apache.mahout.matrix;
 
 import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.Writable;
 
 /**
  * The basic interface including numerous convenience functions
- * 
+ *
  */
-public interface Vector extends Iterable<Vector.Element> {
+public interface Vector extends Iterable<Vector.Element>, Writable {
 
   /**
    * Return a formatted WritableComparable suitable for output
-   * 
+   *
    * @return formatted WritableComparable
    */
   WritableComparable asWritableComparable();
 
   /**
    * Return a formatted String suitable for output
-   * 
+   *
    * @return
    */
   String asFormatString();
 
   /**
    * Assign the value to all elements of the receiver
-   * 
+   *
    * @param value
    *            a double value
    * @return the modified receiver
@@ -49,7 +50,7 @@
 
   /**
    * Assign the values to the receiver
-   * 
+   *
    * @param values
    *            a double[] of values
    * @return the modified receiver
@@ -60,7 +61,7 @@
 
   /**
    * Assign the other matrix values to the receiver
-   * 
+   *
    * @param other
    *            a Vector
    * @return the modified receiver
@@ -71,7 +72,7 @@
 
   /**
    * Apply the function to each element of the receiver
-   * 
+   *
    * @param function
    *            a DoubleFunction to apply
    * @return the modified receiver
@@ -81,7 +82,7 @@
   /**
    * Apply the function to each element of the receiver and the corresponding
    * element of the other argument
-   * 
+   *
    * @param other
    *            a Vector containing the second arguments to the function
    * @param function
@@ -95,14 +96,14 @@
 
   /**
    * Return the cardinality of the recipient (the maximum number of values)
-   * 
+   *
    * @return an int
    */
   int cardinality();
 
   /**
    * Return a copy of the recipient
-   * 
+   *
    * @return a new Vector
    */
   Vector copy();
@@ -110,7 +111,7 @@
   /**
    * Return an object of Vector.Element representing an element of this Vector.
    * Useful when designing new iterator types.
-   * 
+   *
    * @param index
    *            Index of the Vector.Element required
    * @return The Vector.Element Object
@@ -138,7 +139,7 @@
   /**
    * Return a new matrix containing the values of the recipient divided by the
    * argument
-   * 
+   *
    * @param x
    *            a double value
    * @return a new Vector
@@ -147,7 +148,7 @@
 
   /**
    * Return the dot product of the recipient and the argument
-   * 
+   *
    * @param x
    *            a Vector
    * @return a new Vector
@@ -158,7 +159,7 @@
 
   /**
    * Return the value at the given index
-   * 
+   *
    * @param index
    *            an int index
    * @return the double at the index
@@ -169,7 +170,7 @@
 
   /**
    * Return the value at the given index, without checking bounds
-   * 
+   *
    * @param index
    *            an int index
    * @return the double at the index
@@ -178,7 +179,7 @@
 
   /**
    * Return if the other matrix and the receiver share any underlying data cells
-   * 
+   *
    * @param other
    *            a Vector
    * @return true if the other matrix has common data cells
@@ -187,7 +188,7 @@
 
   /**
    * Return an empty matrix of the same underlying class as the receiver
-   * 
+   *
    * @return a Vector
    */
   Vector like();
@@ -195,7 +196,7 @@
   /**
    * Return an empty matrix of the same underlying class as the receiver and of
    * the given cardinality
-   * 
+   *
    * @param cardinality
    *            an int specifying the desired cardinality
    * @return a Vector
@@ -205,7 +206,7 @@
   /**
    * Return a new matrix containing the element by element difference of the
    * recipient and the argument
-   * 
+   *
    * @param x
    *            a Vector
    * @return a new Vector
@@ -216,7 +217,7 @@
 
   /**
    * Return a new matrix containing the normalized values of the recipient
-   * 
+   *
    * @return a new Vector
    */
   Vector normalize();
@@ -224,7 +225,7 @@
   /**
    * Return a new matrix containing the sum of each value of the recipient and
    * the argument
-   * 
+   *
    * @param x
    *            a double
    * @return a new Vector
@@ -234,7 +235,7 @@
   /**
    * Return a new matrix containing the element by element sum of the recipient
    * and the argument
-   * 
+   *
    * @param x
    *            a Vector
    * @return a new Vector
@@ -245,7 +246,7 @@
 
   /**
    * Set the value at the given index
-   * 
+   *
    * @param index
    *            an int index into the receiver
    * @param value
@@ -257,7 +258,7 @@
 
   /**
    * Set the value at the given index, without checking bounds
-   * 
+   *
    * @param index
    *            an int index into the receiver
    * @param value
@@ -267,7 +268,7 @@
 
   /**
    * Return the number of values in the recipient
-   * 
+   *
    * @return an int
    */
   int size();
@@ -275,7 +276,7 @@
   /**
    * Return a new matrix containing the product of each value of the recipient
    * and the argument
-   * 
+   *
    * @param x
    *            a double argument
    * @return a new Vector
@@ -285,7 +286,7 @@
   /**
    * Return a new matrix containing the element-wise product of the recipient
    * and the argument
-   * 
+   *
    * @param x
    *            a Vector argument
    * @return a new Vector
@@ -296,14 +297,14 @@
 
   /**
    * Return the element of the recipient as a double[]
-   * 
+   *
    * @return a double[]
    */
   double[] toArray();
 
   /**
    * Return a new matrix containing the subset of the recipient
-   * 
+   *
    * @param offset
    *            an int offset into the receiver
    * @param length
@@ -320,14 +321,14 @@
 
   /**
    * Return the sum of all the elements of the receiver
-   * 
+   *
    * @return a double
    */
   double zSum();
 
   /**
    * Return the cross product of the receiver and the other vector
-   * 
+   *
    * @param other
    *            another Vector
    * @return a Matrix
@@ -336,7 +337,7 @@
 
   /*
    * Need stories for these but keeping them here for now.
-   * 
+   *
    */
   // void getNonZeros(IntArrayList jx, DoubleArrayList values);
   // void foreachNonZero(IntDoubleFunction f);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java?rev=650950&r1=650949&r2=650950&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java Wed Apr 23 10:35:16 2008
@@ -19,12 +19,20 @@
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.WritableComparable;
 
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
 import java.util.Iterator;
 
 /**
  * Implements subset view of a Vector
  */
 public class VectorView extends AbstractVector {
+
+  /** For serialization purposes only */
+  public VectorView() {
+  }
+
   private Vector vector;
 
   // the offset into the Vector
@@ -185,5 +193,34 @@
     public void remove() {
       throw new UnsupportedOperationException();
     }
+  }
+
+
+  public void write(DataOutput dataOutput) throws IOException {
+    dataOutput.writeInt(offset);
+    dataOutput.writeInt(cardinality);
+    String vectorClassName = vector.getClass().getName();
+    dataOutput.writeInt(vectorClassName.length() * 2);
+    dataOutput.write(vectorClassName.getBytes());
+    vector.write(dataOutput);
+  }
+
+  public void readFields(DataInput dataInput) throws IOException {
+    int offset = dataInput.readInt();
+    int cardinality = dataInput.readInt();
+    byte[] buf = new byte[dataInput.readInt()];
+    dataInput.readFully(buf);
+    String vectorClassName = new String(buf);
+    Vector vector;
+    try {
+      vector = (Vector) Class.forName(vectorClassName).newInstance();
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+    vector.readFields(dataInput);
+
+    this.offset = offset;
+    this.cardinality = cardinality;
+    this.vector = vector;
   }
 }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/WeightedDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/WeightedDistanceMeasure.java?rev=650950&r1=650949&r2=650950&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/WeightedDistanceMeasure.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/WeightedDistanceMeasure.java Wed Apr 23 10:35:16 2008
@@ -19,14 +19,11 @@
 
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapred.JobConf;
-import org.apache.mahout.matrix.DenseVectorWritable;
-import org.apache.mahout.matrix.Vector;
-import org.apache.mahout.matrix.VectorWritable;
-import org.apache.mahout.matrix.SparseVectorWritable;
+import org.apache.mahout.matrix.*;
 
-import java.io.*;
+import java.io.DataInputStream;
+import java.io.FileNotFoundException;
 
 /**
  * Abstract implementation of DistanceMeasure with support for weights.
@@ -50,15 +47,15 @@
       FileSystem fs = FileSystem.get(jobConf);
       String weightsPathName = WeightedDistanceMeasure.class.getName() + ".sparseVector";
       if (weightsPathName != null) {
-        VectorWritable writable = new SparseVectorWritable();
+        Vector weights = new SparseVector();
         Path weightsPath = new Path(weightsPathName);
         if (!fs.exists(weightsPath)) {
           throw new FileNotFoundException(weightsPath.toString());
         }
         DataInputStream in = fs.open(weightsPath);
-        writable.readFields(in);
+        weights.readFields(in);
         in.close();
-        weights = writable.get();
+        this.weights = weights;
       }
     } catch (Exception e) {
       throw new RuntimeException(e);

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestSparseVector.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestSparseVector.java?rev=650950&r1=650949&r2=650950&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestSparseVector.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestSparseVector.java Wed Apr 23 10:35:16 2008
@@ -18,8 +18,6 @@
  * limitations under the License.
  */
 
-import junit.framework.TestCase;
-
 public class TestSparseVector extends TestCase {
 
   double[] values = {1.1, 2.2, 3.3};

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestVectorWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestVectorWritable.java?rev=650950&r1=650949&r2=650950&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestVectorWritable.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestVectorWritable.java Wed Apr 23 10:35:16 2008
@@ -27,9 +27,9 @@
 
   private static final int cardinality = 10;
 
-  public void test(VectorWritable writable) throws Exception {
+  public void test(Vector writable) throws Exception {
     for (int i = 0; i < cardinality; i++) {
-      writable.get().set(i, i);
+      writable.set(i, i);
     }
     DataOutputBuffer out = new DataOutputBuffer();
     writable.write(out);
@@ -39,28 +39,27 @@
     writable.readFields(in);
     in.close();
 
-    assertEquals(cardinality, writable.get().cardinality());
+    assertEquals(cardinality, writable.cardinality());
     for (int i = 0; i < cardinality; i++) {
-      assertEquals((double)i, writable.get().get(i));
+      assertEquals((double)i, writable.get(i));
     }
 
-    // also make sure it creates the vector correct even if it is not set.
-    writable.set(null);
+    //
 
     in = new DataInputStream(new ByteArrayInputStream(out.getData()));
     writable.readFields(in);
     in.close();
 
-    assertEquals(cardinality, writable.get().cardinality());
+    assertEquals(cardinality, writable.cardinality());
     for (int i = 0; i < cardinality; i++) {
-      assertEquals((double)i, writable.get().get(i));
+      assertEquals((double)i, writable.get(i));
     }
 
 
   }
 
   public void test() throws Exception {
-    test(new SparseVectorWritable(new SparseVector(cardinality)));
-    test(new DenseVectorWritable(new DenseVector(cardinality)));
+    test(new SparseVector(cardinality));
+    test(new DenseVector(cardinality));
   }
 }

Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java?rev=650950&r1=650949&r2=650950&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java Wed Apr 23 10:35:16 2008
@@ -18,9 +18,6 @@
 
 import junit.framework.TestCase;
 
-import java.util.Map;
-import java.util.LinkedHashMap;
-
 public class VectorTest extends TestCase {