You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ka...@apache.org on 2008/04/23 19:35:21 UTC
svn commit: r650950 - in /lucene/mahout/trunk/core/src:
main/java/org/apache/mahout/matrix/ main/java/org/apache/mahout/utils/
test/java/org/apache/mahout/matrix/
Author: kalle
Date: Wed Apr 23 10:35:16 2008
New Revision: 650950
URL: http://svn.apache.org/viewvc?rev=650950&view=rev
Log:
MAHOUT-50, Vector extends Writable.
Old VectorWritable is removed.
Removed:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVectorWritable.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseVectorWritable.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorWritable.java
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/AbstractVector.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseVector.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/Vector.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/WeightedDistanceMeasure.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestSparseVector.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestVectorWritable.java
lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/AbstractVector.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/AbstractVector.java?rev=650950&r1=650949&r2=650950&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/AbstractVector.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/AbstractVector.java Wed Apr 23 10:35:16 2008
@@ -17,12 +17,13 @@
package org.apache.mahout.matrix;
import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.Writable;
/**
* Implementations of generic capabilities like sum of elements and dot products
*
*/
-public abstract class AbstractVector implements Vector {
+public abstract class AbstractVector implements Vector, Writable {
/*
* (non-Javadoc)
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java?rev=650950&r1=650949&r2=650950&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/DenseVector.java Wed Apr 23 10:35:16 2008
@@ -19,10 +19,19 @@
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.DataInput;
+
/**
* Implements vector as an array of doubles
*/
public class DenseVector extends AbstractVector {
+
+ /** For serialization purposes only */
+ public DenseVector() {
+ }
+
private double[] values;
/**
@@ -167,5 +176,21 @@
public void remove() {
throw new UnsupportedOperationException();
}
+ }
+
+
+ public void write(DataOutput dataOutput) throws IOException {
+ dataOutput.writeInt(cardinality());
+ for (Vector.Element element : this) {
+ dataOutput.writeDouble(element.get());
+ }
+ }
+
+ public void readFields(DataInput dataInput) throws IOException {
+ double[] values = new double[dataInput.readInt()];
+ for (int i = 0; i < values.length; i++) {
+ values[i] = dataInput.readDouble();
+ }
+ this.values = values;
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseVector.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseVector.java?rev=650950&r1=650949&r2=650950&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseVector.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/SparseVector.java Wed Apr 23 10:35:16 2008
@@ -18,23 +18,31 @@
import java.util.HashMap;
import java.util.Map;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.DataInput;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
/**
* Implements vector that only stores non-zero doubles
- *
+ *
*/
public class SparseVector extends AbstractVector {
- private Map<Integer, Double> values = new HashMap<Integer, Double>();
+ /** For serialization purposes only. */
+ public SparseVector() {
+ }
+
+ private Map<Integer, Double> values;
+
private int cardinality;
/**
* Decode a new instance from the formatted string
- *
+ *
* @param formattedString
* a string produced by the asFormatString method
* @return a DenseVector
@@ -59,6 +67,7 @@
public SparseVector(int cardinality) {
super();
+ values = new HashMap<Integer, Double>();
this.cardinality = cardinality;
}
@@ -160,6 +169,26 @@
return new Iterator();
}
+
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ SparseVector that = (SparseVector) o;
+
+ if (cardinality != that.cardinality) return false;
+ if (values != null ? !values.equals(that.values) : that.values != null) return false;
+
+ return true;
+ }
+
+ public int hashCode() {
+ int result;
+ result = (values != null ? values.hashCode() : 0);
+ result = 31 * result + cardinality;
+ return result;
+ }
+
private class Iterator implements java.util.Iterator<Vector.Element> {
private java.util.Iterator<Map.Entry<Integer, Double>> it;
@@ -200,6 +229,28 @@
result += getQuick(nextIndex) * x.getQuick(nextIndex);
}
return result;
+ }
+
+ public void write(DataOutput dataOutput) throws IOException {
+ dataOutput.writeInt(cardinality());
+ dataOutput.writeInt(size());
+ for (Vector.Element element : this) {
+ if (element.get() != 0d) {
+ dataOutput.writeInt(element.index());
+ dataOutput.writeDouble(element.get());
+ }
+ }
+ }
+
+ public void readFields(DataInput dataInput) throws IOException {
+ int cardinality = dataInput.readInt();
+ Map<Integer, Double> values = new HashMap<Integer, Double>();
+ int size = dataInput.readInt();
+ for (int i = 0; i < size; i++) {
+ values.put(dataInput.readInt(), dataInput.readDouble());
+ }
+ this.cardinality = cardinality;
+ this.values = values;
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/Vector.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/Vector.java?rev=650950&r1=650949&r2=650950&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/Vector.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/Vector.java Wed Apr 23 10:35:16 2008
@@ -17,30 +17,31 @@
package org.apache.mahout.matrix;
import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.Writable;
/**
* The basic interface including numerous convenience functions
- *
+ *
*/
-public interface Vector extends Iterable<Vector.Element> {
+public interface Vector extends Iterable<Vector.Element>, Writable {
/**
* Return a formatted WritableComparable suitable for output
- *
+ *
* @return formatted WritableComparable
*/
WritableComparable asWritableComparable();
/**
* Return a formatted String suitable for output
- *
+ *
* @return
*/
String asFormatString();
/**
* Assign the value to all elements of the receiver
- *
+ *
* @param value
* a double value
* @return the modified receiver
@@ -49,7 +50,7 @@
/**
* Assign the values to the receiver
- *
+ *
* @param values
* a double[] of values
* @return the modified receiver
@@ -60,7 +61,7 @@
/**
* Assign the other matrix values to the receiver
- *
+ *
* @param other
* a Vector
* @return the modified receiver
@@ -71,7 +72,7 @@
/**
* Apply the function to each element of the receiver
- *
+ *
* @param function
* a DoubleFunction to apply
* @return the modified receiver
@@ -81,7 +82,7 @@
/**
* Apply the function to each element of the receiver and the corresponding
* element of the other argument
- *
+ *
* @param other
* a Vector containing the second arguments to the function
* @param function
@@ -95,14 +96,14 @@
/**
* Return the cardinality of the recipient (the maximum number of values)
- *
+ *
* @return an int
*/
int cardinality();
/**
* Return a copy of the recipient
- *
+ *
* @return a new Vector
*/
Vector copy();
@@ -110,7 +111,7 @@
/**
* Return an object of Vector.Element representing an element of this Vector.
* Useful when designing new iterator types.
- *
+ *
* @param index
* Index of the Vector.Element required
* @return The Vector.Element Object
@@ -138,7 +139,7 @@
/**
* Return a new matrix containing the values of the recipient divided by the
* argument
- *
+ *
* @param x
* a double value
* @return a new Vector
@@ -147,7 +148,7 @@
/**
* Return the dot product of the recipient and the argument
- *
+ *
* @param x
* a Vector
* @return a new Vector
@@ -158,7 +159,7 @@
/**
* Return the value at the given index
- *
+ *
* @param index
* an int index
* @return the double at the index
@@ -169,7 +170,7 @@
/**
* Return the value at the given index, without checking bounds
- *
+ *
* @param index
* an int index
* @return the double at the index
@@ -178,7 +179,7 @@
/**
* Return if the other matrix and the receiver share any underlying data cells
- *
+ *
* @param other
* a Vector
* @return true if the other matrix has common data cells
@@ -187,7 +188,7 @@
/**
* Return an empty matrix of the same underlying class as the receiver
- *
+ *
* @return a Vector
*/
Vector like();
@@ -195,7 +196,7 @@
/**
* Return an empty matrix of the same underlying class as the receiver and of
* the given cardinality
- *
+ *
* @param cardinality
* an int specifying the desired cardinality
* @return a Vector
@@ -205,7 +206,7 @@
/**
* Return a new matrix containing the element by element difference of the
* recipient and the argument
- *
+ *
* @param x
* a Vector
* @return a new Vector
@@ -216,7 +217,7 @@
/**
* Return a new matrix containing the normalized values of the recipient
- *
+ *
* @return a new Vector
*/
Vector normalize();
@@ -224,7 +225,7 @@
/**
* Return a new matrix containing the sum of each value of the recipient and
* the argument
- *
+ *
* @param x
* a double
* @return a new Vector
@@ -234,7 +235,7 @@
/**
* Return a new matrix containing the element by element sum of the recipient
* and the argument
- *
+ *
* @param x
* a Vector
* @return a new Vector
@@ -245,7 +246,7 @@
/**
* Set the value at the given index
- *
+ *
* @param index
* an int index into the receiver
* @param value
@@ -257,7 +258,7 @@
/**
* Set the value at the given index, without checking bounds
- *
+ *
* @param index
* an int index into the receiver
* @param value
@@ -267,7 +268,7 @@
/**
* Return the number of values in the recipient
- *
+ *
* @return an int
*/
int size();
@@ -275,7 +276,7 @@
/**
* Return a new matrix containing the product of each value of the recipient
* and the argument
- *
+ *
* @param x
* a double argument
* @return a new Vector
@@ -285,7 +286,7 @@
/**
* Return a new matrix containing the element-wise product of the recipient
* and the argument
- *
+ *
* @param x
* a Vector argument
* @return a new Vector
@@ -296,14 +297,14 @@
/**
* Return the element of the recipient as a double[]
- *
+ *
* @return a double[]
*/
double[] toArray();
/**
* Return a new matrix containing the subset of the recipient
- *
+ *
* @param offset
* an int offset into the receiver
* @param length
@@ -320,14 +321,14 @@
/**
* Return the sum of all the elements of the receiver
- *
+ *
* @return a double
*/
double zSum();
/**
* Return the cross product of the receiver and the other vector
- *
+ *
* @param other
* another Vector
* @return a Matrix
@@ -336,7 +337,7 @@
/*
* Need stories for these but keeping them here for now.
- *
+ *
*/
// void getNonZeros(IntArrayList jx, DoubleArrayList values);
// void foreachNonZero(IntDoubleFunction f);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java?rev=650950&r1=650949&r2=650950&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/matrix/VectorView.java Wed Apr 23 10:35:16 2008
@@ -19,12 +19,20 @@
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
import java.util.Iterator;
/**
* Implements subset view of a Vector
*/
public class VectorView extends AbstractVector {
+
+ /** For serialization purposes only */
+ public VectorView() {
+ }
+
private Vector vector;
// the offset into the Vector
@@ -185,5 +193,34 @@
public void remove() {
throw new UnsupportedOperationException();
}
+ }
+
+
+ public void write(DataOutput dataOutput) throws IOException {
+ dataOutput.writeInt(offset);
+ dataOutput.writeInt(cardinality);
+ String vectorClassName = vector.getClass().getName();
+ dataOutput.writeInt(vectorClassName.length() * 2);
+ dataOutput.write(vectorClassName.getBytes());
+ vector.write(dataOutput);
+ }
+
+ public void readFields(DataInput dataInput) throws IOException {
+ int offset = dataInput.readInt();
+ int cardinality = dataInput.readInt();
+ byte[] buf = new byte[dataInput.readInt()];
+ dataInput.readFully(buf);
+ String vectorClassName = new String(buf);
+ Vector vector;
+ try {
+ vector = (Vector) Class.forName(vectorClassName).newInstance();
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ vector.readFields(dataInput);
+
+ this.offset = offset;
+ this.cardinality = cardinality;
+ this.vector = vector;
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/WeightedDistanceMeasure.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/WeightedDistanceMeasure.java?rev=650950&r1=650949&r2=650950&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/WeightedDistanceMeasure.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/utils/WeightedDistanceMeasure.java Wed Apr 23 10:35:16 2008
@@ -19,14 +19,11 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
-import org.apache.mahout.matrix.DenseVectorWritable;
-import org.apache.mahout.matrix.Vector;
-import org.apache.mahout.matrix.VectorWritable;
-import org.apache.mahout.matrix.SparseVectorWritable;
+import org.apache.mahout.matrix.*;
-import java.io.*;
+import java.io.DataInputStream;
+import java.io.FileNotFoundException;
/**
* Abstract implementation of DistanceMeasure with support for weights.
@@ -50,15 +47,15 @@
FileSystem fs = FileSystem.get(jobConf);
String weightsPathName = WeightedDistanceMeasure.class.getName() + ".sparseVector";
if (weightsPathName != null) {
- VectorWritable writable = new SparseVectorWritable();
+ Vector weights = new SparseVector();
Path weightsPath = new Path(weightsPathName);
if (!fs.exists(weightsPath)) {
throw new FileNotFoundException(weightsPath.toString());
}
DataInputStream in = fs.open(weightsPath);
- writable.readFields(in);
+ weights.readFields(in);
in.close();
- weights = writable.get();
+ this.weights = weights;
}
} catch (Exception e) {
throw new RuntimeException(e);
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestSparseVector.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestSparseVector.java?rev=650950&r1=650949&r2=650950&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestSparseVector.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestSparseVector.java Wed Apr 23 10:35:16 2008
@@ -18,8 +18,6 @@
* limitations under the License.
*/
-import junit.framework.TestCase;
-
public class TestSparseVector extends TestCase {
double[] values = {1.1, 2.2, 3.3};
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestVectorWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestVectorWritable.java?rev=650950&r1=650949&r2=650950&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestVectorWritable.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/TestVectorWritable.java Wed Apr 23 10:35:16 2008
@@ -27,9 +27,9 @@
private static final int cardinality = 10;
- public void test(VectorWritable writable) throws Exception {
+ public void test(Vector writable) throws Exception {
for (int i = 0; i < cardinality; i++) {
- writable.get().set(i, i);
+ writable.set(i, i);
}
DataOutputBuffer out = new DataOutputBuffer();
writable.write(out);
@@ -39,28 +39,27 @@
writable.readFields(in);
in.close();
- assertEquals(cardinality, writable.get().cardinality());
+ assertEquals(cardinality, writable.cardinality());
for (int i = 0; i < cardinality; i++) {
- assertEquals((double)i, writable.get().get(i));
+ assertEquals((double)i, writable.get(i));
}
- // also make sure it creates the vector correct even if it is not set.
- writable.set(null);
+ //
in = new DataInputStream(new ByteArrayInputStream(out.getData()));
writable.readFields(in);
in.close();
- assertEquals(cardinality, writable.get().cardinality());
+ assertEquals(cardinality, writable.cardinality());
for (int i = 0; i < cardinality; i++) {
- assertEquals((double)i, writable.get().get(i));
+ assertEquals((double)i, writable.get(i));
}
}
public void test() throws Exception {
- test(new SparseVectorWritable(new SparseVector(cardinality)));
- test(new DenseVectorWritable(new DenseVector(cardinality)));
+ test(new SparseVector(cardinality));
+ test(new DenseVector(cardinality));
}
}
Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java?rev=650950&r1=650949&r2=650950&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java (original)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/matrix/VectorTest.java Wed Apr 23 10:35:16 2008
@@ -18,9 +18,6 @@
import junit.framework.TestCase;
-import java.util.Map;
-import java.util.LinkedHashMap;
-
public class VectorTest extends TestCase {