You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by jm...@apache.org on 2010/01/13 09:13:11 UTC

svn commit: r898671 - in /lucene/mahout/trunk: core/src/main/java/org/apache/mahout/math/ core/src/test/java/org/apache/mahout/math/ utils/src/test/java/org/apache/mahout/math/

Author: jmannix
Date: Wed Jan 13 08:13:10 2010
New Revision: 898671

URL: http://svn.apache.org/viewvc?rev=898671&view=rev
Log:
Worse yet, missed all these directories to be checked in!

Added:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/math/
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/math/DenseMatrixWritable.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/math/DenseVectorWritable.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/math/MatrixWritable.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/math/SparseVectorWritable.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/math/VectorWritable.java
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/math/
    lucene/mahout/trunk/core/src/test/java/org/apache/mahout/math/TestVectorWritable.java
    lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/math/
    lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/math/TestVectorWritable.java

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/math/DenseMatrixWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/math/DenseMatrixWritable.java?rev=898671&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/math/DenseMatrixWritable.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/math/DenseMatrixWritable.java Wed Jan 13 08:13:10 2010
@@ -0,0 +1,39 @@
+package org.apache.mahout.math;
+
+import org.apache.hadoop.io.Writable;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.HashMap;
+
+
+public class DenseMatrixWritable extends DenseMatrix implements Writable {
+
+  public void readFields(DataInput in) throws IOException {
+    columnLabelBindings = new HashMap<String, Integer>();
+    rowLabelBindings = new HashMap<String, Integer>();
+    MatrixWritable.readLabels(in, columnLabelBindings, rowLabelBindings);
+    int rows = in.readInt();
+    int columns = in.readInt();
+    this.values = new double[rows][columns];
+    for (int row = 0; row < rows; row++) {
+      for (int column = 0; column < columns; column++) {
+        this.values[row][column] = in.readDouble();
+      }
+    }
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    MatrixWritable.writeLabelBindings(out, columnLabelBindings, rowLabelBindings);
+    out.writeInt(rowSize());
+    out.writeInt(columnSize());
+    for (double[] row : values) {
+      for (double value : row) {
+        out.writeDouble(value);
+      }
+    }
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/math/DenseVectorWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/math/DenseVectorWritable.java?rev=898671&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/math/DenseVectorWritable.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/math/DenseVectorWritable.java Wed Jan 13 08:13:10 2010
@@ -0,0 +1,44 @@
+package org.apache.mahout.math;
+
+import org.apache.hadoop.io.Writable;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Iterator;
+
+public class DenseVectorWritable extends DenseVector implements Writable {
+
+  public DenseVectorWritable() {
+    
+  }
+
+  public DenseVectorWritable(DenseVector v) {
+    setName(v.getName());
+    values = v.values;
+    lengthSquared = v.lengthSquared;
+  }
+
+  public void write(DataOutput dataOutput) throws IOException {
+    dataOutput.writeUTF(getClass().getName());
+    dataOutput.writeUTF(this.getName() == null ? "" : this.getName());
+    dataOutput.writeInt(size());
+    dataOutput.writeDouble(lengthSquared);
+    Iterator<Vector.Element> iter = iterateAll();
+    while (iter.hasNext()) {
+      Vector.Element element = iter.next();
+      dataOutput.writeDouble(element.get());
+    }
+  }
+
+  public void readFields(DataInput dataInput) throws IOException {
+    this.setName(dataInput.readUTF());
+    double[] values = new double[dataInput.readInt()];
+    lengthSquared = dataInput.readDouble();
+    for (int i = 0; i < values.length; i++) {
+      values[i] = dataInput.readDouble();
+    }
+    this.values = values;
+  }
+  
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/math/MatrixWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/math/MatrixWritable.java?rev=898671&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/math/MatrixWritable.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/math/MatrixWritable.java Wed Jan 13 08:13:10 2010
@@ -0,0 +1,105 @@
+package org.apache.mahout.math;
+
+import org.apache.hadoop.io.Writable;
+import org.apache.mahout.math.Matrix;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+public class MatrixWritable implements Writable {
+
+  private Matrix matrix;
+
+  public Matrix get() { return matrix; }
+
+  public void set(Matrix matrix) {
+    this.matrix = matrix;
+  }
+
+  public MatrixWritable() {
+
+  }
+
+  public MatrixWritable(Matrix m) {
+    set(m);
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    writeMatrix(out, matrix);
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    matrix = readMatrix(in);
+  }
+
+  public static void readLabels(DataInput in,
+                                Map<String, Integer> columnLabelBindings,
+                                Map<String, Integer> rowLabelBindings) throws IOException {
+    int colSize = in.readInt();
+    if (colSize > 0) {
+      for (int i = 0; i < colSize; i++) {
+        columnLabelBindings.put(in.readUTF(), in.readInt());
+      }
+    }
+    int rowSize = in.readInt();
+    if (rowSize > 0) {
+      rowLabelBindings = new HashMap<String, Integer>();
+      for (int i = 0; i < rowSize; i++) {
+        rowLabelBindings.put(in.readUTF(), in.readInt());
+      }
+    }
+  }
+
+  public static void writeLabelBindings(DataOutput out,
+                                        Map<String, Integer> columnLabelBindings,
+                                        Map<String, Integer> rowLabelBindings) throws IOException {
+    if (columnLabelBindings == null) {
+      out.writeInt(0);
+    } else {
+      out.writeInt(columnLabelBindings.size());
+      for (Map.Entry<String, Integer> stringIntegerEntry : columnLabelBindings.entrySet()) {
+        out.writeUTF(stringIntegerEntry.getKey());
+        out.writeInt(stringIntegerEntry.getValue());
+      }
+    }
+    if (rowLabelBindings == null) {
+      out.writeInt(0);
+    } else {
+      out.writeInt(rowLabelBindings.size());
+      for (Map.Entry<String, Integer> stringIntegerEntry : rowLabelBindings.entrySet()) {
+        out.writeUTF(stringIntegerEntry.getKey());
+        out.writeInt(stringIntegerEntry.getValue());
+      }
+    }
+  }
+
+  /** Reads a typed Matrix instance from the input stream */
+  public static Matrix readMatrix(DataInput in) throws IOException {
+    String matrixClassName = in.readUTF();
+    Matrix matrix;
+    try {
+      matrix = Class.forName(matrixClassName).asSubclass(Matrix.class)
+          .newInstance();
+    } catch (ClassNotFoundException e) {
+      throw new IllegalStateException(e);
+    } catch (IllegalAccessException e) {
+      throw new IllegalStateException(e);
+    } catch (InstantiationException e) {
+      throw new IllegalStateException(e);
+    }
+   // matrix.readFields(in);
+    return matrix;
+  }
+
+  /** Writes a typed Matrix instance to the output stream */
+  public static void writeMatrix(DataOutput out, Matrix matrix)
+      throws IOException {
+    out.writeUTF(matrix.getClass().getName());
+   // matrix.write(out);
+  }
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/math/SparseVectorWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/math/SparseVectorWritable.java?rev=898671&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/math/SparseVectorWritable.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/math/SparseVectorWritable.java Wed Jan 13 08:13:10 2010
@@ -0,0 +1,57 @@
+package org.apache.mahout.math;
+
+import org.apache.hadoop.io.Writable;
+import org.apache.mahout.math.map.OpenIntDoubleHashMap;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Iterator;
+
+
+public class SparseVectorWritable extends SparseVector implements Writable {
+
+  public SparseVectorWritable(SparseVector vector) {
+    setName(vector.getName());
+    cardinality = vector.cardinality;
+    values = vector.values;
+  }
+
+  public SparseVectorWritable() {
+    
+  }
+
+  public void write(DataOutput dataOutput) throws IOException {
+    dataOutput.writeUTF(getClass().getName());
+    dataOutput.writeUTF(this.getName() == null ? "" : this.getName());
+    dataOutput.writeInt(size());
+    int nde = getNumNondefaultElements();
+    dataOutput.writeInt(nde);
+    Iterator<Vector.Element> iter = iterateNonZero();
+    int count = 0;
+    while (iter.hasNext()) {
+      Vector.Element element = iter.next();
+      dataOutput.writeInt(element.index());
+      dataOutput.writeDouble(element.get());
+      count++;
+    }
+    assert (nde == count);
+  }
+
+  public void readFields(DataInput dataInput) throws IOException {
+    this.setName(dataInput.readUTF());
+    this.cardinality = dataInput.readInt();
+    int size = dataInput.readInt();
+    OpenIntDoubleHashMap values = new OpenIntDoubleHashMap((int) (size * 1.5));
+    int i = 0;
+    while (i < size) {
+      int index = dataInput.readInt();
+      double value = dataInput.readDouble();
+      values.put(index, value);
+      i++;
+    }
+    assert (i == size);
+    this.values = values;
+    this.lengthSquared = -1.0;
+  }
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/math/VectorWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/math/VectorWritable.java?rev=898671&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/math/VectorWritable.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/math/VectorWritable.java Wed Jan 13 08:13:10 2010
@@ -0,0 +1,88 @@
+package org.apache.mahout.math;
+
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+public class VectorWritable extends Configured implements Writable {
+
+  private Vector vector;
+  // cache most recent vector instance class name
+  private static String instanceClassName;// cache most recent vector instance class
+  private static Class<? extends Vector> instanceClass;
+
+  public Vector get() {
+    return vector;
+  }
+
+  public void set(Vector vector) {
+    this.vector = vector;
+  }
+
+  public VectorWritable() {
+
+  }
+
+  public VectorWritable(Vector v) {
+    set(v);
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    Writable w = null;
+    if(vector instanceof Writable) {
+      w = (Writable) vector;
+    } else if(vector instanceof SparseVector) {
+      w = new SparseVectorWritable((SparseVector)vector);
+    } else {
+      w = new DenseVectorWritable(new DenseVector(vector));
+    }
+    w.write(out);
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    try {
+      String vectorClassName = in.readUTF();
+      Class<? extends Vector> inputClass = (Class<? extends Vector>) getConf().getClassByName(vectorClassName);
+      Class<? extends Vector> vectorClass = getConf().getClass("vector.class", inputClass, Vector.class);
+      vector = ReflectionUtils.newInstance(vectorClass, getConf());
+      ((Writable)vector).readFields(in);
+    } catch (ClassNotFoundException cnfe) {
+      throw new IOException(cnfe);
+    } catch (ClassCastException cce) {
+      throw new IOException(cce);
+    }
+  }
+
+  /** Read and return a vector from the input */
+  public static Vector readVector(DataInput in) throws IOException {
+    String vectorClassName = in.readUTF();
+    Vector vector;
+    try {
+      if (!vectorClassName.equals(instanceClassName)) {
+        instanceClassName = vectorClassName;
+        instanceClass = Class.forName(vectorClassName).asSubclass(Vector.class);
+      }
+      vector = instanceClass.newInstance();
+    } catch (ClassNotFoundException e) {
+      throw new IllegalStateException(e);
+    } catch (IllegalAccessException e) {
+      throw new IllegalStateException(e);
+    } catch (InstantiationException e) {
+      throw new IllegalStateException(e);
+    }
+    ((Writable)vector).readFields(in);
+    return vector;
+  }
+
+  /** Write the vector to the output */
+  public static void writeVector(DataOutput out, Vector vector)
+      throws IOException {
+    new VectorWritable(vector).write(out);
+  }
+}

Added: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/math/TestVectorWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/math/TestVectorWritable.java?rev=898671&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/math/TestVectorWritable.java (added)
+++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/math/TestVectorWritable.java Wed Jan 13 08:13:10 2010
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math;
+
+import junit.framework.TestCase;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.DataOutputBuffer;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+
+public class TestVectorWritable extends TestCase {
+
+  private static final int cardinality = 10;
+
+  private static void doTest(Vector vector) throws Exception {
+    for (int i = 0; i < cardinality; i++) {
+      vector.set(i, i);
+    }
+    DataOutputBuffer out = new DataOutputBuffer();
+    VectorWritable v = new VectorWritable();
+    v.set(vector);
+    v.write(out);
+    out.close();
+
+    DataInputStream in = new DataInputStream(new ByteArrayInputStream(out.getData()));
+    v = new VectorWritable();
+    v.setConf(new Configuration());
+    v.readFields(in);
+    in.close();
+
+    assertEquals(cardinality, vector.size());
+    for (int i = 0; i < cardinality; i++) {
+      assertEquals((double) i, vector.get(i));
+    }
+
+    in = new DataInputStream(new ByteArrayInputStream(out.getData()));
+    v = new VectorWritable();
+    v.setConf(new Configuration());
+    v.readFields(in);
+    in.close();
+
+    assertEquals(cardinality, vector.size());
+    for (int i = 0; i < cardinality; i++) {
+      assertEquals((double) i, vector.get(i));
+    }
+  }
+
+  public void testVectors() throws Exception {
+    doTest(new SparseVector(cardinality));
+    doTest(new DenseVector(cardinality));
+    doTest(new VectorView(new SparseVector(cardinality + 1), 1, cardinality));
+    doTest(new VectorView(new DenseVector(cardinality + 1), 1, cardinality));
+  }
+}

Added: lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/math/TestVectorWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/math/TestVectorWritable.java?rev=898671&view=auto
==============================================================================
--- lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/math/TestVectorWritable.java (added)
+++ lucene/mahout/trunk/utils/src/test/java/org/apache/mahout/math/TestVectorWritable.java Wed Jan 13 08:13:10 2010
@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math;
+
+import junit.framework.TestCase;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.DataOutputBuffer;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.SparseVector;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorView;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+
+public class TestVectorWritable extends TestCase {
+
+  private static final int cardinality = 10;
+
+  private static void doTest(Vector vector) throws Exception {
+    for (int i = 0; i < cardinality; i++) {
+      vector.set(i, i);
+    }
+    DataOutputBuffer out = new DataOutputBuffer();
+    VectorWritable v = new VectorWritable();
+    v.set(vector);
+    v.write(out);
+    out.close();
+
+    DataInputStream in = new DataInputStream(new ByteArrayInputStream(out.getData()));
+    v = new VectorWritable();
+    v.setConf(new Configuration());
+    v.readFields(in);
+    in.close();
+
+    assertEquals(cardinality, vector.size());
+    for (int i = 0; i < cardinality; i++) {
+      assertEquals((double) i, vector.get(i));
+    }
+
+    in = new DataInputStream(new ByteArrayInputStream(out.getData()));
+    v = new VectorWritable();
+    v.setConf(new Configuration());
+    v.readFields(in);
+    in.close();
+
+    assertEquals(cardinality, vector.size());
+    for (int i = 0; i < cardinality; i++) {
+      assertEquals((double) i, vector.get(i));
+    }
+  }
+
+  public void testVectors() throws Exception {
+    doTest(new SparseVector(cardinality));
+    doTest(new DenseVector(cardinality));
+    doTest(new VectorView(new SparseVector(cardinality + 1), 1, cardinality));
+    doTest(new VectorView(new DenseVector(cardinality + 1), 1, cardinality));
+  }
+}