You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hama.apache.org by ed...@apache.org on 2008/08/12 10:55:17 UTC
svn commit: r685092 - in /incubator/hama/trunk/src:
java/org/apache/hama/Vector.java java/org/apache/hama/io/VectorWritable.java
test/org/apache/hama/TestVector.java
Author: edwardyoon
Date: Tue Aug 12 01:55:16 2008
New Revision: 685092
URL: http://svn.apache.org/viewvc?rev=685092&view=rev
Log:
Refactor VectorWritable
Modified:
incubator/hama/trunk/src/java/org/apache/hama/Vector.java
incubator/hama/trunk/src/java/org/apache/hama/io/VectorWritable.java
incubator/hama/trunk/src/test/org/apache/hama/TestVector.java
Modified: incubator/hama/trunk/src/java/org/apache/hama/Vector.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/java/org/apache/hama/Vector.java?rev=685092&r1=685091&r2=685092&view=diff
==============================================================================
--- incubator/hama/trunk/src/java/org/apache/hama/Vector.java (original)
+++ incubator/hama/trunk/src/java/org/apache/hama/Vector.java Tue Aug 12 01:55:16 2008
@@ -19,6 +19,10 @@
*/
package org.apache.hama;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+
import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.hbase.io.HbaseMapWritable;
import org.apache.hadoop.hbase.io.RowResult;
@@ -39,10 +43,13 @@
}
public Vector(int row, RowResult rowResult) {
+ this.cells = new HbaseMapWritable<byte[], Cell>();
this.row = intToBytes(row);
- parse(rowResult.entrySet());
+ for (Map.Entry<byte[], Cell> f : rowResult.entrySet()) {
+ this.cells.put(f.getKey(), f.getValue());
+ }
}
-
+
public void add(int index, double value) {
// TODO Auto-generated method stub
@@ -56,7 +63,7 @@
public Vector add(Vector v2) {
HbaseMapWritable<byte[], Cell> trunk = new HbaseMapWritable<byte[], Cell>();
for (int i = 0; i < this.size(); i++) {
- double value = (this.getValueAt(i) + v2.getValueAt(i));
+ double value = (this.get(i) + v2.get(i));
Cell cValue = new Cell(String.valueOf(value), 0);
trunk.put(Bytes.toBytes("column:" + i), cValue);
}
@@ -66,12 +73,10 @@
public double dot(Vector v) {
double cosine = 0.0;
- int dim;
double q_i, d_i;
for (int i = 0; i < Math.min(this.size(), v.size()); i++) {
- dim = v.getDimAt(i);
- q_i = v.getValueAt(dim);
- d_i = this.getValueAt(dim);
+ q_i = v.get(i);
+ d_i = this.get(i);
cosine += q_i * d_i;
}
return cosine / (this.getNorm2() * v.getNorm2());
@@ -104,17 +109,28 @@
public double getNorm1() {
double sum = 0.0;
- for (int i = 0; i < m_vals.length; i++) {
- sum += m_vals[i];
+
+ Set<byte[]> keySet = cells.keySet();
+ Iterator<byte[]> it = keySet.iterator();
+
+ while (it.hasNext()) {
+ sum += bytesToDouble(get(it.next()).getValue());
}
+
return sum;
}
public double getNorm2() {
double square_sum = 0.0;
- for (int i = 0; i < m_vals.length; i++) {
- square_sum += (m_vals[i] * m_vals[i]);
+
+ Set<byte[]> keySet = cells.keySet();
+ Iterator<byte[]> it = keySet.iterator();
+
+ while (it.hasNext()) {
+ double value = bytesToDouble(get(it.next()).getValue());
+ square_sum += value * value;
}
+
return Math.sqrt(square_sum);
}
@@ -127,12 +143,4 @@
// TODO Auto-generated method stub
return 0;
}
-
- public int getDimAt(int index) {
- return m_dims[index];
- }
-
- public double getValueAt(int index) {
- return m_vals[index];
- }
}
Modified: incubator/hama/trunk/src/java/org/apache/hama/io/VectorWritable.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/java/org/apache/hama/io/VectorWritable.java?rev=685092&r1=685091&r2=685092&view=diff
==============================================================================
--- incubator/hama/trunk/src/java/org/apache/hama/io/VectorWritable.java (original)
+++ incubator/hama/trunk/src/java/org/apache/hama/io/VectorWritable.java Tue Aug 12 01:55:16 2008
@@ -8,8 +8,6 @@
import java.util.Collections;
import java.util.Map;
import java.util.Set;
-import java.util.SortedMap;
-import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.hadoop.hbase.HConstants;
@@ -27,29 +25,6 @@
static final Logger LOG = Logger.getLogger(VectorWritable.class);
public byte[] row;
public HbaseMapWritable<byte[], Cell> cells;
- public int[] m_dims;
- public double[] m_vals;
-
- public void parse(Set<Entry<byte[], Cell>> entrySet) {
- this.cells = new HbaseMapWritable<byte[], Cell>();
-
- SortedMap<Integer, Double> m = new TreeMap<Integer, Double>();
- for (Map.Entry<byte[], Cell> f : entrySet) {
- m.put(getColumnIndex(f.getKey()), Double.parseDouble(Bytes.toString(f
- .getValue().getValue())));
- this.cells.put(f.getKey(), f.getValue());
- }
-
- this.m_dims = new int[m.keySet().size()];
- this.m_vals = new double[m.keySet().size()];
-
- int i = 0;
- for (Map.Entry<Integer, Double> f : m.entrySet()) {
- this.m_dims[i] = f.getKey();
- this.m_vals[i] = f.getValue();
- i++;
- }
- }
public Cell put(@SuppressWarnings("unused")
byte[] key, @SuppressWarnings("unused")
@@ -106,7 +81,6 @@
public void readFields(final DataInput in) throws IOException {
this.row = Bytes.readByteArray(in);
this.cells.readFields(in);
- parse(this.cells.entrySet());
}
public void write(final DataOutput out) throws IOException {
@@ -138,9 +112,15 @@
return get(Bytes.toBytes(key));
}
+ /**
+ * Get the double value without timestamp
+ */
+ public double get(int key) {
+ return bytesToDouble(get(intToBytes(key)).getValue());
+ }
+
public int size() {
- // return this.cells.size();
- return m_dims.length;
+ return this.cells.size();
}
@Override
Modified: incubator/hama/trunk/src/test/org/apache/hama/TestVector.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/test/org/apache/hama/TestVector.java?rev=685092&r1=685091&r2=685092&view=diff
==============================================================================
--- incubator/hama/trunk/src/test/org/apache/hama/TestVector.java (original)
+++ incubator/hama/trunk/src/test/org/apache/hama/TestVector.java Tue Aug 12 01:55:16 2008
@@ -20,28 +20,42 @@
package org.apache.hama;
public class TestVector extends HamaTestCase {
+ final double cosine = 0.6978227007909176;
+ final double norm1 = 12.0;
+ final double norm2 = 6.782329983125268;
+ private double[][] values = { { 2, 5, 1, 4 }, { 4, 1, 3, 3 } };
/**
- * Test cosine similarity
+ * Test vector
*/
- public void testCosine() {
- final double result = 0.6978227007909176;
+ public void testGetVector() {
Matrix m1 = new Matrix(conf, "dotTest");
- m1.set(0, 0, 2);
- m1.set(0, 1, 5);
- m1.set(0, 2, 1);
- m1.set(0, 3, 4);
-
- m1.set(1, 0, 4);
- m1.set(1, 1, 1);
- m1.set(1, 2, 3);
- m1.set(1, 3, 3);
+ for (int i = 0; i < 2; i++) {
+ for (int j = 0; j < 4; j++) {
+ m1.set(i, j, values[i][j]);
+ }
+ }
Vector v1 = m1.getRow(0);
Vector v2 = m1.getRow(1);
+ dotTest(v1, v2);
+ norm1Test(v1, v2);
+ norm2Test(v1, v2);
+ }
+
+ public void dotTest(Vector v1, Vector v2) {
double cos = v1.dot(v2);
- assertEquals(cos, result);
+ assertEquals(cos, cosine);
+ }
+
+ public void norm1Test(Vector v1, Vector v2) {
+ assertEquals(norm1, v1.getNorm1());
}
+
+ public void norm2Test(Vector v1, Vector v2) {
+ assertEquals(norm2, v1.getNorm2());
+ }
+
}