You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hama.apache.org by ed...@apache.org on 2009/10/23 04:03:38 UTC
svn commit: r828916 - in /incubator/hama/trunk/src:
examples/org/apache/hama/examples/ java/org/apache/hama/matrix/
test/org/apache/hama/examples/
Author: edwardyoon
Date: Fri Oct 23 02:03:37 2009
New Revision: 828916
URL: http://svn.apache.org/viewvc?rev=828916&view=rev
Log:
cosine similarity matrix
Added:
incubator/hama/trunk/src/examples/org/apache/hama/examples/CosineSimilarityMatrix.java
incubator/hama/trunk/src/test/org/apache/hama/examples/TestCosineSimilarityMatrix.java
Removed:
incubator/hama/trunk/src/examples/org/apache/hama/examples/FileMatrixBlockMult.java
Modified:
incubator/hama/trunk/src/examples/org/apache/hama/examples/ExampleDriver.java
incubator/hama/trunk/src/java/org/apache/hama/matrix/AbstractMatrix.java
Added: incubator/hama/trunk/src/examples/org/apache/hama/examples/CosineSimilarityMatrix.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/examples/org/apache/hama/examples/CosineSimilarityMatrix.java?rev=828916&view=auto
==============================================================================
--- incubator/hama/trunk/src/examples/org/apache/hama/examples/CosineSimilarityMatrix.java (added)
+++ incubator/hama/trunk/src/examples/org/apache/hama/examples/CosineSimilarityMatrix.java Fri Oct 23 02:03:37 2009
@@ -0,0 +1,106 @@
+package org.apache.hama.examples;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.IdentityTableReducer;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.hbase.mapreduce.TableMapper;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hama.Constants;
+import org.apache.hama.HamaConfiguration;
+import org.apache.hama.matrix.DenseMatrix;
+import org.apache.hama.matrix.DenseVector;
+import org.apache.hama.util.BytesUtil;
+
+/**
+ * Cosine Similarity MapReduce
+ *
+ * <p>
+ * This is EXAMPLE code. You will need to change it to work for your context.
+ * <p>
+ *
+ * <pre>
+ * ./bin/hama examples similarity INPUT_MATRIX OUTPUT_NAME
+ * </pre>
+ */
+public class CosineSimilarityMatrix {
+
+ public static class ComputeSimilarityMapper extends
+ TableMapper<ImmutableBytesWritable, Put> implements Configurable {
+ private Configuration conf = null;
+ private DenseMatrix matrix;
+
+ public void map(ImmutableBytesWritable key, Result value, Context context)
+ throws IOException, InterruptedException {
+ DenseVector v = new DenseVector(value);
+
+ Put put = new Put(key.get());
+ for (int i = 0; i < matrix.getRows(); i++) {
+ double dotProduct = matrix.getRow(i).dot(v);
+ if (BytesUtil.getRowIndex(key.get()) == i) {
+ dotProduct = 0;
+ }
+ put.add(Bytes.toBytes(Constants.COLUMN_FAMILY), Bytes.toBytes(String
+ .valueOf(i)), BytesUtil.doubleToBytes(dotProduct));
+ }
+
+ context.write(key, put);
+ }
+
+ @Override
+ public Configuration getConf() {
+ return conf;
+ }
+
+ @Override
+ public void setConf(Configuration conf) {
+ this.conf = conf;
+ try {
+ matrix = new DenseMatrix(new HamaConfiguration(), conf
+ .get("input.matrix"));
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ }
+ }
+
+ private static Job configureJob(HamaConfiguration conf, String[] args)
+ throws IOException {
+ Job job = new Job(conf, "set MR job test");
+ job.getConfiguration().set("input.matrix", args[0]);
+
+ Scan scan = new Scan();
+ scan.addFamily(Bytes.toBytes(Constants.COLUMN_FAMILY));
+
+ TableMapReduceUtil.initTableMapperJob(args[0], scan,
+ ComputeSimilarityMapper.class, ImmutableBytesWritable.class, Put.class, job);
+ TableMapReduceUtil.initTableReducerJob(args[1], IdentityTableReducer.class,
+ job);
+ job.setNumReduceTasks(0);
+ return job;
+ }
+
+ /**
+ * <input matrix> <output similarity matrix>
+ *
+ * @param args
+ * @throws Exception
+ */
+ public static void main(String[] args) throws Exception {
+ if (args.length < 2) {
+ System.out.println("Usage: <input matrix> <output similarity matrix>");
+ }
+
+ HamaConfiguration conf = new HamaConfiguration();
+ Job job = configureJob(conf, args);
+ System.exit(job.waitForCompletion(true) ? 0 : 1);
+ }
+}
Modified: incubator/hama/trunk/src/examples/org/apache/hama/examples/ExampleDriver.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/examples/org/apache/hama/examples/ExampleDriver.java?rev=828916&r1=828915&r2=828916&view=diff
==============================================================================
--- incubator/hama/trunk/src/examples/org/apache/hama/examples/ExampleDriver.java (original)
+++ incubator/hama/trunk/src/examples/org/apache/hama/examples/ExampleDriver.java Fri Oct 23 02:03:37 2009
@@ -29,7 +29,7 @@
pgd.addClass("rand", RandomMatrix.class, "Generate matrix with random elements.");
pgd.addClass("add", MatrixAddition.class, "Mat-Mat addition.");
pgd.addClass("mult", MatrixMultiplication.class, "Mat-Mat multiplication.");
- pgd.addClass("multfiles", FileMatrixBlockMult.class, "file matrices multiplication.");
+ pgd.addClass("similarity", CosineSimilarityMatrix.class, "Cosine Similarity Matrix.");
pgd.addClass("norm", MatrixNorm.class, "Maximum absolute row sum of matrix");
pgd.driver(args);
} catch (Throwable e) {
Modified: incubator/hama/trunk/src/java/org/apache/hama/matrix/AbstractMatrix.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/java/org/apache/hama/matrix/AbstractMatrix.java?rev=828916&r1=828915&r2=828916&view=diff
==============================================================================
--- incubator/hama/trunk/src/java/org/apache/hama/matrix/AbstractMatrix.java (original)
+++ incubator/hama/trunk/src/java/org/apache/hama/matrix/AbstractMatrix.java Fri Oct 23 02:03:37 2009
@@ -25,6 +25,8 @@
import java.util.Map;
import java.util.NavigableMap;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HColumnDescriptor;
@@ -405,9 +407,10 @@
}
public static class ScanMapper extends
- TableMapper<ImmutableBytesWritable, Put> {
- private static List<Double> alpha = new ArrayList<Double>();
-
+ TableMapper<ImmutableBytesWritable, Put> implements Configurable {
+ private static Double alpha = null;
+ private Configuration conf = null;
+
public void map(ImmutableBytesWritable key, Result value, Context context)
throws IOException, InterruptedException {
Put put = new Put(key.get());
@@ -419,13 +422,12 @@
for (Map.Entry<byte[], byte[]> b : a.getValue().entrySet()) {
byte[] qualifier = b.getKey();
byte[] val = b.getValue();
- if (alpha.size() == 0) {
+ if (alpha.equals(new Double(1))) {
put.add(family, qualifier, val);
} else {
if (Bytes.toString(family).equals(Constants.COLUMN_FAMILY)) {
double currVal = BytesUtil.bytesToDouble(val);
- put.add(family, qualifier, BytesUtil.doubleToBytes(currVal
- * alpha.get(0)));
+ put.add(family, qualifier, BytesUtil.doubleToBytes(currVal * alpha));
} else {
put.add(family, qualifier, val);
}
@@ -436,10 +438,16 @@
context.write(key, put);
}
- public static void setAlpha(double a) {
- if (alpha.size() > 0)
- alpha = new ArrayList<Double>();
- alpha.add(a);
+ @Override
+ public Configuration getConf() {
+ return conf;
+ }
+
+ @Override
+ public void setConf(Configuration conf) {
+ this.conf = conf;
+ Float f = conf.getFloat("set.alpha", 1);
+ alpha = f.doubleValue();
}
}
@@ -484,7 +492,9 @@
scan.addFamily(Bytes.toBytes(JacobiEigenValue.EI_COLUMNFAMILY));
scan.addFamily(Bytes.toBytes(JacobiEigenValue.EICOL_FAMILY));
scan.addFamily(Bytes.toBytes(JacobiEigenValue.EIVEC_FAMILY));
- ScanMapper.setAlpha(alpha);
+ Float f = new Float(alpha);
+ job.getConfiguration().setFloat("set.alpha", f);
+
org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.initTableMapperJob(B
.getPath(), scan, ScanMapper.class, ImmutableBytesWritable.class,
Put.class, job);
Added: incubator/hama/trunk/src/test/org/apache/hama/examples/TestCosineSimilarityMatrix.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/test/org/apache/hama/examples/TestCosineSimilarityMatrix.java?rev=828916&view=auto
==============================================================================
--- incubator/hama/trunk/src/test/org/apache/hama/examples/TestCosineSimilarityMatrix.java (added)
+++ incubator/hama/trunk/src/test/org/apache/hama/examples/TestCosineSimilarityMatrix.java Fri Oct 23 02:03:37 2009
@@ -0,0 +1,114 @@
+package org.apache.hama.examples;
+
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.IdentityTableReducer;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.hbase.mapreduce.TableMapper;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hama.Constants;
+import org.apache.hama.HamaCluster;
+import org.apache.hama.HamaConfiguration;
+import org.apache.hama.matrix.DenseMatrix;
+import org.apache.hama.matrix.DenseVector;
+import org.apache.hama.matrix.Matrix;
+import org.apache.hama.matrix.Vector;
+import org.apache.hama.util.BytesUtil;
+import org.apache.log4j.Logger;
+
+public class TestCosineSimilarityMatrix extends HamaCluster {
+ static final Logger LOG = Logger.getLogger(TestCosineSimilarityMatrix.class);
+ private int SIZE = 10;
+ private Matrix m1;
+ private Matrix symmetricMatrix;
+ private HamaConfiguration conf;
+
+ /**
+ * @throws UnsupportedEncodingException
+ */
+ public TestCosineSimilarityMatrix() throws UnsupportedEncodingException {
+ super();
+ }
+
+ public void setUp() throws Exception {
+ super.setUp();
+
+ conf = getConf();
+
+ m1 = DenseMatrix.random(conf, SIZE, SIZE);
+ symmetricMatrix = new DenseMatrix(conf, SIZE, SIZE);
+ }
+
+ public void testCosineSimilarity() throws IOException {
+ Job job = new Job(conf, "set MR job test");
+ job.getConfiguration().set("input.matrix", m1.getPath());
+
+ Scan scan = new Scan();
+ scan.addFamily(Bytes.toBytes(Constants.COLUMN_FAMILY));
+
+ TableMapReduceUtil.initTableMapperJob(m1.getPath(), scan,
+ ComputeSimilarity.class, ImmutableBytesWritable.class, Put.class, job);
+ TableMapReduceUtil.initTableReducerJob(symmetricMatrix.getPath(),
+ IdentityTableReducer.class, job);
+ job.setNumReduceTasks(0);
+ try {
+ job.waitForCompletion(true);
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ } catch (ClassNotFoundException e) {
+ e.printStackTrace();
+ }
+
+ Vector v1 = m1.getRow(0);
+ Vector v2 = m1.getRow(2);
+ assertEquals(v1.dot(v2), symmetricMatrix.get(0, 2));
+ }
+
+ public static class ComputeSimilarity extends
+ TableMapper<ImmutableBytesWritable, Put> implements Configurable {
+ private Configuration conf = null;
+ private DenseMatrix matrix;
+
+ public void map(ImmutableBytesWritable key, Result value, Context context)
+ throws IOException, InterruptedException {
+ DenseVector v = new DenseVector(value);
+
+ Put put = new Put(key.get());
+ for (int i = 0; i < matrix.getRows(); i++) {
+ double dotProduct = matrix.getRow(i).dot(v);
+ if (BytesUtil.getRowIndex(key.get()) == i) {
+ dotProduct = 0;
+ }
+ put.add(Bytes.toBytes(Constants.COLUMN_FAMILY), Bytes.toBytes(String
+ .valueOf(i)), BytesUtil.doubleToBytes(dotProduct));
+ }
+
+ context.write(key, put);
+ }
+
+ @Override
+ public Configuration getConf() {
+ return conf;
+ }
+
+ @Override
+ public void setConf(Configuration conf) {
+ this.conf = conf;
+ try {
+ matrix = new DenseMatrix(new HamaConfiguration(), conf
+ .get("input.matrix"));
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ }
+ }
+}