You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hama.apache.org by ed...@apache.org on 2009/10/23 04:03:38 UTC

svn commit: r828916 - in /incubator/hama/trunk/src: examples/org/apache/hama/examples/ java/org/apache/hama/matrix/ test/org/apache/hama/examples/

Author: edwardyoon
Date: Fri Oct 23 02:03:37 2009
New Revision: 828916

URL: http://svn.apache.org/viewvc?rev=828916&view=rev
Log:
cosine similarity matrix

Added:
    incubator/hama/trunk/src/examples/org/apache/hama/examples/CosineSimilarityMatrix.java
    incubator/hama/trunk/src/test/org/apache/hama/examples/TestCosineSimilarityMatrix.java
Removed:
    incubator/hama/trunk/src/examples/org/apache/hama/examples/FileMatrixBlockMult.java
Modified:
    incubator/hama/trunk/src/examples/org/apache/hama/examples/ExampleDriver.java
    incubator/hama/trunk/src/java/org/apache/hama/matrix/AbstractMatrix.java

Added: incubator/hama/trunk/src/examples/org/apache/hama/examples/CosineSimilarityMatrix.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/examples/org/apache/hama/examples/CosineSimilarityMatrix.java?rev=828916&view=auto
==============================================================================
--- incubator/hama/trunk/src/examples/org/apache/hama/examples/CosineSimilarityMatrix.java (added)
+++ incubator/hama/trunk/src/examples/org/apache/hama/examples/CosineSimilarityMatrix.java Fri Oct 23 02:03:37 2009
@@ -0,0 +1,106 @@
+package org.apache.hama.examples;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.IdentityTableReducer;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.hbase.mapreduce.TableMapper;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hama.Constants;
+import org.apache.hama.HamaConfiguration;
+import org.apache.hama.matrix.DenseMatrix;
+import org.apache.hama.matrix.DenseVector;
+import org.apache.hama.util.BytesUtil;
+
+/**
+ * Cosine Similarity MapReduce
+ * 
+ * <p>
+ * This is EXAMPLE code. You will need to change it to work for your context.
+ * <p>
+ * 
+ * <pre>
+ * ./bin/hama examples similarity INPUT_MATRIX OUTPUT_NAME
+ * </pre>
+ */
+public class CosineSimilarityMatrix {
+
+  public static class ComputeSimilarityMapper extends
+      TableMapper<ImmutableBytesWritable, Put> implements Configurable {
+    private Configuration conf = null;
+    private DenseMatrix matrix;
+
+    public void map(ImmutableBytesWritable key, Result value, Context context)
+        throws IOException, InterruptedException {
+      DenseVector v = new DenseVector(value);
+
+      Put put = new Put(key.get());
+      for (int i = 0; i < matrix.getRows(); i++) {
+        double dotProduct = matrix.getRow(i).dot(v);
+        if (BytesUtil.getRowIndex(key.get()) == i) {
+          dotProduct = 0;
+        }
+        put.add(Bytes.toBytes(Constants.COLUMN_FAMILY), Bytes.toBytes(String
+            .valueOf(i)), BytesUtil.doubleToBytes(dotProduct));
+      }
+
+      context.write(key, put);
+    }
+
+    @Override
+    public Configuration getConf() {
+      return conf;
+    }
+
+    @Override
+    public void setConf(Configuration conf) {
+      this.conf = conf;
+      try {
+        matrix = new DenseMatrix(new HamaConfiguration(), conf
+            .get("input.matrix"));
+      } catch (IOException e) {
+        e.printStackTrace();
+      }
+
+    }
+  }
+
+  private static Job configureJob(HamaConfiguration conf, String[] args)
+      throws IOException {
+    Job job = new Job(conf, "set MR job test");
+    job.getConfiguration().set("input.matrix", args[0]);
+
+    Scan scan = new Scan();
+    scan.addFamily(Bytes.toBytes(Constants.COLUMN_FAMILY));
+
+    TableMapReduceUtil.initTableMapperJob(args[0], scan,
+        ComputeSimilarityMapper.class, ImmutableBytesWritable.class, Put.class, job);
+    TableMapReduceUtil.initTableReducerJob(args[1], IdentityTableReducer.class,
+        job);
+    job.setNumReduceTasks(0);
+    return job;
+  }
+
+  /**
+   * <input matrix> <output similarity matrix>
+   * 
+   * @param args
+   * @throws Exception
+   */
+  public static void main(String[] args) throws Exception {
+    if (args.length < 2) {
+      System.out.println("Usage:  <input matrix> <output similarity matrix>");
+    }
+
+    HamaConfiguration conf = new HamaConfiguration();
+    Job job = configureJob(conf, args);
+    System.exit(job.waitForCompletion(true) ? 0 : 1);
+  }
+}

Modified: incubator/hama/trunk/src/examples/org/apache/hama/examples/ExampleDriver.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/examples/org/apache/hama/examples/ExampleDriver.java?rev=828916&r1=828915&r2=828916&view=diff
==============================================================================
--- incubator/hama/trunk/src/examples/org/apache/hama/examples/ExampleDriver.java (original)
+++ incubator/hama/trunk/src/examples/org/apache/hama/examples/ExampleDriver.java Fri Oct 23 02:03:37 2009
@@ -29,7 +29,7 @@
       pgd.addClass("rand", RandomMatrix.class, "Generate matrix with random elements.");
       pgd.addClass("add", MatrixAddition.class, "Mat-Mat addition.");
       pgd.addClass("mult", MatrixMultiplication.class, "Mat-Mat multiplication.");
-      pgd.addClass("multfiles", FileMatrixBlockMult.class, "file matrices multiplication.");
+      pgd.addClass("similarity", CosineSimilarityMatrix.class, "Cosine Similarity Matrix.");
       pgd.addClass("norm", MatrixNorm.class, "Maximum absolute row sum of matrix");
       pgd.driver(args);
     } catch (Throwable e) {

Modified: incubator/hama/trunk/src/java/org/apache/hama/matrix/AbstractMatrix.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/java/org/apache/hama/matrix/AbstractMatrix.java?rev=828916&r1=828915&r2=828916&view=diff
==============================================================================
--- incubator/hama/trunk/src/java/org/apache/hama/matrix/AbstractMatrix.java (original)
+++ incubator/hama/trunk/src/java/org/apache/hama/matrix/AbstractMatrix.java Fri Oct 23 02:03:37 2009
@@ -25,6 +25,8 @@
 import java.util.Map;
 import java.util.NavigableMap;
 
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HColumnDescriptor;
@@ -405,9 +407,10 @@
   }
 
   public static class ScanMapper extends
-      TableMapper<ImmutableBytesWritable, Put> {
-    private static List<Double> alpha = new ArrayList<Double>();
-
+      TableMapper<ImmutableBytesWritable, Put> implements Configurable {
+    private static Double alpha = null;
+    private Configuration conf = null;
+    
     public void map(ImmutableBytesWritable key, Result value, Context context)
         throws IOException, InterruptedException {
       Put put = new Put(key.get());
@@ -419,13 +422,12 @@
         for (Map.Entry<byte[], byte[]> b : a.getValue().entrySet()) {
           byte[] qualifier = b.getKey();
           byte[] val = b.getValue();
-          if (alpha.size() == 0) {
+          if (alpha.equals(new Double(1))) {
             put.add(family, qualifier, val);
           } else {
             if (Bytes.toString(family).equals(Constants.COLUMN_FAMILY)) {
               double currVal = BytesUtil.bytesToDouble(val);
-              put.add(family, qualifier, BytesUtil.doubleToBytes(currVal
-                  * alpha.get(0)));
+              put.add(family, qualifier, BytesUtil.doubleToBytes(currVal * alpha));
             } else {
               put.add(family, qualifier, val);
             }
@@ -436,10 +438,16 @@
       context.write(key, put);
     }
 
-    public static void setAlpha(double a) {
-      if (alpha.size() > 0)
-        alpha = new ArrayList<Double>();
-      alpha.add(a);
+    @Override
+    public Configuration getConf() {
+      return conf;
+    }
+
+    @Override
+    public void setConf(Configuration conf) {
+      this.conf = conf;
+      Float f = conf.getFloat("set.alpha", 1);
+      alpha = f.doubleValue();
     }
   }
 
@@ -484,7 +492,9 @@
     scan.addFamily(Bytes.toBytes(JacobiEigenValue.EI_COLUMNFAMILY));
     scan.addFamily(Bytes.toBytes(JacobiEigenValue.EICOL_FAMILY));
     scan.addFamily(Bytes.toBytes(JacobiEigenValue.EIVEC_FAMILY));
-    ScanMapper.setAlpha(alpha);
+    Float f = new Float(alpha);
+    job.getConfiguration().setFloat("set.alpha", f);
+    
     org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.initTableMapperJob(B
         .getPath(), scan, ScanMapper.class, ImmutableBytesWritable.class,
         Put.class, job);

Added: incubator/hama/trunk/src/test/org/apache/hama/examples/TestCosineSimilarityMatrix.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/test/org/apache/hama/examples/TestCosineSimilarityMatrix.java?rev=828916&view=auto
==============================================================================
--- incubator/hama/trunk/src/test/org/apache/hama/examples/TestCosineSimilarityMatrix.java (added)
+++ incubator/hama/trunk/src/test/org/apache/hama/examples/TestCosineSimilarityMatrix.java Fri Oct 23 02:03:37 2009
@@ -0,0 +1,114 @@
+package org.apache.hama.examples;
+
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.mapreduce.IdentityTableReducer;
+import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
+import org.apache.hadoop.hbase.mapreduce.TableMapper;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hama.Constants;
+import org.apache.hama.HamaCluster;
+import org.apache.hama.HamaConfiguration;
+import org.apache.hama.matrix.DenseMatrix;
+import org.apache.hama.matrix.DenseVector;
+import org.apache.hama.matrix.Matrix;
+import org.apache.hama.matrix.Vector;
+import org.apache.hama.util.BytesUtil;
+import org.apache.log4j.Logger;
+
+public class TestCosineSimilarityMatrix extends HamaCluster {
+  static final Logger LOG = Logger.getLogger(TestCosineSimilarityMatrix.class);
+  private int SIZE = 10;
+  private Matrix m1;
+  private Matrix symmetricMatrix;
+  private HamaConfiguration conf;
+
+  /**
+   * @throws UnsupportedEncodingException
+   */
+  public TestCosineSimilarityMatrix() throws UnsupportedEncodingException {
+    super();
+  }
+
+  public void setUp() throws Exception {
+    super.setUp();
+
+    conf = getConf();
+
+    m1 = DenseMatrix.random(conf, SIZE, SIZE);
+    symmetricMatrix = new DenseMatrix(conf, SIZE, SIZE);
+  }
+
+  public void testCosineSimilarity() throws IOException {
+    Job job = new Job(conf, "set MR job test");
+    job.getConfiguration().set("input.matrix", m1.getPath());
+
+    Scan scan = new Scan();
+    scan.addFamily(Bytes.toBytes(Constants.COLUMN_FAMILY));
+
+    TableMapReduceUtil.initTableMapperJob(m1.getPath(), scan,
+        ComputeSimilarity.class, ImmutableBytesWritable.class, Put.class, job);
+    TableMapReduceUtil.initTableReducerJob(symmetricMatrix.getPath(),
+        IdentityTableReducer.class, job);
+    job.setNumReduceTasks(0);
+    try {
+      job.waitForCompletion(true);
+    } catch (InterruptedException e) {
+      e.printStackTrace();
+    } catch (ClassNotFoundException e) {
+      e.printStackTrace();
+    }
+
+    Vector v1 = m1.getRow(0);
+    Vector v2 = m1.getRow(2);
+    assertEquals(v1.dot(v2), symmetricMatrix.get(0, 2));
+  }
+
+  public static class ComputeSimilarity extends
+      TableMapper<ImmutableBytesWritable, Put> implements Configurable {
+    private Configuration conf = null;
+    private DenseMatrix matrix;
+
+    public void map(ImmutableBytesWritable key, Result value, Context context)
+        throws IOException, InterruptedException {
+      DenseVector v = new DenseVector(value);
+
+      Put put = new Put(key.get());
+      for (int i = 0; i < matrix.getRows(); i++) {
+        double dotProduct = matrix.getRow(i).dot(v);
+        if (BytesUtil.getRowIndex(key.get()) == i) {
+          dotProduct = 0;
+        }
+        put.add(Bytes.toBytes(Constants.COLUMN_FAMILY), Bytes.toBytes(String
+            .valueOf(i)), BytesUtil.doubleToBytes(dotProduct));
+      }
+
+      context.write(key, put);
+    }
+
+    @Override
+    public Configuration getConf() {
+      return conf;
+    }
+
+    @Override
+    public void setConf(Configuration conf) {
+      this.conf = conf;
+      try {
+        matrix = new DenseMatrix(new HamaConfiguration(), conf
+            .get("input.matrix"));
+      } catch (IOException e) {
+        e.printStackTrace();
+      }
+
+    }
+  }
+}