You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hama.apache.org by ed...@apache.org on 2008/12/03 08:49:29 UTC

svn commit: r722801 - in /incubator/hama/trunk: ./ src/examples/org/apache/hama/examples/ src/java/org/apache/hama/ src/java/org/apache/hama/mapred/ src/test/org/apache/hama/

Author: edwardyoon
Date: Tue Dec  2 23:49:28 2008
New Revision: 722801

URL: http://svn.apache.org/viewvc?rev=722801&view=rev
Log:
Random matrix generator on map/reduce

Added:
    incubator/hama/trunk/src/java/org/apache/hama/mapred/RandomMatrixMap.java
Modified:
    incubator/hama/trunk/CHANGES.txt
    incubator/hama/trunk/src/examples/org/apache/hama/examples/MatrixMultiplication.java
    incubator/hama/trunk/src/java/org/apache/hama/AbstractMatrix.java
    incubator/hama/trunk/src/java/org/apache/hama/DenseMatrix.java
    incubator/hama/trunk/src/test/org/apache/hama/TestDenseMatrix.java

Modified: incubator/hama/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/CHANGES.txt?rev=722801&r1=722800&r2=722801&view=diff
==============================================================================
--- incubator/hama/trunk/CHANGES.txt (original)
+++ incubator/hama/trunk/CHANGES.txt Tue Dec  2 23:49:28 2008
@@ -33,6 +33,7 @@
     
   IMPROVEMENTS
     
+    HAMA-113: Random matrix generator on map/reduce (edwardyoon)
     HAMA-118: Add getting started page link (edwardyoon)
     HAMA-117: Move bytesToSubMatrix/subMatrixToBytes method 
                 to submatrix class from BytesUtil (edwardyoon)

Modified: incubator/hama/trunk/src/examples/org/apache/hama/examples/MatrixMultiplication.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/examples/org/apache/hama/examples/MatrixMultiplication.java?rev=722801&r1=722800&r2=722801&view=diff
==============================================================================
--- incubator/hama/trunk/src/examples/org/apache/hama/examples/MatrixMultiplication.java (original)
+++ incubator/hama/trunk/src/examples/org/apache/hama/examples/MatrixMultiplication.java Tue Dec  2 23:49:28 2008
@@ -40,9 +40,9 @@
       parseArgs(args);
     }
 
-    DenseMatrix a = DenseMatrix.random(conf, row, column);
-    DenseMatrix b = DenseMatrix.random(conf, row, column);
-
+    DenseMatrix a = DenseMatrix.random_mapred(conf, row, column);
+    DenseMatrix b = DenseMatrix.random_mapred(conf, row, column);
+    
     if (!a.isBlocked())
       a.blocking_mapred(conf.getNumMapTasks());
     if (!b.isBlocked())

Modified: incubator/hama/trunk/src/java/org/apache/hama/AbstractMatrix.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/java/org/apache/hama/AbstractMatrix.java?rev=722801&r1=722800&r2=722801&view=diff
==============================================================================
--- incubator/hama/trunk/src/java/org/apache/hama/AbstractMatrix.java (original)
+++ incubator/hama/trunk/src/java/org/apache/hama/AbstractMatrix.java Tue Dec  2 23:49:28 2008
@@ -97,7 +97,7 @@
   public int getRows() throws IOException {
     Cell rows = null;
     rows = table.get(Constants.METADATA, Constants.METADATA_ROWS);
-    return BytesUtil.bytesToInt(rows.getValue());
+    return (rows != null) ? BytesUtil.bytesToInt(rows.getValue()) : 0;
   }
 
   /** {@inheritDoc} */

Modified: incubator/hama/trunk/src/java/org/apache/hama/DenseMatrix.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/java/org/apache/hama/DenseMatrix.java?rev=722801&r1=722800&r2=722801&view=diff
==============================================================================
--- incubator/hama/trunk/src/java/org/apache/hama/DenseMatrix.java (original)
+++ incubator/hama/trunk/src/java/org/apache/hama/DenseMatrix.java Tue Dec  2 23:49:28 2008
@@ -21,6 +21,8 @@
 
 import java.io.IOException;
 
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.client.HTable;
@@ -29,8 +31,16 @@
 import org.apache.hadoop.hbase.io.Cell;
 import org.apache.hadoop.hbase.io.RowResult;
 import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.BooleanWritable;
 import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.SequenceFile.CompressionType;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.JobClient;
 import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.lib.NullOutputFormat;
 import org.apache.hama.algebra.BlockCyclicMultiplyMap;
 import org.apache.hama.algebra.BlockCyclicMultiplyReduce;
 import org.apache.hama.algebra.RowCyclicAdditionMap;
@@ -44,6 +54,7 @@
 import org.apache.hama.io.VectorWritable;
 import org.apache.hama.mapred.BlockCyclicReduce;
 import org.apache.hama.mapred.BlockingMapRed;
+import org.apache.hama.mapred.RandomMatrixMap;
 import org.apache.hama.mapred.RowCyclicReduce;
 import org.apache.hama.util.BytesUtil;
 import org.apache.hama.util.JobManager;
@@ -53,7 +64,9 @@
 
   static int tryPathLength = Constants.DEFAULT_PATH_LENGTH;
   static final String TABLE_PREFIX = DenseMatrix.class.getSimpleName() + "_";
-
+  static private final Path TMP_DIR = new Path(
+      DenseMatrix.class.getSimpleName() + "_TMP_dir");
+  
   /**
    * Construct a raw matrix. Just create a table in HBase, but didn't lay any
    * schema ( such as dimensions: i, j ) on it.
@@ -227,6 +240,68 @@
   }
 
   /**
+   * Generate matrix with random elements using Map/Reduce
+   * 
+   * @param conf configuration object
+   * @param m the number of rows.
+   * @param n the number of columns.
+   * @return an m-by-n matrix with uniformly distributed random elements.
+   * @throws IOException
+   */
+  public static DenseMatrix random_mapred(HamaConfiguration conf, int m,
+      int n) throws IOException {
+    DenseMatrix rand = new DenseMatrix(conf);
+    LOG.info("Create the " + m + " * " + n + " random matrix : "
+        + rand.getPath());
+    
+    JobConf jobConf = new JobConf(conf);
+    jobConf.setJobName("random matrix MR job : " + rand.getPath());
+
+    jobConf.setNumMapTasks(conf.getNumMapTasks());
+    jobConf.setNumReduceTasks(conf.getNumReduceTasks());
+
+    final Path inDir = new Path(TMP_DIR, "in");
+    FileInputFormat.setInputPaths(jobConf, inDir);
+    jobConf.setMapperClass(RandomMatrixMap.class);
+
+    jobConf.setOutputKeyClass(BooleanWritable.class);
+    jobConf.setOutputValueClass(LongWritable.class);
+    jobConf.setOutputFormat(NullOutputFormat.class);
+    jobConf.setSpeculativeExecution(false);
+    jobConf.set("matrix.column", String.valueOf(n));
+    jobConf.set("matrix.path", rand.getPath());
+
+    jobConf.setInputFormat(SequenceFileInputFormat.class);
+    final FileSystem fs = FileSystem.get(jobConf);
+    int interval = m/conf.getNumMapTasks();
+    
+    // generate an input file for each map task
+    for (int i = 0; i < conf.getNumMapTasks(); ++i) {
+      final Path file = new Path(inDir, "part" + i);
+      final IntWritable start = new IntWritable(i * interval);
+      IntWritable end = null;
+      if((i + 1) != conf.getNumMapTasks()) {
+        end = new IntWritable(((i * interval) + interval) - 1);        
+      } else {
+        end = new IntWritable(m);       
+      }
+      final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf,
+          file, IntWritable.class, IntWritable.class, CompressionType.NONE);
+      try {
+        writer.append(start, end);
+      } finally {
+        writer.close();
+      }
+      System.out.println("Wrote input for Map #" + i);
+    }
+
+    JobClient.runJob(jobConf);
+    
+    rand.setDimension(m, n);
+    return rand;
+  }
+  
+  /**
    * Generate identity matrix
    * 
    * @param conf configuration object
@@ -398,7 +473,8 @@
   public void blocking_mapred(int blockNum) throws IOException {
     setBlockPosition(blockNum);
     setBlockSize(blockNum);
-
+    LOG.info("Convert to " + blockNum + " * " + blockNum + " blocked matrix");
+    
     JobConf jobConf = new JobConf(config);
     jobConf.setJobName("Blocking MR job" + getPath());
 
@@ -504,5 +580,4 @@
       setBlock(blockR, blockC, subMatrix(pos[0], pos[1], pos[2], pos[3]));
     }
   }
-
 }

Added: incubator/hama/trunk/src/java/org/apache/hama/mapred/RandomMatrixMap.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/java/org/apache/hama/mapred/RandomMatrixMap.java?rev=722801&view=auto
==============================================================================
--- incubator/hama/trunk/src/java/org/apache/hama/mapred/RandomMatrixMap.java (added)
+++ incubator/hama/trunk/src/java/org/apache/hama/mapred/RandomMatrixMap.java Tue Dec  2 23:49:28 2008
@@ -0,0 +1,70 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hama.mapred;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hama.DenseMatrix;
+import org.apache.hama.DenseVector;
+import org.apache.hama.HamaConfiguration;
+import org.apache.hama.Matrix;
+import org.apache.hama.util.RandomVariable;
+import org.apache.log4j.Logger;
+
+/**
+ * Generate matrix with random elements
+ */
+public class RandomMatrixMap extends MapReduceBase implements
+    Mapper<IntWritable, IntWritable, BooleanWritable, LongWritable> {
+  static final Logger LOG = Logger.getLogger(RandomMatrixMap.class);
+  protected Matrix matrix;
+  protected int column;
+
+  @Override
+  public void map(IntWritable key, IntWritable value,
+      OutputCollector<BooleanWritable, LongWritable> output, Reporter report)
+      throws IOException {
+    DenseVector vector = new DenseVector();
+    for (int i = key.get(); i <= value.get(); i++) {
+      vector.clear();
+      for (int j = 0; j < column; j++) {
+        vector.set(j, RandomVariable.rand());
+      }
+      matrix.setRow(i, vector);
+    }
+  }
+
+  public void configure(JobConf job) {
+    try {
+      column = Integer.parseInt(job.get("matrix.column"));
+      matrix = new DenseMatrix(new HamaConfiguration(), job.get("matrix.path"));
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+  }
+}

Modified: incubator/hama/trunk/src/test/org/apache/hama/TestDenseMatrix.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/test/org/apache/hama/TestDenseMatrix.java?rev=722801&r1=722800&r2=722801&view=diff
==============================================================================
--- incubator/hama/trunk/src/test/org/apache/hama/TestDenseMatrix.java (original)
+++ incubator/hama/trunk/src/test/org/apache/hama/TestDenseMatrix.java Tue Dec  2 23:49:28 2008
@@ -349,4 +349,13 @@
       }
     }
   }
+  
+  public void testRandomMatrixMapReduce() throws IOException {
+    DenseMatrix rand = DenseMatrix.random_mapred(conf, 20, 20);
+    for(int i = 0; i < rand.getRows(); i++) {
+      for(int j = 0; j < rand.getColumns(); j++) {
+        assertTrue(rand.get(i, j) > -1);
+      }
+    }
+  }
 }