You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hama.apache.org by ed...@apache.org on 2008/12/03 08:49:29 UTC
svn commit: r722801 - in /incubator/hama/trunk: ./
src/examples/org/apache/hama/examples/ src/java/org/apache/hama/
src/java/org/apache/hama/mapred/ src/test/org/apache/hama/
Author: edwardyoon
Date: Tue Dec 2 23:49:28 2008
New Revision: 722801
URL: http://svn.apache.org/viewvc?rev=722801&view=rev
Log:
Random matrix generator on map/reduce
Added:
incubator/hama/trunk/src/java/org/apache/hama/mapred/RandomMatrixMap.java
Modified:
incubator/hama/trunk/CHANGES.txt
incubator/hama/trunk/src/examples/org/apache/hama/examples/MatrixMultiplication.java
incubator/hama/trunk/src/java/org/apache/hama/AbstractMatrix.java
incubator/hama/trunk/src/java/org/apache/hama/DenseMatrix.java
incubator/hama/trunk/src/test/org/apache/hama/TestDenseMatrix.java
Modified: incubator/hama/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/CHANGES.txt?rev=722801&r1=722800&r2=722801&view=diff
==============================================================================
--- incubator/hama/trunk/CHANGES.txt (original)
+++ incubator/hama/trunk/CHANGES.txt Tue Dec 2 23:49:28 2008
@@ -33,6 +33,7 @@
IMPROVEMENTS
+ HAMA-113: Random matrix generator on map/reduce (edwardyoon)
HAMA-118: Add getting started page link (edwardyoon)
HAMA-117: Move bytesToSubMatrix/subMatrixToBytes method
to submatrix class from BytesUtil (edwardyoon)
Modified: incubator/hama/trunk/src/examples/org/apache/hama/examples/MatrixMultiplication.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/examples/org/apache/hama/examples/MatrixMultiplication.java?rev=722801&r1=722800&r2=722801&view=diff
==============================================================================
--- incubator/hama/trunk/src/examples/org/apache/hama/examples/MatrixMultiplication.java (original)
+++ incubator/hama/trunk/src/examples/org/apache/hama/examples/MatrixMultiplication.java Tue Dec 2 23:49:28 2008
@@ -40,9 +40,9 @@
parseArgs(args);
}
- DenseMatrix a = DenseMatrix.random(conf, row, column);
- DenseMatrix b = DenseMatrix.random(conf, row, column);
-
+ DenseMatrix a = DenseMatrix.random_mapred(conf, row, column);
+ DenseMatrix b = DenseMatrix.random_mapred(conf, row, column);
+
if (!a.isBlocked())
a.blocking_mapred(conf.getNumMapTasks());
if (!b.isBlocked())
Modified: incubator/hama/trunk/src/java/org/apache/hama/AbstractMatrix.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/java/org/apache/hama/AbstractMatrix.java?rev=722801&r1=722800&r2=722801&view=diff
==============================================================================
--- incubator/hama/trunk/src/java/org/apache/hama/AbstractMatrix.java (original)
+++ incubator/hama/trunk/src/java/org/apache/hama/AbstractMatrix.java Tue Dec 2 23:49:28 2008
@@ -97,7 +97,7 @@
public int getRows() throws IOException {
Cell rows = null;
rows = table.get(Constants.METADATA, Constants.METADATA_ROWS);
- return BytesUtil.bytesToInt(rows.getValue());
+ return (rows != null) ? BytesUtil.bytesToInt(rows.getValue()) : 0;
}
/** {@inheritDoc} */
Modified: incubator/hama/trunk/src/java/org/apache/hama/DenseMatrix.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/java/org/apache/hama/DenseMatrix.java?rev=722801&r1=722800&r2=722801&view=diff
==============================================================================
--- incubator/hama/trunk/src/java/org/apache/hama/DenseMatrix.java (original)
+++ incubator/hama/trunk/src/java/org/apache/hama/DenseMatrix.java Tue Dec 2 23:49:28 2008
@@ -21,6 +21,8 @@
import java.io.IOException;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.HTable;
@@ -29,8 +31,16 @@
import org.apache.hadoop.hbase.io.Cell;
import org.apache.hadoop.hbase.io.RowResult;
import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.SequenceFile.CompressionType;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.SequenceFileInputFormat;
+import org.apache.hadoop.mapred.lib.NullOutputFormat;
import org.apache.hama.algebra.BlockCyclicMultiplyMap;
import org.apache.hama.algebra.BlockCyclicMultiplyReduce;
import org.apache.hama.algebra.RowCyclicAdditionMap;
@@ -44,6 +54,7 @@
import org.apache.hama.io.VectorWritable;
import org.apache.hama.mapred.BlockCyclicReduce;
import org.apache.hama.mapred.BlockingMapRed;
+import org.apache.hama.mapred.RandomMatrixMap;
import org.apache.hama.mapred.RowCyclicReduce;
import org.apache.hama.util.BytesUtil;
import org.apache.hama.util.JobManager;
@@ -53,7 +64,9 @@
static int tryPathLength = Constants.DEFAULT_PATH_LENGTH;
static final String TABLE_PREFIX = DenseMatrix.class.getSimpleName() + "_";
-
+ static private final Path TMP_DIR = new Path(
+ DenseMatrix.class.getSimpleName() + "_TMP_dir");
+
/**
* Construct a raw matrix. Just create a table in HBase, but didn't lay any
* schema ( such as dimensions: i, j ) on it.
@@ -227,6 +240,68 @@
}
/**
+ * Generate matrix with random elements using Map/Reduce
+ *
+ * @param conf configuration object
+ * @param m the number of rows.
+ * @param n the number of columns.
+ * @return an m-by-n matrix with uniformly distributed random elements.
+ * @throws IOException
+ */
+ public static DenseMatrix random_mapred(HamaConfiguration conf, int m,
+ int n) throws IOException {
+ DenseMatrix rand = new DenseMatrix(conf);
+ LOG.info("Create the " + m + " * " + n + " random matrix : "
+ + rand.getPath());
+
+ JobConf jobConf = new JobConf(conf);
+ jobConf.setJobName("random matrix MR job : " + rand.getPath());
+
+ jobConf.setNumMapTasks(conf.getNumMapTasks());
+ jobConf.setNumReduceTasks(conf.getNumReduceTasks());
+
+ final Path inDir = new Path(TMP_DIR, "in");
+ FileInputFormat.setInputPaths(jobConf, inDir);
+ jobConf.setMapperClass(RandomMatrixMap.class);
+
+ jobConf.setOutputKeyClass(BooleanWritable.class);
+ jobConf.setOutputValueClass(LongWritable.class);
+ jobConf.setOutputFormat(NullOutputFormat.class);
+ jobConf.setSpeculativeExecution(false);
+ jobConf.set("matrix.column", String.valueOf(n));
+ jobConf.set("matrix.path", rand.getPath());
+
+ jobConf.setInputFormat(SequenceFileInputFormat.class);
+ final FileSystem fs = FileSystem.get(jobConf);
+ int interval = m/conf.getNumMapTasks();
+
+ // generate an input file for each map task
+ for (int i = 0; i < conf.getNumMapTasks(); ++i) {
+ final Path file = new Path(inDir, "part" + i);
+ final IntWritable start = new IntWritable(i * interval);
+ IntWritable end = null;
+ if((i + 1) != conf.getNumMapTasks()) {
+ end = new IntWritable(((i * interval) + interval) - 1);
+ } else {
+ end = new IntWritable(m);
+ }
+ final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf,
+ file, IntWritable.class, IntWritable.class, CompressionType.NONE);
+ try {
+ writer.append(start, end);
+ } finally {
+ writer.close();
+ }
+ System.out.println("Wrote input for Map #" + i);
+ }
+
+ JobClient.runJob(jobConf);
+
+ rand.setDimension(m, n);
+ return rand;
+ }
+
+ /**
* Generate identity matrix
*
* @param conf configuration object
@@ -398,7 +473,8 @@
public void blocking_mapred(int blockNum) throws IOException {
setBlockPosition(blockNum);
setBlockSize(blockNum);
-
+ LOG.info("Convert to " + blockNum + " * " + blockNum + " blocked matrix");
+
JobConf jobConf = new JobConf(config);
jobConf.setJobName("Blocking MR job" + getPath());
@@ -504,5 +580,4 @@
setBlock(blockR, blockC, subMatrix(pos[0], pos[1], pos[2], pos[3]));
}
}
-
}
Added: incubator/hama/trunk/src/java/org/apache/hama/mapred/RandomMatrixMap.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/java/org/apache/hama/mapred/RandomMatrixMap.java?rev=722801&view=auto
==============================================================================
--- incubator/hama/trunk/src/java/org/apache/hama/mapred/RandomMatrixMap.java (added)
+++ incubator/hama/trunk/src/java/org/apache/hama/mapred/RandomMatrixMap.java Tue Dec 2 23:49:28 2008
@@ -0,0 +1,70 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hama.mapred;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hama.DenseMatrix;
+import org.apache.hama.DenseVector;
+import org.apache.hama.HamaConfiguration;
+import org.apache.hama.Matrix;
+import org.apache.hama.util.RandomVariable;
+import org.apache.log4j.Logger;
+
+/**
+ * Generate matrix with random elements
+ */
+public class RandomMatrixMap extends MapReduceBase implements
+ Mapper<IntWritable, IntWritable, BooleanWritable, LongWritable> {
+ static final Logger LOG = Logger.getLogger(RandomMatrixMap.class);
+ protected Matrix matrix;
+ protected int column;
+
+ @Override
+ public void map(IntWritable key, IntWritable value,
+ OutputCollector<BooleanWritable, LongWritable> output, Reporter report)
+ throws IOException {
+ DenseVector vector = new DenseVector();
+ for (int i = key.get(); i <= value.get(); i++) {
+ vector.clear();
+ for (int j = 0; j < column; j++) {
+ vector.set(j, RandomVariable.rand());
+ }
+ matrix.setRow(i, vector);
+ }
+ }
+
+ public void configure(JobConf job) {
+ try {
+ column = Integer.parseInt(job.get("matrix.column"));
+ matrix = new DenseMatrix(new HamaConfiguration(), job.get("matrix.path"));
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+}
Modified: incubator/hama/trunk/src/test/org/apache/hama/TestDenseMatrix.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/test/org/apache/hama/TestDenseMatrix.java?rev=722801&r1=722800&r2=722801&view=diff
==============================================================================
--- incubator/hama/trunk/src/test/org/apache/hama/TestDenseMatrix.java (original)
+++ incubator/hama/trunk/src/test/org/apache/hama/TestDenseMatrix.java Tue Dec 2 23:49:28 2008
@@ -349,4 +349,13 @@
}
}
}
+
+ public void testRandomMatrixMapReduce() throws IOException {
+ DenseMatrix rand = DenseMatrix.random_mapred(conf, 20, 20);
+ for(int i = 0; i < rand.getRows(); i++) {
+ for(int j = 0; j < rand.getColumns(); j++) {
+ assertTrue(rand.get(i, j) > -1);
+ }
+ }
+ }
}