You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hama.apache.org by ed...@apache.org on 2008/12/11 05:25:34 UTC
svn commit: r725570 - in /incubator/hama/trunk: CHANGES.txt
src/java/org/apache/hama/DenseMatrix.java
src/java/org/apache/hama/SubMatrix.java
src/java/org/apache/hama/mapred/BlockingMapRed.java
src/test/org/apache/hama/TestDenseMatrix.java
Author: edwardyoon
Date: Wed Dec 10 20:25:33 2008
New Revision: 725570
URL: http://svn.apache.org/viewvc?rev=725570&view=rev
Log:
Computing Block's range will miss some cell during blocking
Modified:
incubator/hama/trunk/CHANGES.txt
incubator/hama/trunk/src/java/org/apache/hama/DenseMatrix.java
incubator/hama/trunk/src/java/org/apache/hama/SubMatrix.java
incubator/hama/trunk/src/java/org/apache/hama/mapred/BlockingMapRed.java
incubator/hama/trunk/src/test/org/apache/hama/TestDenseMatrix.java
Modified: incubator/hama/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/CHANGES.txt?rev=725570&r1=725569&r2=725570&view=diff
==============================================================================
--- incubator/hama/trunk/CHANGES.txt (original)
+++ incubator/hama/trunk/CHANGES.txt Wed Dec 10 20:25:33 2008
@@ -87,6 +87,8 @@
BUG FIXES
+ HAMA-130: Computing Block's range will miss some cell during blocking
+ (samuel)
HAMA-126: In random_mapred, (m) should be (m-1)
since start from 0. (edwardyoon)
HAMA-116: ArrayIndexOutOfBoundsException at BlockingMapRed (samuel)
Modified: incubator/hama/trunk/src/java/org/apache/hama/DenseMatrix.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/java/org/apache/hama/DenseMatrix.java?rev=725570&r1=725569&r2=725570&view=diff
==============================================================================
--- incubator/hama/trunk/src/java/org/apache/hama/DenseMatrix.java (original)
+++ incubator/hama/trunk/src/java/org/apache/hama/DenseMatrix.java Wed Dec 10 20:25:33 2008
@@ -64,9 +64,10 @@
static int tryPathLength = Constants.DEFAULT_PATH_LENGTH;
static final String TABLE_PREFIX = DenseMatrix.class.getSimpleName() + "_";
- static private final Path TMP_DIR = new Path(
- DenseMatrix.class.getSimpleName() + "_TMP_dir");
-
+ static private final Path TMP_DIR = new Path(DenseMatrix.class
+ .getSimpleName()
+ + "_TMP_dir");
+
/**
* Construct a raw matrix. Just create a table in HBase, but didn't lay any
* schema ( such as dimensions: i, j ) on it.
@@ -248,12 +249,12 @@
* @return an m-by-n matrix with uniformly distributed random elements.
* @throws IOException
*/
- public static DenseMatrix random_mapred(HamaConfiguration conf, int m,
- int n) throws IOException {
+ public static DenseMatrix random_mapred(HamaConfiguration conf, int m, int n)
+ throws IOException {
DenseMatrix rand = new DenseMatrix(conf);
LOG.info("Create the " + m + " * " + n + " random matrix : "
+ rand.getPath());
-
+
JobConf jobConf = new JobConf(conf);
jobConf.setJobName("random matrix MR job : " + rand.getPath());
@@ -273,17 +274,17 @@
jobConf.setInputFormat(SequenceFileInputFormat.class);
final FileSystem fs = FileSystem.get(jobConf);
- int interval = m/conf.getNumMapTasks();
-
+ int interval = m / conf.getNumMapTasks();
+
// generate an input file for each map task
for (int i = 0; i < conf.getNumMapTasks(); ++i) {
final Path file = new Path(inDir, "part" + i);
final IntWritable start = new IntWritable(i * interval);
IntWritable end = null;
- if((i + 1) != conf.getNumMapTasks()) {
- end = new IntWritable(((i * interval) + interval) - 1);
+ if ((i + 1) != conf.getNumMapTasks()) {
+ end = new IntWritable(((i * interval) + interval) - 1);
} else {
- end = new IntWritable(m - 1);
+ end = new IntWritable(m - 1);
}
final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf,
file, IntWritable.class, IntWritable.class, CompressionType.NONE);
@@ -300,7 +301,7 @@
fs.delete(TMP_DIR, true);
return rand;
}
-
+
/**
* Generate identity matrix
*
@@ -470,8 +471,8 @@
}
public SubMatrix getBlock(int i, int j) throws IOException {
- return new SubMatrix(table.get(String.valueOf(i),
- Constants.BLOCK + j).getValue());
+ return new SubMatrix(table.get(String.valueOf(i), Constants.BLOCK + j)
+ .getValue());
}
/**
@@ -499,12 +500,20 @@
int block_row_size = this.getRows() / blockNum;
int block_column_size = this.getColumns() / blockNum;
- for (int i = 0; i < blockNum; i++) {
- for (int j = 0; j < blockNum; j++) {
- int startRow = i * block_row_size;
- int endRow = (startRow + block_row_size) - 1;
- int startColumn = j * block_column_size;
- int endColumn = (startColumn + block_column_size) - 1;
+ int startRow, endRow, startColumn, endColumn;
+ int i = 0, j = 0;
+ do {
+ startRow = i * block_row_size;
+ endRow = (startRow + block_row_size) - 1;
+ if (endRow >= this.getRows())
+ endRow = this.getRows() - 1;
+
+ j = 0;
+ do {
+ startColumn = j * block_column_size;
+ endColumn = (startColumn + block_column_size) - 1;
+ if (endColumn >= this.getColumns())
+ endColumn = this.getColumns() - 1;
BatchUpdate update = new BatchUpdate(getBlockKey(i, j));
update.put(Constants.BLOCK_STARTROW, BytesUtil.intToBytes(startRow));
@@ -513,8 +522,12 @@
.intToBytes(startColumn));
update.put(Constants.BLOCK_ENDCOLUMN, BytesUtil.intToBytes(endColumn));
table.commit(update);
- }
- }
+
+ j++;
+ } while (endColumn < (this.getColumns() - 1));
+
+ i++;
+ } while (endRow < (this.getRows() - 1));
}
protected int[] getBlockPosition(int i, int j) throws IOException {
@@ -542,7 +555,7 @@
*/
public void blocking_mapred(int blockNum) throws IOException {
this.checkBlockNum(blockNum);
-
+
JobConf jobConf = new JobConf(config);
jobConf.setJobName("Blocking MR job" + getPath());
@@ -563,7 +576,7 @@
*/
public void blocking(int blockNum) throws IOException {
this.checkBlockNum(blockNum);
-
+
String[] columns = new String[] { Constants.BLOCK_STARTROW,
Constants.BLOCK_ENDROW, Constants.BLOCK_STARTCOLUMN,
Constants.BLOCK_ENDCOLUMN };
@@ -577,14 +590,14 @@
setBlock(blockR, blockC, subMatrix(pos[0], pos[1], pos[2], pos[3]));
}
}
-
+
private void checkBlockNum(int blockNum) throws IOException {
double blocks = Math.pow(blockNum, 0.5);
// TODO: Check also it is validation with matrix.
- if(!String.valueOf(blocks).endsWith(".0"))
+ if (!String.valueOf(blocks).endsWith(".0"))
throw new IOException("can't divide.");
-
- int block_size = (int) blocks;
+
+ int block_size = (int) blocks;
setBlockPosition(block_size);
setBlockSize(block_size);
LOG.info("Create " + block_size + " * " + block_size + " blocked matrix");
Modified: incubator/hama/trunk/src/java/org/apache/hama/SubMatrix.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/java/org/apache/hama/SubMatrix.java?rev=725570&r1=725569&r2=725570&view=diff
==============================================================================
--- incubator/hama/trunk/src/java/org/apache/hama/SubMatrix.java (original)
+++ incubator/hama/trunk/src/java/org/apache/hama/SubMatrix.java Wed Dec 10 20:25:33 2008
@@ -24,6 +24,7 @@
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
+import java.util.Arrays;
import org.apache.log4j.Logger;
@@ -102,7 +103,7 @@
double[][] C = new double[getRows()][getColumns()];
for (int i = 0; i < this.getRows(); i++) {
for (int j = 0; j < this.getColumns(); j++) {
- C[i][j] += this.get(i, j) + b.get(i, j);
+ C[i][j] = (this.get(i, j) + b.get(i, j));
}
}
@@ -116,11 +117,14 @@
* @return c
*/
public SubMatrix mult(SubMatrix b) {
- double[][] C = new double[getRows()][getColumns()];
+ double[][] C = new double[this.getRows()][b.getColumns()];
+ for (int i = 0; i < this.getRows(); i++) {
+ Arrays.fill(C[i], 0);
+ }
for (int i = 0; i < this.getRows(); i++) {
for (int j = 0; j < b.getColumns(); j++) {
for (int k = 0; k < this.getColumns(); k++) {
- C[i][k] += this.get(i, j) * b.get(j, k);
+ C[i][j] += this.get(i, k) * b.get(k, j);
}
}
}
Modified: incubator/hama/trunk/src/java/org/apache/hama/mapred/BlockingMapRed.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/java/org/apache/hama/mapred/BlockingMapRed.java?rev=725570&r1=725569&r2=725570&view=diff
==============================================================================
--- incubator/hama/trunk/src/java/org/apache/hama/mapred/BlockingMapRed.java (original)
+++ incubator/hama/trunk/src/java/org/apache/hama/mapred/BlockingMapRed.java Wed Dec 10 20:25:33 2008
@@ -76,6 +76,9 @@
protected int mBlockNum;
protected int mBlockRowSize;
protected int mBlockColSize;
+
+ protected int mRows;
+ protected int mColumns;
@Override
public void configure(JobConf job) {
@@ -85,6 +88,9 @@
mBlockNum = matrix.getBlockSize();
mBlockRowSize = matrix.getRows() / mBlockNum;
mBlockColSize = matrix.getColumns() / mBlockNum;
+
+ mRows = matrix.getRows();
+ mColumns = matrix.getColumns();
} catch (IOException e) {
LOG.warn("Load matrix_blocking failed : " + e.getMessage());
}
@@ -106,12 +112,18 @@
int endColumn;
int blkRow = key.get() / mBlockRowSize;
DenseVector dv = value.getDenseVector();
- for (int i = 0; i < mBlockNum; i++) {
+
+ int i = 0;
+ do {
startColumn = i * mBlockColSize;
endColumn = startColumn + mBlockColSize - 1;
+ if(endColumn >= mColumns) // the last sub vector
+ endColumn = mColumns - 1;
output.collect(new BlockID(blkRow, i), new VectorWritable(key.get(),
dv.subVector(startColumn, endColumn)));
- }
+
+ i++;
+ } while(endColumn < (mColumns-1));
}
}
@@ -128,21 +140,35 @@
throws IOException {
// Note: all the sub-vectors are grouped by {@link
// org.apache.hama.io.BlockID}
- SubMatrix subMatrix = new SubMatrix(mBlockRowSize, mBlockColSize);
- int i = 0, j = 0;
+
+ // the block's base offset in the original matrix
int colBase = key.getColumn() * mBlockColSize;
int rowBase = key.getRow() * mBlockRowSize;
+
+ // the block's size : rows & columns
+ int smRows = mBlockRowSize;
+ if((rowBase + mBlockRowSize - 1) >= mRows)
+ smRows = mRows - rowBase;
+ int smCols = mBlockColSize;
+ if((colBase + mBlockColSize - 1) >= mColumns)
+ smCols = mColumns - colBase;
+
+ // construct the matrix
+ SubMatrix subMatrix = new SubMatrix(smRows, smCols);
+
+ // i, j is the current offset in the sub-matrix
+ int i = 0, j = 0;
while (values.hasNext()) {
VectorWritable vw = values.next();
// check the size is suitable
- if (vw.size() != mBlockColSize)
- throw new IOException("BlockColumnSize dismatched.");
+ if (vw.size() != smCols)
+ throw new IOException("Block Column Size dismatched.");
i = vw.row - rowBase;
- if (i >= mBlockRowSize || i < 0)
- throw new IOException("BlockRowSize dismatched.");
+ if (i >= smRows || i < 0)
+ throw new IOException("Block Row Size dismatched.");
// put the subVector to the subMatrix
- for (j = 0; j < mBlockColSize; j++) {
+ for (j = 0; j < smCols; j++) {
subMatrix.set(i, j, vw.get(colBase + j));
}
}
Modified: incubator/hama/trunk/src/test/org/apache/hama/TestDenseMatrix.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/test/org/apache/hama/TestDenseMatrix.java?rev=725570&r1=725569&r2=725570&view=diff
==============================================================================
--- incubator/hama/trunk/src/test/org/apache/hama/TestDenseMatrix.java (original)
+++ incubator/hama/trunk/src/test/org/apache/hama/TestDenseMatrix.java Wed Dec 10 20:25:33 2008
@@ -149,21 +149,21 @@
}
public void testSubMatrix() throws IOException {
- SubMatrix a = m1.subMatrix(2, 4, 2, 4);
+ SubMatrix a = m1.subMatrix(2, 4, 2, 5); // A : 3 * 4
for (int i = 0; i < a.getRows(); i++) {
for (int j = 0; j < a.getColumns(); j++) {
assertEquals(a.get(i, j), m1.get(i + 2, j + 2));
}
}
- SubMatrix b = m2.subMatrix(0, 2, 0, 2);
+ SubMatrix b = m2.subMatrix(0, 3, 0, 2); // B : 4 * 3
SubMatrix c = a.mult(b);
- double[][] C = new double[3][3];
+ double[][] C = new double[3][3]; // A * B
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
- for (int k = 0; k < 3; k++) {
- C[i][k] += m1.get(i + 2, j + 2) * m2.get(j, k);
+ for (int k = 0; k < 4; k++) {
+ C[i][j] += m1.get(i + 2, k + 2) * m2.get(k, j);
}
}
}