You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hama.apache.org by ed...@apache.org on 2008/12/11 05:25:34 UTC

svn commit: r725570 - in /incubator/hama/trunk: CHANGES.txt src/java/org/apache/hama/DenseMatrix.java src/java/org/apache/hama/SubMatrix.java src/java/org/apache/hama/mapred/BlockingMapRed.java src/test/org/apache/hama/TestDenseMatrix.java

Author: edwardyoon
Date: Wed Dec 10 20:25:33 2008
New Revision: 725570

URL: http://svn.apache.org/viewvc?rev=725570&view=rev
Log:
Computing Block's range will miss some cell during blocking

Modified:
    incubator/hama/trunk/CHANGES.txt
    incubator/hama/trunk/src/java/org/apache/hama/DenseMatrix.java
    incubator/hama/trunk/src/java/org/apache/hama/SubMatrix.java
    incubator/hama/trunk/src/java/org/apache/hama/mapred/BlockingMapRed.java
    incubator/hama/trunk/src/test/org/apache/hama/TestDenseMatrix.java

Modified: incubator/hama/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/CHANGES.txt?rev=725570&r1=725569&r2=725570&view=diff
==============================================================================
--- incubator/hama/trunk/CHANGES.txt (original)
+++ incubator/hama/trunk/CHANGES.txt Wed Dec 10 20:25:33 2008
@@ -87,6 +87,8 @@
 
   BUG FIXES
    
+    HAMA-130: Computing Block's range will miss some cell during blocking
+              (samuel)
     HAMA-126: In random_mapred, (m) should be (m-1) 
                 since start from 0. (edwardyoon)
     HAMA-116: ArrayIndexOutOfBoundsException at BlockingMapRed (samuel)

Modified: incubator/hama/trunk/src/java/org/apache/hama/DenseMatrix.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/java/org/apache/hama/DenseMatrix.java?rev=725570&r1=725569&r2=725570&view=diff
==============================================================================
--- incubator/hama/trunk/src/java/org/apache/hama/DenseMatrix.java (original)
+++ incubator/hama/trunk/src/java/org/apache/hama/DenseMatrix.java Wed Dec 10 20:25:33 2008
@@ -64,9 +64,10 @@
 
   static int tryPathLength = Constants.DEFAULT_PATH_LENGTH;
   static final String TABLE_PREFIX = DenseMatrix.class.getSimpleName() + "_";
-  static private final Path TMP_DIR = new Path(
-      DenseMatrix.class.getSimpleName() + "_TMP_dir");
-  
+  static private final Path TMP_DIR = new Path(DenseMatrix.class
+      .getSimpleName()
+      + "_TMP_dir");
+
   /**
    * Construct a raw matrix. Just create a table in HBase, but didn't lay any
    * schema ( such as dimensions: i, j ) on it.
@@ -248,12 +249,12 @@
    * @return an m-by-n matrix with uniformly distributed random elements.
    * @throws IOException
    */
-  public static DenseMatrix random_mapred(HamaConfiguration conf, int m,
-      int n) throws IOException {
+  public static DenseMatrix random_mapred(HamaConfiguration conf, int m, int n)
+      throws IOException {
     DenseMatrix rand = new DenseMatrix(conf);
     LOG.info("Create the " + m + " * " + n + " random matrix : "
         + rand.getPath());
-    
+
     JobConf jobConf = new JobConf(conf);
     jobConf.setJobName("random matrix MR job : " + rand.getPath());
 
@@ -273,17 +274,17 @@
 
     jobConf.setInputFormat(SequenceFileInputFormat.class);
     final FileSystem fs = FileSystem.get(jobConf);
-    int interval = m/conf.getNumMapTasks();
-    
+    int interval = m / conf.getNumMapTasks();
+
     // generate an input file for each map task
     for (int i = 0; i < conf.getNumMapTasks(); ++i) {
       final Path file = new Path(inDir, "part" + i);
       final IntWritable start = new IntWritable(i * interval);
       IntWritable end = null;
-      if((i + 1) != conf.getNumMapTasks()) {
-        end = new IntWritable(((i * interval) + interval) - 1);        
+      if ((i + 1) != conf.getNumMapTasks()) {
+        end = new IntWritable(((i * interval) + interval) - 1);
       } else {
-        end = new IntWritable(m - 1);       
+        end = new IntWritable(m - 1);
       }
       final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf,
           file, IntWritable.class, IntWritable.class, CompressionType.NONE);
@@ -300,7 +301,7 @@
     fs.delete(TMP_DIR, true);
     return rand;
   }
-  
+
   /**
    * Generate identity matrix
    * 
@@ -470,8 +471,8 @@
   }
 
   public SubMatrix getBlock(int i, int j) throws IOException {
-    return new SubMatrix(table.get(String.valueOf(i),
-        Constants.BLOCK + j).getValue());
+    return new SubMatrix(table.get(String.valueOf(i), Constants.BLOCK + j)
+        .getValue());
   }
 
   /**
@@ -499,12 +500,20 @@
     int block_row_size = this.getRows() / blockNum;
     int block_column_size = this.getColumns() / blockNum;
 
-    for (int i = 0; i < blockNum; i++) {
-      for (int j = 0; j < blockNum; j++) {
-        int startRow = i * block_row_size;
-        int endRow = (startRow + block_row_size) - 1;
-        int startColumn = j * block_column_size;
-        int endColumn = (startColumn + block_column_size) - 1;
+    int startRow, endRow, startColumn, endColumn;
+    int i = 0, j = 0;
+    do {
+      startRow = i * block_row_size;
+      endRow = (startRow + block_row_size) - 1;
+      if (endRow >= this.getRows())
+        endRow = this.getRows() - 1;
+
+      j = 0;
+      do {
+        startColumn = j * block_column_size;
+        endColumn = (startColumn + block_column_size) - 1;
+        if (endColumn >= this.getColumns())
+          endColumn = this.getColumns() - 1;
 
         BatchUpdate update = new BatchUpdate(getBlockKey(i, j));
         update.put(Constants.BLOCK_STARTROW, BytesUtil.intToBytes(startRow));
@@ -513,8 +522,12 @@
             .intToBytes(startColumn));
         update.put(Constants.BLOCK_ENDCOLUMN, BytesUtil.intToBytes(endColumn));
         table.commit(update);
-      }
-    }
+
+        j++;
+      } while (endColumn < (this.getColumns() - 1));
+
+      i++;
+    } while (endRow < (this.getRows() - 1));
   }
 
   protected int[] getBlockPosition(int i, int j) throws IOException {
@@ -542,7 +555,7 @@
    */
   public void blocking_mapred(int blockNum) throws IOException {
     this.checkBlockNum(blockNum);
-    
+
     JobConf jobConf = new JobConf(config);
     jobConf.setJobName("Blocking MR job" + getPath());
 
@@ -563,7 +576,7 @@
    */
   public void blocking(int blockNum) throws IOException {
     this.checkBlockNum(blockNum);
-    
+
     String[] columns = new String[] { Constants.BLOCK_STARTROW,
         Constants.BLOCK_ENDROW, Constants.BLOCK_STARTCOLUMN,
         Constants.BLOCK_ENDCOLUMN };
@@ -577,14 +590,14 @@
       setBlock(blockR, blockC, subMatrix(pos[0], pos[1], pos[2], pos[3]));
     }
   }
-  
+
   private void checkBlockNum(int blockNum) throws IOException {
     double blocks = Math.pow(blockNum, 0.5);
     // TODO: Check also it is validation with matrix.
-    if(!String.valueOf(blocks).endsWith(".0"))
+    if (!String.valueOf(blocks).endsWith(".0"))
       throw new IOException("can't divide.");
-    
-    int block_size = (int) blocks;  
+
+    int block_size = (int) blocks;
     setBlockPosition(block_size);
     setBlockSize(block_size);
     LOG.info("Create " + block_size + " * " + block_size + " blocked matrix");

Modified: incubator/hama/trunk/src/java/org/apache/hama/SubMatrix.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/java/org/apache/hama/SubMatrix.java?rev=725570&r1=725569&r2=725570&view=diff
==============================================================================
--- incubator/hama/trunk/src/java/org/apache/hama/SubMatrix.java (original)
+++ incubator/hama/trunk/src/java/org/apache/hama/SubMatrix.java Wed Dec 10 20:25:33 2008
@@ -24,6 +24,7 @@
 import java.io.IOException;
 import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
+import java.util.Arrays;
 
 import org.apache.log4j.Logger;
 
@@ -102,7 +103,7 @@
     double[][] C = new double[getRows()][getColumns()];
     for (int i = 0; i < this.getRows(); i++) {
       for (int j = 0; j < this.getColumns(); j++) {
-        C[i][j] += this.get(i, j) + b.get(i, j);
+        C[i][j] = (this.get(i, j) + b.get(i, j));
       }
     }
 
@@ -116,11 +117,14 @@
    * @return c
    */
   public SubMatrix mult(SubMatrix b) {
-    double[][] C = new double[getRows()][getColumns()];
+    double[][] C = new double[this.getRows()][b.getColumns()];
+    for (int i = 0; i < this.getRows(); i++) {
+      Arrays.fill(C[i], 0);
+    }
     for (int i = 0; i < this.getRows(); i++) {
       for (int j = 0; j < b.getColumns(); j++) {
         for (int k = 0; k < this.getColumns(); k++) {
-          C[i][k] += this.get(i, j) * b.get(j, k);
+          C[i][j] += this.get(i, k) * b.get(k, j);
         }
       }
     }

Modified: incubator/hama/trunk/src/java/org/apache/hama/mapred/BlockingMapRed.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/java/org/apache/hama/mapred/BlockingMapRed.java?rev=725570&r1=725569&r2=725570&view=diff
==============================================================================
--- incubator/hama/trunk/src/java/org/apache/hama/mapred/BlockingMapRed.java (original)
+++ incubator/hama/trunk/src/java/org/apache/hama/mapred/BlockingMapRed.java Wed Dec 10 20:25:33 2008
@@ -76,6 +76,9 @@
     protected int mBlockNum;
     protected int mBlockRowSize;
     protected int mBlockColSize;
+    
+    protected int mRows;
+    protected int mColumns;
 
     @Override
     public void configure(JobConf job) {
@@ -85,6 +88,9 @@
         mBlockNum = matrix.getBlockSize();
         mBlockRowSize = matrix.getRows() / mBlockNum;
         mBlockColSize = matrix.getColumns() / mBlockNum;
+        
+        mRows = matrix.getRows();
+        mColumns = matrix.getColumns();
       } catch (IOException e) {
         LOG.warn("Load matrix_blocking failed : " + e.getMessage());
       }
@@ -106,12 +112,18 @@
       int endColumn;
       int blkRow = key.get() / mBlockRowSize;
       DenseVector dv = value.getDenseVector();
-      for (int i = 0; i < mBlockNum; i++) {
+      
+      int i = 0;
+      do {
         startColumn = i * mBlockColSize;
         endColumn = startColumn + mBlockColSize - 1;
+        if(endColumn >= mColumns) // the last sub vector
+          endColumn = mColumns - 1;
         output.collect(new BlockID(blkRow, i), new VectorWritable(key.get(),
             dv.subVector(startColumn, endColumn)));
-      }
+        
+        i++;
+      } while(endColumn < (mColumns-1));
     }
 
   }
@@ -128,21 +140,35 @@
         throws IOException {
       // Note: all the sub-vectors are grouped by {@link
       // org.apache.hama.io.BlockID}
-      SubMatrix subMatrix = new SubMatrix(mBlockRowSize, mBlockColSize);
-      int i = 0, j = 0;
+      
+      // the block's base offset in the original matrix
       int colBase = key.getColumn() * mBlockColSize;
       int rowBase = key.getRow() * mBlockRowSize;
+      
+      // the block's size : rows & columns
+      int smRows = mBlockRowSize;
+      if((rowBase + mBlockRowSize - 1) >= mRows)
+        smRows = mRows - rowBase;
+      int smCols = mBlockColSize;
+      if((colBase + mBlockColSize - 1) >= mColumns)  
+        smCols = mColumns - colBase;
+      
+      // construct the matrix
+      SubMatrix subMatrix = new SubMatrix(smRows, smCols);
+      
+      // i, j is the current offset in the sub-matrix
+      int i = 0, j = 0;
       while (values.hasNext()) {
         VectorWritable vw = values.next();
         // check the size is suitable
-        if (vw.size() != mBlockColSize)
-          throw new IOException("BlockColumnSize dismatched.");
+        if (vw.size() != smCols)
+          throw new IOException("Block Column Size dismatched.");
         i = vw.row - rowBase;
-        if (i >= mBlockRowSize || i < 0)
-          throw new IOException("BlockRowSize dismatched.");
+        if (i >= smRows || i < 0)
+          throw new IOException("Block Row Size dismatched.");
 
         // put the subVector to the subMatrix
-        for (j = 0; j < mBlockColSize; j++) {
+        for (j = 0; j < smCols; j++) {
           subMatrix.set(i, j, vw.get(colBase + j));
         }
       }

Modified: incubator/hama/trunk/src/test/org/apache/hama/TestDenseMatrix.java
URL: http://svn.apache.org/viewvc/incubator/hama/trunk/src/test/org/apache/hama/TestDenseMatrix.java?rev=725570&r1=725569&r2=725570&view=diff
==============================================================================
--- incubator/hama/trunk/src/test/org/apache/hama/TestDenseMatrix.java (original)
+++ incubator/hama/trunk/src/test/org/apache/hama/TestDenseMatrix.java Wed Dec 10 20:25:33 2008
@@ -149,21 +149,21 @@
   }
 
   public void testSubMatrix() throws IOException {
-    SubMatrix a = m1.subMatrix(2, 4, 2, 4);
+    SubMatrix a = m1.subMatrix(2, 4, 2, 5); // A : 3 * 4
     for (int i = 0; i < a.getRows(); i++) {
       for (int j = 0; j < a.getColumns(); j++) {
         assertEquals(a.get(i, j), m1.get(i + 2, j + 2));
       }
     }
 
-    SubMatrix b = m2.subMatrix(0, 2, 0, 2);
+    SubMatrix b = m2.subMatrix(0, 3, 0, 2); // B : 4 * 3
     SubMatrix c = a.mult(b);
 
-    double[][] C = new double[3][3];
+    double[][] C = new double[3][3]; // A * B
     for (int i = 0; i < 3; i++) {
       for (int j = 0; j < 3; j++) {
-        for (int k = 0; k < 3; k++) {
-          C[i][k] += m1.get(i + 2, j + 2) * m2.get(j, k);
+        for (int k = 0; k < 4; k++) {
+          C[i][j] += m1.get(i + 2, k + 2) * m2.get(k, j);
         }
       }
     }