You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ss...@apache.org on 2011/02/04 19:49:59 UTC

svn commit: r1067257 - in /mahout/trunk/core/src: main/java/org/apache/mahout/cf/taste/hadoop/MaybePruneRowsMapper.java test/java/org/apache/mahout/cf/taste/hadoop/MaybePruneRowsMapperTest.java

Author: ssc
Date: Fri Feb  4 18:49:59 2011
New Revision: 1067257

URL: http://svn.apache.org/viewvc?rev=1067257&view=rev
Log:
MAHOUT-607 Count used and neglected elements in MaybePruneRowsMapper

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/MaybePruneRowsMapper.java
    mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/MaybePruneRowsMapperTest.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/MaybePruneRowsMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/MaybePruneRowsMapper.java?rev=1067257&r1=1067256&r2=1067257&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/MaybePruneRowsMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/MaybePruneRowsMapper.java Fri Feb  4 18:49:59 2011
@@ -42,6 +42,10 @@ public class MaybePruneRowsMapper
   private int maxCooccurrences;
   private final OpenIntIntHashMap indexCounts = new OpenIntIntHashMap();
 
+  static enum Elements {
+    USED, NEGLECTED;
+  }
+
   @Override
   protected void setup(Context ctx) throws IOException, InterruptedException {
     super.setup(ctx);
@@ -56,7 +60,13 @@ public class MaybePruneRowsMapper
     throws IOException, InterruptedException {
     Vector vector = vectorWritable.get();
     countSeen(vector);
+
+    int numElementsBeforePruning = vector.getNumNondefaultElements();
     vector = maybePruneVector(vector);
+    int numElementsAfterPruning = vector.getNumNondefaultElements();
+
+    ctx.getCounter(Elements.USED).increment(numElementsAfterPruning);
+    ctx.getCounter(Elements.NEGLECTED).increment(numElementsBeforePruning - numElementsAfterPruning);
 
     DistributedRowMatrix.MatrixEntryWritable entry = new DistributedRowMatrix.MatrixEntryWritable();
     int colIndex = TasteHadoopUtils.idToIndex(rowIndex.get());

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/MaybePruneRowsMapperTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/MaybePruneRowsMapperTest.java?rev=1067257&r1=1067256&r2=1067257&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/MaybePruneRowsMapperTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/MaybePruneRowsMapperTest.java Fri Feb  4 18:49:59 2011
@@ -18,6 +18,7 @@
 package org.apache.mahout.cf.taste.hadoop;
 
 import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.mapreduce.Counter;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.mahout.cf.taste.impl.TasteTestCase;
 import org.apache.mahout.math.RandomAccessSparseVector;
@@ -51,21 +52,37 @@ public class MaybePruneRowsMapperTest ex
 
     Mapper<VarLongWritable,VectorWritable, IntWritable, DistributedRowMatrix.MatrixEntryWritable>.Context ctx =
       EasyMock.createMock(Mapper.Context.class);
+    Counter usedElementsCounter = EasyMock.createMock(Counter.class);
+    Counter neglectedElementsCounter = EasyMock.createMock(Counter.class);
 
     ctx.write(EasyMock.eq(new IntWritable(1)), MathHelper.matrixEntryMatches(1, 123, 1));
     ctx.write(EasyMock.eq(new IntWritable(3)), MathHelper.matrixEntryMatches(3, 123, 1));
+    EasyMock.expect(ctx.getCounter(MaybePruneRowsMapper.Elements.USED)).andReturn(usedElementsCounter);
+    usedElementsCounter.increment(2);
+    EasyMock.expect(ctx.getCounter(MaybePruneRowsMapper.Elements.NEGLECTED)).andReturn(neglectedElementsCounter);
+    neglectedElementsCounter.increment(0);
+
     ctx.write(EasyMock.eq(new IntWritable(1)), MathHelper.matrixEntryMatches(1, 456, 1));
-    ctx.write(EasyMock.eq(new IntWritable(7)), MathHelper.matrixEntryMatches(7, 456, 1));    
+    ctx.write(EasyMock.eq(new IntWritable(7)), MathHelper.matrixEntryMatches(7, 456, 1));
+    EasyMock.expect(ctx.getCounter(MaybePruneRowsMapper.Elements.USED)).andReturn(usedElementsCounter);
+    usedElementsCounter.increment(2);
+    EasyMock.expect(ctx.getCounter(MaybePruneRowsMapper.Elements.NEGLECTED)).andReturn(neglectedElementsCounter);
+    neglectedElementsCounter.increment(0);
+
     ctx.write(EasyMock.eq(new IntWritable(5)), MathHelper.matrixEntryMatches(5, 789, 1));
     ctx.write(EasyMock.eq(new IntWritable(9)), MathHelper.matrixEntryMatches(9, 789, 1));
+        EasyMock.expect(ctx.getCounter(MaybePruneRowsMapper.Elements.USED)).andReturn(usedElementsCounter);
+    usedElementsCounter.increment(2);
+    EasyMock.expect(ctx.getCounter(MaybePruneRowsMapper.Elements.NEGLECTED)).andReturn(neglectedElementsCounter);
+    neglectedElementsCounter.increment(1);
 
-    EasyMock.replay(ctx);
+    EasyMock.replay(ctx, usedElementsCounter, neglectedElementsCounter);
 
     mapper.map(new VarLongWritable(123L), new VectorWritable(v1), ctx);
     mapper.map(new VarLongWritable(456L), new VectorWritable(v2), ctx);
     mapper.map(new VarLongWritable(789L), new VectorWritable(v3), ctx);
 
-    EasyMock.verify(ctx);
+    EasyMock.verify(ctx, usedElementsCounter, neglectedElementsCounter);
   }
 
 }