You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ss...@apache.org on 2012/06/29 23:51:43 UTC

svn commit: r1355573 - in /mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence: RowSimilarityJob.java Vectors.java

Author: ssc
Date: Fri Jun 29 21:51:42 2012
New Revision: 1355573

URL: http://svn.apache.org/viewvc?rev=1355573&view=rev
Log:
MAHOUT-1035 Hotspot in recommenditembased – UnsymmetrifyMapper job

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
    mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/Vectors.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java?rev=1355573&r1=1355572&r2=1355573&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java Fri Jun 29 21:51:42 2012
@@ -406,8 +406,8 @@ public class RowSimilarityJob extends Ab
     protected void map(IntWritable row, VectorWritable similaritiesWritable, Context ctx)
       throws IOException, InterruptedException {
       Vector similarities = similaritiesWritable.get();
-      // For performance reasons moved transposedPartial creation out of the while loop and reusing the same vector
-      Vector transposedPartial = similarities.like();
+      // For performance reasons, the creation of transposedPartial is moved out of the while loop and it is reused inside
+      Vector transposedPartial = new RandomAccessSparseVector(similarities.size(), 1);
       TopK<Vector.Element> topKQueue = new TopK<Vector.Element>(maxSimilaritiesPerRow, Vectors.BY_VALUE);
       Iterator<Vector.Element> nonZeroElements = similarities.iterateNonZero();
       while (nonZeroElements.hasNext()) {
@@ -417,7 +417,7 @@ public class RowSimilarityJob extends Ab
         ctx.write(new IntWritable(nonZeroElement.index()), new VectorWritable(transposedPartial));
         transposedPartial.setQuick(row.get(), 0.0);
       }
-      Vector topKSimilarities = similarities.like();
+      Vector topKSimilarities = new RandomAccessSparseVector(similarities.size(), maxSimilaritiesPerRow);
       for (Vector.Element topKSimilarity : topKQueue.retrieve()) {
         topKSimilarities.setQuick(topKSimilarity.index(), topKSimilarity.get());
       }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/Vectors.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/Vectors.java?rev=1355573&r1=1355572&r2=1355573&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/Vectors.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/Vectors.java Fri Jun 29 21:51:42 2012
@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.mahout.cf.taste.common.TopK;
 import org.apache.mahout.common.iterator.FixedSizeSamplingIterator;
+import org.apache.mahout.math.RandomAccessSparseVector;
 import org.apache.mahout.math.Varint;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
@@ -46,7 +47,7 @@ public final class Vectors {
     if (original.getNumNondefaultElements() <= sampleSize) {
       return original;
     }
-    Vector sample = original.like();
+    Vector sample = new RandomAccessSparseVector(original.size(), sampleSize);
     Iterator<Vector.Element> sampledElements =
         new FixedSizeSamplingIterator<Vector.Element>(sampleSize, original.iterateNonZero());
     while (sampledElements.hasNext()) {
@@ -66,7 +67,7 @@ public final class Vectors {
       Vector.Element nonZeroElement = nonZeroElements.next();
       topKQueue.offer(new Vectors.TemporaryElement(nonZeroElement));
     }
-    Vector topKSimilarities = original.like();
+    Vector topKSimilarities = new RandomAccessSparseVector(original.size(), k);
     for (Vector.Element topKSimilarity : topKQueue.retrieve()) {
       topKSimilarities.setQuick(topKSimilarity.index(), topKSimilarity.get());
     }