You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ss...@apache.org on 2012/06/29 23:51:43 UTC
svn commit: r1355573 - in
/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence:
RowSimilarityJob.java Vectors.java
Author: ssc
Date: Fri Jun 29 21:51:42 2012
New Revision: 1355573
URL: http://svn.apache.org/viewvc?rev=1355573&view=rev
Log:
MAHOUT-1035 Hotspot in recommenditembased – UnsymmetrifyMapper job
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/Vectors.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java?rev=1355573&r1=1355572&r2=1355573&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java Fri Jun 29 21:51:42 2012
@@ -406,8 +406,8 @@ public class RowSimilarityJob extends Ab
protected void map(IntWritable row, VectorWritable similaritiesWritable, Context ctx)
throws IOException, InterruptedException {
Vector similarities = similaritiesWritable.get();
- // For performance reasons moved transposedPartial creation out of the while loop and reusing the same vector
- Vector transposedPartial = similarities.like();
+ // For performance reasons, the creation of transposedPartial is moved out of the while loop and it is reused inside
+ Vector transposedPartial = new RandomAccessSparseVector(similarities.size(), 1);
TopK<Vector.Element> topKQueue = new TopK<Vector.Element>(maxSimilaritiesPerRow, Vectors.BY_VALUE);
Iterator<Vector.Element> nonZeroElements = similarities.iterateNonZero();
while (nonZeroElements.hasNext()) {
@@ -417,7 +417,7 @@ public class RowSimilarityJob extends Ab
ctx.write(new IntWritable(nonZeroElement.index()), new VectorWritable(transposedPartial));
transposedPartial.setQuick(row.get(), 0.0);
}
- Vector topKSimilarities = similarities.like();
+ Vector topKSimilarities = new RandomAccessSparseVector(similarities.size(), maxSimilaritiesPerRow);
for (Vector.Element topKSimilarity : topKQueue.retrieve()) {
topKSimilarities.setQuick(topKSimilarity.index(), topKSimilarity.get());
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/Vectors.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/Vectors.java?rev=1355573&r1=1355572&r2=1355573&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/Vectors.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/Vectors.java Fri Jun 29 21:51:42 2012
@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.mahout.cf.taste.common.TopK;
import org.apache.mahout.common.iterator.FixedSizeSamplingIterator;
+import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.Varint;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
@@ -46,7 +47,7 @@ public final class Vectors {
if (original.getNumNondefaultElements() <= sampleSize) {
return original;
}
- Vector sample = original.like();
+ Vector sample = new RandomAccessSparseVector(original.size(), sampleSize);
Iterator<Vector.Element> sampledElements =
new FixedSizeSamplingIterator<Vector.Element>(sampleSize, original.iterateNonZero());
while (sampledElements.hasNext()) {
@@ -66,7 +67,7 @@ public final class Vectors {
Vector.Element nonZeroElement = nonZeroElements.next();
topKQueue.offer(new Vectors.TemporaryElement(nonZeroElement));
}
- Vector topKSimilarities = original.like();
+ Vector topKSimilarities = new RandomAccessSparseVector(original.size(), k);
for (Vector.Element topKSimilarity : topKQueue.retrieve()) {
topKSimilarities.setQuick(topKSimilarity.index(), topKSimilarity.get());
}