You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/05/11 15:01:43 UTC
svn commit: r943109 - in /lucene/mahout/trunk:
core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/
math/src/main/java/org/apache/mahout/math/
Author: srowen
Date: Tue May 11 13:01:42 2010
New Revision: 943109
URL: http://svn.apache.org/viewvc?rev=943109&view=rev
Log:
More fixes and improvements to item-based distributed recommender
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/IndexIndexWritable.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PartialMultiplyReducer.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java
lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java
lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java?rev=943109&r1=943108&r2=943109&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java Tue May 11 13:01:42 2010
@@ -106,11 +106,13 @@ public final class AggregateAndRecommend
Vector.Element element = recommendationVectorIterator.next();
int index = element.index();
float value = (float) element.get();
- if (topItems.size() < recommendationsPerUser && !Float.isNaN(value)) {
- topItems.add(new GenericRecommendedItem(indexItemIDMap.get(index), value));
- } else if (value > topItems.peek().getValue()) {
- topItems.add(new GenericRecommendedItem(indexItemIDMap.get(index), value));
- topItems.poll();
+ if (!Float.isNaN(value)) {
+ if (topItems.size() < recommendationsPerUser) {
+ topItems.add(new GenericRecommendedItem(indexItemIDMap.get(index), value));
+ } else if (value > topItems.peek().getValue()) {
+ topItems.add(new GenericRecommendedItem(indexItemIDMap.get(index), value));
+ topItems.poll();
+ }
}
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/IndexIndexWritable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/IndexIndexWritable.java?rev=943109&r1=943108&r2=943109&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/IndexIndexWritable.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/IndexIndexWritable.java Tue May 11 13:01:42 2010
@@ -22,6 +22,7 @@ import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
+import org.apache.mahout.math.Varint;
/** A {@link WritableComparable} encapsulating two item indices. */
public final class IndexIndexWritable
@@ -54,14 +55,14 @@ public final class IndexIndexWritable
@Override
public void write(DataOutput out) throws IOException {
- out.writeInt(aID);
- out.writeInt(bID);
+ Varint.writeUnsignedVarInt(aID, out);
+ Varint.writeUnsignedVarInt(bID, out);
}
@Override
public void readFields(DataInput in) throws IOException {
- aID = in.readInt();
- bID = in.readInt();
+ aID = Varint.readUnsignedVarInt(in);
+ bID = Varint.readUnsignedVarInt(in);
}
@Override
@@ -83,7 +84,7 @@ public final class IndexIndexWritable
public boolean equals(Object o) {
if (o instanceof IndexIndexWritable) {
IndexIndexWritable that = (IndexIndexWritable) o;
- return (aID == that.getAID()) && (bID == that.getBID());
+ return aID == that.getAID() && bID == that.getBID();
}
return false;
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PartialMultiplyReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PartialMultiplyReducer.java?rev=943109&r1=943108&r2=943109&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PartialMultiplyReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PartialMultiplyReducer.java Tue May 11 13:01:42 2010
@@ -41,7 +41,7 @@ public final class PartialMultiplyReduce
private static final Logger log = LoggerFactory.getLogger(PartialMultiplyReducer.class);
- private static final int MAX_PRODUCTS_PER_ITEM = 1000;
+ private static final int MAX_PRODUCTS_PER_ITEM = 100;
private enum Counters {
PRODUCTS_OUTPUT,
@@ -72,11 +72,6 @@ public final class PartialMultiplyReduce
}
}
- if (cooccurrenceColumn == null) {
- log.info("Column vector missing for {}; continuing", itemIndex);
- return;
- }
-
final VLongWritable userIDWritable = new VLongWritable();
// These single-element vectors ensure that each user will not be recommended
@@ -98,6 +93,11 @@ public final class PartialMultiplyReduce
}
});
+ if (cooccurrenceColumn == null) {
+ log.info("Column vector missing for {}; continuing", itemIndex);
+ return;
+ }
+
final float smallestLargeValue = findSmallestLargeValue(savedValues);
final VectorWritable vectorWritable = new VectorWritable();
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java?rev=943109&r1=943108&r2=943109&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java Tue May 11 13:01:42 2010
@@ -35,7 +35,7 @@ import org.apache.mahout.math.map.OpenIn
public final class UserVectorToCooccurrenceMapper extends MapReduceBase implements
Mapper<VLongWritable,VectorWritable,IndexIndexWritable,IntWritable> {
- private static final int MAX_PREFS_CONSIDERED = 50;
+ private static final int MAX_PREFS_CONSIDERED = 100;
private boolean outputGuardValue = true;
private final OpenIntIntHashMap indexCounts = new OpenIntIntHashMap();
@@ -45,8 +45,9 @@ public final class UserVectorToCooccurre
VectorWritable userVectorWritable,
OutputCollector<IndexIndexWritable,IntWritable> output,
Reporter reporter) throws IOException {
- Vector userVector = maybePruneUserVector(userVectorWritable.get());
+ Vector userVector = userVectorWritable.get();
countSeen(userVector);
+ userVector = maybePruneUserVector(userVector);
Iterator<Vector.Element> it = userVector.iterateNonZero();
IndexIndexWritable entityEntity = new IndexIndexWritable();
IntWritable one = new IntWritable(1);
@@ -55,10 +56,8 @@ public final class UserVectorToCooccurre
Iterator<Vector.Element> it2 = userVector.iterateNonZero();
while (it2.hasNext()) {
int index2 = it2.next().index();
- if (index1 != index2) {
- entityEntity.set(index1, index2);
- output.collect(entityEntity, one);
- }
+ entityEntity.set(index1, index2);
+ output.collect(entityEntity, one);
}
}
// Guard value, output once, sorts after everything; will be dropped by combiner
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java?rev=943109&r1=943108&r2=943109&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceReducer.java Tue May 11 13:01:42 2010
@@ -45,6 +45,7 @@ public final class UserVectorToCooccurre
int item1ID = entityEntity.getAID();
int item2ID = entityEntity.getBID();
+ int sum = CooccurrenceCombiner.sum(counts);
if (item1ID < lastItem1ID) {
throw new IllegalStateException();
@@ -54,17 +55,20 @@ public final class UserVectorToCooccurre
throw new IllegalStateException();
}
if (item2ID == lastItem2ID) {
- count += CooccurrenceCombiner.sum(counts);
+ count += sum;
} else {
if (cooccurrenceRow == null) {
- cooccurrenceRow = new RandomAccessSparseVector(Integer.MAX_VALUE);
+ cooccurrenceRow = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
}
- cooccurrenceRow.set(item2ID, count);
+ cooccurrenceRow.set(lastItem2ID, count);
lastItem2ID = item2ID;
- count = CooccurrenceCombiner.sum(counts);
+ count = sum;
}
} else {
if (cooccurrenceRow != null) {
+ if (count > 0) {
+ cooccurrenceRow.set(lastItem2ID, count);
+ }
VectorWritable vw = new VectorWritable(cooccurrenceRow);
vw.setWritesLaxPrecision(true);
output.collect(new IntWritable(lastItem1ID), vw);
@@ -72,7 +76,7 @@ public final class UserVectorToCooccurre
lastItem1ID = item1ID;
lastItem2ID = item2ID;
cooccurrenceRow = null;
- count = CooccurrenceCombiner.sum(counts);
+ count = sum;
}
}
Modified: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java?rev=943109&r1=943108&r2=943109&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java (original)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java Tue May 11 13:01:42 2010
@@ -77,6 +77,24 @@ public class RandomAccessSparseVector ex
}
@Override
+ public String toString() {
+ StringBuilder result = new StringBuilder();
+ result.append('{');
+ Iterator<Element> it = iterateNonZero();
+ while (it.hasNext()) {
+ Element e = it.next();
+ result.append(e.index());
+ result.append(':');
+ result.append(e.get());
+ result.append(',');
+ }
+ if (result.length() > 1) {
+ result.setCharAt(result.length() - 1, '}');
+ }
+ return result.toString();
+ }
+
+ @Override
public Vector assign(Vector other) {
if (size() != other.size()) {
throw new CardinalityException(size(), other.size());
Modified: lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java?rev=943109&r1=943108&r2=943109&view=diff
==============================================================================
--- lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java (original)
+++ lucene/mahout/trunk/math/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java Tue May 11 13:01:42 2010
@@ -96,6 +96,24 @@ public class SequentialAccessSparseVecto
return new SequentialAccessSparseVector(size(), values.clone());
}
+ @Override
+ public String toString() {
+ StringBuilder result = new StringBuilder();
+ result.append('{');
+ Iterator<Element> it = iterateNonZero();
+ while (it.hasNext()) {
+ Element e = it.next();
+ result.append(e.index());
+ result.append(':');
+ result.append(e.get());
+ result.append(',');
+ }
+ if (result.length() > 1) {
+ result.setCharAt(result.length() - 1, '}');
+ }
+ return result.toString();
+ }
+
/**
* @return false
*/