You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2010/05/25 00:44:53 UTC
svn commit: r947844 [2/2] - in /mahout/trunk: ./
core/src/main/java/org/apache/mahout/cf/taste/hadoop/
core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/
core/src/main/java/org/apache/mahout/cf/taste/hadoop/pseudo/
core/src/main/java/org/apache...
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersReducer.java?rev=947844&r1=947843&r2=947844&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersReducer.java Mon May 24 22:44:51 2010
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -20,24 +20,24 @@ package org.apache.mahout.cf.taste.hadoo
import java.io.IOException;
import java.util.Iterator;
-import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
+import org.apache.mahout.math.VarIntWritable;
+import org.apache.mahout.math.VarLongWritable;
/**
* counts all unique users, we ensure that we see userIDs sorted in ascending order via
* secondary sort, so we don't have to buffer all of them
*/
public class CountUsersReducer extends MapReduceBase
- implements Reducer<CountUsersKeyWritable,VLongWritable,IntWritable,NullWritable> {
+ implements Reducer<CountUsersKeyWritable,VarLongWritable, VarIntWritable,NullWritable> {
@Override
- public void reduce(CountUsersKeyWritable key, Iterator<VLongWritable> userIDs,
- OutputCollector<IntWritable,NullWritable> out, Reporter reporter)
+ public void reduce(CountUsersKeyWritable key, Iterator<VarLongWritable> userIDs,
+ OutputCollector<VarIntWritable,NullWritable> out, Reporter reporter)
throws IOException {
long lastSeenUserID = Long.MIN_VALUE;
@@ -50,7 +50,7 @@ public class CountUsersReducer extends M
numberOfUsers++;
}
}
- out.collect(new IntWritable(numberOfUsers), NullWritable.get());
+ out.collect(new VarIntWritable(numberOfUsers), NullWritable.get());
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java?rev=947844&r1=947843&r2=947844&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java Mon May 24 22:44:51 2010
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -29,9 +29,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IOUtils;
-import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
@@ -46,6 +44,8 @@ import org.apache.mahout.cf.taste.hadoop
import org.apache.mahout.cf.taste.hadoop.similarity.CoRating;
import org.apache.mahout.cf.taste.hadoop.similarity.DistributedItemSimilarity;
import org.apache.mahout.common.AbstractJob;
+import org.apache.mahout.math.VarIntWritable;
+import org.apache.mahout.math.VarLongWritable;
/**
* <p>Runs a completely distributed computation of the cosine distance of the itemvectors of the user-item-matrix
@@ -143,9 +143,9 @@ public final class ItemSimilarityJob ext
TextInputFormat.class,
CountUsersMapper.class,
CountUsersKeyWritable.class,
- VLongWritable.class,
+ VarLongWritable.class,
CountUsersReducer.class,
- IntWritable.class,
+ VarIntWritable.class,
NullWritable.class,
TextOutputFormat.class);
@@ -163,10 +163,10 @@ public final class ItemSimilarityJob ext
itemVectorsPath,
TextInputFormat.class,
ToUserPrefsMapper.class,
- VLongWritable.class,
+ VarLongWritable.class,
EntityPrefWritable.class,
ToItemVectorReducer.class,
- VLongWritable.class,
+ VarLongWritable.class,
EntityPrefWritableArrayWritable.class,
SequenceFileOutputFormat.class);
JobClient.runJob(itemVectors);
@@ -175,10 +175,10 @@ public final class ItemSimilarityJob ext
userVectorsPath,
SequenceFileInputFormat.class,
PreferredItemsPerUserMapper.class,
- VLongWritable.class,
+ VarLongWritable.class,
ItemPrefWithItemVectorWeightWritable.class,
PreferredItemsPerUserReducer.class,
- VLongWritable.class,
+ VarLongWritable.class,
ItemPrefWithItemVectorWeightArrayWritable.class,
SequenceFileOutputFormat.class);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java?rev=947844&r1=947843&r2=947844&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java Mon May 24 22:44:51 2010
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -21,7 +21,6 @@ import java.io.IOException;
import java.util.Iterator;
import java.util.NoSuchElementException;
-import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
@@ -30,13 +29,14 @@ import org.apache.hadoop.mapred.Reporter
import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
import org.apache.mahout.cf.taste.hadoop.EntityPrefWritableArrayWritable;
import org.apache.mahout.cf.taste.hadoop.similarity.DistributedItemSimilarity;
+import org.apache.mahout.math.VarLongWritable;
/**
* for each item-vector, we compute its weight here and map out all entries with the user as key,
* so we can create the user-vectors in the reducer
*/
public final class PreferredItemsPerUserMapper extends MapReduceBase
- implements Mapper<VLongWritable,EntityPrefWritableArrayWritable,VLongWritable,ItemPrefWithItemVectorWeightWritable> {
+ implements Mapper<VarLongWritable,EntityPrefWritableArrayWritable,VarLongWritable,ItemPrefWithItemVectorWeightWritable> {
private DistributedItemSimilarity distributedSimilarity;
@@ -48,9 +48,9 @@ public final class PreferredItemsPerUser
}
@Override
- public void map(VLongWritable item,
+ public void map(VarLongWritable item,
EntityPrefWritableArrayWritable userPrefsArray,
- OutputCollector<VLongWritable,ItemPrefWithItemVectorWeightWritable> output,
+ OutputCollector<VarLongWritable,ItemPrefWithItemVectorWeightWritable> output,
Reporter reporter) throws IOException {
EntityPrefWritable[] userPrefs = userPrefsArray.getPrefs();
@@ -58,7 +58,7 @@ public final class PreferredItemsPerUser
double weight = distributedSimilarity.weightOfItemVector(new UserPrefsIterator(userPrefs));
for (EntityPrefWritable userPref : userPrefs) {
- output.collect(new VLongWritable(userPref.getID()),
+ output.collect(new VarLongWritable(userPref.getID()),
new ItemPrefWithItemVectorWeightWritable(item.get(), weight, userPref.getPrefValue()));
}
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java?rev=947844&r1=947843&r2=947844&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java Mon May 24 22:44:51 2010
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -22,19 +22,19 @@ import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
-import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
+import org.apache.mahout.math.VarLongWritable;
public final class PreferredItemsPerUserReducer extends MapReduceBase
- implements Reducer<VLongWritable,ItemPrefWithItemVectorWeightWritable,VLongWritable,ItemPrefWithItemVectorWeightArrayWritable> {
+ implements Reducer<VarLongWritable,ItemPrefWithItemVectorWeightWritable,VarLongWritable,ItemPrefWithItemVectorWeightArrayWritable> {
@Override
- public void reduce(VLongWritable user,
+ public void reduce(VarLongWritable user,
Iterator<ItemPrefWithItemVectorWeightWritable> itemPrefs,
- OutputCollector<VLongWritable,ItemPrefWithItemVectorWeightArrayWritable> output,
+ OutputCollector<VarLongWritable,ItemPrefWithItemVectorWeightArrayWritable> output,
Reporter reporter)
throws IOException {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java?rev=947844&r1=947843&r2=947844&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java Mon May 24 22:44:51 2010
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -22,13 +22,13 @@ import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
-import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
import org.apache.mahout.cf.taste.hadoop.EntityPrefWritableArrayWritable;
+import org.apache.mahout.math.VarLongWritable;
/**
* For each single item, collect all users with their preferences
@@ -36,12 +36,12 @@ import org.apache.mahout.cf.taste.hadoop
*/
public final class ToItemVectorReducer
extends MapReduceBase implements
- Reducer<VLongWritable,EntityPrefWritable,VLongWritable,EntityPrefWritableArrayWritable> {
+ Reducer<VarLongWritable,EntityPrefWritable,VarLongWritable,EntityPrefWritableArrayWritable> {
@Override
- public void reduce(VLongWritable item,
+ public void reduce(VarLongWritable item,
Iterator<EntityPrefWritable> userPrefs,
- OutputCollector<VLongWritable,EntityPrefWritableArrayWritable> output,
+ OutputCollector<VarLongWritable,EntityPrefWritableArrayWritable> output,
Reporter reporter)
throws IOException {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java?rev=947844&r1=947843&r2=947844&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java Mon May 24 22:44:51 2010
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -22,7 +22,6 @@ import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapred.JobClient;
@@ -37,6 +36,7 @@ import org.apache.mahout.cf.taste.hadoop
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
import org.apache.mahout.cf.taste.hadoop.ToItemPrefsMapper;
+import org.apache.mahout.math.VarLongWritable;
public final class SlopeOneAverageDiffsJob extends AbstractJob {
@@ -54,7 +54,7 @@ public final class SlopeOneAverageDiffsJ
String averagesOutputPath = parsedArgs.get("--tempDir");
JobConf prefsToDiffsJobConf = prepareJobConf(prefsFile, averagesOutputPath,
- TextInputFormat.class, ToItemPrefsMapper.class, VLongWritable.class, EntityPrefWritable.class,
+ TextInputFormat.class, ToItemPrefsMapper.class, VarLongWritable.class, EntityPrefWritable.class,
SlopeOnePrefsToDiffsReducer.class, EntityEntityWritable.class, FloatWritable.class,
SequenceFileOutputFormat.class);
JobClient.runJob(prefsToDiffsJobConf);
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOnePrefsToDiffsReducer.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOnePrefsToDiffsReducer.java?rev=947844&r1=947843&r2=947844&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOnePrefsToDiffsReducer.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOnePrefsToDiffsReducer.java Mon May 24 22:44:51 2010
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -24,19 +24,19 @@ import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.io.FloatWritable;
-import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
+import org.apache.mahout.math.VarLongWritable;
public final class SlopeOnePrefsToDiffsReducer extends MapReduceBase implements
- Reducer<VLongWritable,EntityPrefWritable,EntityEntityWritable,FloatWritable> {
+ Reducer<VarLongWritable,EntityPrefWritable,EntityEntityWritable,FloatWritable> {
@Override
- public void reduce(VLongWritable key,
+ public void reduce(VarLongWritable key,
Iterator<EntityPrefWritable> values,
OutputCollector<EntityEntityWritable,FloatWritable> output,
Reporter reporter) throws IOException {
Added: mahout/trunk/core/src/main/java/org/apache/mahout/math/VarIntWritable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/VarIntWritable.java?rev=947844&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/VarIntWritable.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/VarIntWritable.java Mon May 24 22:44:51 2010
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.WritableComparable;
+
+public class VarIntWritable implements WritableComparable<VarIntWritable>, Cloneable {
+
+ private int value;
+
+ public VarIntWritable() {
+ }
+
+ public VarIntWritable(int value) {
+ this.value = value;
+ }
+
+ public int get() {
+ return value;
+ }
+
+ public void set(int value) {
+ this.value = value;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ return other instanceof VarIntWritable && ((VarIntWritable) other).value == value;
+ }
+
+ @Override
+ public int hashCode() {
+ return value;
+ }
+
+ @Override
+ public String toString() {
+ return String.valueOf(value);
+ }
+
+ @Override
+ public VarIntWritable clone() {
+ return new VarIntWritable(value);
+ }
+
+ @Override
+ public int compareTo(VarIntWritable other) {
+ if (value < other.value) {
+ return -1;
+ } else if (value > other.value) {
+ return 1;
+ }
+ return 0;
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ Varint.writeSignedVarInt(value, out);
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ value = Varint.readSignedVarInt(in);
+ }
+
+}
Added: mahout/trunk/core/src/main/java/org/apache/mahout/math/VarLongWritable.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/VarLongWritable.java?rev=947844&view=auto
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/VarLongWritable.java (added)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/VarLongWritable.java Mon May 24 22:44:51 2010
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.WritableComparable;
+
+public class VarLongWritable implements WritableComparable<VarLongWritable>, Cloneable {
+
+ private long value;
+
+ public VarLongWritable() {
+ }
+
+ public VarLongWritable(long value) {
+ this.value = value;
+ }
+
+ public long get() {
+ return value;
+ }
+
+ public void set(long value) {
+ this.value = value;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ return other instanceof VarLongWritable && ((VarLongWritable) other).value == value;
+ }
+
+ @Override
+ public int hashCode() {
+ return (int) ((value >>> 32) ^ value);
+ }
+
+ @Override
+ public String toString() {
+ return String.valueOf(value);
+ }
+
+ @Override
+ public VarLongWritable clone() {
+ return new VarLongWritable(value);
+ }
+
+ @Override
+ public int compareTo(VarLongWritable other) {
+ if (value < other.value) {
+ return -1;
+ } else if (value > other.value) {
+ return 1;
+ }
+ return 0;
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ Varint.writeSignedVarLong(value, out);
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ value = Varint.readSignedVarLong(in);
+ }
+
+}
\ No newline at end of file
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java?rev=947844&r1=947843&r2=947844&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java Mon May 24 22:44:51 2010
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -30,11 +30,9 @@ import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.DoubleWritable;
-import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
@@ -44,6 +42,8 @@ import org.apache.mahout.cf.taste.hadoop
import org.apache.mahout.cf.taste.hadoop.similarity.CoRating;
import org.apache.mahout.cf.taste.hadoop.similarity.DistributedUncenteredZeroAssumingCosineSimilarity;
import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.math.VarIntWritable;
+import org.apache.mahout.math.VarLongWritable;
import org.easymock.IArgumentMatcher;
import org.easymock.classextension.EasyMock;
@@ -55,9 +55,9 @@ import org.easymock.classextension.EasyM
public final class ItemSimilarityTest extends MahoutTestCase {
public void testUserPrefsPerItemMapper() throws Exception {
- OutputCollector<VLongWritable,VLongWritable> output =
+ OutputCollector<VarLongWritable,VarLongWritable> output =
EasyMock.createMock(OutputCollector.class);
- output.collect(new VLongWritable(34L), new EntityPrefWritable(12L, 2.3f));
+ output.collect(new VarLongWritable(34L), new EntityPrefWritable(12L, 2.3f));
EasyMock.replay(output);
new ToUserPrefsMapper().map(new LongWritable(), new Text("12,34,2.3"), output, null);
@@ -66,9 +66,9 @@ public final class ItemSimilarityTest ex
}
public void testCountUsersMapper() throws Exception {
- OutputCollector<CountUsersKeyWritable,VLongWritable> output = EasyMock.createMock(OutputCollector.class);
- output.collect(keyForUserID(12L), EasyMock.eq(new VLongWritable(12L)));
- output.collect(keyForUserID(35L), EasyMock.eq(new VLongWritable(35L)));
+ OutputCollector<CountUsersKeyWritable,VarLongWritable> output = EasyMock.createMock(OutputCollector.class);
+ output.collect(keyForUserID(12L), EasyMock.eq(new VarLongWritable(12L)));
+ output.collect(keyForUserID(35L), EasyMock.eq(new VarLongWritable(35L)));
EasyMock.replay(output);
CountUsersMapper mapper = new CountUsersMapper();
@@ -98,13 +98,13 @@ public final class ItemSimilarityTest ex
public void testCountUsersReducer() throws Exception {
- OutputCollector<IntWritable,NullWritable> output = EasyMock.createMock(OutputCollector.class);
- output.collect(new IntWritable(3), NullWritable.get());
+ OutputCollector<VarIntWritable,NullWritable> output = EasyMock.createMock(OutputCollector.class);
+ output.collect(new VarIntWritable(3), NullWritable.get());
EasyMock.replay(output);
- List<VLongWritable> userIDs = Arrays.asList(new VLongWritable(1L), new VLongWritable(1L),
- new VLongWritable(3L), new VLongWritable(5L),
- new VLongWritable(5L), new VLongWritable(5L));
+ List<VarLongWritable> userIDs = Arrays.asList(new VarLongWritable(1L), new VarLongWritable(1L),
+ new VarLongWritable(3L), new VarLongWritable(5L),
+ new VarLongWritable(5L), new VarLongWritable(5L));
new CountUsersReducer().reduce(null, userIDs.iterator(), output, null);
@@ -116,14 +116,14 @@ public final class ItemSimilarityTest ex
List<EntityPrefWritable> userPrefs = Arrays.asList(
new EntityPrefWritable(34L, 1.0f), new EntityPrefWritable(56L, 2.0f));
- OutputCollector<VLongWritable,EntityPrefWritableArrayWritable> output =
+ OutputCollector<VarLongWritable,EntityPrefWritableArrayWritable> output =
EasyMock.createMock(OutputCollector.class);
- output.collect(EasyMock.eq(new VLongWritable(12L)), equalToUserPrefs(userPrefs));
+ output.collect(EasyMock.eq(new VarLongWritable(12L)), equalToUserPrefs(userPrefs));
EasyMock.replay(output);
- new ToItemVectorReducer().reduce(new VLongWritable(12L), userPrefs.iterator(), output, null);
+ new ToItemVectorReducer().reduce(new VarLongWritable(12L), userPrefs.iterator(), output, null);
EasyMock.verify(output);
}
@@ -162,7 +162,7 @@ public final class ItemSimilarityTest ex
}
public void testPreferredItemsPerUserMapper() throws Exception {
- OutputCollector<VLongWritable,ItemPrefWithItemVectorWeightWritable> output =
+ OutputCollector<VarLongWritable,ItemPrefWithItemVectorWeightWritable> output =
EasyMock.createMock(OutputCollector.class);
EntityPrefWritableArrayWritable userPrefs = new EntityPrefWritableArrayWritable(
new EntityPrefWritable[] {
@@ -172,8 +172,8 @@ public final class ItemSimilarityTest ex
double weight =
new DistributedUncenteredZeroAssumingCosineSimilarity().weightOfItemVector(Arrays.asList(2.0f, 3.0f).iterator());
- output.collect(new VLongWritable(12L), new ItemPrefWithItemVectorWeightWritable(34L, weight, 2.0f));
- output.collect(new VLongWritable(56L), new ItemPrefWithItemVectorWeightWritable(34L, weight, 3.0f));
+ output.collect(new VarLongWritable(12L), new ItemPrefWithItemVectorWeightWritable(34L, weight, 2.0f));
+ output.collect(new VarLongWritable(56L), new ItemPrefWithItemVectorWeightWritable(34L, weight, 3.0f));
JobConf conf = new JobConf();
conf.set(ItemSimilarityJob.DISTRIBUTED_SIMILARITY_CLASSNAME,
@@ -183,7 +183,7 @@ public final class ItemSimilarityTest ex
PreferredItemsPerUserMapper mapper = new PreferredItemsPerUserMapper();
mapper.configure(conf);
- mapper.map(new VLongWritable(34L), userPrefs, output, null);
+ mapper.map(new VarLongWritable(34L), userPrefs, output, null);
EasyMock.verify(output);
}
@@ -194,15 +194,15 @@ public final class ItemSimilarityTest ex
Arrays.asList(new ItemPrefWithItemVectorWeightWritable(34L, 5.0, 1.0f),
new ItemPrefWithItemVectorWeightWritable(56L, 7.0, 2.0f));
- OutputCollector<VLongWritable,ItemPrefWithItemVectorWeightArrayWritable> output =
+ OutputCollector<VarLongWritable,ItemPrefWithItemVectorWeightArrayWritable> output =
EasyMock.createMock(OutputCollector.class);
- output.collect(EasyMock.eq(new VLongWritable(12L)), equalToItemPrefs(itemPrefs));
+ output.collect(EasyMock.eq(new VarLongWritable(12L)), equalToItemPrefs(itemPrefs));
EasyMock.replay(output);
new PreferredItemsPerUserReducer().reduce(
- new VLongWritable(12L), itemPrefs.iterator(), output, null);
+ new VarLongWritable(12L), itemPrefs.iterator(), output, null);
EasyMock.verify(output);
}
@@ -254,7 +254,7 @@ public final class ItemSimilarityTest ex
EasyMock.replay(output, itemPrefs);
- new CopreferredItemsMapper().map(new VLongWritable(), itemPrefs, output, null);
+ new CopreferredItemsMapper().map(new VarLongWritable(), itemPrefs, output, null);
EasyMock.verify(output, itemPrefs);
}
@@ -282,97 +282,78 @@ public final class ItemSimilarityTest ex
public void testCompleteJob() throws Exception {
- String tmpDirProp = System.getProperty("java.io.tmpdir");
- if (!tmpDirProp.endsWith("/")) {
- tmpDirProp += "/";
- }
- String tmpDirPath = tmpDirProp + ItemSimilarityTest.class.getCanonicalName();
- File tmpDir = new File(tmpDirPath);
+ File inputFile = getTestTempFile("prefs.txt");
+ File outputDir = getTestTempDir("output");
+ outputDir.delete();
+ File tmpDir = getTestTempDir("tmp");
+
+ /* user-item-matrix
+
+ Game Mouse PC Disk
+ Jane - 1 2 -
+ Paul 1 - 1 -
+ Fred - - - 1
+ */
+ BufferedWriter writer = new BufferedWriter(new FileWriter(inputFile));
try {
- if (tmpDir.exists()) {
- recursiveDelete(tmpDir);
- }
- tmpDir.mkdirs();
-
- /* user-item-matrix
-
- Game Mouse PC Disk
- Jane - 1 2 -
- Paul 1 - 1 -
- Fred - - - 1
- */
-
- BufferedWriter writer = new BufferedWriter(new FileWriter(tmpDirPath+"/prefs.txt"));
- try {
- writer.write("2,1,1\n" +
- "1,2,1\n" +
- "3,4,1\n" +
- "1,3,2\n" +
- "2,3,1\n");
- } finally {
- writer.close();
- }
-
- ItemSimilarityJob similarityJob = new ItemSimilarityJob();
-
- Configuration conf = new Configuration();
- conf.set("mapred.input.dir", tmpDirPath+"/prefs.txt");
- conf.set("mapred.output.dir", tmpDirPath+"/output");
- conf.set("mapred.output.compress", Boolean.FALSE.toString());
+ writer.write("2,1,1\n" +
+ "1,2,1\n" +
+ "3,4,1\n" +
+ "1,3,2\n" +
+ "2,3,1\n");
+ } finally {
+ writer.close();
+ }
- similarityJob.setConf(conf);
+ ItemSimilarityJob similarityJob = new ItemSimilarityJob();
- similarityJob.run(new String[] { "--tempDir", tmpDirPath+"/tmp", "--similarityClassname",
- "org.apache.mahout.cf.taste.hadoop.similarity.DistributedUncenteredZeroAssumingCosineSimilarity"});
+ Configuration conf = new Configuration();
+ conf.set("mapred.input.dir", inputFile.getAbsolutePath());
+ conf.set("mapred.output.dir", outputDir.getAbsolutePath());
+ conf.set("mapred.output.compress", Boolean.FALSE.toString());
- int numberOfUsers = ItemSimilarityJob.readNumberOfUsers(new JobConf(), tmpDirPath + "/tmp/countUsers/part-00000");
+ similarityJob.setConf(conf);
- assertEquals(3, numberOfUsers);
+ similarityJob.run(new String[] { "--tempDir", tmpDir.getAbsolutePath(), "--similarityClassname",
+ "org.apache.mahout.cf.taste.hadoop.similarity.DistributedUncenteredZeroAssumingCosineSimilarity"});
- String filePath = tmpDirPath+"/output/part-00000";
- BufferedReader reader = new BufferedReader(new FileReader(filePath));
+ File countUsersPart = new File(new File(tmpDir, "countUsers"), "part-00000");
+ int numberOfUsers = ItemSimilarityJob.readNumberOfUsers(new JobConf(), countUsersPart.getAbsolutePath());
- String line;
- int currentLine = 1;
- while ( (line = reader.readLine()) != null) {
+ assertEquals(3, numberOfUsers);
- String[] tokens = line.split("\t");
+ File outPart = new File(outputDir, "part-00000");
+ BufferedReader reader = new BufferedReader(new FileReader(outPart));
- long itemAID = Long.parseLong(tokens[0]);
- long itemBID = Long.parseLong(tokens[1]);
- double similarity = Double.parseDouble(tokens[2]);
+ String line;
+ int currentLine = 1;
+ while ( (line = reader.readLine()) != null) {
- if (currentLine == 1) {
- assertEquals(1L, itemAID);
- assertEquals(3L, itemBID);
- assertEquals(0.45, similarity, 0.01);
- }
+ String[] tokens = line.split("\t");
- if (currentLine == 2) {
- assertEquals(2L, itemAID);
- assertEquals(3L, itemBID);
- assertEquals(0.89, similarity, 0.01);
- }
+ long itemAID = Long.parseLong(tokens[0]);
+ long itemBID = Long.parseLong(tokens[1]);
+ double similarity = Double.parseDouble(tokens[2]);
- currentLine++;
+ if (currentLine == 1) {
+ assertEquals(1L, itemAID);
+ assertEquals(3L, itemBID);
+ assertEquals(0.45, similarity, 0.01);
}
- int linesWritten = currentLine-1;
- assertEquals(2, linesWritten);
+ if (currentLine == 2) {
+ assertEquals(2L, itemAID);
+ assertEquals(3L, itemBID);
+ assertEquals(0.89, similarity, 0.01);
+ }
- } finally {
- recursiveDelete(tmpDir);
+ currentLine++;
}
- }
- static void recursiveDelete(File fileOrDir) {
- if (fileOrDir.isDirectory()) {
- for (File innerFile : fileOrDir.listFiles()) {
- recursiveDelete(innerFile);
- }
- }
- fileOrDir.delete();
+ int linesWritten = currentLine-1;
+ assertEquals(2, linesWritten);
+
}
}
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/Gram.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/Gram.java?rev=947844&r1=947843&r2=947844&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/Gram.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/Gram.java Mon May 24 22:44:51 2010
@@ -26,7 +26,7 @@ import java.nio.charset.CharacterCodingE
import org.apache.hadoop.io.BinaryComparable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.io.WritableUtils;
+import org.apache.mahout.math.Varint;
/**
* Writable for holding data generated from the collocation discovery jobs. Depending on the job configuration
@@ -168,19 +168,19 @@ public class Gram extends BinaryComparab
@Override
public void readFields(DataInput in) throws IOException {
- int newLength = WritableUtils.readVInt(in);
+ int newLength = Varint.readUnsignedVarInt(in);
setCapacity(newLength, false);
in.readFully(bytes, 0, newLength);
- int newFrequency = WritableUtils.readVInt(in);
+ int newFrequency = Varint.readUnsignedVarInt(in);
length = newLength;
frequency = newFrequency;
}
@Override
public void write(DataOutput out) throws IOException {
- WritableUtils.writeVInt(out, length);
+ Varint.writeUnsignedVarInt(length, out);
out.write(bytes, 0, length);
- WritableUtils.writeVInt(out, frequency);
+ Varint.writeUnsignedVarInt(frequency, out);
}
/* Cribbed from o.a.hadoop.io.Text:
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKey.java
URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKey.java?rev=947844&r1=947843&r2=947844&view=diff
==============================================================================
--- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKey.java (original)
+++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/nlp/collocations/llr/GramKey.java Mon May 24 22:44:51 2010
@@ -25,7 +25,7 @@ import java.nio.charset.CharacterCodingE
import org.apache.hadoop.io.BinaryComparable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.io.WritableUtils;
+import org.apache.mahout.math.Varint;
import org.apache.mahout.utils.nlp.collocations.llr.Gram.Type;
/** A GramKey, based on the identity fields of Gram (type, string) plus a byte[] used for secondary ordering */
@@ -76,8 +76,8 @@ public class GramKey extends BinaryCompa
@Override
public void readFields(DataInput in) throws IOException {
- int newLength = WritableUtils.readVInt(in);
- int newPrimaryLength = WritableUtils.readVInt(in);
+ int newLength = Varint.readUnsignedVarInt(in);
+ int newPrimaryLength = Varint.readUnsignedVarInt(in);
setCapacity(newLength, false);
in.readFully(bytes, 0, newLength);
length = newLength;
@@ -87,8 +87,8 @@ public class GramKey extends BinaryCompa
@Override
public void write(DataOutput out) throws IOException {
- WritableUtils.writeVInt(out, length);
- WritableUtils.writeVInt(out, primaryLength);
+ Varint.writeUnsignedVarInt(length, out);
+ Varint.writeUnsignedVarInt(primaryLength, out);
out.write(bytes, 0, length);
}