You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by pa...@apache.org on 2015/04/01 20:07:43 UTC

[12/51] [partial] mahout git commit: MAHOUT-1655 Refactors mr-legacy into mahout-hdfs and mahout-mr, closes apache/mahout#86

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java b/mr/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java
new file mode 100644
index 0000000..1326777
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java
@@ -0,0 +1,928 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.item;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
+import org.apache.mahout.cf.taste.hadoop.MutableRecommendedItem;
+import org.apache.mahout.cf.taste.hadoop.RecommendedItemsWritable;
+import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils;
+import org.apache.mahout.cf.taste.hadoop.ToItemPrefsMapper;
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.impl.recommender.GenericRecommendedItem;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.common.Pair;
+import org.apache.mahout.common.iterator.FileLineIterable;
+import org.apache.mahout.math.RandomAccessSparseVector;
+import org.apache.mahout.math.VarIntWritable;
+import org.apache.mahout.math.VarLongWritable;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
+import org.apache.mahout.math.hadoop.MathHelper;
+import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.CooccurrenceCountSimilarity;
+import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.TanimotoCoefficientSimilarity;
+import org.apache.mahout.math.map.OpenIntLongHashMap;
+import org.easymock.IArgumentMatcher;
+import org.easymock.EasyMock;
+import org.junit.Test;
+
+public class RecommenderJobTest extends TasteTestCase {
+
+  /**
+   * tests {@link ItemIDIndexMapper}
+   */
+  @Test
+  public void testItemIDIndexMapper() throws Exception {
+    Mapper<LongWritable,Text, VarIntWritable, VarLongWritable>.Context context =
+      EasyMock.createMock(Mapper.Context.class);
+
+    context.write(new VarIntWritable(TasteHadoopUtils.idToIndex(789L)), new VarLongWritable(789L));
+    EasyMock.replay(context);
+
+    new ItemIDIndexMapper().map(new LongWritable(123L), new Text("456,789,5.0"), context);
+
+    EasyMock.verify(context);
+  }
+
+  /**
+   * tests {@link ItemIDIndexReducer}
+   */
+  @Test
+  public void testItemIDIndexReducer() throws Exception {
+    Reducer<VarIntWritable, VarLongWritable, VarIntWritable,VarLongWritable>.Context context =
+      EasyMock.createMock(Reducer.Context.class);
+
+    context.write(new VarIntWritable(123), new VarLongWritable(45L));
+    EasyMock.replay(context);
+
+    new ItemIDIndexReducer().reduce(new VarIntWritable(123), Arrays.asList(new VarLongWritable(67L),
+        new VarLongWritable(89L), new VarLongWritable(45L)), context);
+
+    EasyMock.verify(context);
+  }
+
+  /**
+   * tests {@link ToItemPrefsMapper}
+   */
+  @Test
+  public void testToItemPrefsMapper() throws Exception {
+    Mapper<LongWritable,Text, VarLongWritable,VarLongWritable>.Context context =
+      EasyMock.createMock(Mapper.Context.class);
+
+    context.write(new VarLongWritable(12L), new EntityPrefWritable(34L, 1.0f));
+    context.write(new VarLongWritable(56L), new EntityPrefWritable(78L, 2.0f));
+    EasyMock.replay(context);
+
+    ToItemPrefsMapper mapper = new ToItemPrefsMapper();
+    mapper.map(new LongWritable(123L), new Text("12,34,1"), context);
+    mapper.map(new LongWritable(456L), new Text("56,78,2"), context);
+
+    EasyMock.verify(context);
+  }
+
+  /**
+   * tests {@link ToItemPrefsMapper} using boolean data
+   */
+  @Test
+  public void testToItemPrefsMapperBooleanData() throws Exception {
+    Mapper<LongWritable,Text, VarLongWritable,VarLongWritable>.Context context =
+      EasyMock.createMock(Mapper.Context.class);
+
+    context.write(new VarLongWritable(12L), new VarLongWritable(34L));
+    context.write(new VarLongWritable(56L), new VarLongWritable(78L));
+    EasyMock.replay(context);
+
+    ToItemPrefsMapper mapper = new ToItemPrefsMapper();
+    setField(mapper, "booleanData", true);
+    mapper.map(new LongWritable(123L), new Text("12,34"), context);
+    mapper.map(new LongWritable(456L), new Text("56,78"), context);
+
+    EasyMock.verify(context);
+  }
+
+  /**
+   * tests {@link ToUserVectorsReducer}
+   */
+  @Test
+  public void testToUserVectorReducer() throws Exception {
+    Reducer<VarLongWritable,VarLongWritable,VarLongWritable,VectorWritable>.Context context =
+      EasyMock.createMock(Reducer.Context.class);
+    Counter userCounters = EasyMock.createMock(Counter.class);
+
+    EasyMock.expect(context.getCounter(ToUserVectorsReducer.Counters.USERS)).andReturn(userCounters);
+    userCounters.increment(1);
+    context.write(EasyMock.eq(new VarLongWritable(12L)), MathHelper.vectorMatches(
+        MathHelper.elem(TasteHadoopUtils.idToIndex(34L), 1.0), MathHelper.elem(TasteHadoopUtils.idToIndex(56L), 2.0)));
+
+    EasyMock.replay(context, userCounters);
+
+    Collection<VarLongWritable> varLongWritables = Lists.newLinkedList();
+    varLongWritables.add(new EntityPrefWritable(34L, 1.0f));
+    varLongWritables.add(new EntityPrefWritable(56L, 2.0f));
+
+    new ToUserVectorsReducer().reduce(new VarLongWritable(12L), varLongWritables, context);
+
+    EasyMock.verify(context, userCounters);
+  }
+
+  /**
+   * tests {@link ToUserVectorsReducer} using boolean data
+   */
+  @Test
+  public void testToUserVectorReducerWithBooleanData() throws Exception {
+    Reducer<VarLongWritable,VarLongWritable,VarLongWritable,VectorWritable>.Context context =
+      EasyMock.createMock(Reducer.Context.class);
+    Counter userCounters = EasyMock.createMock(Counter.class);
+
+    EasyMock.expect(context.getCounter(ToUserVectorsReducer.Counters.USERS)).andReturn(userCounters);
+    userCounters.increment(1);
+    context.write(EasyMock.eq(new VarLongWritable(12L)), MathHelper.vectorMatches(
+        MathHelper.elem(TasteHadoopUtils.idToIndex(34L), 1.0), MathHelper.elem(TasteHadoopUtils.idToIndex(56L), 1.0)));
+
+    EasyMock.replay(context, userCounters);
+
+    new ToUserVectorsReducer().reduce(new VarLongWritable(12L), Arrays.asList(new VarLongWritable(34L),
+        new VarLongWritable(56L)), context);
+
+    EasyMock.verify(context, userCounters);
+  }
+
+  /**
+   * tests {@link SimilarityMatrixRowWrapperMapper}
+   */
+  @Test
+  public void testSimilarityMatrixRowWrapperMapper() throws Exception {
+    Mapper<IntWritable,VectorWritable,VarIntWritable,VectorOrPrefWritable>.Context context =
+      EasyMock.createMock(Mapper.Context.class);
+
+    context.write(EasyMock.eq(new VarIntWritable(12)), vectorOfVectorOrPrefWritableMatches(MathHelper.elem(34, 0.5),
+        MathHelper.elem(56, 0.7)));
+
+    EasyMock.replay(context);
+
+    RandomAccessSparseVector vector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
+    vector.set(12, 1.0);
+    vector.set(34, 0.5);
+    vector.set(56, 0.7);
+
+    new SimilarityMatrixRowWrapperMapper().map(new IntWritable(12), new VectorWritable(vector), context);
+
+    EasyMock.verify(context);
+  }
+
+  /**
+   * verifies the {@link Vector} included in a {@link VectorOrPrefWritable}
+   */
+  private static VectorOrPrefWritable vectorOfVectorOrPrefWritableMatches(final Vector.Element... elements) {
+    EasyMock.reportMatcher(new IArgumentMatcher() {
+      @Override
+      public boolean matches(Object argument) {
+        if (argument instanceof VectorOrPrefWritable) {
+          Vector v = ((VectorOrPrefWritable) argument).getVector();
+          return MathHelper.consistsOf(v, elements);
+        }
+        return false;
+      }
+
+      @Override
+      public void appendTo(StringBuffer buffer) {}
+    });
+    return null;
+  }
+
+  /**
+   * tests {@link UserVectorSplitterMapper}
+   */
+  @Test
+  public void testUserVectorSplitterMapper() throws Exception {
+    Mapper<VarLongWritable,VectorWritable, VarIntWritable,VectorOrPrefWritable>.Context context =
+        EasyMock.createMock(Mapper.Context.class);
+
+    context.write(EasyMock.eq(new VarIntWritable(34)), prefOfVectorOrPrefWritableMatches(123L, 0.5f));
+    context.write(EasyMock.eq(new VarIntWritable(56)), prefOfVectorOrPrefWritableMatches(123L, 0.7f));
+
+    EasyMock.replay(context);
+
+    UserVectorSplitterMapper mapper = new UserVectorSplitterMapper();
+    setField(mapper, "maxPrefsPerUserConsidered", 10);
+
+    RandomAccessSparseVector vector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
+    vector.set(34, 0.5);
+    vector.set(56, 0.7);
+
+    mapper.map(new VarLongWritable(123L), new VectorWritable(vector), context);
+
+    EasyMock.verify(context);
+  }
+
+  /**
+   * verifies a preference in a {@link VectorOrPrefWritable}
+   */
+  private static VectorOrPrefWritable prefOfVectorOrPrefWritableMatches(final long userID, final float prefValue) {
+    EasyMock.reportMatcher(new IArgumentMatcher() {
+      @Override
+      public boolean matches(Object argument) {
+        if (argument instanceof VectorOrPrefWritable) {
+          VectorOrPrefWritable pref = (VectorOrPrefWritable) argument;
+          return pref.getUserID() == userID && pref.getValue() == prefValue;
+        }
+        return false;
+      }
+
+      @Override
+      public void appendTo(StringBuffer buffer) {}
+    });
+    return null;
+  }
+
+  /**
+   * tests {@link UserVectorSplitterMapper} in the special case that some userIDs shall be excluded
+   */
+  @Test
+  public void testUserVectorSplitterMapperUserExclusion() throws Exception {
+    Mapper<VarLongWritable,VectorWritable, VarIntWritable,VectorOrPrefWritable>.Context context =
+        EasyMock.createMock(Mapper.Context.class);
+
+    context.write(EasyMock.eq(new VarIntWritable(34)), prefOfVectorOrPrefWritableMatches(123L, 0.5f));
+    context.write(EasyMock.eq(new VarIntWritable(56)), prefOfVectorOrPrefWritableMatches(123L, 0.7f));
+
+    EasyMock.replay(context);
+
+    FastIDSet usersToRecommendFor = new FastIDSet();
+    usersToRecommendFor.add(123L);
+
+    UserVectorSplitterMapper mapper = new UserVectorSplitterMapper();
+    setField(mapper, "maxPrefsPerUserConsidered", 10);
+    setField(mapper, "usersToRecommendFor", usersToRecommendFor);
+
+
+    RandomAccessSparseVector vector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
+    vector.set(34, 0.5);
+    vector.set(56, 0.7);
+
+    mapper.map(new VarLongWritable(123L), new VectorWritable(vector), context);
+    mapper.map(new VarLongWritable(456L), new VectorWritable(vector), context);
+
+    EasyMock.verify(context);
+  }
+
+  /**
+   * tests {@link UserVectorSplitterMapper} in the special case that the number of preferences to be considered
+   * is less than the number of available preferences
+   */
+  @Test
+  public void testUserVectorSplitterMapperOnlySomePrefsConsidered() throws Exception {
+    Mapper<VarLongWritable,VectorWritable, VarIntWritable,VectorOrPrefWritable>.Context context =
+        EasyMock.createMock(Mapper.Context.class);
+
+    context.write(EasyMock.eq(new VarIntWritable(34)), prefOfVectorOrPrefWritableMatchesNaN(123L));
+    context.write(EasyMock.eq(new VarIntWritable(56)), prefOfVectorOrPrefWritableMatches(123L, 0.7f));
+
+    EasyMock.replay(context);
+
+    UserVectorSplitterMapper mapper = new UserVectorSplitterMapper();
+    setField(mapper, "maxPrefsPerUserConsidered", 1);
+
+    RandomAccessSparseVector vector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
+    vector.set(34, 0.5);
+    vector.set(56, 0.7);
+
+    mapper.map(new VarLongWritable(123L), new VectorWritable(vector), context);
+
+    EasyMock.verify(context);
+  }
+
+  /**
+   * verifies that a preference value is NaN in a {@link VectorOrPrefWritable}
+   */
+  private static VectorOrPrefWritable prefOfVectorOrPrefWritableMatchesNaN(final long userID) {
+    EasyMock.reportMatcher(new IArgumentMatcher() {
+      @Override
+      public boolean matches(Object argument) {
+        if (argument instanceof VectorOrPrefWritable) {
+          VectorOrPrefWritable pref = (VectorOrPrefWritable) argument;
+          return pref.getUserID() == userID && Float.isNaN(pref.getValue());
+        }
+        return false;
+      }
+
+      @Override
+      public void appendTo(StringBuffer buffer) {}
+    });
+    return null;
+  }
+
+  /**
+   * tests {@link ToVectorAndPrefReducer}
+   */
+  @Test
+  public void testToVectorAndPrefReducer() throws Exception {
+    Reducer<VarIntWritable,VectorOrPrefWritable,VarIntWritable,VectorAndPrefsWritable>.Context context =
+      EasyMock.createMock(Reducer.Context.class);
+
+    context.write(EasyMock.eq(new VarIntWritable(1)), vectorAndPrefsWritableMatches(Arrays.asList(123L, 456L),
+        Arrays.asList(1.0f, 2.0f), MathHelper.elem(3, 0.5), MathHelper.elem(7, 0.8)));
+
+    EasyMock.replay(context);
+
+    Vector similarityColumn = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
+    similarityColumn.set(3, 0.5);
+    similarityColumn.set(7, 0.8);
+
+    VectorOrPrefWritable itemPref1 = new VectorOrPrefWritable(123L, 1.0f);
+    VectorOrPrefWritable itemPref2 = new VectorOrPrefWritable(456L, 2.0f);
+    VectorOrPrefWritable similarities = new VectorOrPrefWritable(similarityColumn);
+
+    new ToVectorAndPrefReducer().reduce(new VarIntWritable(1), Arrays.asList(itemPref1, itemPref2, similarities),
+        context);
+
+    EasyMock.verify(context);
+  }
+
+  /**
+   * verifies a {@link VectorAndPrefsWritable}
+   */
+  private static VectorAndPrefsWritable vectorAndPrefsWritableMatches(final List<Long> userIDs,
+      final List<Float> prefValues, final Vector.Element... elements) {
+    EasyMock.reportMatcher(new IArgumentMatcher() {
+      @Override
+      public boolean matches(Object argument) {
+        if (argument instanceof VectorAndPrefsWritable) {
+          VectorAndPrefsWritable vectorAndPrefs = (VectorAndPrefsWritable) argument;
+
+          if (!vectorAndPrefs.getUserIDs().equals(userIDs)) {
+            return false;
+          }
+          if (!vectorAndPrefs.getValues().equals(prefValues)) {
+            return false;
+          }
+          return MathHelper.consistsOf(vectorAndPrefs.getVector(), elements);
+        }
+        return false;
+      }
+
+      @Override
+      public void appendTo(StringBuffer buffer) {}
+    });
+    return null;
+  }
+
+  /**
+   * tests {@link ToVectorAndPrefReducer} in the error case that two similarity column vectors a supplied for the same
+   * item (which should never happen)
+   */
+  @Test
+  public void testToVectorAndPrefReducerExceptionOn2Vectors() throws Exception {
+    Reducer<VarIntWritable,VectorOrPrefWritable,VarIntWritable,VectorAndPrefsWritable>.Context context =
+      EasyMock.createMock(Reducer.Context.class);
+
+    EasyMock.replay(context);
+
+    Vector similarityColumn1 = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
+    Vector similarityColumn2 = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
+
+    VectorOrPrefWritable similarities1 = new VectorOrPrefWritable(similarityColumn1);
+    VectorOrPrefWritable similarities2 = new VectorOrPrefWritable(similarityColumn2);
+
+    try {
+      new ToVectorAndPrefReducer().reduce(new VarIntWritable(1), Arrays.asList(similarities1, similarities2), context);
+      fail();
+    } catch (IllegalStateException e) {
+      // good
+    }
+
+    EasyMock.verify(context);
+  }
+
+  /**
+   * tests {@link org.apache.mahout.cf.taste.hadoop.item.ItemFilterMapper}
+   */
+  @Test
+  public void testItemFilterMapper() throws Exception {
+
+    Mapper<LongWritable,Text,VarLongWritable,VarLongWritable>.Context context =
+      EasyMock.createMock(Mapper.Context.class);
+
+    context.write(new VarLongWritable(34L), new VarLongWritable(12L));
+    context.write(new VarLongWritable(78L), new VarLongWritable(56L));
+
+    EasyMock.replay(context);
+
+    ItemFilterMapper mapper = new ItemFilterMapper();
+    mapper.map(null, new Text("12,34"), context);
+    mapper.map(null, new Text("56,78"), context);
+
+    EasyMock.verify(context);
+  }
+
+  /**
+   * tests {@link org.apache.mahout.cf.taste.hadoop.item.ItemFilterAsVectorAndPrefsReducer}
+   */
+  @Test
+  public void testItemFilterAsVectorAndPrefsReducer() throws Exception {
+    Reducer<VarLongWritable,VarLongWritable,VarIntWritable,VectorAndPrefsWritable>.Context context =
+        EasyMock.createMock(Reducer.Context.class);
+
+    int itemIDIndex = TasteHadoopUtils.idToIndex(123L);
+    context.write(EasyMock.eq(new VarIntWritable(itemIDIndex)), vectorAndPrefsForFilteringMatches(123L, 456L, 789L));
+
+    EasyMock.replay(context);
+
+    new ItemFilterAsVectorAndPrefsReducer().reduce(new VarLongWritable(123L), Arrays.asList(new VarLongWritable(456L),
+        new VarLongWritable(789L)), context);
+
+    EasyMock.verify(context);
+  }
+
+  static VectorAndPrefsWritable vectorAndPrefsForFilteringMatches(final long itemID, final long... userIDs) {
+    EasyMock.reportMatcher(new IArgumentMatcher() {
+      @Override
+      public boolean matches(Object argument) {
+        if (argument instanceof VectorAndPrefsWritable) {
+          VectorAndPrefsWritable vectorAndPrefs = (VectorAndPrefsWritable) argument;
+          Vector vector = vectorAndPrefs.getVector();
+          if (vector.getNumNondefaultElements() != 1) {
+            return false;
+          }
+          if (!Double.isNaN(vector.get(TasteHadoopUtils.idToIndex(itemID)))) {
+            return false;
+          }
+          if (userIDs.length != vectorAndPrefs.getUserIDs().size()) {
+            return false;
+          }
+          for (long userID : userIDs) {
+            if (!vectorAndPrefs.getUserIDs().contains(userID)) {
+              return false;
+            }
+          }
+          return true;
+        }
+        return false;
+      }
+
+      @Override
+      public void appendTo(StringBuffer buffer) {}
+    });
+    return null;
+  }
+
+  /**
+   * tests {@link PartialMultiplyMapper}
+   */
+  @Test
+  public void testPartialMultiplyMapper() throws Exception {
+
+    Vector similarityColumn = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
+    similarityColumn.set(3, 0.5);
+    similarityColumn.set(7, 0.8);
+
+    Mapper<VarIntWritable,VectorAndPrefsWritable,VarLongWritable,PrefAndSimilarityColumnWritable>.Context context =
+      EasyMock.createMock(Mapper.Context.class);
+
+    PrefAndSimilarityColumnWritable one = new PrefAndSimilarityColumnWritable();
+    PrefAndSimilarityColumnWritable two = new PrefAndSimilarityColumnWritable();
+    one.set(1.0f, similarityColumn);
+    two.set(3.0f, similarityColumn);
+
+    context.write(EasyMock.eq(new VarLongWritable(123L)), EasyMock.eq(one));
+    context.write(EasyMock.eq(new VarLongWritable(456L)), EasyMock.eq(two));
+
+    EasyMock.replay(context);
+
+    VectorAndPrefsWritable vectorAndPrefs = new VectorAndPrefsWritable(similarityColumn, Arrays.asList(123L, 456L),
+        Arrays.asList(1.0f, 3.0f));
+
+    new PartialMultiplyMapper().map(new VarIntWritable(1), vectorAndPrefs, context);
+
+    EasyMock.verify(context);
+  }
+
+
+  /**
+   * tests {@link AggregateAndRecommendReducer}
+   */
+  @Test
+  public void testAggregateAndRecommendReducer() throws Exception {
+    Reducer<VarLongWritable,PrefAndSimilarityColumnWritable,VarLongWritable,RecommendedItemsWritable>.Context context =
+        EasyMock.createMock(Reducer.Context.class);
+
+    context.write(EasyMock.eq(new VarLongWritable(123L)), recommendationsMatch(new MutableRecommendedItem(1L, 2.8f),
+        new MutableRecommendedItem(2L, 2.0f)));
+
+    EasyMock.replay(context);
+
+    RandomAccessSparseVector similarityColumnOne = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
+    similarityColumnOne.set(1, 0.1);
+    similarityColumnOne.set(2, 0.5);
+
+    RandomAccessSparseVector similarityColumnTwo = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
+    similarityColumnTwo.set(1, 0.9);
+    similarityColumnTwo.set(2, 0.5);
+
+    List<PrefAndSimilarityColumnWritable> values = Arrays.asList(
+        new PrefAndSimilarityColumnWritable(1.0f, similarityColumnOne),
+        new PrefAndSimilarityColumnWritable(3.0f, similarityColumnTwo));
+
+    OpenIntLongHashMap indexItemIDMap = new OpenIntLongHashMap();
+    indexItemIDMap.put(1, 1L);
+    indexItemIDMap.put(2, 2L);
+
+    AggregateAndRecommendReducer reducer = new AggregateAndRecommendReducer();
+
+    setField(reducer, "indexItemIDMap", indexItemIDMap);
+    setField(reducer, "recommendationsPerUser", 3);
+
+    reducer.reduce(new VarLongWritable(123L), values, context);
+
+    EasyMock.verify(context);
+  }
+
+  /**
+   * tests {@link AggregateAndRecommendReducer}
+   */
+  @Test
+  public void testAggregateAndRecommendReducerExcludeRecommendationsBasedOnOneItem() throws Exception {
+    Reducer<VarLongWritable,PrefAndSimilarityColumnWritable,VarLongWritable,RecommendedItemsWritable>.Context context =
+        EasyMock.createMock(Reducer.Context.class);
+
+    context.write(EasyMock.eq(new VarLongWritable(123L)), recommendationsMatch(new MutableRecommendedItem(1L, 2.8f)));
+
+    EasyMock.replay(context);
+
+    RandomAccessSparseVector similarityColumnOne = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
+    similarityColumnOne.set(1, 0.1);
+
+    RandomAccessSparseVector similarityColumnTwo = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
+    similarityColumnTwo.set(1, 0.9);
+    similarityColumnTwo.set(2, 0.5);
+
+    List<PrefAndSimilarityColumnWritable> values = Arrays.asList(
+        new PrefAndSimilarityColumnWritable(1.0f, similarityColumnOne),
+        new PrefAndSimilarityColumnWritable(3.0f, similarityColumnTwo));
+
+    OpenIntLongHashMap indexItemIDMap = new OpenIntLongHashMap();
+    indexItemIDMap.put(1, 1L);
+    indexItemIDMap.put(2, 2L);
+
+    AggregateAndRecommendReducer reducer = new AggregateAndRecommendReducer();
+
+    setField(reducer, "indexItemIDMap", indexItemIDMap);
+    setField(reducer, "recommendationsPerUser", 3);
+
+    reducer.reduce(new VarLongWritable(123L), values, context);
+
+    EasyMock.verify(context);
+  }
+
+  /**
+   * tests {@link AggregateAndRecommendReducer} with a limit on the recommendations per user
+   */
+  @Test
+  public void testAggregateAndRecommendReducerLimitNumberOfRecommendations() throws Exception {
+    Reducer<VarLongWritable,PrefAndSimilarityColumnWritable,VarLongWritable,RecommendedItemsWritable>.Context context =
+      EasyMock.createMock(Reducer.Context.class);
+
+    context.write(EasyMock.eq(new VarLongWritable(123L)), recommendationsMatch(new MutableRecommendedItem(1L, 2.8f)));
+
+    EasyMock.replay(context);
+
+    RandomAccessSparseVector similarityColumnOne = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
+    similarityColumnOne.set(1, 0.1);
+    similarityColumnOne.set(2, 0.5);
+
+    RandomAccessSparseVector similarityColumnTwo = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
+    similarityColumnTwo.set(1, 0.9);
+    similarityColumnTwo.set(2, 0.5);
+
+    List<PrefAndSimilarityColumnWritable> values = Arrays.asList(
+        new PrefAndSimilarityColumnWritable(1.0f, similarityColumnOne),
+        new PrefAndSimilarityColumnWritable(3.0f, similarityColumnTwo));
+
+    OpenIntLongHashMap indexItemIDMap = new OpenIntLongHashMap();
+    indexItemIDMap.put(1, 1L);
+    indexItemIDMap.put(2, 2L);
+
+    AggregateAndRecommendReducer reducer = new AggregateAndRecommendReducer();
+
+    setField(reducer, "indexItemIDMap", indexItemIDMap);
+    setField(reducer, "recommendationsPerUser", 1);
+
+    reducer.reduce(new VarLongWritable(123L), values, context);
+
+    EasyMock.verify(context);
+  }
+
+  /**
+   * verifies a {@link RecommendedItemsWritable}
+   */
+  static RecommendedItemsWritable recommendationsMatch(final RecommendedItem... items) {
+    EasyMock.reportMatcher(new IArgumentMatcher() {
+      @Override
+      public boolean matches(Object argument) {
+        if (argument instanceof RecommendedItemsWritable) {
+          RecommendedItemsWritable recommendedItemsWritable = (RecommendedItemsWritable) argument;
+          List<RecommendedItem> expectedItems = Arrays.asList(items);
+          return expectedItems.equals(recommendedItemsWritable.getRecommendedItems());
+        }
+        return false;
+      }
+
+      @Override
+      public void appendTo(StringBuffer buffer) {}
+    });
+    return null;
+  }
+
+  /**
+   * small integration test that runs the full job
+   *
+   * As a tribute to http://www.slideshare.net/srowen/collaborative-filtering-at-scale,
+   * we recommend people food to animals in this test :)
+   *
+   * <pre>
+   *
+   *  user-item-matrix
+   *
+   *          burger  hotdog  berries  icecream
+   *  dog       5       5        2        -
+   *  rabbit    2       -        3        5
+   *  cow       -       5        -        3
+   *  donkey    3       -        -        5
+   *
+   *
+   *  item-item-similarity-matrix (tanimoto-coefficient of the item-vectors of the user-item-matrix)
+   *
+   *          burger  hotdog  berries icecream
+   *  burger    -      0.25    0.66    0.5
+   *  hotdog   0.25     -      0.33    0.25
+   *  berries  0.66    0.33     -      0.25
+   *  icecream 0.5     0.25    0.25     -
+   *
+   *
+   *  Prediction(dog, icecream)   = (0.5 * 5 + 0.25 * 5 + 0.25 * 2 ) / (0.5 + 0.25 + 0.25)  ~ 4.3
+   *  Prediction(rabbit, hotdog)  = (0.25 * 2 + 0.33 * 3 + 0.25 * 5) / (0.25 + 0.33 + 0.25) ~ 3,3
+   *  Prediction(cow, burger)     = (0.25 * 5 + 0.5 * 3) / (0.25 + 0.5)                     ~ 3,7
+   *  Prediction(cow, berries)    = (0.33 * 5 + 0.25 * 3) / (0.33 + 0.25)                   ~ 4,1
+   *  Prediction(donkey, hotdog)  = (0.25 * 3 + 0.25 * 5) / (0.25 + 0.25)                   ~ 4
+   *  Prediction(donkey, berries) = (0.66 * 3 + 0.25 * 5) / (0.66 + 0.25)                   ~ 3,5
+   *
+   * </pre>
+   */
+  @Test
+  public void testCompleteJob() throws Exception {
+
+    File inputFile = getTestTempFile("prefs.txt");
+    File outputDir = getTestTempDir("output");
+    outputDir.delete();
+    File similaritiesOutputDir = getTestTempDir("outputSimilarities");
+    similaritiesOutputDir.delete();
+    File tmpDir = getTestTempDir("tmp");
+
+    writeLines(inputFile,
+        "1,1,5",
+        "1,2,5",
+        "1,3,2",
+        "2,1,2",
+        "2,3,3",
+        "2,4,5",
+        "3,2,5",
+        "3,4,3",
+        "4,1,3",
+        "4,4,5");
+
+    RecommenderJob recommenderJob = new RecommenderJob();
+
+    Configuration conf = getConfiguration();
+    conf.set("mapred.input.dir", inputFile.getAbsolutePath());
+    conf.set("mapred.output.dir", outputDir.getAbsolutePath());
+    conf.setBoolean("mapred.output.compress", false);
+
+    recommenderJob.setConf(conf);
+
+    recommenderJob.run(new String[] { "--tempDir", tmpDir.getAbsolutePath(), "--similarityClassname",
+       TanimotoCoefficientSimilarity.class.getName(), "--numRecommendations", "4",
+        "--outputPathForSimilarityMatrix", similaritiesOutputDir.getAbsolutePath() });
+
+    Map<Long,List<RecommendedItem>> recommendations = readRecommendations(new File(outputDir, "part-r-00000"));
+    assertEquals(4, recommendations.size());
+
+    for (Entry<Long,List<RecommendedItem>> entry : recommendations.entrySet()) {
+      long userID = entry.getKey();
+      List<RecommendedItem> items = entry.getValue();
+      assertNotNull(items);
+      RecommendedItem item1 = items.get(0);
+
+      if (userID == 1L) {
+        assertEquals(1, items.size());
+        assertEquals(4L, item1.getItemID());
+        assertEquals(4.3, item1.getValue(), 0.05);
+      }
+      if (userID == 2L) {
+        assertEquals(1, items.size());
+        assertEquals(2L, item1.getItemID());
+        assertEquals(3.3, item1.getValue(), 0.05);
+      }
+      if (userID == 3L) {
+        assertEquals(2, items.size());
+        assertEquals(3L, item1.getItemID());
+        assertEquals(4.1, item1.getValue(), 0.05);
+        RecommendedItem item2 = items.get(1);
+        assertEquals(1L, item2.getItemID());
+        assertEquals(3.7, item2.getValue(), 0.05);
+      }
+      if (userID == 4L) {
+        assertEquals(2, items.size());
+        assertEquals(2L, item1.getItemID());
+        assertEquals(4.0, item1.getValue(), 0.05);
+        RecommendedItem item2 = items.get(1);
+        assertEquals(3L, item2.getItemID());
+        assertEquals(3.5, item2.getValue(), 0.05);
+      }
+    }
+
+    Map<Pair<Long, Long>, Double> similarities = readSimilarities(new File(similaritiesOutputDir, "part-r-00000"));
+    assertEquals(6, similarities.size());
+
+    assertEquals(0.25, similarities.get(new Pair<Long, Long>(1L, 2L)), EPSILON);
+    assertEquals(0.6666666666666666, similarities.get(new Pair<Long, Long>(1L, 3L)), EPSILON);
+    assertEquals(0.5, similarities.get(new Pair<Long, Long>(1L, 4L)), EPSILON);
+    assertEquals(0.3333333333333333, similarities.get(new Pair<Long, Long>(2L, 3L)), EPSILON);
+    assertEquals(0.25, similarities.get(new Pair<Long, Long>(2L, 4L)), EPSILON);
+    assertEquals(0.25, similarities.get(new Pair<Long, Long>(3L, 4L)), EPSILON);
+  }
+
+  /**
+   * small integration test for boolean data
+   */
+  @Test
+  public void testCompleteJobBoolean() throws Exception {
+
+    File inputFile = getTestTempFile("prefs.txt");
+    File outputDir = getTestTempDir("output");
+    outputDir.delete();
+    File tmpDir = getTestTempDir("tmp");
+    File usersFile = getTestTempFile("users.txt");
+    writeLines(usersFile, "3");
+
+    writeLines(inputFile,
+        "1,1",
+        "1,2",
+        "1,3",
+        "2,1",
+        "2,3",
+        "2,4",
+        "3,2",
+        "3,4",
+        "4,1",
+        "4,4");
+
+    RecommenderJob recommenderJob = new RecommenderJob();
+
+    Configuration conf = getConfiguration();
+    conf.set("mapred.input.dir", inputFile.getAbsolutePath());
+    conf.set("mapred.output.dir", outputDir.getAbsolutePath());
+    conf.setBoolean("mapred.output.compress", false);
+
+    recommenderJob.setConf(conf);
+
+    recommenderJob.run(new String[] { "--tempDir", tmpDir.getAbsolutePath(), "--similarityClassname",
+        CooccurrenceCountSimilarity.class.getName(), "--booleanData", "true",
+        "--usersFile", usersFile.getAbsolutePath() });
+
+    Map<Long,List<RecommendedItem>> recommendations = readRecommendations(new File(outputDir, "part-r-00000"));
+
+    List<RecommendedItem> recommendedToCow = recommendations.get(3L);
+    assertEquals(2, recommendedToCow.size());
+
+    RecommendedItem item1 = recommendedToCow.get(0);
+    RecommendedItem item2 = recommendedToCow.get(1);
+
+    assertEquals(1L, item1.getItemID());
+    assertEquals(3L, item2.getItemID());
+
+    /* predicted pref must be the sum of similarities:
+    *    item1: coocc(burger, hotdog) + coocc(burger, icecream) = 3 
+    *    item2: coocc(berries, hotdog) + coocc(berries, icecream) = 2 */
+    assertEquals(3, item1.getValue(), 0.05);
+    assertEquals(2, item2.getValue(), 0.05);
+  }
+
+  /**
+   * check whether the explicit user/item filter works
+   */
+  @Test
+  public void testCompleteJobWithFiltering() throws Exception {
+
+     File inputFile = getTestTempFile("prefs.txt");
+     File userFile = getTestTempFile("users.txt");
+     File filterFile = getTestTempFile("filter.txt");
+     File outputDir = getTestTempDir("output");
+     outputDir.delete();
+     File tmpDir = getTestTempDir("tmp");
+
+     writeLines(inputFile,
+         "1,1,5",
+         "1,2,5",
+         "1,3,2",
+         "2,1,2",
+         "2,3,3",
+         "2,4,5",
+         "3,2,5",
+         "3,4,3",
+         "4,1,3",
+         "4,4,5");
+
+     /* only compute recommendations for the donkey */
+     writeLines(userFile, "4");
+     /* do not recommend the hotdog for the donkey */
+     writeLines(filterFile, "4,2");
+
+     RecommenderJob recommenderJob = new RecommenderJob();
+
+     Configuration conf = getConfiguration();
+     conf.set("mapred.input.dir", inputFile.getAbsolutePath());
+     conf.set("mapred.output.dir", outputDir.getAbsolutePath());
+     conf.setBoolean("mapred.output.compress", false);
+
+     recommenderJob.setConf(conf);
+
+     recommenderJob.run(new String[] { "--tempDir", tmpDir.getAbsolutePath(), "--similarityClassname",
+        TanimotoCoefficientSimilarity.class.getName(), "--numRecommendations", "1",
+        "--usersFile", userFile.getAbsolutePath(), "--filterFile", filterFile.getAbsolutePath() });
+
+     Map<Long,List<RecommendedItem>> recommendations = readRecommendations(new File(outputDir, "part-r-00000"));
+
+     assertEquals(1, recommendations.size());
+     assertTrue(recommendations.containsKey(4L));
+     assertEquals(1, recommendations.get(4L).size());
+
+     /* berries should have been recommended to the donkey */
+     RecommendedItem recommendedItem = recommendations.get(4L).get(0);
+     assertEquals(3L, recommendedItem.getItemID());
+     assertEquals(3.5, recommendedItem.getValue(), 0.05);
+   }
+
+  static Map<Pair<Long,Long>, Double> readSimilarities(File file) throws IOException {
+    Map<Pair<Long,Long>, Double> similarities = Maps.newHashMap();
+    for (String line : new FileLineIterable(file)) {
+      String[] parts = line.split("\t");
+      similarities.put(new Pair<Long,Long>(Long.parseLong(parts[0]), Long.parseLong(parts[1])),
+          Double.parseDouble(parts[2]));
+    }
+    return similarities;
+  }
+
+  static Map<Long,List<RecommendedItem>> readRecommendations(File file) throws IOException {
+    Map<Long,List<RecommendedItem>> recommendations = Maps.newHashMap();
+    for (String line : new FileLineIterable(file)) {
+
+      String[] keyValue = line.split("\t");
+      long userID = Long.parseLong(keyValue[0]);
+      String[] tokens = keyValue[1].replaceAll("\\[", "")
+          .replaceAll("\\]", "").split(",");
+
+      List<RecommendedItem> items = Lists.newLinkedList();
+      for (String token : tokens) {
+        String[] itemTokens = token.split(":");
+        long itemID = Long.parseLong(itemTokens[0]);
+        float value = Float.parseFloat(itemTokens[1]);
+        items.add(new GenericRecommendedItem(itemID, value));
+      }
+      recommendations.put(userID, items);
+    }
+    return recommendations;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorsReducerTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorsReducerTest.java b/mr/src/test/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorsReducerTest.java
new file mode 100644
index 0000000..bb22b71
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorsReducerTest.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.item;
+
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils;
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
+import org.apache.mahout.math.VarLongWritable;
+import org.apache.mahout.math.VectorWritable;
+import org.apache.mahout.math.hadoop.MathHelper;
+import org.easymock.EasyMock;
+import org.junit.Test;
+
+import java.util.Arrays;
+import java.util.Collections;
+
+/**
+ * tests {@link ToUserVectorsReducer}
+ */
+public class ToUserVectorsReducerTest extends TasteTestCase {
+
+  @Test
+  public void testToUsersReducerMinPreferencesUserIgnored() throws Exception {
+    Reducer<VarLongWritable,VarLongWritable,VarLongWritable,VectorWritable>.Context context =
+        EasyMock.createMock(Reducer.Context.class);
+
+    ToUserVectorsReducer reducer = new ToUserVectorsReducer();
+    setField(reducer, "minPreferences", 2);
+
+    EasyMock.replay(context);
+
+    reducer.reduce(new VarLongWritable(123), Collections.singletonList(new VarLongWritable(456)), context);
+
+    EasyMock.verify(context);
+  }
+
+  @Test
+  public void testToUsersReducerMinPreferencesUserPasses() throws Exception {
+    Reducer<VarLongWritable,VarLongWritable,VarLongWritable,VectorWritable>.Context context =
+        EasyMock.createMock(Reducer.Context.class);
+    Counter userCounters = EasyMock.createMock(Counter.class);
+
+    ToUserVectorsReducer reducer = new ToUserVectorsReducer();
+    setField(reducer, "minPreferences", 2);
+
+    EasyMock.expect(context.getCounter(ToUserVectorsReducer.Counters.USERS)).andReturn(userCounters);
+    userCounters.increment(1);
+    context.write(EasyMock.eq(new VarLongWritable(123)), MathHelper.vectorMatches(
+        MathHelper.elem(TasteHadoopUtils.idToIndex(456L), 1.0), MathHelper.elem(TasteHadoopUtils.idToIndex(789L), 1.0)));
+
+    EasyMock.replay(context, userCounters);
+
+    reducer.reduce(new VarLongWritable(123), Arrays.asList(new VarLongWritable(456), new VarLongWritable(789)), context);
+
+    EasyMock.verify(context, userCounters);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJobTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJobTest.java b/mr/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJobTest.java
new file mode 100644
index 0000000..f61b5e6
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJobTest.java
@@ -0,0 +1,269 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FilenameFilter;
+import java.util.Arrays;
+import java.util.regex.Pattern;
+
+import com.google.common.base.Charsets;
+import com.google.common.io.Files;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
+import org.apache.mahout.math.RandomAccessSparseVector;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
+import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.CosineSimilarity;
+import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.TanimotoCoefficientSimilarity;
+import org.apache.mahout.math.map.OpenIntLongHashMap;
+import org.easymock.EasyMock;
+import org.junit.Test;
+
+/**
+ * Unit tests for the mappers and reducers in org.apache.mahout.cf.taste.hadoop.similarity.item
+ * some integration tests with tiny data sets at the end
+ */
+public final class ItemSimilarityJobTest extends TasteTestCase {
+
+  private static final Pattern TAB = Pattern.compile("\t");
+
+  /**
+   * Tests {@link ItemSimilarityJob.MostSimilarItemPairsMapper}
+   */
+  @Test
+  public void testMostSimilarItemsPairsMapper() throws Exception {
+
+    OpenIntLongHashMap indexItemIDMap = new OpenIntLongHashMap();
+    indexItemIDMap.put(12, 12L);
+    indexItemIDMap.put(34, 34L);
+    indexItemIDMap.put(56, 56L);
+
+    Mapper<IntWritable,VectorWritable,EntityEntityWritable,DoubleWritable>.Context context =
+      EasyMock.createMock(Mapper.Context.class);
+
+    context.write(new EntityEntityWritable(34L, 56L), new DoubleWritable(0.9));
+
+    EasyMock.replay(context);
+
+    Vector vector = new RandomAccessSparseVector(Integer.MAX_VALUE);
+    vector.set(12, 0.2);
+    vector.set(56, 0.9);
+
+    ItemSimilarityJob.MostSimilarItemPairsMapper mapper = new ItemSimilarityJob.MostSimilarItemPairsMapper();
+    setField(mapper, "indexItemIDMap", indexItemIDMap);
+    setField(mapper, "maxSimilarItemsPerItem", 1);
+
+    mapper.map(new IntWritable(34), new VectorWritable(vector), context);
+
+    EasyMock.verify(context);
+  }
+
+  /**
+   * Tests {@link ItemSimilarityJob.MostSimilarItemPairsReducer}
+   */
+  @Test
+  public void testMostSimilarItemPairsReducer() throws Exception {
+    Reducer<EntityEntityWritable,DoubleWritable,EntityEntityWritable,DoubleWritable>.Context context =
+      EasyMock.createMock(Reducer.Context.class);
+
+    context.write(new EntityEntityWritable(123L, 456L), new DoubleWritable(0.5));
+
+    EasyMock.replay(context);
+
+    new ItemSimilarityJob.MostSimilarItemPairsReducer().reduce(new EntityEntityWritable(123L, 456L),
+        Arrays.asList(new DoubleWritable(0.5), new DoubleWritable(0.5)), context);
+
+    EasyMock.verify(context);
+  }
+
+  /**
+   * Integration test with a tiny data set
+   *
+   * <pre>
+   * user-item-matrix
+   *
+   *        Game   Mouse   PC    Disk
+   * Jane    -       1      2      -
+   * Paul    1       -      1      -
+   * Fred    -       -      -      1
+   * </pre>
+   */
+  @Test
+  public void testCompleteJob() throws Exception {
+
+    File inputFile = getTestTempFile("prefs.txt");
+    File outputDir = getTestTempDir("output");
+    outputDir.delete();
+    File tmpDir = getTestTempDir("tmp");
+
+    writeLines(inputFile,
+        "2,1,1",
+        "1,2,1",
+        "3,4,1",
+        "1,3,2",
+        "2,3,1");
+
+    ItemSimilarityJob similarityJob = new ItemSimilarityJob();
+
+    Configuration conf = getConfiguration();
+    conf.set("mapred.input.dir", inputFile.getAbsolutePath());
+    conf.set("mapred.output.dir", outputDir.getAbsolutePath());
+    conf.setBoolean("mapred.output.compress", false);
+
+    similarityJob.setConf(conf);
+    similarityJob.run(new String[] { "--tempDir", tmpDir.getAbsolutePath(), "--similarityClassname",
+       CosineSimilarity.class.getName() });
+    File outPart = outputDir.listFiles(new FilenameFilter() {
+      @Override
+      public boolean accept(File dir, String name) {
+        return name.startsWith("part-");
+      }
+    })[0];
+    BufferedReader reader = Files.newReader(outPart, Charsets.UTF_8);
+
+    String line;
+    int currentLine = 1;
+    while ( (line = reader.readLine()) != null) {
+
+      String[] tokens = TAB.split(line);
+
+      long itemAID = Long.parseLong(tokens[0]);
+      long itemBID = Long.parseLong(tokens[1]);
+      double similarity = Double.parseDouble(tokens[2]);
+
+      if (currentLine == 1) {
+        assertEquals(1L, itemAID);
+        assertEquals(3L, itemBID);
+        assertEquals(0.45, similarity, 0.01);
+      }
+
+      if (currentLine == 2) {
+        assertEquals(2L, itemAID);
+        assertEquals(3L, itemBID);
+        assertEquals(0.89, similarity, 0.01);
+      }
+
+      currentLine++;
+    }
+
+    int linesWritten = currentLine-1;
+    assertEquals(2, linesWritten);
+  }
+
+  /**
+   * integration test for the limitation of the number of computed similarities
+   *
+   * <pre>
+   * user-item-matrix
+   *
+   *        i1  i2  i3
+   *    u1   1   0   1
+   *    u2   0   1   1
+   *    u3   1   1   0
+   *    u4   1   1   1
+   *    u5   0   1   0
+   *    u6   1   1   0
+   *
+   *    tanimoto(i1,i2) = 0.5
+   *    tanimoto(i2,i3) = 0.333
+   *     tanimoto(i3,i1) = 0.4
+   *
+   *    When we set maxSimilaritiesPerItem to 1 the following pairs should be found:
+   *
+   *    i1 --> i2
+   *    i2 --> i1
+   *    i3 --> i1
+   * </pre>
+   */
+  @Test
+  public void testMaxSimilaritiesPerItem() throws Exception {
+
+    File inputFile = getTestTempFile("prefsForMaxSimilarities.txt");
+    File outputDir = getTestTempDir("output");
+    outputDir.delete();
+    File tmpDir = getTestTempDir("tmp");
+
+    writeLines(inputFile,
+        "1,1,1",
+        "1,3,1",
+        "2,2,1",
+        "2,3,1",
+        "3,1,1",
+        "3,2,1",
+        "4,1,1",
+        "4,2,1",
+        "4,3,1",
+        "5,2,1",
+        "6,1,1",
+        "6,2,1");
+
+    ItemSimilarityJob similarityJob =  new ItemSimilarityJob();
+
+    Configuration conf = getConfiguration();
+    conf.set("mapred.input.dir", inputFile.getAbsolutePath());
+    conf.set("mapred.output.dir", outputDir.getAbsolutePath());
+    conf.setBoolean("mapred.output.compress", false);
+
+    similarityJob.setConf(conf);
+    similarityJob.run(new String[] { "--tempDir", tmpDir.getAbsolutePath(), "--similarityClassname",
+        TanimotoCoefficientSimilarity.class.getName(), "--maxSimilaritiesPerItem", "1" });
+    File outPart = outputDir.listFiles(new FilenameFilter() {
+      @Override
+      public boolean accept(File dir, String name) {
+        return name.startsWith("part-");
+      }
+    })[0];
+    BufferedReader reader = Files.newReader(outPart, Charsets.UTF_8);
+
+    String line;
+    int currentLine = 1;
+    while ((line = reader.readLine()) != null) {
+
+      String[] tokens = TAB.split(line);
+
+      long itemAID = Long.parseLong(tokens[0]);
+      long itemBID = Long.parseLong(tokens[1]);
+      double similarity = Double.parseDouble(tokens[2]);
+
+      if (currentLine == 1) {
+        assertEquals(1L, itemAID);
+        assertEquals(2L, itemBID);
+        assertEquals(0.5, similarity, 0.0001);
+      }
+
+      if (currentLine == 2) {
+        assertEquals(1L, itemAID);
+        assertEquals(3L, itemBID);
+        assertEquals(0.4, similarity, 0.0001);
+      }
+
+      currentLine++;
+    }
+
+    int linesWritten = currentLine - 1;
+    assertEquals(2, linesWritten);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java b/mr/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java
new file mode 100644
index 0000000..2f8ca95
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java
@@ -0,0 +1,98 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl;
+
+import com.google.common.collect.Lists;
+import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.impl.model.GenericBooleanPrefDataModel;
+import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.cf.taste.impl.model.GenericDataModel;
+import org.apache.mahout.cf.taste.impl.model.GenericPreference;
+import org.apache.mahout.cf.taste.impl.model.GenericUserPreferenceArray;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+
+import java.util.List;
+
+public abstract class TasteTestCase extends MahoutTestCase {
+
+  public static DataModel getDataModel(long[] userIDs, Double[][] prefValues) {
+    FastByIDMap<PreferenceArray> result = new FastByIDMap<PreferenceArray>();
+    for (int i = 0; i < userIDs.length; i++) {
+      List<Preference> prefsList = Lists.newArrayList();
+      for (int j = 0; j < prefValues[i].length; j++) {
+        if (prefValues[i][j] != null) {
+          prefsList.add(new GenericPreference(userIDs[i], j, prefValues[i][j].floatValue()));
+        }
+      }
+      if (!prefsList.isEmpty()) {
+        result.put(userIDs[i], new GenericUserPreferenceArray(prefsList));
+      }
+    }
+    return new GenericDataModel(result);
+  }
+
+  public static DataModel getBooleanDataModel(long[] userIDs, boolean[][] prefs) {
+    FastByIDMap<FastIDSet> result = new FastByIDMap<FastIDSet>();
+    for (int i = 0; i < userIDs.length; i++) {
+      FastIDSet prefsSet = new FastIDSet();
+      for (int j = 0; j < prefs[i].length; j++) {
+        if (prefs[i][j]) {
+          prefsSet.add(j);
+        }
+      }
+      if (!prefsSet.isEmpty()) {
+        result.put(userIDs[i], prefsSet);
+      }
+    }
+    return new GenericBooleanPrefDataModel(result);
+  }
+
+  protected static DataModel getDataModel() {
+    return getDataModel(
+            new long[] {1, 2, 3, 4},
+            new Double[][] {
+                    {0.1, 0.3},
+                    {0.2, 0.3, 0.3},
+                    {0.4, 0.3, 0.5},
+                    {0.7, 0.3, 0.8},
+            });
+  }
+
+  protected static DataModel getBooleanDataModel() {
+    return getBooleanDataModel(new long[] {1, 2, 3, 4},
+                               new boolean[][] {
+                                   {false, true,  false},
+                                   {false, true,  true,  false},
+                                   {true,  false, false, true},
+                                   {true,  false, true,  true},
+                               });
+  }
+
+  protected static boolean arrayContains(long[] array, long value) {
+    for (long l : array) {
+      if (l == value) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/BitSetTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/BitSetTest.java b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/BitSetTest.java
new file mode 100644
index 0000000..1f7c76b
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/BitSetTest.java
@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
+import org.junit.Test;
+
+public final class BitSetTest extends TasteTestCase {
+
+  private static final int NUM_BITS = 100;
+
+  @Test
+  public void testGetSet() {
+    BitSet bitSet = new BitSet(NUM_BITS);
+    for (int i = 0; i < NUM_BITS; i++) {
+      assertFalse(bitSet.get(i));
+    }
+    bitSet.set(0);
+    bitSet.set(NUM_BITS-1);
+    assertTrue(bitSet.get(0));
+    assertTrue(bitSet.get(NUM_BITS-1));
+  }
+
+  @Test(expected = ArrayIndexOutOfBoundsException.class)
+  public void testBounds1() {
+    BitSet bitSet = new BitSet(NUM_BITS);
+    bitSet.set(1000);
+  }
+
+  @Test(expected = ArrayIndexOutOfBoundsException.class)
+  public void testBounds2() {
+    BitSet bitSet = new BitSet(NUM_BITS);
+    bitSet.set(-1);
+  }
+
+  @Test
+  public void testClear() {
+    BitSet bitSet = new BitSet(NUM_BITS);
+    for (int i = 0; i < NUM_BITS; i++) {
+      bitSet.set(i);
+    }
+    for (int i = 0; i < NUM_BITS; i++) {
+      assertTrue(bitSet.get(i));
+    }
+    bitSet.clear();
+    for (int i = 0; i < NUM_BITS; i++) {
+      assertFalse(bitSet.get(i));
+    }
+  }
+
+  @Test
+  public void testClone() {
+    BitSet bitSet = new BitSet(NUM_BITS);
+    bitSet.set(NUM_BITS-1);
+    bitSet = bitSet.clone();
+    assertTrue(bitSet.get(NUM_BITS-1));
+  }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/CacheTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/CacheTest.java b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/CacheTest.java
new file mode 100644
index 0000000..cab1984
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/CacheTest.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
+import org.apache.mahout.common.RandomUtils;
+import org.junit.Test;
+
+import java.util.Random;
+
+public final class CacheTest extends TasteTestCase {
+
+  @Test
+  public void testLotsOfGets() throws TasteException {
+    Retriever<Object,Object> retriever = new IdentityRetriever();
+    Cache<Object,Object> cache = new Cache<Object,Object>(retriever, 1000);
+    for (int i = 0; i < 1000000; i++) {
+      assertEquals(i, cache.get(i));
+    }
+  }
+
+  @Test
+  public void testMixedUsage() throws TasteException {
+    Random random = RandomUtils.getRandom();
+    Retriever<Object,Object> retriever = new IdentityRetriever();
+    Cache<Object,Object> cache = new Cache<Object,Object>(retriever, 1000);
+    for (int i = 0; i < 1000000; i++) {
+      double r = random.nextDouble();
+      if (r < 0.01) {
+        cache.clear();
+      } else if (r < 0.1) {
+        cache.remove(r - 100);
+      } else {
+        assertEquals(i, cache.get(i));
+      }
+    }
+  }
+  
+  private static class IdentityRetriever implements Retriever<Object,Object> {
+    @Override
+    public Object get(Object key) throws TasteException {
+      return key;
+    }
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastByIDMapTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastByIDMapTest.java b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastByIDMapTest.java
new file mode 100644
index 0000000..9263ce7
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastByIDMapTest.java
@@ -0,0 +1,147 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import com.google.common.collect.Maps;
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
+import org.apache.mahout.common.RandomUtils;
+import org.junit.Test;
+
+import java.util.Map;
+import java.util.Random;
+
+/** <p>Tests {@link FastByIDMap}.</p> */
+public final class FastByIDMapTest extends TasteTestCase {
+
+  @Test
+  public void testPutAndGet() {
+    FastByIDMap<Long> map = new FastByIDMap<Long>();
+    assertNull(map.get(500000L));
+    map.put(500000L, 2L);
+    assertEquals(2L, (long) map.get(500000L));
+  }
+  
+  @Test
+  public void testRemove() {
+    FastByIDMap<Long> map = new FastByIDMap<Long>();
+    map.put(500000L, 2L);
+    map.remove(500000L);
+    assertEquals(0, map.size());
+    assertTrue(map.isEmpty());
+    assertNull(map.get(500000L));
+  }
+  
+  @Test
+  public void testClear() {
+    FastByIDMap<Long> map = new FastByIDMap<Long>();
+    map.put(500000L, 2L);
+    map.clear();
+    assertEquals(0, map.size());
+    assertTrue(map.isEmpty());
+    assertNull(map.get(500000L));
+  }
+  
+  @Test
+  public void testSizeEmpty() {
+    FastByIDMap<Long> map = new FastByIDMap<Long>();
+    assertEquals(0, map.size());
+    assertTrue(map.isEmpty());
+    map.put(500000L, 2L);
+    assertEquals(1, map.size());
+    assertFalse(map.isEmpty());
+    map.remove(500000L);
+    assertEquals(0, map.size());
+    assertTrue(map.isEmpty());
+  }
+  
+  @Test
+  public void testContains() {
+    FastByIDMap<String> map = buildTestFastMap();
+    assertTrue(map.containsKey(500000L));
+    assertTrue(map.containsKey(47L));
+    assertTrue(map.containsKey(2L));
+    assertTrue(map.containsValue("alpha"));
+    assertTrue(map.containsValue("bang"));
+    assertTrue(map.containsValue("beta"));
+    assertFalse(map.containsKey(999));
+    assertFalse(map.containsValue("something"));
+  }
+
+  @Test
+  public void testRehash() {
+    FastByIDMap<String> map = buildTestFastMap();
+    map.remove(500000L);
+    map.rehash();
+    assertNull(map.get(500000L));
+    assertEquals("bang", map.get(47L));
+  }
+  
+  @Test
+  public void testGrow() {
+    FastByIDMap<String> map = new FastByIDMap<String>(1,1);
+    map.put(500000L, "alpha");
+    map.put(47L, "bang");
+    assertNull(map.get(500000L));
+    assertEquals("bang", map.get(47L));
+  }
+   
+  @Test
+  public void testVersusHashMap() {
+    FastByIDMap<String> actual = new FastByIDMap<String>();
+    Map<Long, String> expected = Maps.newHashMapWithExpectedSize(1000000);
+    Random r = RandomUtils.getRandom();
+    for (int i = 0; i < 1000000; i++) {
+      double d = r.nextDouble();
+      Long key = (long) r.nextInt(100);
+      if (d < 0.4) {
+        assertEquals(expected.get(key), actual.get(key));
+      } else {
+        if (d < 0.7) {
+          assertEquals(expected.put(key, "bang"), actual.put(key, "bang"));
+        } else {
+          assertEquals(expected.remove(key), actual.remove(key));
+        }
+        assertEquals(expected.size(), actual.size());
+        assertEquals(expected.isEmpty(), actual.isEmpty());
+      }
+    }
+  }
+  
+  @Test
+  public void testMaxSize() {
+    FastByIDMap<String> map = new FastByIDMap<String>();
+    map.put(4, "bang");
+    assertEquals(1, map.size());
+    map.put(47L, "bang");
+    assertEquals(2, map.size());
+    assertNull(map.get(500000L));
+    map.put(47L, "buzz");
+    assertEquals(2, map.size());
+    assertEquals("buzz", map.get(47L));
+  }
+  
+  
+  private static FastByIDMap<String> buildTestFastMap() {
+    FastByIDMap<String> map = new FastByIDMap<String>();
+    map.put(500000L, "alpha");
+    map.put(47L, "bang");
+    map.put(2L, "beta");
+    return map;
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastIDSetTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastIDSetTest.java b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastIDSetTest.java
new file mode 100644
index 0000000..aec1738
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastIDSetTest.java
@@ -0,0 +1,162 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import com.google.common.collect.Sets;
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
+import org.apache.mahout.common.RandomUtils;
+import org.junit.Test;
+
+import java.util.Collection;
+import java.util.Random;
+
+/** <p>Tests {@link FastIDSet}.</p> */
+public final class FastIDSetTest extends TasteTestCase {
+
+  @Test
+  public void testContainsAndAdd() {
+    FastIDSet set = new FastIDSet();
+    assertFalse(set.contains(1));
+    set.add(1);
+    assertTrue(set.contains(1));
+  }
+
+  @Test
+  public void testRemove() {
+    FastIDSet set = new FastIDSet();
+    set.add(1);
+    set.remove(1);
+    assertEquals(0, set.size());
+    assertTrue(set.isEmpty());
+    assertFalse(set.contains(1));
+  }
+
+  @Test
+  public void testClear() {
+    FastIDSet set = new FastIDSet();
+    set.add(1);
+    set.clear();
+    assertEquals(0, set.size());
+    assertTrue(set.isEmpty());
+    assertFalse(set.contains(1));
+  }
+
+  @Test
+  public void testSizeEmpty() {
+    FastIDSet set = new FastIDSet();
+    assertEquals(0, set.size());
+    assertTrue(set.isEmpty());
+    set.add(1);
+    assertEquals(1, set.size());
+    assertFalse(set.isEmpty());
+    set.remove(1);
+    assertEquals(0, set.size());
+    assertTrue(set.isEmpty());
+  }
+
+  @Test
+  public void testContains() {
+    FastIDSet set = buildTestFastSet();
+    assertTrue(set.contains(1));
+    assertTrue(set.contains(2));
+    assertTrue(set.contains(3));
+    assertFalse(set.contains(4));
+  }
+
+  @Test
+  public void testReservedValues() {
+    FastIDSet set = new FastIDSet();
+    try {
+      set.add(Long.MIN_VALUE);
+      fail("Should have thrown IllegalArgumentException");
+    } catch (IllegalArgumentException iae) {
+      // good
+    }
+    assertFalse(set.contains(Long.MIN_VALUE));
+    try {
+      set.add(Long.MAX_VALUE);
+      fail("Should have thrown IllegalArgumentException");
+    } catch (IllegalArgumentException iae) {
+      // good
+    }
+    assertFalse(set.contains(Long.MAX_VALUE));
+  }
+
+  @Test
+  public void testRehash() {
+    FastIDSet set = buildTestFastSet();
+    set.remove(1);
+    set.rehash();
+    assertFalse(set.contains(1));
+  }
+
+  @Test
+  public void testGrow() {
+    FastIDSet set = new FastIDSet(1);
+    set.add(1);
+    set.add(2);
+    assertTrue(set.contains(1));
+    assertTrue(set.contains(2));
+  }
+
+  @Test
+  public void testIterator() {
+    FastIDSet set = buildTestFastSet();
+    Collection<Long> expected = Sets.newHashSetWithExpectedSize(3);
+    expected.add(1L);
+    expected.add(2L);
+    expected.add(3L);
+    LongPrimitiveIterator it = set.iterator();
+    while (it.hasNext()) {
+      expected.remove(it.nextLong());
+    }
+    assertTrue(expected.isEmpty());
+  }
+
+  @Test
+  public void testVersusHashSet() {
+    FastIDSet actual = new FastIDSet(1);
+    Collection<Integer> expected = Sets.newHashSetWithExpectedSize(1000000);
+    Random r = RandomUtils.getRandom();
+    for (int i = 0; i < 1000000; i++) {
+      double d = r.nextDouble();
+      Integer key = r.nextInt(100);
+      if (d < 0.4) {
+        assertEquals(expected.contains(key), actual.contains(key));
+      } else {
+        if (d < 0.7) {
+          assertEquals(expected.add(key), actual.add(key));
+        } else {
+          assertEquals(expected.remove(key), actual.remove(key));
+        }
+        assertEquals(expected.size(), actual.size());
+        assertEquals(expected.isEmpty(), actual.isEmpty());
+      }
+    }
+  }
+
+  private static FastIDSet buildTestFastSet() {
+    FastIDSet set = new FastIDSet();
+    set.add(1);
+    set.add(2);
+    set.add(3);
+    return set;
+  }
+
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastMapTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastMapTest.java b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastMapTest.java
new file mode 100644
index 0000000..2f27483
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastMapTest.java
@@ -0,0 +1,228 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
+import org.apache.mahout.common.RandomUtils;
+import org.junit.Test;
+
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+
+/** <p>Tests {@link FastMap}.</p> */
+public final class FastMapTest extends TasteTestCase {
+
+  @Test
+  public void testPutAndGet() {
+    Map<String, String> map = new FastMap<String, String>();
+    assertNull(map.get("foo"));
+    map.put("foo", "bar");
+    assertEquals("bar", map.get("foo"));
+  }
+
+  @Test
+  public void testRemove() {
+    Map<String, String> map = new FastMap<String, String>();
+    map.put("foo", "bar");
+    map.remove("foo");
+    assertEquals(0, map.size());
+    assertTrue(map.isEmpty());
+    assertNull(map.get("foo"));
+  }
+
+  @Test
+  public void testClear() {
+    Map<String, String> map = new FastMap<String, String>();
+    map.put("foo", "bar");
+    map.clear();
+    assertEquals(0, map.size());
+    assertTrue(map.isEmpty());
+    assertNull(map.get("foo"));
+  }
+
+  @Test
+  public void testSizeEmpty() {
+    Map<String, String> map = new FastMap<String, String>();
+    assertEquals(0, map.size());
+    assertTrue(map.isEmpty());
+    map.put("foo", "bar");
+    assertEquals(1, map.size());
+    assertFalse(map.isEmpty());
+    map.remove("foo");
+    assertEquals(0, map.size());
+    assertTrue(map.isEmpty());
+  }
+
+  @Test
+  public void testContains() {
+    FastMap<String, String> map = buildTestFastMap();
+    assertTrue(map.containsKey("foo"));
+    assertTrue(map.containsKey("baz"));
+    assertTrue(map.containsKey("alpha"));
+    assertTrue(map.containsValue("bar"));
+    assertTrue(map.containsValue("bang"));
+    assertTrue(map.containsValue("beta"));
+    assertFalse(map.containsKey("something"));
+    assertFalse(map.containsValue("something"));
+  }
+
+  @Test(expected = NullPointerException.class)
+  public void testNull1() {
+    Map<String, String> map = new FastMap<String, String>();
+    assertNull(map.get(null));
+    map.put(null, "bar");
+  }
+
+  @Test(expected = NullPointerException.class)
+  public void testNull2() {
+    Map<String, String> map = new FastMap<String, String>();
+    map.put("foo", null);
+  }
+
+  @Test
+  public void testRehash() {
+    FastMap<String, String> map = buildTestFastMap();
+    map.remove("foo");
+    map.rehash();
+    assertNull(map.get("foo"));
+    assertEquals("bang", map.get("baz"));
+  }
+
+  @Test
+  public void testGrow() {
+    Map<String, String> map = new FastMap<String, String>(1, FastMap.NO_MAX_SIZE);
+    map.put("foo", "bar");
+    map.put("baz", "bang");
+    assertEquals("bar", map.get("foo"));
+    assertEquals("bang", map.get("baz"));
+  }
+
+  @Test
+  public void testKeySet() {
+    FastMap<String, String> map = buildTestFastMap();
+    Collection<String> expected = Sets.newHashSetWithExpectedSize(3);
+    expected.add("foo");
+    expected.add("baz");
+    expected.add("alpha");
+    Set<String> actual = map.keySet();
+    assertTrue(expected.containsAll(actual));
+    assertTrue(actual.containsAll(expected));
+    Iterator<String> it = actual.iterator();
+    while (it.hasNext()) {
+      String value = it.next();
+      if (!"baz".equals(value)) {
+        it.remove();
+      }
+    }
+    assertTrue(map.containsKey("baz"));
+    assertFalse(map.containsKey("foo"));
+    assertFalse(map.containsKey("alpha"));
+  }
+
+  @Test
+  public void testValues() {
+    FastMap<String, String> map = buildTestFastMap();
+    Collection<String> expected = Sets.newHashSetWithExpectedSize(3);
+    expected.add("bar");
+    expected.add("bang");
+    expected.add("beta");
+    Collection<String> actual = map.values();
+    assertTrue(expected.containsAll(actual));
+    assertTrue(actual.containsAll(expected));
+    Iterator<String> it = actual.iterator();
+    while (it.hasNext()) {
+      String value = it.next();
+      if (!"bang".equals(value)) {
+        it.remove();
+      }
+    }
+    assertTrue(map.containsValue("bang"));
+    assertFalse(map.containsValue("bar"));
+    assertFalse(map.containsValue("beta"));
+  }
+
+  @Test
+  public void testEntrySet() {
+    FastMap<String, String> map = buildTestFastMap();
+    Set<Map.Entry<String, String>> actual = map.entrySet();
+    Collection<String> expectedKeys = Sets.newHashSetWithExpectedSize(3);
+    expectedKeys.add("foo");
+    expectedKeys.add("baz");
+    expectedKeys.add("alpha");
+    Collection<String> expectedValues = Sets.newHashSetWithExpectedSize(3);
+    expectedValues.add("bar");
+    expectedValues.add("bang");
+    expectedValues.add("beta");
+    assertEquals(3, actual.size());
+    for (Map.Entry<String, String> entry : actual) {
+      expectedKeys.remove(entry.getKey());
+      expectedValues.remove(entry.getValue());
+    }
+    assertEquals(0, expectedKeys.size());
+    assertEquals(0, expectedValues.size());
+  }
+
+  @Test
+  public void testVersusHashMap() {
+    Map<Integer, String> actual = new FastMap<Integer, String>(1, 1000000);
+    Map<Integer, String> expected = Maps.newHashMapWithExpectedSize(1000000);
+    Random r = RandomUtils.getRandom();
+    for (int i = 0; i < 1000000; i++) {
+      double d = r.nextDouble();
+      Integer key = r.nextInt(100);
+      if (d < 0.4) {
+        assertEquals(expected.get(key), actual.get(key));
+      } else {
+        if (d < 0.7) {
+          assertEquals(expected.put(key, "foo"), actual.put(key, "foo"));
+        } else {
+          assertEquals(expected.remove(key), actual.remove(key));
+        }
+        assertEquals(expected.size(), actual.size());
+        assertEquals(expected.isEmpty(), actual.isEmpty());
+      }
+    }
+  }
+
+  @Test
+  public void testMaxSize() {
+    Map<String, String> map = new FastMap<String, String>(1, 1);
+    map.put("foo", "bar");
+    assertEquals(1, map.size());
+    map.put("baz", "bang");
+    assertEquals(1, map.size());
+    assertNull(map.get("foo"));
+    map.put("baz", "buzz");
+    assertEquals(1, map.size());
+    assertEquals("buzz", map.get("baz"));
+  }
+
+  private static FastMap<String, String> buildTestFastMap() {
+    FastMap<String, String> map = new FastMap<String, String>();
+    map.put("foo", "bar");
+    map.put("baz", "bang");
+    map.put("alpha", "beta");
+    return map;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageTest.java b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageTest.java
new file mode 100644
index 0000000..1fcc800
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageTest.java
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
+import org.junit.Test;
+
+public final class InvertedRunningAverageTest extends TasteTestCase {
+
+  @Test
+  public void testAverage() {
+    RunningAverage avg = new FullRunningAverage();
+    RunningAverage inverted = new InvertedRunningAverage(avg);
+    assertEquals(0, inverted.getCount());
+    avg.addDatum(1.0);
+    assertEquals(1, inverted.getCount());
+    assertEquals(-1.0, inverted.getAverage(), EPSILON);
+    avg.addDatum(2.0);
+    assertEquals(2, inverted.getCount());
+    assertEquals(-1.5, inverted.getAverage(), EPSILON);
+  }
+
+  @Test(expected = UnsupportedOperationException.class)
+  public void testUnsupported1() {
+    RunningAverage inverted = new InvertedRunningAverage(new FullRunningAverage());
+    inverted.addDatum(1.0);
+  }
+
+  @Test(expected = UnsupportedOperationException.class)
+  public void testUnsupported2() {
+    RunningAverage inverted = new InvertedRunningAverage(new FullRunningAverage());
+    inverted.changeDatum(1.0);
+  }
+
+  @Test(expected = UnsupportedOperationException.class)
+  public void testUnsupported3() {
+    RunningAverage inverted = new InvertedRunningAverage(new FullRunningAverage());
+    inverted.removeDatum(1.0);
+  }
+
+  @Test
+  public void testAverageAndStdDev() {
+    RunningAverageAndStdDev avg = new FullRunningAverageAndStdDev();
+    RunningAverageAndStdDev inverted = new InvertedRunningAverageAndStdDev(avg);
+    assertEquals(0, inverted.getCount());
+    avg.addDatum(1.0);
+    assertEquals(1, inverted.getCount());
+    assertEquals(-1.0, inverted.getAverage(), EPSILON);
+    avg.addDatum(2.0);
+    assertEquals(2, inverted.getCount());
+    assertEquals(-1.5, inverted.getAverage(), EPSILON);
+    assertEquals(Math.sqrt(2.0)/2.0, inverted.getStandardDeviation(), EPSILON);
+  }
+
+  @Test(expected = UnsupportedOperationException.class)
+  public void testAndStdDevUnsupported1() {
+    RunningAverage inverted = new InvertedRunningAverageAndStdDev(new FullRunningAverageAndStdDev());
+    inverted.addDatum(1.0);
+  }
+
+  @Test(expected = UnsupportedOperationException.class)
+  public void testAndStdDevUnsupported2() {
+    RunningAverage inverted = new InvertedRunningAverageAndStdDev(new FullRunningAverageAndStdDev());
+    inverted.changeDatum(1.0);
+  }
+
+  @Test(expected = UnsupportedOperationException.class)
+  public void testAndStdDevUnsupported3() {
+    RunningAverage inverted = new InvertedRunningAverageAndStdDev(new FullRunningAverageAndStdDev());
+    inverted.removeDatum(1.0);
+  }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveArrayIteratorTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveArrayIteratorTest.java b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveArrayIteratorTest.java
new file mode 100644
index 0000000..7458df3
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveArrayIteratorTest.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
+import org.junit.Test;
+
+import java.util.NoSuchElementException;
+
+public final class LongPrimitiveArrayIteratorTest extends TasteTestCase {
+
+  @Test(expected = NoSuchElementException.class)
+  public void testEmpty() {
+    LongPrimitiveIterator it = new LongPrimitiveArrayIterator(new long[0]);
+    assertFalse(it.hasNext());
+    it.next();
+  }
+
+  @Test(expected = NoSuchElementException.class)
+  public void testNext() {
+    LongPrimitiveIterator it = new LongPrimitiveArrayIterator(new long[] {3,2,1});
+    assertTrue(it.hasNext());
+    assertEquals(3, (long) it.next());
+    assertTrue(it.hasNext());
+    assertEquals(2, it.nextLong());
+    assertTrue(it.hasNext());
+    assertEquals(1, (long) it.next());    
+    assertFalse(it.hasNext());
+    it.nextLong();
+  }
+
+  @Test
+  public void testPeekSkip() {
+    LongPrimitiveIterator it = new LongPrimitiveArrayIterator(new long[] {3,2,1});
+    assertEquals(3, it.peek());
+    it.skip(2);
+    assertEquals(1, it.nextLong());
+    assertFalse(it.hasNext());
+  }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/MockRefreshable.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/MockRefreshable.java b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/MockRefreshable.java
new file mode 100644
index 0000000..20233a7
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/MockRefreshable.java
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+
+import java.util.Collection;
+import java.util.concurrent.Callable;
+
+/** A mock {@link Refreshable} which counts the number of times it has been refreshed, for use in tests. */
+final class MockRefreshable implements Refreshable, Callable<Object> {
+
+  private int callCount;
+
+  @Override
+  public void refresh(Collection<Refreshable> alreadyRefreshed) {
+    call();
+  }
+
+  @Override
+  public Object call() {
+    callCount++;
+    return null;
+  }
+
+  int getCallCount() {
+    return callCount;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/RefreshHelperTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/RefreshHelperTest.java b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/RefreshHelperTest.java
new file mode 100644
index 0000000..54c97e3
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/RefreshHelperTest.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import com.google.common.collect.Sets;
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
+import org.junit.Test;
+
+import java.util.Collection;
+
+/** Tests {@link RefreshHelper} */
+public final class RefreshHelperTest extends TasteTestCase {
+
+  @Test
+  public void testCallable() {
+    MockRefreshable mock = new MockRefreshable();
+    Refreshable helper = new RefreshHelper(mock);
+    helper.refresh(null);
+    assertEquals(1, mock.getCallCount());
+  }
+
+  @Test
+  public void testNoCallable() {
+    Refreshable helper = new RefreshHelper(null);
+    helper.refresh(null);
+  }
+
+  @Test
+  public void testDependencies() {
+    RefreshHelper helper = new RefreshHelper(null);
+    MockRefreshable mock1 = new MockRefreshable();
+    MockRefreshable mock2 = new MockRefreshable();
+    helper.addDependency(mock1);
+    helper.addDependency(mock2);
+    helper.refresh(null);
+    assertEquals(1, mock1.getCallCount());
+    assertEquals(1, mock2.getCallCount());
+  }
+
+  @Test
+  public void testAlreadyRefreshed() {
+    RefreshHelper helper = new RefreshHelper(null);
+    MockRefreshable mock1 = new MockRefreshable();
+    MockRefreshable mock2 = new MockRefreshable();
+    helper.addDependency(mock1);
+    helper.addDependency(mock2);
+    Collection<Refreshable> alreadyRefreshed = Sets.newHashSetWithExpectedSize(1);
+    alreadyRefreshed.add(mock1);
+    helper.refresh(alreadyRefreshed);
+    assertEquals(0, mock1.getCallCount());
+    assertEquals(1, mock2.getCallCount());
+  }
+
+}