You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by pa...@apache.org on 2015/04/01 20:07:43 UTC
[12/51] [partial] mahout git commit: MAHOUT-1655 Refactors mr-legacy
into mahout-hdfs and mahout-mr, closes apache/mahout#86
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java b/mr/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java
new file mode 100644
index 0000000..1326777
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java
@@ -0,0 +1,928 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.item;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
+import org.apache.mahout.cf.taste.hadoop.MutableRecommendedItem;
+import org.apache.mahout.cf.taste.hadoop.RecommendedItemsWritable;
+import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils;
+import org.apache.mahout.cf.taste.hadoop.ToItemPrefsMapper;
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.impl.recommender.GenericRecommendedItem;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.common.Pair;
+import org.apache.mahout.common.iterator.FileLineIterable;
+import org.apache.mahout.math.RandomAccessSparseVector;
+import org.apache.mahout.math.VarIntWritable;
+import org.apache.mahout.math.VarLongWritable;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
+import org.apache.mahout.math.hadoop.MathHelper;
+import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.CooccurrenceCountSimilarity;
+import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.TanimotoCoefficientSimilarity;
+import org.apache.mahout.math.map.OpenIntLongHashMap;
+import org.easymock.IArgumentMatcher;
+import org.easymock.EasyMock;
+import org.junit.Test;
+
+public class RecommenderJobTest extends TasteTestCase {
+
+ /**
+ * tests {@link ItemIDIndexMapper}
+ */
+ @Test
+ public void testItemIDIndexMapper() throws Exception {
+ Mapper<LongWritable,Text, VarIntWritable, VarLongWritable>.Context context =
+ EasyMock.createMock(Mapper.Context.class);
+
+ context.write(new VarIntWritable(TasteHadoopUtils.idToIndex(789L)), new VarLongWritable(789L));
+ EasyMock.replay(context);
+
+ new ItemIDIndexMapper().map(new LongWritable(123L), new Text("456,789,5.0"), context);
+
+ EasyMock.verify(context);
+ }
+
+ /**
+ * tests {@link ItemIDIndexReducer}
+ */
+ @Test
+ public void testItemIDIndexReducer() throws Exception {
+ Reducer<VarIntWritable, VarLongWritable, VarIntWritable,VarLongWritable>.Context context =
+ EasyMock.createMock(Reducer.Context.class);
+
+ context.write(new VarIntWritable(123), new VarLongWritable(45L));
+ EasyMock.replay(context);
+
+ new ItemIDIndexReducer().reduce(new VarIntWritable(123), Arrays.asList(new VarLongWritable(67L),
+ new VarLongWritable(89L), new VarLongWritable(45L)), context);
+
+ EasyMock.verify(context);
+ }
+
+ /**
+ * tests {@link ToItemPrefsMapper}
+ */
+ @Test
+ public void testToItemPrefsMapper() throws Exception {
+ Mapper<LongWritable,Text, VarLongWritable,VarLongWritable>.Context context =
+ EasyMock.createMock(Mapper.Context.class);
+
+ context.write(new VarLongWritable(12L), new EntityPrefWritable(34L, 1.0f));
+ context.write(new VarLongWritable(56L), new EntityPrefWritable(78L, 2.0f));
+ EasyMock.replay(context);
+
+ ToItemPrefsMapper mapper = new ToItemPrefsMapper();
+ mapper.map(new LongWritable(123L), new Text("12,34,1"), context);
+ mapper.map(new LongWritable(456L), new Text("56,78,2"), context);
+
+ EasyMock.verify(context);
+ }
+
+ /**
+ * tests {@link ToItemPrefsMapper} using boolean data
+ */
+ @Test
+ public void testToItemPrefsMapperBooleanData() throws Exception {
+ Mapper<LongWritable,Text, VarLongWritable,VarLongWritable>.Context context =
+ EasyMock.createMock(Mapper.Context.class);
+
+ context.write(new VarLongWritable(12L), new VarLongWritable(34L));
+ context.write(new VarLongWritable(56L), new VarLongWritable(78L));
+ EasyMock.replay(context);
+
+ ToItemPrefsMapper mapper = new ToItemPrefsMapper();
+ setField(mapper, "booleanData", true);
+ mapper.map(new LongWritable(123L), new Text("12,34"), context);
+ mapper.map(new LongWritable(456L), new Text("56,78"), context);
+
+ EasyMock.verify(context);
+ }
+
+ /**
+ * tests {@link ToUserVectorsReducer}
+ */
+ @Test
+ public void testToUserVectorReducer() throws Exception {
+ Reducer<VarLongWritable,VarLongWritable,VarLongWritable,VectorWritable>.Context context =
+ EasyMock.createMock(Reducer.Context.class);
+ Counter userCounters = EasyMock.createMock(Counter.class);
+
+ EasyMock.expect(context.getCounter(ToUserVectorsReducer.Counters.USERS)).andReturn(userCounters);
+ userCounters.increment(1);
+ context.write(EasyMock.eq(new VarLongWritable(12L)), MathHelper.vectorMatches(
+ MathHelper.elem(TasteHadoopUtils.idToIndex(34L), 1.0), MathHelper.elem(TasteHadoopUtils.idToIndex(56L), 2.0)));
+
+ EasyMock.replay(context, userCounters);
+
+ Collection<VarLongWritable> varLongWritables = Lists.newLinkedList();
+ varLongWritables.add(new EntityPrefWritable(34L, 1.0f));
+ varLongWritables.add(new EntityPrefWritable(56L, 2.0f));
+
+ new ToUserVectorsReducer().reduce(new VarLongWritable(12L), varLongWritables, context);
+
+ EasyMock.verify(context, userCounters);
+ }
+
+ /**
+ * tests {@link ToUserVectorsReducer} using boolean data
+ */
+ @Test
+ public void testToUserVectorReducerWithBooleanData() throws Exception {
+ Reducer<VarLongWritable,VarLongWritable,VarLongWritable,VectorWritable>.Context context =
+ EasyMock.createMock(Reducer.Context.class);
+ Counter userCounters = EasyMock.createMock(Counter.class);
+
+ EasyMock.expect(context.getCounter(ToUserVectorsReducer.Counters.USERS)).andReturn(userCounters);
+ userCounters.increment(1);
+ context.write(EasyMock.eq(new VarLongWritable(12L)), MathHelper.vectorMatches(
+ MathHelper.elem(TasteHadoopUtils.idToIndex(34L), 1.0), MathHelper.elem(TasteHadoopUtils.idToIndex(56L), 1.0)));
+
+ EasyMock.replay(context, userCounters);
+
+ new ToUserVectorsReducer().reduce(new VarLongWritable(12L), Arrays.asList(new VarLongWritable(34L),
+ new VarLongWritable(56L)), context);
+
+ EasyMock.verify(context, userCounters);
+ }
+
+ /**
+ * tests {@link SimilarityMatrixRowWrapperMapper}
+ */
+ @Test
+ public void testSimilarityMatrixRowWrapperMapper() throws Exception {
+ Mapper<IntWritable,VectorWritable,VarIntWritable,VectorOrPrefWritable>.Context context =
+ EasyMock.createMock(Mapper.Context.class);
+
+ context.write(EasyMock.eq(new VarIntWritable(12)), vectorOfVectorOrPrefWritableMatches(MathHelper.elem(34, 0.5),
+ MathHelper.elem(56, 0.7)));
+
+ EasyMock.replay(context);
+
+ RandomAccessSparseVector vector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
+ vector.set(12, 1.0);
+ vector.set(34, 0.5);
+ vector.set(56, 0.7);
+
+ new SimilarityMatrixRowWrapperMapper().map(new IntWritable(12), new VectorWritable(vector), context);
+
+ EasyMock.verify(context);
+ }
+
+ /**
+ * verifies the {@link Vector} included in a {@link VectorOrPrefWritable}
+ */
+ private static VectorOrPrefWritable vectorOfVectorOrPrefWritableMatches(final Vector.Element... elements) {
+ EasyMock.reportMatcher(new IArgumentMatcher() {
+ @Override
+ public boolean matches(Object argument) {
+ if (argument instanceof VectorOrPrefWritable) {
+ Vector v = ((VectorOrPrefWritable) argument).getVector();
+ return MathHelper.consistsOf(v, elements);
+ }
+ return false;
+ }
+
+ @Override
+ public void appendTo(StringBuffer buffer) {}
+ });
+ return null;
+ }
+
+ /**
+ * tests {@link UserVectorSplitterMapper}
+ */
+ @Test
+ public void testUserVectorSplitterMapper() throws Exception {
+ Mapper<VarLongWritable,VectorWritable, VarIntWritable,VectorOrPrefWritable>.Context context =
+ EasyMock.createMock(Mapper.Context.class);
+
+ context.write(EasyMock.eq(new VarIntWritable(34)), prefOfVectorOrPrefWritableMatches(123L, 0.5f));
+ context.write(EasyMock.eq(new VarIntWritable(56)), prefOfVectorOrPrefWritableMatches(123L, 0.7f));
+
+ EasyMock.replay(context);
+
+ UserVectorSplitterMapper mapper = new UserVectorSplitterMapper();
+ setField(mapper, "maxPrefsPerUserConsidered", 10);
+
+ RandomAccessSparseVector vector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
+ vector.set(34, 0.5);
+ vector.set(56, 0.7);
+
+ mapper.map(new VarLongWritable(123L), new VectorWritable(vector), context);
+
+ EasyMock.verify(context);
+ }
+
+ /**
+ * verifies a preference in a {@link VectorOrPrefWritable}
+ */
+ private static VectorOrPrefWritable prefOfVectorOrPrefWritableMatches(final long userID, final float prefValue) {
+ EasyMock.reportMatcher(new IArgumentMatcher() {
+ @Override
+ public boolean matches(Object argument) {
+ if (argument instanceof VectorOrPrefWritable) {
+ VectorOrPrefWritable pref = (VectorOrPrefWritable) argument;
+ return pref.getUserID() == userID && pref.getValue() == prefValue;
+ }
+ return false;
+ }
+
+ @Override
+ public void appendTo(StringBuffer buffer) {}
+ });
+ return null;
+ }
+
+ /**
+ * tests {@link UserVectorSplitterMapper} in the special case that some userIDs shall be excluded
+ */
+ @Test
+ public void testUserVectorSplitterMapperUserExclusion() throws Exception {
+ Mapper<VarLongWritable,VectorWritable, VarIntWritable,VectorOrPrefWritable>.Context context =
+ EasyMock.createMock(Mapper.Context.class);
+
+ context.write(EasyMock.eq(new VarIntWritable(34)), prefOfVectorOrPrefWritableMatches(123L, 0.5f));
+ context.write(EasyMock.eq(new VarIntWritable(56)), prefOfVectorOrPrefWritableMatches(123L, 0.7f));
+
+ EasyMock.replay(context);
+
+ FastIDSet usersToRecommendFor = new FastIDSet();
+ usersToRecommendFor.add(123L);
+
+ UserVectorSplitterMapper mapper = new UserVectorSplitterMapper();
+ setField(mapper, "maxPrefsPerUserConsidered", 10);
+ setField(mapper, "usersToRecommendFor", usersToRecommendFor);
+
+
+ RandomAccessSparseVector vector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
+ vector.set(34, 0.5);
+ vector.set(56, 0.7);
+
+ mapper.map(new VarLongWritable(123L), new VectorWritable(vector), context);
+ mapper.map(new VarLongWritable(456L), new VectorWritable(vector), context);
+
+ EasyMock.verify(context);
+ }
+
+ /**
+ * tests {@link UserVectorSplitterMapper} in the special case that the number of preferences to be considered
+ * is less than the number of available preferences
+ */
+ @Test
+ public void testUserVectorSplitterMapperOnlySomePrefsConsidered() throws Exception {
+ Mapper<VarLongWritable,VectorWritable, VarIntWritable,VectorOrPrefWritable>.Context context =
+ EasyMock.createMock(Mapper.Context.class);
+
+ context.write(EasyMock.eq(new VarIntWritable(34)), prefOfVectorOrPrefWritableMatchesNaN(123L));
+ context.write(EasyMock.eq(new VarIntWritable(56)), prefOfVectorOrPrefWritableMatches(123L, 0.7f));
+
+ EasyMock.replay(context);
+
+ UserVectorSplitterMapper mapper = new UserVectorSplitterMapper();
+ setField(mapper, "maxPrefsPerUserConsidered", 1);
+
+ RandomAccessSparseVector vector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
+ vector.set(34, 0.5);
+ vector.set(56, 0.7);
+
+ mapper.map(new VarLongWritable(123L), new VectorWritable(vector), context);
+
+ EasyMock.verify(context);
+ }
+
+ /**
+ * verifies that a preference value is NaN in a {@link VectorOrPrefWritable}
+ */
+ private static VectorOrPrefWritable prefOfVectorOrPrefWritableMatchesNaN(final long userID) {
+ EasyMock.reportMatcher(new IArgumentMatcher() {
+ @Override
+ public boolean matches(Object argument) {
+ if (argument instanceof VectorOrPrefWritable) {
+ VectorOrPrefWritable pref = (VectorOrPrefWritable) argument;
+ return pref.getUserID() == userID && Float.isNaN(pref.getValue());
+ }
+ return false;
+ }
+
+ @Override
+ public void appendTo(StringBuffer buffer) {}
+ });
+ return null;
+ }
+
+ /**
+ * tests {@link ToVectorAndPrefReducer}
+ */
+ @Test
+ public void testToVectorAndPrefReducer() throws Exception {
+ Reducer<VarIntWritable,VectorOrPrefWritable,VarIntWritable,VectorAndPrefsWritable>.Context context =
+ EasyMock.createMock(Reducer.Context.class);
+
+ context.write(EasyMock.eq(new VarIntWritable(1)), vectorAndPrefsWritableMatches(Arrays.asList(123L, 456L),
+ Arrays.asList(1.0f, 2.0f), MathHelper.elem(3, 0.5), MathHelper.elem(7, 0.8)));
+
+ EasyMock.replay(context);
+
+ Vector similarityColumn = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
+ similarityColumn.set(3, 0.5);
+ similarityColumn.set(7, 0.8);
+
+ VectorOrPrefWritable itemPref1 = new VectorOrPrefWritable(123L, 1.0f);
+ VectorOrPrefWritable itemPref2 = new VectorOrPrefWritable(456L, 2.0f);
+ VectorOrPrefWritable similarities = new VectorOrPrefWritable(similarityColumn);
+
+ new ToVectorAndPrefReducer().reduce(new VarIntWritable(1), Arrays.asList(itemPref1, itemPref2, similarities),
+ context);
+
+ EasyMock.verify(context);
+ }
+
+ /**
+ * verifies a {@link VectorAndPrefsWritable}
+ */
+ private static VectorAndPrefsWritable vectorAndPrefsWritableMatches(final List<Long> userIDs,
+ final List<Float> prefValues, final Vector.Element... elements) {
+ EasyMock.reportMatcher(new IArgumentMatcher() {
+ @Override
+ public boolean matches(Object argument) {
+ if (argument instanceof VectorAndPrefsWritable) {
+ VectorAndPrefsWritable vectorAndPrefs = (VectorAndPrefsWritable) argument;
+
+ if (!vectorAndPrefs.getUserIDs().equals(userIDs)) {
+ return false;
+ }
+ if (!vectorAndPrefs.getValues().equals(prefValues)) {
+ return false;
+ }
+ return MathHelper.consistsOf(vectorAndPrefs.getVector(), elements);
+ }
+ return false;
+ }
+
+ @Override
+ public void appendTo(StringBuffer buffer) {}
+ });
+ return null;
+ }
+
+ /**
+ * tests {@link ToVectorAndPrefReducer} in the error case that two similarity column vectors a supplied for the same
+ * item (which should never happen)
+ */
+ @Test
+ public void testToVectorAndPrefReducerExceptionOn2Vectors() throws Exception {
+ Reducer<VarIntWritable,VectorOrPrefWritable,VarIntWritable,VectorAndPrefsWritable>.Context context =
+ EasyMock.createMock(Reducer.Context.class);
+
+ EasyMock.replay(context);
+
+ Vector similarityColumn1 = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
+ Vector similarityColumn2 = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
+
+ VectorOrPrefWritable similarities1 = new VectorOrPrefWritable(similarityColumn1);
+ VectorOrPrefWritable similarities2 = new VectorOrPrefWritable(similarityColumn2);
+
+ try {
+ new ToVectorAndPrefReducer().reduce(new VarIntWritable(1), Arrays.asList(similarities1, similarities2), context);
+ fail();
+ } catch (IllegalStateException e) {
+ // good
+ }
+
+ EasyMock.verify(context);
+ }
+
+ /**
+ * tests {@link org.apache.mahout.cf.taste.hadoop.item.ItemFilterMapper}
+ */
+ @Test
+ public void testItemFilterMapper() throws Exception {
+
+ Mapper<LongWritable,Text,VarLongWritable,VarLongWritable>.Context context =
+ EasyMock.createMock(Mapper.Context.class);
+
+ context.write(new VarLongWritable(34L), new VarLongWritable(12L));
+ context.write(new VarLongWritable(78L), new VarLongWritable(56L));
+
+ EasyMock.replay(context);
+
+ ItemFilterMapper mapper = new ItemFilterMapper();
+ mapper.map(null, new Text("12,34"), context);
+ mapper.map(null, new Text("56,78"), context);
+
+ EasyMock.verify(context);
+ }
+
+ /**
+ * tests {@link org.apache.mahout.cf.taste.hadoop.item.ItemFilterAsVectorAndPrefsReducer}
+ */
+ @Test
+ public void testItemFilterAsVectorAndPrefsReducer() throws Exception {
+ Reducer<VarLongWritable,VarLongWritable,VarIntWritable,VectorAndPrefsWritable>.Context context =
+ EasyMock.createMock(Reducer.Context.class);
+
+ int itemIDIndex = TasteHadoopUtils.idToIndex(123L);
+ context.write(EasyMock.eq(new VarIntWritable(itemIDIndex)), vectorAndPrefsForFilteringMatches(123L, 456L, 789L));
+
+ EasyMock.replay(context);
+
+ new ItemFilterAsVectorAndPrefsReducer().reduce(new VarLongWritable(123L), Arrays.asList(new VarLongWritable(456L),
+ new VarLongWritable(789L)), context);
+
+ EasyMock.verify(context);
+ }
+
+ static VectorAndPrefsWritable vectorAndPrefsForFilteringMatches(final long itemID, final long... userIDs) {
+ EasyMock.reportMatcher(new IArgumentMatcher() {
+ @Override
+ public boolean matches(Object argument) {
+ if (argument instanceof VectorAndPrefsWritable) {
+ VectorAndPrefsWritable vectorAndPrefs = (VectorAndPrefsWritable) argument;
+ Vector vector = vectorAndPrefs.getVector();
+ if (vector.getNumNondefaultElements() != 1) {
+ return false;
+ }
+ if (!Double.isNaN(vector.get(TasteHadoopUtils.idToIndex(itemID)))) {
+ return false;
+ }
+ if (userIDs.length != vectorAndPrefs.getUserIDs().size()) {
+ return false;
+ }
+ for (long userID : userIDs) {
+ if (!vectorAndPrefs.getUserIDs().contains(userID)) {
+ return false;
+ }
+ }
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public void appendTo(StringBuffer buffer) {}
+ });
+ return null;
+ }
+
+ /**
+ * tests {@link PartialMultiplyMapper}
+ */
+ @Test
+ public void testPartialMultiplyMapper() throws Exception {
+
+ Vector similarityColumn = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
+ similarityColumn.set(3, 0.5);
+ similarityColumn.set(7, 0.8);
+
+ Mapper<VarIntWritable,VectorAndPrefsWritable,VarLongWritable,PrefAndSimilarityColumnWritable>.Context context =
+ EasyMock.createMock(Mapper.Context.class);
+
+ PrefAndSimilarityColumnWritable one = new PrefAndSimilarityColumnWritable();
+ PrefAndSimilarityColumnWritable two = new PrefAndSimilarityColumnWritable();
+ one.set(1.0f, similarityColumn);
+ two.set(3.0f, similarityColumn);
+
+ context.write(EasyMock.eq(new VarLongWritable(123L)), EasyMock.eq(one));
+ context.write(EasyMock.eq(new VarLongWritable(456L)), EasyMock.eq(two));
+
+ EasyMock.replay(context);
+
+ VectorAndPrefsWritable vectorAndPrefs = new VectorAndPrefsWritable(similarityColumn, Arrays.asList(123L, 456L),
+ Arrays.asList(1.0f, 3.0f));
+
+ new PartialMultiplyMapper().map(new VarIntWritable(1), vectorAndPrefs, context);
+
+ EasyMock.verify(context);
+ }
+
+
+ /**
+ * tests {@link AggregateAndRecommendReducer}
+ */
+ @Test
+ public void testAggregateAndRecommendReducer() throws Exception {
+ Reducer<VarLongWritable,PrefAndSimilarityColumnWritable,VarLongWritable,RecommendedItemsWritable>.Context context =
+ EasyMock.createMock(Reducer.Context.class);
+
+ context.write(EasyMock.eq(new VarLongWritable(123L)), recommendationsMatch(new MutableRecommendedItem(1L, 2.8f),
+ new MutableRecommendedItem(2L, 2.0f)));
+
+ EasyMock.replay(context);
+
+ RandomAccessSparseVector similarityColumnOne = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
+ similarityColumnOne.set(1, 0.1);
+ similarityColumnOne.set(2, 0.5);
+
+ RandomAccessSparseVector similarityColumnTwo = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
+ similarityColumnTwo.set(1, 0.9);
+ similarityColumnTwo.set(2, 0.5);
+
+ List<PrefAndSimilarityColumnWritable> values = Arrays.asList(
+ new PrefAndSimilarityColumnWritable(1.0f, similarityColumnOne),
+ new PrefAndSimilarityColumnWritable(3.0f, similarityColumnTwo));
+
+ OpenIntLongHashMap indexItemIDMap = new OpenIntLongHashMap();
+ indexItemIDMap.put(1, 1L);
+ indexItemIDMap.put(2, 2L);
+
+ AggregateAndRecommendReducer reducer = new AggregateAndRecommendReducer();
+
+ setField(reducer, "indexItemIDMap", indexItemIDMap);
+ setField(reducer, "recommendationsPerUser", 3);
+
+ reducer.reduce(new VarLongWritable(123L), values, context);
+
+ EasyMock.verify(context);
+ }
+
+ /**
+ * tests {@link AggregateAndRecommendReducer}
+ */
+ @Test
+ public void testAggregateAndRecommendReducerExcludeRecommendationsBasedOnOneItem() throws Exception {
+ Reducer<VarLongWritable,PrefAndSimilarityColumnWritable,VarLongWritable,RecommendedItemsWritable>.Context context =
+ EasyMock.createMock(Reducer.Context.class);
+
+ context.write(EasyMock.eq(new VarLongWritable(123L)), recommendationsMatch(new MutableRecommendedItem(1L, 2.8f)));
+
+ EasyMock.replay(context);
+
+ RandomAccessSparseVector similarityColumnOne = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
+ similarityColumnOne.set(1, 0.1);
+
+ RandomAccessSparseVector similarityColumnTwo = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
+ similarityColumnTwo.set(1, 0.9);
+ similarityColumnTwo.set(2, 0.5);
+
+ List<PrefAndSimilarityColumnWritable> values = Arrays.asList(
+ new PrefAndSimilarityColumnWritable(1.0f, similarityColumnOne),
+ new PrefAndSimilarityColumnWritable(3.0f, similarityColumnTwo));
+
+ OpenIntLongHashMap indexItemIDMap = new OpenIntLongHashMap();
+ indexItemIDMap.put(1, 1L);
+ indexItemIDMap.put(2, 2L);
+
+ AggregateAndRecommendReducer reducer = new AggregateAndRecommendReducer();
+
+ setField(reducer, "indexItemIDMap", indexItemIDMap);
+ setField(reducer, "recommendationsPerUser", 3);
+
+ reducer.reduce(new VarLongWritable(123L), values, context);
+
+ EasyMock.verify(context);
+ }
+
+ /**
+ * tests {@link AggregateAndRecommendReducer} with a limit on the recommendations per user
+ */
+ @Test
+ public void testAggregateAndRecommendReducerLimitNumberOfRecommendations() throws Exception {
+ Reducer<VarLongWritable,PrefAndSimilarityColumnWritable,VarLongWritable,RecommendedItemsWritable>.Context context =
+ EasyMock.createMock(Reducer.Context.class);
+
+ context.write(EasyMock.eq(new VarLongWritable(123L)), recommendationsMatch(new MutableRecommendedItem(1L, 2.8f)));
+
+ EasyMock.replay(context);
+
+ RandomAccessSparseVector similarityColumnOne = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
+ similarityColumnOne.set(1, 0.1);
+ similarityColumnOne.set(2, 0.5);
+
+ RandomAccessSparseVector similarityColumnTwo = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
+ similarityColumnTwo.set(1, 0.9);
+ similarityColumnTwo.set(2, 0.5);
+
+ List<PrefAndSimilarityColumnWritable> values = Arrays.asList(
+ new PrefAndSimilarityColumnWritable(1.0f, similarityColumnOne),
+ new PrefAndSimilarityColumnWritable(3.0f, similarityColumnTwo));
+
+ OpenIntLongHashMap indexItemIDMap = new OpenIntLongHashMap();
+ indexItemIDMap.put(1, 1L);
+ indexItemIDMap.put(2, 2L);
+
+ AggregateAndRecommendReducer reducer = new AggregateAndRecommendReducer();
+
+ setField(reducer, "indexItemIDMap", indexItemIDMap);
+ setField(reducer, "recommendationsPerUser", 1);
+
+ reducer.reduce(new VarLongWritable(123L), values, context);
+
+ EasyMock.verify(context);
+ }
+
+ /**
+ * verifies a {@link RecommendedItemsWritable}
+ */
+ static RecommendedItemsWritable recommendationsMatch(final RecommendedItem... items) {
+ EasyMock.reportMatcher(new IArgumentMatcher() {
+ @Override
+ public boolean matches(Object argument) {
+ if (argument instanceof RecommendedItemsWritable) {
+ RecommendedItemsWritable recommendedItemsWritable = (RecommendedItemsWritable) argument;
+ List<RecommendedItem> expectedItems = Arrays.asList(items);
+ return expectedItems.equals(recommendedItemsWritable.getRecommendedItems());
+ }
+ return false;
+ }
+
+ @Override
+ public void appendTo(StringBuffer buffer) {}
+ });
+ return null;
+ }
+
+ /**
+ * small integration test that runs the full job
+ *
+ * As a tribute to http://www.slideshare.net/srowen/collaborative-filtering-at-scale,
+ * we recommend people food to animals in this test :)
+ *
+ * <pre>
+ *
+ * user-item-matrix
+ *
+ * burger hotdog berries icecream
+ * dog 5 5 2 -
+ * rabbit 2 - 3 5
+ * cow - 5 - 3
+ * donkey 3 - - 5
+ *
+ *
+ * item-item-similarity-matrix (tanimoto-coefficient of the item-vectors of the user-item-matrix)
+ *
+ * burger hotdog berries icecream
+ * burger - 0.25 0.66 0.5
+ * hotdog 0.25 - 0.33 0.25
+ * berries 0.66 0.33 - 0.25
+ * icecream 0.5 0.25 0.25 -
+ *
+ *
+ * Prediction(dog, icecream) = (0.5 * 5 + 0.25 * 5 + 0.25 * 2 ) / (0.5 + 0.25 + 0.25) ~ 4.3
+ * Prediction(rabbit, hotdog) = (0.25 * 2 + 0.33 * 3 + 0.25 * 5) / (0.25 + 0.33 + 0.25) ~ 3,3
+ * Prediction(cow, burger) = (0.25 * 5 + 0.5 * 3) / (0.25 + 0.5) ~ 3,7
+ * Prediction(cow, berries) = (0.33 * 5 + 0.25 * 3) / (0.33 + 0.25) ~ 4,1
+ * Prediction(donkey, hotdog) = (0.25 * 3 + 0.25 * 5) / (0.25 + 0.25) ~ 4
+ * Prediction(donkey, berries) = (0.66 * 3 + 0.25 * 5) / (0.66 + 0.25) ~ 3,5
+ *
+ * </pre>
+ */
+ @Test
+ public void testCompleteJob() throws Exception {
+
+ File inputFile = getTestTempFile("prefs.txt");
+ File outputDir = getTestTempDir("output");
+ outputDir.delete();
+ File similaritiesOutputDir = getTestTempDir("outputSimilarities");
+ similaritiesOutputDir.delete();
+ File tmpDir = getTestTempDir("tmp");
+
+ writeLines(inputFile,
+ "1,1,5",
+ "1,2,5",
+ "1,3,2",
+ "2,1,2",
+ "2,3,3",
+ "2,4,5",
+ "3,2,5",
+ "3,4,3",
+ "4,1,3",
+ "4,4,5");
+
+ RecommenderJob recommenderJob = new RecommenderJob();
+
+ Configuration conf = getConfiguration();
+ conf.set("mapred.input.dir", inputFile.getAbsolutePath());
+ conf.set("mapred.output.dir", outputDir.getAbsolutePath());
+ conf.setBoolean("mapred.output.compress", false);
+
+ recommenderJob.setConf(conf);
+
+ recommenderJob.run(new String[] { "--tempDir", tmpDir.getAbsolutePath(), "--similarityClassname",
+ TanimotoCoefficientSimilarity.class.getName(), "--numRecommendations", "4",
+ "--outputPathForSimilarityMatrix", similaritiesOutputDir.getAbsolutePath() });
+
+ Map<Long,List<RecommendedItem>> recommendations = readRecommendations(new File(outputDir, "part-r-00000"));
+ assertEquals(4, recommendations.size());
+
+ for (Entry<Long,List<RecommendedItem>> entry : recommendations.entrySet()) {
+ long userID = entry.getKey();
+ List<RecommendedItem> items = entry.getValue();
+ assertNotNull(items);
+ RecommendedItem item1 = items.get(0);
+
+ if (userID == 1L) {
+ assertEquals(1, items.size());
+ assertEquals(4L, item1.getItemID());
+ assertEquals(4.3, item1.getValue(), 0.05);
+ }
+ if (userID == 2L) {
+ assertEquals(1, items.size());
+ assertEquals(2L, item1.getItemID());
+ assertEquals(3.3, item1.getValue(), 0.05);
+ }
+ if (userID == 3L) {
+ assertEquals(2, items.size());
+ assertEquals(3L, item1.getItemID());
+ assertEquals(4.1, item1.getValue(), 0.05);
+ RecommendedItem item2 = items.get(1);
+ assertEquals(1L, item2.getItemID());
+ assertEquals(3.7, item2.getValue(), 0.05);
+ }
+ if (userID == 4L) {
+ assertEquals(2, items.size());
+ assertEquals(2L, item1.getItemID());
+ assertEquals(4.0, item1.getValue(), 0.05);
+ RecommendedItem item2 = items.get(1);
+ assertEquals(3L, item2.getItemID());
+ assertEquals(3.5, item2.getValue(), 0.05);
+ }
+ }
+
+ Map<Pair<Long, Long>, Double> similarities = readSimilarities(new File(similaritiesOutputDir, "part-r-00000"));
+ assertEquals(6, similarities.size());
+
+ assertEquals(0.25, similarities.get(new Pair<Long, Long>(1L, 2L)), EPSILON);
+ assertEquals(0.6666666666666666, similarities.get(new Pair<Long, Long>(1L, 3L)), EPSILON);
+ assertEquals(0.5, similarities.get(new Pair<Long, Long>(1L, 4L)), EPSILON);
+ assertEquals(0.3333333333333333, similarities.get(new Pair<Long, Long>(2L, 3L)), EPSILON);
+ assertEquals(0.25, similarities.get(new Pair<Long, Long>(2L, 4L)), EPSILON);
+ assertEquals(0.25, similarities.get(new Pair<Long, Long>(3L, 4L)), EPSILON);
+ }
+
+ /**
+ * small integration test for boolean data
+ */
+ @Test
+ public void testCompleteJobBoolean() throws Exception {
+
+ File inputFile = getTestTempFile("prefs.txt");
+ File outputDir = getTestTempDir("output");
+ outputDir.delete();
+ File tmpDir = getTestTempDir("tmp");
+ File usersFile = getTestTempFile("users.txt");
+ writeLines(usersFile, "3");
+
+ writeLines(inputFile,
+ "1,1",
+ "1,2",
+ "1,3",
+ "2,1",
+ "2,3",
+ "2,4",
+ "3,2",
+ "3,4",
+ "4,1",
+ "4,4");
+
+ RecommenderJob recommenderJob = new RecommenderJob();
+
+ Configuration conf = getConfiguration();
+ conf.set("mapred.input.dir", inputFile.getAbsolutePath());
+ conf.set("mapred.output.dir", outputDir.getAbsolutePath());
+ conf.setBoolean("mapred.output.compress", false);
+
+ recommenderJob.setConf(conf);
+
+ recommenderJob.run(new String[] { "--tempDir", tmpDir.getAbsolutePath(), "--similarityClassname",
+ CooccurrenceCountSimilarity.class.getName(), "--booleanData", "true",
+ "--usersFile", usersFile.getAbsolutePath() });
+
+ Map<Long,List<RecommendedItem>> recommendations = readRecommendations(new File(outputDir, "part-r-00000"));
+
+ List<RecommendedItem> recommendedToCow = recommendations.get(3L);
+ assertEquals(2, recommendedToCow.size());
+
+ RecommendedItem item1 = recommendedToCow.get(0);
+ RecommendedItem item2 = recommendedToCow.get(1);
+
+ assertEquals(1L, item1.getItemID());
+ assertEquals(3L, item2.getItemID());
+
+ /* predicted pref must be the sum of similarities:
+ * item1: coocc(burger, hotdog) + coocc(burger, icecream) = 3
+ * item2: coocc(berries, hotdog) + coocc(berries, icecream) = 2 */
+ assertEquals(3, item1.getValue(), 0.05);
+ assertEquals(2, item2.getValue(), 0.05);
+ }
+
+ /**
+ * check whether the explicit user/item filter works
+ */
+ @Test
+ public void testCompleteJobWithFiltering() throws Exception {
+
+ File inputFile = getTestTempFile("prefs.txt");
+ File userFile = getTestTempFile("users.txt");
+ File filterFile = getTestTempFile("filter.txt");
+ File outputDir = getTestTempDir("output");
+ outputDir.delete();
+ File tmpDir = getTestTempDir("tmp");
+
+ writeLines(inputFile,
+ "1,1,5",
+ "1,2,5",
+ "1,3,2",
+ "2,1,2",
+ "2,3,3",
+ "2,4,5",
+ "3,2,5",
+ "3,4,3",
+ "4,1,3",
+ "4,4,5");
+
+ /* only compute recommendations for the donkey */
+ writeLines(userFile, "4");
+ /* do not recommend the hotdog for the donkey */
+ writeLines(filterFile, "4,2");
+
+ RecommenderJob recommenderJob = new RecommenderJob();
+
+ Configuration conf = getConfiguration();
+ conf.set("mapred.input.dir", inputFile.getAbsolutePath());
+ conf.set("mapred.output.dir", outputDir.getAbsolutePath());
+ conf.setBoolean("mapred.output.compress", false);
+
+ recommenderJob.setConf(conf);
+
+ recommenderJob.run(new String[] { "--tempDir", tmpDir.getAbsolutePath(), "--similarityClassname",
+ TanimotoCoefficientSimilarity.class.getName(), "--numRecommendations", "1",
+ "--usersFile", userFile.getAbsolutePath(), "--filterFile", filterFile.getAbsolutePath() });
+
+ Map<Long,List<RecommendedItem>> recommendations = readRecommendations(new File(outputDir, "part-r-00000"));
+
+ assertEquals(1, recommendations.size());
+ assertTrue(recommendations.containsKey(4L));
+ assertEquals(1, recommendations.get(4L).size());
+
+ /* berries should have been recommended to the donkey */
+ RecommendedItem recommendedItem = recommendations.get(4L).get(0);
+ assertEquals(3L, recommendedItem.getItemID());
+ assertEquals(3.5, recommendedItem.getValue(), 0.05);
+ }
+
+ static Map<Pair<Long,Long>, Double> readSimilarities(File file) throws IOException {
+ Map<Pair<Long,Long>, Double> similarities = Maps.newHashMap();
+ for (String line : new FileLineIterable(file)) {
+ String[] parts = line.split("\t");
+ similarities.put(new Pair<Long,Long>(Long.parseLong(parts[0]), Long.parseLong(parts[1])),
+ Double.parseDouble(parts[2]));
+ }
+ return similarities;
+ }
+
+ static Map<Long,List<RecommendedItem>> readRecommendations(File file) throws IOException {
+ Map<Long,List<RecommendedItem>> recommendations = Maps.newHashMap();
+ for (String line : new FileLineIterable(file)) {
+
+ String[] keyValue = line.split("\t");
+ long userID = Long.parseLong(keyValue[0]);
+ String[] tokens = keyValue[1].replaceAll("\\[", "")
+ .replaceAll("\\]", "").split(",");
+
+ List<RecommendedItem> items = Lists.newLinkedList();
+ for (String token : tokens) {
+ String[] itemTokens = token.split(":");
+ long itemID = Long.parseLong(itemTokens[0]);
+ float value = Float.parseFloat(itemTokens[1]);
+ items.add(new GenericRecommendedItem(itemID, value));
+ }
+ recommendations.put(userID, items);
+ }
+ return recommendations;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorsReducerTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorsReducerTest.java b/mr/src/test/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorsReducerTest.java
new file mode 100644
index 0000000..bb22b71
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorsReducerTest.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.item;
+
+import org.apache.hadoop.mapreduce.Counter;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils;
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
+import org.apache.mahout.math.VarLongWritable;
+import org.apache.mahout.math.VectorWritable;
+import org.apache.mahout.math.hadoop.MathHelper;
+import org.easymock.EasyMock;
+import org.junit.Test;
+
+import java.util.Arrays;
+import java.util.Collections;
+
+/**
+ * tests {@link ToUserVectorsReducer}
+ */
+public class ToUserVectorsReducerTest extends TasteTestCase {
+
+ @Test
+ public void testToUsersReducerMinPreferencesUserIgnored() throws Exception {
+ Reducer<VarLongWritable,VarLongWritable,VarLongWritable,VectorWritable>.Context context =
+ EasyMock.createMock(Reducer.Context.class);
+
+ ToUserVectorsReducer reducer = new ToUserVectorsReducer();
+ setField(reducer, "minPreferences", 2);
+
+ EasyMock.replay(context);
+
+ reducer.reduce(new VarLongWritable(123), Collections.singletonList(new VarLongWritable(456)), context);
+
+ EasyMock.verify(context);
+ }
+
+ @Test
+ public void testToUsersReducerMinPreferencesUserPasses() throws Exception {
+ Reducer<VarLongWritable,VarLongWritable,VarLongWritable,VectorWritable>.Context context =
+ EasyMock.createMock(Reducer.Context.class);
+ Counter userCounters = EasyMock.createMock(Counter.class);
+
+ ToUserVectorsReducer reducer = new ToUserVectorsReducer();
+ setField(reducer, "minPreferences", 2);
+
+ EasyMock.expect(context.getCounter(ToUserVectorsReducer.Counters.USERS)).andReturn(userCounters);
+ userCounters.increment(1);
+ context.write(EasyMock.eq(new VarLongWritable(123)), MathHelper.vectorMatches(
+ MathHelper.elem(TasteHadoopUtils.idToIndex(456L), 1.0), MathHelper.elem(TasteHadoopUtils.idToIndex(789L), 1.0)));
+
+ EasyMock.replay(context, userCounters);
+
+ reducer.reduce(new VarLongWritable(123), Arrays.asList(new VarLongWritable(456), new VarLongWritable(789)), context);
+
+ EasyMock.verify(context, userCounters);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJobTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJobTest.java b/mr/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJobTest.java
new file mode 100644
index 0000000..f61b5e6
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJobTest.java
@@ -0,0 +1,269 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.similarity.item;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FilenameFilter;
+import java.util.Arrays;
+import java.util.regex.Pattern;
+
+import com.google.common.base.Charsets;
+import com.google.common.io.Files;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.Reducer;
+import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
+import org.apache.mahout.math.RandomAccessSparseVector;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
+import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.CosineSimilarity;
+import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.TanimotoCoefficientSimilarity;
+import org.apache.mahout.math.map.OpenIntLongHashMap;
+import org.easymock.EasyMock;
+import org.junit.Test;
+
+/**
+ * Unit tests for the mappers and reducers in org.apache.mahout.cf.taste.hadoop.similarity.item
+ * some integration tests with tiny data sets at the end
+ */
+public final class ItemSimilarityJobTest extends TasteTestCase {
+
+ private static final Pattern TAB = Pattern.compile("\t");
+
+ /**
+ * Tests {@link ItemSimilarityJob.MostSimilarItemPairsMapper}
+ */
+ @Test
+ public void testMostSimilarItemsPairsMapper() throws Exception {
+
+ OpenIntLongHashMap indexItemIDMap = new OpenIntLongHashMap();
+ indexItemIDMap.put(12, 12L);
+ indexItemIDMap.put(34, 34L);
+ indexItemIDMap.put(56, 56L);
+
+ Mapper<IntWritable,VectorWritable,EntityEntityWritable,DoubleWritable>.Context context =
+ EasyMock.createMock(Mapper.Context.class);
+
+ context.write(new EntityEntityWritable(34L, 56L), new DoubleWritable(0.9));
+
+ EasyMock.replay(context);
+
+ Vector vector = new RandomAccessSparseVector(Integer.MAX_VALUE);
+ vector.set(12, 0.2);
+ vector.set(56, 0.9);
+
+ ItemSimilarityJob.MostSimilarItemPairsMapper mapper = new ItemSimilarityJob.MostSimilarItemPairsMapper();
+ setField(mapper, "indexItemIDMap", indexItemIDMap);
+ setField(mapper, "maxSimilarItemsPerItem", 1);
+
+ mapper.map(new IntWritable(34), new VectorWritable(vector), context);
+
+ EasyMock.verify(context);
+ }
+
+ /**
+ * Tests {@link ItemSimilarityJob.MostSimilarItemPairsReducer}
+ */
+ @Test
+ public void testMostSimilarItemPairsReducer() throws Exception {
+ Reducer<EntityEntityWritable,DoubleWritable,EntityEntityWritable,DoubleWritable>.Context context =
+ EasyMock.createMock(Reducer.Context.class);
+
+ context.write(new EntityEntityWritable(123L, 456L), new DoubleWritable(0.5));
+
+ EasyMock.replay(context);
+
+ new ItemSimilarityJob.MostSimilarItemPairsReducer().reduce(new EntityEntityWritable(123L, 456L),
+ Arrays.asList(new DoubleWritable(0.5), new DoubleWritable(0.5)), context);
+
+ EasyMock.verify(context);
+ }
+
+ /**
+ * Integration test with a tiny data set
+ *
+ * <pre>
+ * user-item-matrix
+ *
+ * Game Mouse PC Disk
+ * Jane - 1 2 -
+ * Paul 1 - 1 -
+ * Fred - - - 1
+ * </pre>
+ */
+ @Test
+ public void testCompleteJob() throws Exception {
+
+ File inputFile = getTestTempFile("prefs.txt");
+ File outputDir = getTestTempDir("output");
+ outputDir.delete();
+ File tmpDir = getTestTempDir("tmp");
+
+ writeLines(inputFile,
+ "2,1,1",
+ "1,2,1",
+ "3,4,1",
+ "1,3,2",
+ "2,3,1");
+
+ ItemSimilarityJob similarityJob = new ItemSimilarityJob();
+
+ Configuration conf = getConfiguration();
+ conf.set("mapred.input.dir", inputFile.getAbsolutePath());
+ conf.set("mapred.output.dir", outputDir.getAbsolutePath());
+ conf.setBoolean("mapred.output.compress", false);
+
+ similarityJob.setConf(conf);
+ similarityJob.run(new String[] { "--tempDir", tmpDir.getAbsolutePath(), "--similarityClassname",
+ CosineSimilarity.class.getName() });
+ File outPart = outputDir.listFiles(new FilenameFilter() {
+ @Override
+ public boolean accept(File dir, String name) {
+ return name.startsWith("part-");
+ }
+ })[0];
+ BufferedReader reader = Files.newReader(outPart, Charsets.UTF_8);
+
+ String line;
+ int currentLine = 1;
+ while ( (line = reader.readLine()) != null) {
+
+ String[] tokens = TAB.split(line);
+
+ long itemAID = Long.parseLong(tokens[0]);
+ long itemBID = Long.parseLong(tokens[1]);
+ double similarity = Double.parseDouble(tokens[2]);
+
+ if (currentLine == 1) {
+ assertEquals(1L, itemAID);
+ assertEquals(3L, itemBID);
+ assertEquals(0.45, similarity, 0.01);
+ }
+
+ if (currentLine == 2) {
+ assertEquals(2L, itemAID);
+ assertEquals(3L, itemBID);
+ assertEquals(0.89, similarity, 0.01);
+ }
+
+ currentLine++;
+ }
+
+ int linesWritten = currentLine-1;
+ assertEquals(2, linesWritten);
+ }
+
+ /**
+ * integration test for the limitation of the number of computed similarities
+ *
+ * <pre>
+ * user-item-matrix
+ *
+ * i1 i2 i3
+ * u1 1 0 1
+ * u2 0 1 1
+ * u3 1 1 0
+ * u4 1 1 1
+ * u5 0 1 0
+ * u6 1 1 0
+ *
+ * tanimoto(i1,i2) = 0.5
+ * tanimoto(i2,i3) = 0.333
+ * tanimoto(i3,i1) = 0.4
+ *
+ * When we set maxSimilaritiesPerItem to 1 the following pairs should be found:
+ *
+ * i1 --> i2
+ * i2 --> i1
+ * i3 --> i1
+ * </pre>
+ */
+ @Test
+ public void testMaxSimilaritiesPerItem() throws Exception {
+
+ File inputFile = getTestTempFile("prefsForMaxSimilarities.txt");
+ File outputDir = getTestTempDir("output");
+ outputDir.delete();
+ File tmpDir = getTestTempDir("tmp");
+
+ writeLines(inputFile,
+ "1,1,1",
+ "1,3,1",
+ "2,2,1",
+ "2,3,1",
+ "3,1,1",
+ "3,2,1",
+ "4,1,1",
+ "4,2,1",
+ "4,3,1",
+ "5,2,1",
+ "6,1,1",
+ "6,2,1");
+
+ ItemSimilarityJob similarityJob = new ItemSimilarityJob();
+
+ Configuration conf = getConfiguration();
+ conf.set("mapred.input.dir", inputFile.getAbsolutePath());
+ conf.set("mapred.output.dir", outputDir.getAbsolutePath());
+ conf.setBoolean("mapred.output.compress", false);
+
+ similarityJob.setConf(conf);
+ similarityJob.run(new String[] { "--tempDir", tmpDir.getAbsolutePath(), "--similarityClassname",
+ TanimotoCoefficientSimilarity.class.getName(), "--maxSimilaritiesPerItem", "1" });
+ File outPart = outputDir.listFiles(new FilenameFilter() {
+ @Override
+ public boolean accept(File dir, String name) {
+ return name.startsWith("part-");
+ }
+ })[0];
+ BufferedReader reader = Files.newReader(outPart, Charsets.UTF_8);
+
+ String line;
+ int currentLine = 1;
+ while ((line = reader.readLine()) != null) {
+
+ String[] tokens = TAB.split(line);
+
+ long itemAID = Long.parseLong(tokens[0]);
+ long itemBID = Long.parseLong(tokens[1]);
+ double similarity = Double.parseDouble(tokens[2]);
+
+ if (currentLine == 1) {
+ assertEquals(1L, itemAID);
+ assertEquals(2L, itemBID);
+ assertEquals(0.5, similarity, 0.0001);
+ }
+
+ if (currentLine == 2) {
+ assertEquals(1L, itemAID);
+ assertEquals(3L, itemBID);
+ assertEquals(0.4, similarity, 0.0001);
+ }
+
+ currentLine++;
+ }
+
+ int linesWritten = currentLine - 1;
+ assertEquals(2, linesWritten);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java b/mr/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java
new file mode 100644
index 0000000..2f8ca95
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java
@@ -0,0 +1,98 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl;
+
+import com.google.common.collect.Lists;
+import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.impl.model.GenericBooleanPrefDataModel;
+import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.cf.taste.impl.model.GenericDataModel;
+import org.apache.mahout.cf.taste.impl.model.GenericPreference;
+import org.apache.mahout.cf.taste.impl.model.GenericUserPreferenceArray;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+
+import java.util.List;
+
+public abstract class TasteTestCase extends MahoutTestCase {
+
+ public static DataModel getDataModel(long[] userIDs, Double[][] prefValues) {
+ FastByIDMap<PreferenceArray> result = new FastByIDMap<PreferenceArray>();
+ for (int i = 0; i < userIDs.length; i++) {
+ List<Preference> prefsList = Lists.newArrayList();
+ for (int j = 0; j < prefValues[i].length; j++) {
+ if (prefValues[i][j] != null) {
+ prefsList.add(new GenericPreference(userIDs[i], j, prefValues[i][j].floatValue()));
+ }
+ }
+ if (!prefsList.isEmpty()) {
+ result.put(userIDs[i], new GenericUserPreferenceArray(prefsList));
+ }
+ }
+ return new GenericDataModel(result);
+ }
+
+ public static DataModel getBooleanDataModel(long[] userIDs, boolean[][] prefs) {
+ FastByIDMap<FastIDSet> result = new FastByIDMap<FastIDSet>();
+ for (int i = 0; i < userIDs.length; i++) {
+ FastIDSet prefsSet = new FastIDSet();
+ for (int j = 0; j < prefs[i].length; j++) {
+ if (prefs[i][j]) {
+ prefsSet.add(j);
+ }
+ }
+ if (!prefsSet.isEmpty()) {
+ result.put(userIDs[i], prefsSet);
+ }
+ }
+ return new GenericBooleanPrefDataModel(result);
+ }
+
+ protected static DataModel getDataModel() {
+ return getDataModel(
+ new long[] {1, 2, 3, 4},
+ new Double[][] {
+ {0.1, 0.3},
+ {0.2, 0.3, 0.3},
+ {0.4, 0.3, 0.5},
+ {0.7, 0.3, 0.8},
+ });
+ }
+
+ protected static DataModel getBooleanDataModel() {
+ return getBooleanDataModel(new long[] {1, 2, 3, 4},
+ new boolean[][] {
+ {false, true, false},
+ {false, true, true, false},
+ {true, false, false, true},
+ {true, false, true, true},
+ });
+ }
+
+ protected static boolean arrayContains(long[] array, long value) {
+ for (long l : array) {
+ if (l == value) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/BitSetTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/BitSetTest.java b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/BitSetTest.java
new file mode 100644
index 0000000..1f7c76b
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/BitSetTest.java
@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
+import org.junit.Test;
+
+public final class BitSetTest extends TasteTestCase {
+
+ private static final int NUM_BITS = 100;
+
+ @Test
+ public void testGetSet() {
+ BitSet bitSet = new BitSet(NUM_BITS);
+ for (int i = 0; i < NUM_BITS; i++) {
+ assertFalse(bitSet.get(i));
+ }
+ bitSet.set(0);
+ bitSet.set(NUM_BITS-1);
+ assertTrue(bitSet.get(0));
+ assertTrue(bitSet.get(NUM_BITS-1));
+ }
+
+ @Test(expected = ArrayIndexOutOfBoundsException.class)
+ public void testBounds1() {
+ BitSet bitSet = new BitSet(NUM_BITS);
+ bitSet.set(1000);
+ }
+
+ @Test(expected = ArrayIndexOutOfBoundsException.class)
+ public void testBounds2() {
+ BitSet bitSet = new BitSet(NUM_BITS);
+ bitSet.set(-1);
+ }
+
+ @Test
+ public void testClear() {
+ BitSet bitSet = new BitSet(NUM_BITS);
+ for (int i = 0; i < NUM_BITS; i++) {
+ bitSet.set(i);
+ }
+ for (int i = 0; i < NUM_BITS; i++) {
+ assertTrue(bitSet.get(i));
+ }
+ bitSet.clear();
+ for (int i = 0; i < NUM_BITS; i++) {
+ assertFalse(bitSet.get(i));
+ }
+ }
+
+ @Test
+ public void testClone() {
+ BitSet bitSet = new BitSet(NUM_BITS);
+ bitSet.set(NUM_BITS-1);
+ bitSet = bitSet.clone();
+ assertTrue(bitSet.get(NUM_BITS-1));
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/CacheTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/CacheTest.java b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/CacheTest.java
new file mode 100644
index 0000000..cab1984
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/CacheTest.java
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
+import org.apache.mahout.common.RandomUtils;
+import org.junit.Test;
+
+import java.util.Random;
+
+public final class CacheTest extends TasteTestCase {
+
+ @Test
+ public void testLotsOfGets() throws TasteException {
+ Retriever<Object,Object> retriever = new IdentityRetriever();
+ Cache<Object,Object> cache = new Cache<Object,Object>(retriever, 1000);
+ for (int i = 0; i < 1000000; i++) {
+ assertEquals(i, cache.get(i));
+ }
+ }
+
+ @Test
+ public void testMixedUsage() throws TasteException {
+ Random random = RandomUtils.getRandom();
+ Retriever<Object,Object> retriever = new IdentityRetriever();
+ Cache<Object,Object> cache = new Cache<Object,Object>(retriever, 1000);
+ for (int i = 0; i < 1000000; i++) {
+ double r = random.nextDouble();
+ if (r < 0.01) {
+ cache.clear();
+ } else if (r < 0.1) {
+ cache.remove(r - 100);
+ } else {
+ assertEquals(i, cache.get(i));
+ }
+ }
+ }
+
+ private static class IdentityRetriever implements Retriever<Object,Object> {
+ @Override
+ public Object get(Object key) throws TasteException {
+ return key;
+ }
+ }
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastByIDMapTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastByIDMapTest.java b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastByIDMapTest.java
new file mode 100644
index 0000000..9263ce7
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastByIDMapTest.java
@@ -0,0 +1,147 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import com.google.common.collect.Maps;
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
+import org.apache.mahout.common.RandomUtils;
+import org.junit.Test;
+
+import java.util.Map;
+import java.util.Random;
+
+/** <p>Tests {@link FastByIDMap}.</p> */
+public final class FastByIDMapTest extends TasteTestCase {
+
+ @Test
+ public void testPutAndGet() {
+ FastByIDMap<Long> map = new FastByIDMap<Long>();
+ assertNull(map.get(500000L));
+ map.put(500000L, 2L);
+ assertEquals(2L, (long) map.get(500000L));
+ }
+
+ @Test
+ public void testRemove() {
+ FastByIDMap<Long> map = new FastByIDMap<Long>();
+ map.put(500000L, 2L);
+ map.remove(500000L);
+ assertEquals(0, map.size());
+ assertTrue(map.isEmpty());
+ assertNull(map.get(500000L));
+ }
+
+ @Test
+ public void testClear() {
+ FastByIDMap<Long> map = new FastByIDMap<Long>();
+ map.put(500000L, 2L);
+ map.clear();
+ assertEquals(0, map.size());
+ assertTrue(map.isEmpty());
+ assertNull(map.get(500000L));
+ }
+
+ @Test
+ public void testSizeEmpty() {
+ FastByIDMap<Long> map = new FastByIDMap<Long>();
+ assertEquals(0, map.size());
+ assertTrue(map.isEmpty());
+ map.put(500000L, 2L);
+ assertEquals(1, map.size());
+ assertFalse(map.isEmpty());
+ map.remove(500000L);
+ assertEquals(0, map.size());
+ assertTrue(map.isEmpty());
+ }
+
+ @Test
+ public void testContains() {
+ FastByIDMap<String> map = buildTestFastMap();
+ assertTrue(map.containsKey(500000L));
+ assertTrue(map.containsKey(47L));
+ assertTrue(map.containsKey(2L));
+ assertTrue(map.containsValue("alpha"));
+ assertTrue(map.containsValue("bang"));
+ assertTrue(map.containsValue("beta"));
+ assertFalse(map.containsKey(999));
+ assertFalse(map.containsValue("something"));
+ }
+
+ @Test
+ public void testRehash() {
+ FastByIDMap<String> map = buildTestFastMap();
+ map.remove(500000L);
+ map.rehash();
+ assertNull(map.get(500000L));
+ assertEquals("bang", map.get(47L));
+ }
+
+ @Test
+ public void testGrow() {
+ FastByIDMap<String> map = new FastByIDMap<String>(1,1);
+ map.put(500000L, "alpha");
+ map.put(47L, "bang");
+ assertNull(map.get(500000L));
+ assertEquals("bang", map.get(47L));
+ }
+
+ @Test
+ public void testVersusHashMap() {
+ FastByIDMap<String> actual = new FastByIDMap<String>();
+ Map<Long, String> expected = Maps.newHashMapWithExpectedSize(1000000);
+ Random r = RandomUtils.getRandom();
+ for (int i = 0; i < 1000000; i++) {
+ double d = r.nextDouble();
+ Long key = (long) r.nextInt(100);
+ if (d < 0.4) {
+ assertEquals(expected.get(key), actual.get(key));
+ } else {
+ if (d < 0.7) {
+ assertEquals(expected.put(key, "bang"), actual.put(key, "bang"));
+ } else {
+ assertEquals(expected.remove(key), actual.remove(key));
+ }
+ assertEquals(expected.size(), actual.size());
+ assertEquals(expected.isEmpty(), actual.isEmpty());
+ }
+ }
+ }
+
+ @Test
+ public void testMaxSize() {
+ FastByIDMap<String> map = new FastByIDMap<String>();
+ map.put(4, "bang");
+ assertEquals(1, map.size());
+ map.put(47L, "bang");
+ assertEquals(2, map.size());
+ assertNull(map.get(500000L));
+ map.put(47L, "buzz");
+ assertEquals(2, map.size());
+ assertEquals("buzz", map.get(47L));
+ }
+
+
+ private static FastByIDMap<String> buildTestFastMap() {
+ FastByIDMap<String> map = new FastByIDMap<String>();
+ map.put(500000L, "alpha");
+ map.put(47L, "bang");
+ map.put(2L, "beta");
+ return map;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastIDSetTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastIDSetTest.java b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastIDSetTest.java
new file mode 100644
index 0000000..aec1738
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastIDSetTest.java
@@ -0,0 +1,162 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import com.google.common.collect.Sets;
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
+import org.apache.mahout.common.RandomUtils;
+import org.junit.Test;
+
+import java.util.Collection;
+import java.util.Random;
+
+/** <p>Tests {@link FastIDSet}.</p> */
+public final class FastIDSetTest extends TasteTestCase {
+
+ @Test
+ public void testContainsAndAdd() {
+ FastIDSet set = new FastIDSet();
+ assertFalse(set.contains(1));
+ set.add(1);
+ assertTrue(set.contains(1));
+ }
+
+ @Test
+ public void testRemove() {
+ FastIDSet set = new FastIDSet();
+ set.add(1);
+ set.remove(1);
+ assertEquals(0, set.size());
+ assertTrue(set.isEmpty());
+ assertFalse(set.contains(1));
+ }
+
+ @Test
+ public void testClear() {
+ FastIDSet set = new FastIDSet();
+ set.add(1);
+ set.clear();
+ assertEquals(0, set.size());
+ assertTrue(set.isEmpty());
+ assertFalse(set.contains(1));
+ }
+
+ @Test
+ public void testSizeEmpty() {
+ FastIDSet set = new FastIDSet();
+ assertEquals(0, set.size());
+ assertTrue(set.isEmpty());
+ set.add(1);
+ assertEquals(1, set.size());
+ assertFalse(set.isEmpty());
+ set.remove(1);
+ assertEquals(0, set.size());
+ assertTrue(set.isEmpty());
+ }
+
+ @Test
+ public void testContains() {
+ FastIDSet set = buildTestFastSet();
+ assertTrue(set.contains(1));
+ assertTrue(set.contains(2));
+ assertTrue(set.contains(3));
+ assertFalse(set.contains(4));
+ }
+
+ @Test
+ public void testReservedValues() {
+ FastIDSet set = new FastIDSet();
+ try {
+ set.add(Long.MIN_VALUE);
+ fail("Should have thrown IllegalArgumentException");
+ } catch (IllegalArgumentException iae) {
+ // good
+ }
+ assertFalse(set.contains(Long.MIN_VALUE));
+ try {
+ set.add(Long.MAX_VALUE);
+ fail("Should have thrown IllegalArgumentException");
+ } catch (IllegalArgumentException iae) {
+ // good
+ }
+ assertFalse(set.contains(Long.MAX_VALUE));
+ }
+
+ @Test
+ public void testRehash() {
+ FastIDSet set = buildTestFastSet();
+ set.remove(1);
+ set.rehash();
+ assertFalse(set.contains(1));
+ }
+
+ @Test
+ public void testGrow() {
+ FastIDSet set = new FastIDSet(1);
+ set.add(1);
+ set.add(2);
+ assertTrue(set.contains(1));
+ assertTrue(set.contains(2));
+ }
+
+ @Test
+ public void testIterator() {
+ FastIDSet set = buildTestFastSet();
+ Collection<Long> expected = Sets.newHashSetWithExpectedSize(3);
+ expected.add(1L);
+ expected.add(2L);
+ expected.add(3L);
+ LongPrimitiveIterator it = set.iterator();
+ while (it.hasNext()) {
+ expected.remove(it.nextLong());
+ }
+ assertTrue(expected.isEmpty());
+ }
+
+ @Test
+ public void testVersusHashSet() {
+ FastIDSet actual = new FastIDSet(1);
+ Collection<Integer> expected = Sets.newHashSetWithExpectedSize(1000000);
+ Random r = RandomUtils.getRandom();
+ for (int i = 0; i < 1000000; i++) {
+ double d = r.nextDouble();
+ Integer key = r.nextInt(100);
+ if (d < 0.4) {
+ assertEquals(expected.contains(key), actual.contains(key));
+ } else {
+ if (d < 0.7) {
+ assertEquals(expected.add(key), actual.add(key));
+ } else {
+ assertEquals(expected.remove(key), actual.remove(key));
+ }
+ assertEquals(expected.size(), actual.size());
+ assertEquals(expected.isEmpty(), actual.isEmpty());
+ }
+ }
+ }
+
+ private static FastIDSet buildTestFastSet() {
+ FastIDSet set = new FastIDSet();
+ set.add(1);
+ set.add(2);
+ set.add(3);
+ return set;
+ }
+
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastMapTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastMapTest.java b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastMapTest.java
new file mode 100644
index 0000000..2f27483
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/FastMapTest.java
@@ -0,0 +1,228 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
+import org.apache.mahout.common.RandomUtils;
+import org.junit.Test;
+
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+
+/** <p>Tests {@link FastMap}.</p> */
+public final class FastMapTest extends TasteTestCase {
+
+ @Test
+ public void testPutAndGet() {
+ Map<String, String> map = new FastMap<String, String>();
+ assertNull(map.get("foo"));
+ map.put("foo", "bar");
+ assertEquals("bar", map.get("foo"));
+ }
+
+ @Test
+ public void testRemove() {
+ Map<String, String> map = new FastMap<String, String>();
+ map.put("foo", "bar");
+ map.remove("foo");
+ assertEquals(0, map.size());
+ assertTrue(map.isEmpty());
+ assertNull(map.get("foo"));
+ }
+
+ @Test
+ public void testClear() {
+ Map<String, String> map = new FastMap<String, String>();
+ map.put("foo", "bar");
+ map.clear();
+ assertEquals(0, map.size());
+ assertTrue(map.isEmpty());
+ assertNull(map.get("foo"));
+ }
+
+ @Test
+ public void testSizeEmpty() {
+ Map<String, String> map = new FastMap<String, String>();
+ assertEquals(0, map.size());
+ assertTrue(map.isEmpty());
+ map.put("foo", "bar");
+ assertEquals(1, map.size());
+ assertFalse(map.isEmpty());
+ map.remove("foo");
+ assertEquals(0, map.size());
+ assertTrue(map.isEmpty());
+ }
+
+ @Test
+ public void testContains() {
+ FastMap<String, String> map = buildTestFastMap();
+ assertTrue(map.containsKey("foo"));
+ assertTrue(map.containsKey("baz"));
+ assertTrue(map.containsKey("alpha"));
+ assertTrue(map.containsValue("bar"));
+ assertTrue(map.containsValue("bang"));
+ assertTrue(map.containsValue("beta"));
+ assertFalse(map.containsKey("something"));
+ assertFalse(map.containsValue("something"));
+ }
+
+ @Test(expected = NullPointerException.class)
+ public void testNull1() {
+ Map<String, String> map = new FastMap<String, String>();
+ assertNull(map.get(null));
+ map.put(null, "bar");
+ }
+
+ @Test(expected = NullPointerException.class)
+ public void testNull2() {
+ Map<String, String> map = new FastMap<String, String>();
+ map.put("foo", null);
+ }
+
+ @Test
+ public void testRehash() {
+ FastMap<String, String> map = buildTestFastMap();
+ map.remove("foo");
+ map.rehash();
+ assertNull(map.get("foo"));
+ assertEquals("bang", map.get("baz"));
+ }
+
+ @Test
+ public void testGrow() {
+ Map<String, String> map = new FastMap<String, String>(1, FastMap.NO_MAX_SIZE);
+ map.put("foo", "bar");
+ map.put("baz", "bang");
+ assertEquals("bar", map.get("foo"));
+ assertEquals("bang", map.get("baz"));
+ }
+
+ @Test
+ public void testKeySet() {
+ FastMap<String, String> map = buildTestFastMap();
+ Collection<String> expected = Sets.newHashSetWithExpectedSize(3);
+ expected.add("foo");
+ expected.add("baz");
+ expected.add("alpha");
+ Set<String> actual = map.keySet();
+ assertTrue(expected.containsAll(actual));
+ assertTrue(actual.containsAll(expected));
+ Iterator<String> it = actual.iterator();
+ while (it.hasNext()) {
+ String value = it.next();
+ if (!"baz".equals(value)) {
+ it.remove();
+ }
+ }
+ assertTrue(map.containsKey("baz"));
+ assertFalse(map.containsKey("foo"));
+ assertFalse(map.containsKey("alpha"));
+ }
+
+ @Test
+ public void testValues() {
+ FastMap<String, String> map = buildTestFastMap();
+ Collection<String> expected = Sets.newHashSetWithExpectedSize(3);
+ expected.add("bar");
+ expected.add("bang");
+ expected.add("beta");
+ Collection<String> actual = map.values();
+ assertTrue(expected.containsAll(actual));
+ assertTrue(actual.containsAll(expected));
+ Iterator<String> it = actual.iterator();
+ while (it.hasNext()) {
+ String value = it.next();
+ if (!"bang".equals(value)) {
+ it.remove();
+ }
+ }
+ assertTrue(map.containsValue("bang"));
+ assertFalse(map.containsValue("bar"));
+ assertFalse(map.containsValue("beta"));
+ }
+
+ @Test
+ public void testEntrySet() {
+ FastMap<String, String> map = buildTestFastMap();
+ Set<Map.Entry<String, String>> actual = map.entrySet();
+ Collection<String> expectedKeys = Sets.newHashSetWithExpectedSize(3);
+ expectedKeys.add("foo");
+ expectedKeys.add("baz");
+ expectedKeys.add("alpha");
+ Collection<String> expectedValues = Sets.newHashSetWithExpectedSize(3);
+ expectedValues.add("bar");
+ expectedValues.add("bang");
+ expectedValues.add("beta");
+ assertEquals(3, actual.size());
+ for (Map.Entry<String, String> entry : actual) {
+ expectedKeys.remove(entry.getKey());
+ expectedValues.remove(entry.getValue());
+ }
+ assertEquals(0, expectedKeys.size());
+ assertEquals(0, expectedValues.size());
+ }
+
+ @Test
+ public void testVersusHashMap() {
+ Map<Integer, String> actual = new FastMap<Integer, String>(1, 1000000);
+ Map<Integer, String> expected = Maps.newHashMapWithExpectedSize(1000000);
+ Random r = RandomUtils.getRandom();
+ for (int i = 0; i < 1000000; i++) {
+ double d = r.nextDouble();
+ Integer key = r.nextInt(100);
+ if (d < 0.4) {
+ assertEquals(expected.get(key), actual.get(key));
+ } else {
+ if (d < 0.7) {
+ assertEquals(expected.put(key, "foo"), actual.put(key, "foo"));
+ } else {
+ assertEquals(expected.remove(key), actual.remove(key));
+ }
+ assertEquals(expected.size(), actual.size());
+ assertEquals(expected.isEmpty(), actual.isEmpty());
+ }
+ }
+ }
+
+ @Test
+ public void testMaxSize() {
+ Map<String, String> map = new FastMap<String, String>(1, 1);
+ map.put("foo", "bar");
+ assertEquals(1, map.size());
+ map.put("baz", "bang");
+ assertEquals(1, map.size());
+ assertNull(map.get("foo"));
+ map.put("baz", "buzz");
+ assertEquals(1, map.size());
+ assertEquals("buzz", map.get("baz"));
+ }
+
+ private static FastMap<String, String> buildTestFastMap() {
+ FastMap<String, String> map = new FastMap<String, String>();
+ map.put("foo", "bar");
+ map.put("baz", "bang");
+ map.put("alpha", "beta");
+ return map;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageTest.java b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageTest.java
new file mode 100644
index 0000000..1fcc800
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageTest.java
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
+import org.junit.Test;
+
+public final class InvertedRunningAverageTest extends TasteTestCase {
+
+ @Test
+ public void testAverage() {
+ RunningAverage avg = new FullRunningAverage();
+ RunningAverage inverted = new InvertedRunningAverage(avg);
+ assertEquals(0, inverted.getCount());
+ avg.addDatum(1.0);
+ assertEquals(1, inverted.getCount());
+ assertEquals(-1.0, inverted.getAverage(), EPSILON);
+ avg.addDatum(2.0);
+ assertEquals(2, inverted.getCount());
+ assertEquals(-1.5, inverted.getAverage(), EPSILON);
+ }
+
+ @Test(expected = UnsupportedOperationException.class)
+ public void testUnsupported1() {
+ RunningAverage inverted = new InvertedRunningAverage(new FullRunningAverage());
+ inverted.addDatum(1.0);
+ }
+
+ @Test(expected = UnsupportedOperationException.class)
+ public void testUnsupported2() {
+ RunningAverage inverted = new InvertedRunningAverage(new FullRunningAverage());
+ inverted.changeDatum(1.0);
+ }
+
+ @Test(expected = UnsupportedOperationException.class)
+ public void testUnsupported3() {
+ RunningAverage inverted = new InvertedRunningAverage(new FullRunningAverage());
+ inverted.removeDatum(1.0);
+ }
+
+ @Test
+ public void testAverageAndStdDev() {
+ RunningAverageAndStdDev avg = new FullRunningAverageAndStdDev();
+ RunningAverageAndStdDev inverted = new InvertedRunningAverageAndStdDev(avg);
+ assertEquals(0, inverted.getCount());
+ avg.addDatum(1.0);
+ assertEquals(1, inverted.getCount());
+ assertEquals(-1.0, inverted.getAverage(), EPSILON);
+ avg.addDatum(2.0);
+ assertEquals(2, inverted.getCount());
+ assertEquals(-1.5, inverted.getAverage(), EPSILON);
+ assertEquals(Math.sqrt(2.0)/2.0, inverted.getStandardDeviation(), EPSILON);
+ }
+
+ @Test(expected = UnsupportedOperationException.class)
+ public void testAndStdDevUnsupported1() {
+ RunningAverage inverted = new InvertedRunningAverageAndStdDev(new FullRunningAverageAndStdDev());
+ inverted.addDatum(1.0);
+ }
+
+ @Test(expected = UnsupportedOperationException.class)
+ public void testAndStdDevUnsupported2() {
+ RunningAverage inverted = new InvertedRunningAverageAndStdDev(new FullRunningAverageAndStdDev());
+ inverted.changeDatum(1.0);
+ }
+
+ @Test(expected = UnsupportedOperationException.class)
+ public void testAndStdDevUnsupported3() {
+ RunningAverage inverted = new InvertedRunningAverageAndStdDev(new FullRunningAverageAndStdDev());
+ inverted.removeDatum(1.0);
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveArrayIteratorTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveArrayIteratorTest.java b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveArrayIteratorTest.java
new file mode 100644
index 0000000..7458df3
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/LongPrimitiveArrayIteratorTest.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
+import org.junit.Test;
+
+import java.util.NoSuchElementException;
+
+public final class LongPrimitiveArrayIteratorTest extends TasteTestCase {
+
+ @Test(expected = NoSuchElementException.class)
+ public void testEmpty() {
+ LongPrimitiveIterator it = new LongPrimitiveArrayIterator(new long[0]);
+ assertFalse(it.hasNext());
+ it.next();
+ }
+
+ @Test(expected = NoSuchElementException.class)
+ public void testNext() {
+ LongPrimitiveIterator it = new LongPrimitiveArrayIterator(new long[] {3,2,1});
+ assertTrue(it.hasNext());
+ assertEquals(3, (long) it.next());
+ assertTrue(it.hasNext());
+ assertEquals(2, it.nextLong());
+ assertTrue(it.hasNext());
+ assertEquals(1, (long) it.next());
+ assertFalse(it.hasNext());
+ it.nextLong();
+ }
+
+ @Test
+ public void testPeekSkip() {
+ LongPrimitiveIterator it = new LongPrimitiveArrayIterator(new long[] {3,2,1});
+ assertEquals(3, it.peek());
+ it.skip(2);
+ assertEquals(1, it.nextLong());
+ assertFalse(it.hasNext());
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/MockRefreshable.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/MockRefreshable.java b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/MockRefreshable.java
new file mode 100644
index 0000000..20233a7
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/MockRefreshable.java
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+
+import java.util.Collection;
+import java.util.concurrent.Callable;
+
+/** A mock {@link Refreshable} which counts the number of times it has been refreshed, for use in tests. */
+final class MockRefreshable implements Refreshable, Callable<Object> {
+
+ private int callCount;
+
+ @Override
+ public void refresh(Collection<Refreshable> alreadyRefreshed) {
+ call();
+ }
+
+ @Override
+ public Object call() {
+ callCount++;
+ return null;
+ }
+
+ int getCallCount() {
+ return callCount;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/RefreshHelperTest.java
----------------------------------------------------------------------
diff --git a/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/RefreshHelperTest.java b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/RefreshHelperTest.java
new file mode 100644
index 0000000..54c97e3
--- /dev/null
+++ b/mr/src/test/java/org/apache/mahout/cf/taste/impl/common/RefreshHelperTest.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import com.google.common.collect.Sets;
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.impl.TasteTestCase;
+import org.junit.Test;
+
+import java.util.Collection;
+
+/** Tests {@link RefreshHelper} */
+public final class RefreshHelperTest extends TasteTestCase {
+
+ @Test
+ public void testCallable() {
+ MockRefreshable mock = new MockRefreshable();
+ Refreshable helper = new RefreshHelper(mock);
+ helper.refresh(null);
+ assertEquals(1, mock.getCallCount());
+ }
+
+ @Test
+ public void testNoCallable() {
+ Refreshable helper = new RefreshHelper(null);
+ helper.refresh(null);
+ }
+
+ @Test
+ public void testDependencies() {
+ RefreshHelper helper = new RefreshHelper(null);
+ MockRefreshable mock1 = new MockRefreshable();
+ MockRefreshable mock2 = new MockRefreshable();
+ helper.addDependency(mock1);
+ helper.addDependency(mock2);
+ helper.refresh(null);
+ assertEquals(1, mock1.getCallCount());
+ assertEquals(1, mock2.getCallCount());
+ }
+
+ @Test
+ public void testAlreadyRefreshed() {
+ RefreshHelper helper = new RefreshHelper(null);
+ MockRefreshable mock1 = new MockRefreshable();
+ MockRefreshable mock2 = new MockRefreshable();
+ helper.addDependency(mock1);
+ helper.addDependency(mock2);
+ Collection<Refreshable> alreadyRefreshed = Sets.newHashSetWithExpectedSize(1);
+ alreadyRefreshed.add(mock1);
+ helper.refresh(alreadyRefreshed);
+ assertEquals(0, mock1.getCallCount());
+ assertEquals(1, mock2.getCallCount());
+ }
+
+}