You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2008/05/09 23:35:17 UTC
svn commit: r654943 [2/9] - in /lucene/mahout/trunk/core: ./ lib/
src/main/examples/org/ src/main/examples/org/apache/
src/main/examples/org/apache/mahout/ src/main/examples/org/apache/mahout/cf/
src/main/examples/org/apache/mahout/cf/taste/ src/main/e...
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/ejb/ejb-jar.xml
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/ejb/ejb-jar.xml?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/ejb/ejb-jar.xml (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/ejb/ejb-jar.xml Fri May 9 14:35:12 2008
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<ejb-jar xmlns="http://java.sun.com/xml/ns/j2ee"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://java.sun.com/xml/ns/j2ee http://java.sun.com/xml/ns/j2ee/ejb-jar_2_1.xsd"
+ version="2.1">
+ <enterprise-beans>
+ <session>
+ <ejb-name>RecommenderEJB</ejb-name>
+ <home>org.apache.mahout.cf.taste.ejb.RecommenderEJBHome</home>
+ <remote>org.apache.mahout.cf.taste.ejb.RecommenderEJB</remote>
+ <local-home>org.apache.mahout.cf.taste.ejb.RecommenderEJBLocalHome</local-home>
+ <local>org.apache.mahout.cf.taste.ejb.RecommenderEJBLocal</local>
+ <ejb-class>org.apache.mahout.cf.taste.ejb.RecommenderEJBBean</ejb-class>
+ <session-type>Stateless</session-type>
+ <transaction-type>Container</transaction-type>
+ <env-entry>
+ <env-entry-name>recommender-class</env-entry-name>
+ <env-entry-type>java.lang.String</env-entry-type>
+ <env-entry-value>@RECOMMENDER_CLASS@</env-entry-value>
+ </env-entry>
+ <!-- ...or give the JNDI name where an implementation can be found,
+ relative to java:comp/env -->
+ <!--
+ <env-entry>
+ <env-entry-name>recommender-jndi-name</env-entry-name>
+ <env-entry-type>java.lang.String</env-entry-type>
+ <env-entry-value>foo/YourRecommender</env-entry-value>
+ </env-entry>
+ -->
+ </session>
+ </enterprise-beans>
+ <assembly-descriptor>
+ <container-transaction>
+ <method>
+ <ejb-name>RecommenderEJB</ejb-name>
+ <method-name>*</method-name>
+ </method>
+ <trans-attribute>Supports</trans-attribute>
+ </container-transaction>
+ </assembly-descriptor>
+</ejb-jar>
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/ejb/mapping.xml
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/ejb/mapping.xml?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/ejb/mapping.xml (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/ejb/mapping.xml Fri May 9 14:35:12 2008
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<java-wsdl-mapping xmlns="http://java.sun.com/xml/ns/j2ee"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://java.sun.com/xml/ns/j2ee http://www.ibm.com/webservices/xsd/j2ee_jaxrpc_mapping_1_1.xsd"
+ version="1.1">
+ <package-mapping>
+ <package-type>org.apache.mahout.cf.taste.ejb</package-type>
+ <namespaceURI>urn:org.apache.mahout.cf.taste.ejb.RecommenderWS</namespaceURI>
+ </package-mapping>
+</java-wsdl-mapping>
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/ejb/webservices.xml
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/ejb/webservices.xml?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/ejb/webservices.xml (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/ejb/webservices.xml Fri May 9 14:35:12 2008
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<webservices xmlns="http://java.sun.com/xml/ns/j2ee"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://java.sun.com/xml/ns/j2ee http://www.ibm.com/webservices/xsd/j2ee_web_services_1_1.xsd"
+ version="1.1">
+ <webservice-description>
+ <webservice-description-name>RecommenderWS</webservice-description-name>
+ <wsdl-file>META-INF/RecommenderWS.wsdl</wsdl-file>
+ <jaxrpc-mapping-file>META-INF/mapping.xml</jaxrpc-mapping-file>
+ <port-component>
+ <port-component-name>RecommenderServicePort</port-component-name>
+ <wsdl-port>
+ <namespaceURI>urn:org.apache.mahout.cf.taste.ejb.RecommenderWS</namespaceURI>
+ <localpart>RecommenderServicePort</localpart>
+ </wsdl-port>
+ <service-endpoint-interface>org.apache.mahout.cf.taste.ejb.RecommenderWS</service-endpoint-interface>
+ <service-impl-bean>
+ <ejb-link>RecommenderEJB</ejb-link>
+ </service-impl-bean>
+ </port-component>
+ </webservice-description>
+</webservices>
\ No newline at end of file
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/IRStatistics.java Fri May 9 14:35:12 2008
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.eval;
+
+/**
+ * <p>Implementations encapsulate information retrieval-related statistics about
+ * a {@link org.apache.mahout.cf.taste.recommender.Recommender}'s recommendations.</p>
+ *
+ * <p>See <a href="http://en.wikipedia.org/wiki/Information_retrieval">Information retrieval</a>.</p>
+ */
+public interface IRStatistics {
+
+ /**
+ * <p>See <a href="http://en.wikipedia.org/wiki/Information_retrieval#Precision">Precision</a>.</p>
+ */
+ double getPrecision();
+
+ /**
+ * <p>See <a href="http://en.wikipedia.org/wiki/Information_retrieval#Recall">Recall</a>.</p>
+ */
+ double getRecall();
+
+ /**
+ * <p>See <a href="http://en.wikipedia.org/wiki/Information_retrieval#F-measure">F-measure</a>.</p>
+ */
+ double getF1Measure();
+
+ /**
+ * <p>See <a href="http://en.wikipedia.org/wiki/Information_retrieval#F-measure">F-measure</a>.</p>
+ */
+ double getFNMeasure(double n);
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderBuilder.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderBuilder.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderBuilder.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderBuilder.java Fri May 9 14:35:12 2008
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.eval;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.recommender.Recommender;
+
+/**
+ * <p>Implementations of this inner interface are simple helper classes which create a
+ * {@link Recommender} to be evaluated based on the given {@link DataModel}.</p>
+ */
+public interface RecommenderBuilder {
+
+ /**
+ * <p>Builds a {@link Recommender} implementation to be evaluated, using the given {@link DataModel}.</p>
+ *
+ * @param dataModel {@link DataModel} to build the {@link Recommender} on
+ * @return {@link Recommender} based upon the given {@link DataModel}
+ * @throws TasteException if an error occurs while accessing the {@link DataModel}
+ */
+ Recommender buildRecommender(DataModel dataModel) throws TasteException;
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderEvaluator.java Fri May 9 14:35:12 2008
@@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.eval;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.model.DataModel;
+
+/**
+ * <p>Implementations of this interface evaluate the quality of a
+ * {@link org.apache.mahout.cf.taste.recommender.Recommender}'s recommendations.</p>
+ */
+public interface RecommenderEvaluator {
+
+ /**
+ * <p>Evaluates the quality of a {@link org.apache.mahout.cf.taste.recommender.Recommender}'s recommendations.
+ * The range of values that may be returned
+ * depends on the implementation, but <em>lower</em> values must mean better recommendations, with 0 being the
+ * lowest / best possible evaluation, meaning a perfect match. This method does not
+ * accept a {@link org.apache.mahout.cf.taste.recommender.Recommender} directly, but rather a
+ * {@link RecommenderBuilder} which can build the
+ * {@link org.apache.mahout.cf.taste.recommender.Recommender} to test on top of a given {@link DataModel}.</p>
+ *
+ * <p>Implementations will take a certain percentage of the preferences supplied by the given {@link DataModel}
+ * as "training data". This is typically most of the data, like 90%. This data is used to produce recommendations,
+ * and the rest of the data is compared against estimated preference values to see how much the
+ * {@link org.apache.mahout.cf.taste.recommender.Recommender}'s predicted preferences match the user's real
+ * preferences. Specifically,
+ * for each user, this percentage of the user's ratings are used to produce recommendatinos, and for each user,
+ * the remaining preferences are compared against the user's real preferences.</p>
+ *
+ * <p>For large datasets, it may be desirable to only evaluate based on a small percentage of the data.
+ * <code>evaluationPercentage</code> controls how many of the {@link DataModel}'s users are used in
+ * evaluation.</p>
+ *
+ * <p>To be clear, <code>trainingPercentage</code> and <code>evaluationPercentage</code> are not related.
+ * They do not need to add up to 1.0, for example.</p>
+ *
+ * @param recommenderBuilder object that can build a {@link org.apache.mahout.cf.taste.recommender.Recommender} to test
+ * @param dataModel dataset to test on
+ * @param trainingPercentage percentage of each user's preferences to use to produce recommendations; the rest
+ * are compared to estimated preference values to evaluate {@link org.apache.mahout.cf.taste.recommender.Recommender}
+ * performance
+ * @param evaluationPercentage percentage of users to use in evaluation
+ * @return a "score" representing how well the {@link org.apache.mahout.cf.taste.recommender.Recommender}'s
+ * estimated preferences match real values; <em>lower</em> scores mean a better match and 0 is a perfect match
+ * @throws TasteException if an error occurs while accessing the {@link DataModel}
+ */
+ double evaluate(RecommenderBuilder recommenderBuilder,
+ DataModel dataModel,
+ double trainingPercentage,
+ double evaluationPercentage) throws TasteException;
+
+}
\ No newline at end of file
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderIRStatsEvaluator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderIRStatsEvaluator.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderIRStatsEvaluator.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/eval/RecommenderIRStatsEvaluator.java Fri May 9 14:35:12 2008
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.eval;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.model.DataModel;
+
+/**
+ * <p>Implementations collect information retrieval-related statistics on a
+ * {@link org.apache.mahout.cf.taste.recommender.Recommender}'s performance, including precision,
+ * recall and f-measure.</p>
+ *
+ * <p>See <a href="http://en.wikipedia.org/wiki/Information_retrieval">Information retrieval</a>.
+ */
+public interface RecommenderIRStatsEvaluator {
+
+ /**
+ * @param recommenderBuilder object that can build a {@link org.apache.mahout.cf.taste.recommender.Recommender} to test
+ * @param dataModel dataset to test on
+ * @param at as in, "precision at 5". The number of recommendations to consider when evaluating
+ * precision, etc.
+ * @param relevanceThreshold {@link org.apache.mahout.cf.taste.model.Item}s whose preference value is at least
+ * this value are considered "relevant" for the purposes of computations
+ * @return {@link IRStatistics} with resulting precision, recall, etc.
+ * @throws TasteException if an error occurs while accessing the {@link DataModel}
+ */
+ IRStatistics evaluate(RecommenderBuilder recommenderBuilder,
+ DataModel dataModel,
+ int at,
+ double relevanceThreshold,
+ double evaluationPercentage) throws TasteException;
+
+}
\ No newline at end of file
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/ArrayIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/ArrayIterator.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/ArrayIterator.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/ArrayIterator.java Fri May 9 14:35:12 2008
@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+/**
+ * <p>Simple, fast {@link Iterator} for an array.</p>
+ */
+public final class ArrayIterator<T> implements Iterator<T>, Iterable<T> {
+
+ private final T[] array;
+ private int position;
+ private final int max;
+
+ /**
+ * <p>Creates an {@link ArrayIterator} over an entire array.</p>
+ *
+ * @param array array to iterate over
+ */
+ public ArrayIterator(T[] array) {
+ if (array == null) {
+ throw new IllegalArgumentException("array is null");
+ }
+ this.array = array; // yeah, not going to copy the array here, for performance
+ this.position = 0;
+ this.max = array.length;
+ }
+
+ public boolean hasNext() {
+ return position < max;
+ }
+
+ public T next() {
+ if (position >= array.length) {
+ throw new NoSuchElementException();
+ }
+ return array[position++];
+ }
+
+ /**
+ * @throws UnsupportedOperationException
+ */
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ public Iterator<T> iterator() {
+ return this;
+ }
+
+ @Override
+ public String toString() {
+ return "ArrayIterator[" + Arrays.toString(array) + '@' + position + ']';
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/CompactRunningAverage.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/CompactRunningAverage.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/CompactRunningAverage.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/CompactRunningAverage.java Fri May 9 14:35:12 2008
@@ -0,0 +1,80 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.io.Serializable;
+
+/**
+ * <p>Like {@link org.apache.mahout.cf.taste.impl.common.FullRunningAverage} but uses smaller values (short, float)
+ * to conserve memory. This is used in {@link org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender}
+ * since it may allocate tens of millions of such objects.</p>
+ */
+public class CompactRunningAverage implements RunningAverage, Serializable {
+
+ private char count;
+ private float average;
+
+ public CompactRunningAverage() {
+ count = (char) 0;
+ average = Float.NaN;
+ }
+
+ public void addDatum(double datum) {
+ if ((int) count < 65535) { // = 65535 = 2^16 - 1
+ if ((int) ++count == 1) {
+ average = (float) datum;
+ } else {
+ average = (float)
+ ((double) average * ((double) ((int) count - 1) / (double) count) + datum / (double) count);
+ }
+ }
+ }
+
+ public void removeDatum(double datum) {
+ if ((int) count == 0) {
+ throw new IllegalStateException();
+ }
+ if ((int) --count == 0) {
+ average = Float.NaN;
+ } else {
+ average = (float)
+ ((double) average * ((double) ((int) count + 1) / (double) count) - datum / (double) count);
+ }
+ }
+
+ public void changeDatum(double delta) {
+ if ((int) count == 0) {
+ throw new IllegalStateException();
+ }
+ average += (float) (delta / (double) count);
+ }
+
+ public int getCount() {
+ return (int) count;
+ }
+
+ public double getAverage() {
+ return (double) average;
+ }
+
+ @Override
+ public String toString() {
+ return String.valueOf(average);
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/CompactRunningAverageAndStdDev.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/CompactRunningAverageAndStdDev.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/CompactRunningAverageAndStdDev.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/CompactRunningAverageAndStdDev.java Fri May 9 14:35:12 2008
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+/**
+ * <p>Extends {@link CompactRunningAverage} to add a running standard deviation computation.</p>
+ */
+public final class CompactRunningAverageAndStdDev extends CompactRunningAverage implements RunningAverageAndStdDev {
+
+ private float stdDev;
+ private float sumX2;
+
+ public CompactRunningAverageAndStdDev() {
+ stdDev = Float.NaN;
+ }
+
+ public double getStandardDeviation() {
+ return (double) stdDev;
+ }
+
+ @Override
+ public void addDatum(double datum) {
+ super.addDatum(datum);
+ sumX2 += (float) (datum * datum);
+ recomputeStdDev();
+ }
+
+ @Override
+ public void removeDatum(double datum) {
+ super.removeDatum(datum);
+ sumX2 -= (float) (datum * datum);
+ recomputeStdDev();
+ }
+
+ /**
+ * @throws UnsupportedOperationException
+ */
+ @Override
+ public void changeDatum(double delta) {
+ throw new UnsupportedOperationException();
+ }
+
+ private void recomputeStdDev() {
+ int count = getCount();
+ if (count > 1) {
+ double average = getAverage();
+ stdDev = (float) Math.sqrt(((double) sumX2 - average * average * (double) count) / (double) (count - 1));
+ } else {
+ stdDev = Float.NaN;
+ }
+ }
+
+ @Override
+ public String toString() {
+ return String.valueOf(String.valueOf(getAverage()) + ',' + stdDev);
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/EmptyIterable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/EmptyIterable.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/EmptyIterable.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/EmptyIterable.java Fri May 9 14:35:12 2008
@@ -0,0 +1,43 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.Iterator;
+
+/**
+ * <p>An {@link Iterable} over no elements: always produces an {@link Iterator} which iterates
+ * over nothing.</p>
+ */
+public final class EmptyIterable<T> implements Iterable<T> {
+
+ private final Iterator<T> iterator;
+
+ public EmptyIterable() {
+ iterator = new EmptyIterator<T>();
+ }
+
+ public Iterator<T> iterator() {
+ return iterator;
+ }
+
+ @Override
+ public String toString() {
+ return "EmptyIterable[iterator:" + iterator + ']';
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/EmptyIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/EmptyIterator.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/EmptyIterator.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/EmptyIterator.java Fri May 9 14:35:12 2008
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+/**
+ * <p>An empty {@link Iterator}, which iterates over nothing.</p>
+ */
+final class EmptyIterator<T> implements Iterator<T> {
+
+ /**
+ * @return false
+ */
+ public boolean hasNext() {
+ return false;
+ }
+
+ /**
+ * @return never returns anything
+ * @throws NoSuchElementException
+ */
+ public T next() {
+ throw new NoSuchElementException();
+ }
+
+ /**
+ * @throws UnsupportedOperationException
+ */
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public String toString() {
+ return "EmptyIterator";
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FastMap.java Fri May 9 14:35:12 2008
@@ -0,0 +1,666 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.AbstractCollection;
+import java.util.AbstractSet;
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Set;
+
+/**
+ * <p>This is an optimized {@link Map} implementation, based on algorithms described in Knuth's
+ * "Art of Computer Programming", Vol. 3, p. 529.</p>
+ *
+ * <p>It should be faster than {@link java.util.HashMap} in some cases, but not all. Its main feature is
+ * a "max size" and the ability to transparently, efficiently and semi-intelligently evict old entries
+ * when max size is exceeded.</p>
+ *
+ * <p>This class is not a bit thread-safe.</p>
+ *
+ * <p>This implementation does not allow <code>null</code> as a key or value.</p>
+ */
+public final class FastMap<K, V> implements Map<K, V> {
+
+ /**
+ * The largest prime less than 2<sup>31</sup>-1 that is the smaller of a twin prime pair.
+ */
+ private static final int MAX_INT_SMALLER_TWIN_PRIME = 2147482949;
+
+ public static final int NO_MAX_SIZE = Integer.MAX_VALUE;
+
+ /**
+ * Dummy object used to represent a key that has been removed. Package-private to allow direct access
+ * by inner classes. No harm in exposing it.
+ */
+ private static final Object REMOVED = new Object();
+
+ private K[] keys;
+ private V[] values;
+ private int numEntries;
+ private int numSlotsUsed;
+ private Set<Entry<K, V>> entrySet;
+ private Set<K> keySet;
+ private Collection<V> valueCollection;
+ private int maxSize;
+ private final BitSet recentlyAccessed;
+
+ /**
+ * Creates a new {@link FastMap} with default capacity.
+ */
+ public FastMap() {
+ this(11, NO_MAX_SIZE);
+ }
+
+ public FastMap(int size) {
+ this(size, NO_MAX_SIZE);
+ }
+
+ /**
+ * Creates a new {@link FastMap} whose capacity can accommodate the given number of entries without rehash.</p>
+ *
+ * @param size desired capacity
+ * @param maxSize max capacity
+ * @throws IllegalArgumentException if size is less than 1 or at least half of {@link #MAX_INT_SMALLER_TWIN_PRIME}
+ */
+ public FastMap(int size, int maxSize) throws IllegalArgumentException {
+ if (size < 1) {
+ throw new IllegalArgumentException("size must be at least 1");
+ }
+ if (size >= MAX_INT_SMALLER_TWIN_PRIME >> 1) {
+ throw new IllegalArgumentException("size must be less than " + (MAX_INT_SMALLER_TWIN_PRIME >> 1));
+ }
+ if (maxSize < 1) {
+ throw new IllegalArgumentException("maxSize must be at least 1");
+ }
+ int hashSize = nextTwinPrime(2 * size);
+ keys = (K[]) new Object[hashSize];
+ values = (V[]) new Object[hashSize];
+ this.maxSize = maxSize;
+ this.recentlyAccessed = maxSize == Integer.MAX_VALUE ? null : new BitSet(maxSize);
+ }
+
+ /**
+ * This is for the benefit of inner classes. Without it the compiler would just generate a similar synthetic
+ * accessor. Might as well make it explicit.
+ */
+ K[] getKeys() {
+ return keys;
+ }
+
+ /**
+ * This is for the benefit of inner classes. Without it the compiler would just generate a similar synthetic
+ * accessor. Might as well make it explicit.
+ */
+ V[] getValues() {
+ return values;
+ }
+
+ private int find(Object key) {
+ int theHashCode = key.hashCode() & 0x7FFFFFFF; // make sure it's positive
+ K[] keys = this.keys;
+ int hashSize = keys.length;
+ int jump = 1 + theHashCode % (hashSize - 2);
+ int index = theHashCode % hashSize;
+ K currentKey = keys[index];
+ while (currentKey != null && (currentKey == REMOVED || !key.equals(currentKey))) {
+ if (index < jump) {
+ index += hashSize - jump;
+ } else {
+ index -= jump;
+ }
+ currentKey = keys[index];
+ }
+ return index;
+ }
+
+ public V get(Object key) {
+ if (key == null) {
+ return null;
+ }
+ int index = find(key);
+ if (recentlyAccessed != null) {
+ recentlyAccessed.set(index);
+ }
+ return values[index];
+ }
+
+ public int size() {
+ return numEntries;
+ }
+
+ public boolean isEmpty() {
+ return numEntries == 0;
+ }
+
+ public boolean containsKey(Object key) {
+ return key != null && keys[find(key)] != null;
+ }
+
+ public boolean containsValue(Object value) {
+ if (value == null) {
+ return false;
+ }
+ for (V theValue : values) {
+ if (theValue != null && value.equals(theValue)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * @throws NullPointerException if key or value is null
+ */
+ public V put(K key, V value) {
+ if (key == null || value == null) {
+ throw new NullPointerException();
+ }
+ int hashSize = keys.length;
+ if (numSlotsUsed >= hashSize >> 1) {
+ growAndRehash();
+ }
+ // Here we may later consider implementing Brent's variation described on page 532
+ int index = find(key);
+ if (keys[index] == null) {
+ // If size is limited,
+ if (recentlyAccessed != null && numEntries >= maxSize) {
+ // and we're too large, clear some old-ish entry
+ clearStaleEntry(index);
+ }
+ keys[index] = key;
+ values[index] = value;
+ numEntries++;
+ numSlotsUsed++;
+ return null;
+ } else {
+ V oldValue = values[index];
+ values[index] = value;
+ return oldValue;
+ }
+ }
+
+ private void clearStaleEntry(int index) {
+ while (true) {
+ int hashSize = keys.length;
+ K currentKey;
+ do {
+ if (index == 0) {
+ index = hashSize - 1;
+ } else {
+ index--;
+ }
+ currentKey = keys[index];
+ } while (currentKey == null || currentKey == REMOVED);
+ if (recentlyAccessed.get(index)) {
+ recentlyAccessed.clear(index);
+ } else {
+ break;
+ }
+ }
+ // Delete the entry
+ ((Object[]) keys)[index] = REMOVED;
+ numEntries--;
+ values[index] = null;
+ }
+
+ public void putAll(Map<? extends K, ? extends V> map) {
+ if (map == null) {
+ throw new NullPointerException();
+ }
+ for (Entry<? extends K, ? extends V> entry : map.entrySet()) {
+ put(entry.getKey(), entry.getValue());
+ }
+ }
+
+ public V remove(Object key) {
+ if (key == null) {
+ return null;
+ }
+ int index = find(key);
+ if (keys[index] == null) {
+ return null;
+ } else {
+ ((Object[]) keys)[index] = REMOVED;
+ numEntries--;
+ V oldValue = values[index];
+ values[index] = null;
+ // don't decrement numSlotsUsed
+ return oldValue;
+ }
+ // Could un-set recentlyAccessed's bit but doesn't matter
+ }
+
+ public void clear() {
+ numEntries = 0;
+ numSlotsUsed = 0;
+ Arrays.fill(keys, null);
+ Arrays.fill(values, null);
+ if (recentlyAccessed != null) {
+ recentlyAccessed.clear();
+ }
+ }
+
+ public Set<K> keySet() {
+ if (keySet == null) {
+ keySet = new KeySet();
+ }
+ return keySet;
+ }
+
+ public Collection<V> values() {
+ if (valueCollection == null) {
+ valueCollection = new ValueCollection();
+ }
+ return valueCollection;
+ }
+
+ public Set<Entry<K, V>> entrySet() {
+ if (entrySet == null) {
+ entrySet = new EntrySet();
+ }
+ return entrySet;
+ }
+
+ public void rehash() {
+ rehash(keys.length);
+ }
+
+ private void growAndRehash() {
+ int hashSize = keys.length;
+ if (hashSize >= MAX_INT_SMALLER_TWIN_PRIME >> 1) {
+ throw new IllegalStateException("Can't grow any more");
+ }
+ rehash(nextTwinPrime(2 * hashSize));
+ }
+
+ private void rehash(int newHashSize) {
+ K[] oldKeys = keys;
+ V[] oldValues = values;
+ numEntries = 0;
+ numSlotsUsed = 0;
+ if (recentlyAccessed != null) {
+ recentlyAccessed.clear();
+ }
+ keys = (K[]) new Object[newHashSize];
+ values = (V[]) new Object[newHashSize];
+ int length = oldKeys.length;
+ for (int i = 0; i < length; i++) {
+ K key = oldKeys[i];
+ if (key != null && key != REMOVED) {
+ put(key, oldValues[i]);
+ }
+ }
+ }
+
+ // Simple methods for finding a next larger prime
+
+
+ /**
+ * <p>Finds next-largest "twin primes": numbers p and p+2 such that both are prime. Finds the smallest such p such
+ * that the smaller twin, p, is greater than or equal to n. Returns p+2, the larger of the two twins.</p>
+ */
+ private static int nextTwinPrime(int n) {
+ if (n > MAX_INT_SMALLER_TWIN_PRIME) {
+ throw new IllegalArgumentException();
+ }
+ int next = nextPrime(n);
+ while (isNotPrime(next + 2)) {
+ next = nextPrime(next + 4);
+ }
+ return next + 2;
+ }
+
+ /**
+ * <p>Finds smallest prime p such that p is greater than or equal to n.</p>
+ */
+ private static int nextPrime(int n) {
+ // Make sure the number is odd. Is this too clever?
+ n |= 0x1;
+ // There is no problem with overflow since Integer.MAX_INT is prime, as it happens
+ while (isNotPrime(n)) {
+ n += 2;
+ }
+ return n;
+ }
+
+ /**
+ * @param n
+ * @return <code>true</code> iff n is not a prime
+ */
+ private static boolean isNotPrime(int n) {
+ if (n < 2) {
+ throw new IllegalArgumentException();
+ }
+ if ((n & 0x1) == 0) { // even
+ return true;
+ }
+ int max = 1 + (int) Math.sqrt((double) n);
+ for (int d = 3; d <= max; d += 2) {
+ if (n % d == 0) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private void iteratorRemove(int lastNext) {
+ if (lastNext >= values.length) {
+ throw new NoSuchElementException();
+ }
+ if (lastNext < 0) {
+ throw new IllegalStateException();
+ }
+ values[lastNext] = null;
+ ((Object[]) keys)[lastNext] = REMOVED;
+ numEntries--;
+ }
+
+ private final class EntrySet extends AbstractSet<Entry<K, V>> {
+
+ @Override
+ public int size() {
+ return FastMap.this.size();
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return FastMap.this.isEmpty();
+ }
+
+ @Override
+ public boolean contains(Object o) {
+ return FastMap.this.containsKey(o);
+ }
+
+ @Override
+ public Iterator<Entry<K, V>> iterator() {
+ return new EntryIterator();
+ }
+
+ @Override
+ public boolean add(Entry<K, V> t) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean remove(Object o) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean addAll(Collection<? extends Entry<K, V>> ts) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean retainAll(Collection<?> objects) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean removeAll(Collection<?> objects) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void clear() {
+ FastMap.this.clear();
+ }
+
+ final class MapEntry implements Entry<K, V> {
+
+ private final int index;
+
+ private MapEntry(int index) {
+ this.index = index;
+ }
+
+ public K getKey() {
+ return getKeys()[index];
+ }
+
+ public V getValue() {
+ return getValues()[index];
+ }
+
+ public V setValue(V value) {
+ if (value == null) {
+ throw new IllegalArgumentException();
+ }
+ V[] values = getValues();
+ V oldValue = values[index];
+ getValues()[index] = value;
+ return oldValue;
+ }
+ }
+
+ final class EntryIterator implements Iterator<Entry<K, V>> {
+
+ private int position;
+ private int lastNext = -1;
+
+ public boolean hasNext() {
+ goToNext();
+ return position < getKeys().length;
+ }
+
+ public Entry<K, V> next() {
+ goToNext();
+ lastNext = position;
+ K[] keys = getKeys();
+ if (position >= keys.length) {
+ throw new NoSuchElementException();
+ }
+ return new MapEntry(position++);
+ }
+
+ private void goToNext() {
+ V[] values = getValues();
+ int length = values.length;
+ while (position < length && values[position] == null) {
+ position++;
+ }
+ }
+
+ public void remove() {
+ iteratorRemove(lastNext);
+ }
+ }
+
+ }
+
+ private final class KeySet extends AbstractSet<K> {
+
+ @Override
+ public int size() {
+ return FastMap.this.size();
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return FastMap.this.isEmpty();
+ }
+
+ @Override
+ public boolean contains(Object o) {
+ return FastMap.this.containsKey(o);
+ }
+
+ @Override
+ public Iterator<K> iterator() {
+ return new KeyIterator();
+ }
+
+ @Override
+ public boolean add(K t) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean remove(Object o) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean addAll(Collection<? extends K> ts) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean retainAll(Collection<?> objects) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean removeAll(Collection<?> objects) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void clear() {
+ FastMap.this.clear();
+ }
+
+ final class KeyIterator implements Iterator<K> {
+
+ private int position;
+ private int lastNext = -1;
+
+ public boolean hasNext() {
+ goToNext();
+ return position < getKeys().length;
+ }
+
+ public K next() {
+ goToNext();
+ lastNext = position;
+ K[] keys = getKeys();
+ if (position >= keys.length) {
+ throw new NoSuchElementException();
+ }
+ return keys[position++];
+ }
+
+ private void goToNext() {
+ V[] values = getValues();
+ int length = values.length;
+ while (position < length && values[position] == null) {
+ position++;
+ }
+ }
+
+ public void remove() {
+ iteratorRemove(lastNext);
+ }
+ }
+
+ }
+
+ private final class ValueCollection extends AbstractCollection<V> {
+
+ @Override
+ public int size() {
+ return FastMap.this.size();
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return FastMap.this.isEmpty();
+ }
+
+ @Override
+ public boolean contains(Object o) {
+ return FastMap.this.containsValue(o);
+ }
+
+ @Override
+ public Iterator<V> iterator() {
+ return new ValueIterator();
+ }
+
+ @Override
+ public boolean add(V v) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean remove(Object o) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean addAll(Collection<? extends V> vs) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean removeAll(Collection<?> objects) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public boolean retainAll(Collection<?> objects) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void clear() {
+ FastMap.this.clear();
+ }
+
+ final class ValueIterator implements Iterator<V> {
+
+ private int position;
+ private int lastNext = -1;
+
+ public boolean hasNext() {
+ goToNext();
+ return position < getValues().length;
+ }
+
+ public V next() {
+ goToNext();
+ lastNext = position;
+ V[] values = getValues();
+ if (position >= values.length) {
+ throw new NoSuchElementException();
+ }
+ return values[position++];
+ }
+
+ private void goToNext() {
+ V[] values = getValues();
+ int length = values.length;
+ while (position < length && values[position] == null) {
+ position++;
+ }
+ }
+
+ public void remove() {
+ iteratorRemove(lastNext);
+ }
+
+ }
+
+ }
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java Fri May 9 14:35:12 2008
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.io.Serializable;
+
+/**
+ * <p>A simple class that can keep track of a running avearage of a series of numbers.
+ * One can add to or remove from the series, as well as update a datum in the series.
+ * The class does not actually keep track of the series of values, just its running average,
+ * so it doesn't even matter if you remove/change a value that wasn't added.</p>
+ */
+public class FullRunningAverage implements RunningAverage, Serializable {
+
+ private int count;
+ private double average;
+
+ public FullRunningAverage() {
+ count = 0;
+ average = Double.NaN;
+ }
+
+ /**
+ * @param datum new item to add to the running average
+ */
+ public void addDatum(double datum) {
+ if (++count == 1) {
+ average = datum;
+ } else {
+ average = average * ((double) (count - 1) / (double) count) + datum / (double) count;
+ }
+ }
+
+ /**
+ * @param datum item to remove to the running average
+ * @throws IllegalStateException if count is 0
+ */
+ public void removeDatum(double datum) {
+ if (count == 0) {
+ throw new IllegalStateException();
+ }
+ if (--count == 0) {
+ average = Double.NaN;
+ } else {
+ average = average * ((double) (count + 1) / (double) count) - datum / (double) count;
+ }
+ }
+
+ /**
+ * @param delta amount by which to change a datum in the running average
+ * @throws IllegalStateException if count is 0
+ */
+ public void changeDatum(double delta) {
+ if (count == 0) {
+ throw new IllegalStateException();
+ }
+ average += delta / (double) count;
+ }
+
+ public int getCount() {
+ return count;
+ }
+
+ public double getAverage() {
+ return average;
+ }
+
+ @Override
+ public String toString() {
+ return String.valueOf(average);
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java Fri May 9 14:35:12 2008
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+/**
+ * <p>Extends {@link FullRunningAverage} to add a running standard deviation computation.</p>
+ */
+public final class FullRunningAverageAndStdDev extends FullRunningAverage implements RunningAverageAndStdDev {
+
+ private double stdDev;
+ private double sumX2;
+
+ public FullRunningAverageAndStdDev() {
+ stdDev = Double.NaN;
+ }
+
+ public double getStandardDeviation() {
+ return stdDev;
+ }
+
+ @Override
+ public void addDatum(double datum) {
+ super.addDatum(datum);
+ sumX2 += datum * datum;
+ recomputeStdDev();
+ }
+
+ @Override
+ public void removeDatum(double datum) {
+ super.removeDatum(datum);
+ sumX2 -= datum * datum;
+ recomputeStdDev();
+ }
+
+ /**
+ * @throws UnsupportedOperationException
+ */
+ @Override
+ public void changeDatum(double delta) {
+ throw new UnsupportedOperationException();
+ }
+
+ private void recomputeStdDev() {
+ int count = getCount();
+ if (count > 1) {
+ double average = getAverage();
+ stdDev = Math.sqrt((sumX2 - average * average * (double) count) / (double) (count - 1));
+ } else {
+ stdDev = Double.NaN;
+ }
+ }
+
+ @Override
+ public String toString() {
+ return String.valueOf(String.valueOf(getAverage()) + ',' + stdDev);
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/IOUtils.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/IOUtils.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/IOUtils.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/IOUtils.java Fri May 9 14:35:12 2008
@@ -0,0 +1,96 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.sql.Connection;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * <p>I/O-related utility methods that don't have a better home.</p>
+ */
+public final class IOUtils {
+
+ private static final Logger log = Logger.getLogger(IOUtils.class.getName());
+
+ private IOUtils() {
+ }
+
+ public static void quietClose(Closeable closeable) {
+ if (closeable != null) {
+ try {
+ closeable.close();
+ } catch (IOException ioe) {
+ log.log(Level.WARNING, "Unexpected exception while closing " + closeable + "; continuing", ioe);
+ }
+ }
+ }
+
+ // Sheez, why can't ResultSet, Statement and Connection implement Closeable?
+
+ public static void quietClose(ResultSet closeable) {
+ if (closeable != null) {
+ try {
+ closeable.close();
+ } catch (SQLException sqle) {
+ log.log(Level.WARNING, "Unexpected exception while closing " + closeable + "; continuing", sqle);
+ }
+ }
+ }
+
+ public static void quietClose(Statement closeable) {
+ if (closeable != null) {
+ try {
+ closeable.close();
+ } catch (SQLException sqle) {
+ log.log(Level.WARNING, "Unexpected exception while closing " + closeable + "; continuing", sqle);
+ }
+ }
+ }
+
+ public static void quietClose(Connection closeable) {
+ if (closeable != null) {
+ try {
+ closeable.close();
+ } catch (SQLException sqle) {
+ log.log(Level.WARNING, "Unexpected exception while closing " + closeable + "; continuing", sqle);
+ }
+ }
+ }
+
+ /**
+ * Closes a {@link ResultSet}, {@link Statement} and {@link Connection} (if not null) and
+ * logs (but does not rethrow) any resulting {@link SQLException}. This is useful for cleaning
+ * up after a database query.
+ *
+ * @param resultSet {@link ResultSet} to close
+ * @param statement {@link Statement} to close
+ * @param connection {@link Connection} to close
+ */
+ public static void safeClose(ResultSet resultSet, Statement statement, Connection connection) {
+ quietClose(resultSet);
+ quietClose(statement);
+ quietClose(connection);
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/IteratorIterable.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/IteratorIterable.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/IteratorIterable.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/IteratorIterable.java Fri May 9 14:35:12 2008
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.Iterator;
+
+/**
+ * <p>Simple utility class that makes an {@link Iterator} {@link Iterable}
+ * by returning the {@link Iterator} itself.</p>
+ */
+public final class IteratorIterable<T> implements Iterable<T> {
+
+ private final Iterator<T> iterator;
+
+ /**
+ * <p>Constructs an {@link IteratorIterable} for an {@link Iterator}.</p>
+ *
+ * @param iterator {@link Iterator} on which to base this {@link IteratorIterable}
+ */
+ public IteratorIterable(Iterator<T> iterator) {
+ if (iterator == null) {
+ throw new IllegalArgumentException("iterator is null");
+ }
+ this.iterator = iterator;
+ }
+
+ public Iterator<T> iterator() {
+ return iterator;
+ }
+
+ @Override
+ public String toString() {
+ return "IteratorIterable[iterator:" + iterator + ']';
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/IteratorUtils.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/IteratorUtils.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/IteratorUtils.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/IteratorUtils.java Fri May 9 14:35:12 2008
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
+/**
+ * <p>{@link java.util.Iterator}-related methods without a better home.</p>
+ */
+public final class IteratorUtils {
+
+ private IteratorUtils() {
+ }
+
+ /**
+ * @param iterable {@link Iterable} whose contents are to be put into a {@link List}
+ * @return a {@link List} with the objects one gets by iterating over the given {@link Iterable}
+ */
+ public static <K> List<K> iterableToList(Iterable<K> iterable) {
+ return iterableToList(iterable, null);
+ }
+
+ /**
+ * @param iterable {@link Iterable} whose contents are to be put into a {@link List}
+ * @param comparator {@link Comparator} defining the sort order of the returned {@link List}
+ * @return a {@link List} with the objects one gets by iterating over the given {@link Iterable},
+ * sorted according to the given {@link Comparator}
+ */
+ public static <K> List<K> iterableToList(Iterable<K> iterable, Comparator<K> comparator) {
+ if (iterable == null) {
+ throw new IllegalArgumentException("iterable is null");
+ }
+ List<K> list;
+ if (iterable instanceof Collection) {
+ if (iterable instanceof List) {
+ list = (List<K>) iterable;
+ } else {
+ Collection<K> collection = (Collection<K>) iterable;
+ list = new ArrayList<K>(collection.size());
+ list.addAll(collection);
+ }
+ } else {
+ list = new ArrayList<K>();
+ for (K item : iterable) {
+ list.add(item);
+ }
+ }
+ if (comparator != null) {
+ Collections.sort(list, comparator);
+ }
+ return list;
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/Pair.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/Pair.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/Pair.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/Pair.java Fri May 9 14:35:12 2008
@@ -0,0 +1,72 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+/**
+ * A simple (ordered) pair of two objects. Elements may be null.
+ */
+public class Pair<A, B> {
+
+ private final A first;
+ private final B second;
+
+ public Pair(A first, B second) {
+ this.first = first;
+ this.second = second;
+ }
+
+ public A getFirst() {
+ return first;
+ }
+
+ public B getSecond() {
+ return second;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (!(obj instanceof Pair)) {
+ return false;
+ }
+ Pair<?, ?> otherPair = (Pair<?, ?>) obj;
+ return isEqualOrNulls(first, otherPair.first) &&
+ isEqualOrNulls(second, otherPair.second);
+ }
+
+ static boolean isEqualOrNulls(Object obj1, Object obj2) {
+ return obj1 == null ? obj2 == null : obj1.equals(obj2);
+ }
+
+ @Override
+ public int hashCode() {
+ int firstHash = hashCodeNull(first);
+ // Flip top and bottom 16 bits; this makes the hash function probably different
+ // for (a,b) versus (b,a)
+ return (firstHash >>> 16 | firstHash << 16) ^ hashCodeNull(second);
+ }
+
+ static int hashCodeNull(Object obj) {
+ return obj == null ? 0 : obj.hashCode();
+ }
+
+ @Override
+ public String toString() {
+ return '(' + String.valueOf(first) + ',' + String.valueOf(second) + ')';
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RandomUtils.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RandomUtils.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RandomUtils.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RandomUtils.java Fri May 9 14:35:12 2008
@@ -0,0 +1,42 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.Random;
+
+/**
+ * <p>The source of random stuff for the whole project. This lets us make all randomness in
+ * the project predictable, if desired, for when we run unit tests, which should be repeatable.</p>
+ */
+public final class RandomUtils {
+
+ private static final long STANDARD_SEED = 0xCAFEBABEL;
+ private static boolean testSeed;
+
+ private RandomUtils() {
+ }
+
+ public static void useTestSeed() {
+ testSeed = true;
+ }
+
+ public static Random getRandom() {
+ return testSeed ? new Random(STANDARD_SEED) : new Random();
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverage.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverage.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverage.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverage.java Fri May 9 14:35:12 2008
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+/**
+ * <p>Interface for classes that can keep track of a running average of a series of numbers.
+ * One can add to or remove from the series, as well as update a datum in the series.
+ * The class does not actually keep track of the series of values, just its running average,
+ * so it doesn't even matter if you remove/change a value that wasn't added.</p>
+ */
+public interface RunningAverage {
+
+ /**
+ * @param datum new item to add to the running average
+ * @throws IllegalArgumentException if datum is {@link Double#NaN}
+ */
+ void addDatum(double datum);
+
+ /**
+ * @param datum item to remove to the running average
+ * @throws IllegalArgumentException if datum is {@link Double#NaN}
+ * @throws IllegalStateException if count is 0
+ */
+ void removeDatum(double datum);
+
+ /**
+ * @param delta amount by which to change a datum in the running average
+ * @throws IllegalArgumentException if delta is {@link Double#NaN}
+ * @throws IllegalStateException if count is 0
+ */
+ void changeDatum(double delta);
+
+ int getCount();
+
+ double getAverage();
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverageAndStdDev.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverageAndStdDev.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverageAndStdDev.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverageAndStdDev.java Fri May 9 14:35:12 2008
@@ -0,0 +1,30 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+/**
+ * <p>Extends {@link RunningAverage} by adding standard deviation too.</p>
+ */
+public interface RunningAverageAndStdDev extends RunningAverage {
+
+ /**
+ * @return standard deviation of data
+ */
+ double getStandardDeviation();
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SoftCache.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SoftCache.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SoftCache.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SoftCache.java Fri May 9 14:35:12 2008
@@ -0,0 +1,130 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+
+/**
+ * <p>An efficient Map-like class which caches values for keys. Values are not "put" into a {@link SoftCache};
+ * instead the caller supplies the instance with an implementation of {@link Retriever} which can load the
+ * value for a given key.</p>
+ *
+ * <p>This class is a bit misnamed at this point since it no longer uses <code>SoftReference</code> internally,
+ * but hey.</p>
+ *
+ * <p>The cache does not support <code>null</code> values or keys.</p>
+ *
+ * <p>Thanks to Amila Jayasooriya for helping evaluate performance of the rewrite of this class, as part of a
+ * Google Summer of Code 2007 project.</p>
+ */
+public final class SoftCache<K, V> {
+
+ private final FastMap<K, V> cache;
+ private final Retriever<? super K, ? extends V> retriever;
+
+ /**
+ * <p>Creates a new cache based on the given {@link Retriever}.</p>
+ *
+ * @param retriever object which can retrieve values for keys
+ */
+ public SoftCache(Retriever<? super K, ? extends V> retriever) {
+ this(retriever, FastMap.NO_MAX_SIZE);
+ }
+
+ /**
+ * <p>Creates a new cache based on the given {@link Retriever} and with given maximum size.</p>
+ *
+ * @param retriever object which can retrieve values for keys
+ * @param maxEntries maximum number of entries the cache will store before evicting some
+ */
+ public SoftCache(Retriever<? super K, ? extends V> retriever, int maxEntries) {
+ if (retriever == null) {
+ throw new IllegalArgumentException("retriever is null");
+ }
+ if (maxEntries < 1) {
+ throw new IllegalArgumentException("maxEntries must be at least 1");
+ }
+ cache = new FastMap<K, V>(11, maxEntries);
+ this.retriever = retriever;
+ }
+
+ /**
+ * <p>Returns cached value for a key. If it does not exist, it is loaded using a {@link Retriever}.</p>
+ *
+ * @param key cache key
+ * @return value for that key
+ * @throws TasteException if an exception occurs while retrieving a new cached value
+ */
+ public V get(K key) throws TasteException {
+ V value;
+ synchronized (cache) {
+ value = cache.get(key);
+ }
+ if (value == null) {
+ return getAndCacheValue(key);
+ }
+ return value;
+ }
+
+ /**
+ * <p>Uncaches any existing value for a given key.</p>
+ *
+ * @param key cache key
+ */
+ public void remove(K key) {
+ synchronized (cache) {
+ cache.remove(key);
+ }
+ }
+
+ /**
+ * <p>Clears the cache.</p>
+ */
+ public void clear() {
+ synchronized (cache) {
+ cache.clear();
+ }
+ }
+
+ private V getAndCacheValue(K key) throws TasteException {
+ V value = retriever.getValue(key);
+ synchronized (cache) {
+ cache.put(key, value);
+ }
+ return value;
+ }
+
+ @Override
+ public String toString() {
+ return "SoftCache[retriever:" + retriever + ']';
+ }
+
+ /**
+ * <p>Implementations can retrieve a value for a given key.</p>
+ */
+ public static interface Retriever<KK, VV> {
+
+ /**
+ * @param key key for which a value should be retrieved
+ * @return value for key
+ * @throws TasteException if an error occurs while retrieving the value
+ */
+ VV getValue(KK key) throws TasteException;
+ }
+
+}
Propchange: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SoftCache.java
------------------------------------------------------------------------------
svn:executable = *
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverage.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverage.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverage.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverage.java Fri May 9 14:35:12 2008
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.io.Serializable;
+
+public final class WeightedRunningAverage implements RunningAverage, Serializable {
+
+ private double totalWeight;
+ private double average;
+
+ public WeightedRunningAverage() {
+ totalWeight = 0.0;
+ average = Double.NaN;
+ }
+
+ public void addDatum(double datum) {
+ addDatum(datum, 1.0);
+ }
+
+ public void addDatum(double datum, double weight) {
+ double oldTotalWeight = totalWeight;
+ totalWeight += weight;
+ if (oldTotalWeight <= 0.0) {
+ average = datum * weight;
+ } else {
+ average = average * (oldTotalWeight / totalWeight) + datum / totalWeight;
+ }
+ }
+
+ public void removeDatum(double datum) {
+ removeDatum(datum, 1.0);
+ }
+
+ public void removeDatum(double datum, double weight) {
+ double oldTotalWeight = totalWeight;
+ totalWeight -= weight;
+ if (totalWeight <= 0.0) {
+ average = Double.NaN;
+ totalWeight = 0.0;
+ } else {
+ average = average * (oldTotalWeight / totalWeight) - datum / totalWeight;
+ }
+ }
+
+ public void changeDatum(double delta) {
+ changeDatum(delta, 1.0);
+ }
+
+ public void changeDatum(double delta, double weight) {
+ if (weight > totalWeight) {
+ throw new IllegalArgumentException();
+ }
+ average += (delta * weight) / totalWeight;
+ }
+
+ public double getTotalWeight() {
+ return totalWeight;
+ }
+
+ /**
+ * @return {@link #getTotalWeight()}
+ */
+ public int getCount() {
+ return (int) totalWeight;
+ }
+
+ public double getAverage() {
+ return average;
+ }
+
+ @Override
+ public String toString() {
+ return String.valueOf(average);
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/AveragingPreferenceInferrer.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/AveragingPreferenceInferrer.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/AveragingPreferenceInferrer.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/correlation/AveragingPreferenceInferrer.java Fri May 9 14:35:12 2008
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.correlation;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.correlation.PreferenceInferrer;
+import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
+import org.apache.mahout.cf.taste.impl.common.RunningAverage;
+import org.apache.mahout.cf.taste.impl.common.SoftCache;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.User;
+
+/**
+ * <p>Implementations of this interface compute an inferred preference for a {@link User} and an {@link Item}
+ * that the user has not expressed any preference for. This might be an average of other preferences scores
+ * from that user, for example. This technique is sometimes called "default voting".</p>
+ */
+public final class AveragingPreferenceInferrer implements PreferenceInferrer {
+
+ private static final SoftCache.Retriever<User, Double> RETRIEVER = new PrefRetriever();
+
+ private final SoftCache<User, Double> averagePreferenceValue;
+
+ public AveragingPreferenceInferrer(DataModel dataModel) throws TasteException {
+ averagePreferenceValue = new SoftCache<User, Double>(RETRIEVER, dataModel.getNumUsers());
+ refresh();
+ }
+
+ public double inferPreference(User user, Item item) throws TasteException {
+ if (user == null || item == null) {
+ throw new IllegalArgumentException("user or item is null");
+ }
+ return averagePreferenceValue.get(user);
+ }
+
+ public void refresh() {
+ averagePreferenceValue.clear();
+ }
+
+ private static final class PrefRetriever implements SoftCache.Retriever<User, Double> {
+
+ public Double getValue(User key) {
+ RunningAverage average = new FullRunningAverage();
+ Preference[] prefs = key.getPreferencesAsArray();
+ if (prefs.length == 0) {
+ return 0.0;
+ }
+ for (int i = 0; i < prefs.length; i++) {
+ average.addDatum(prefs[i].getValue());
+ }
+ return average.getAverage();
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "AveragingPreferenceInferrer";
+ }
+
+}