You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2008/05/09 23:35:17 UTC
svn commit: r654943 [4/9] - in /lucene/mahout/trunk/core: ./ lib/
src/main/examples/org/ src/main/examples/org/apache/
src/main/examples/org/apache/mahout/ src/main/examples/org/apache/mahout/cf/
src/main/examples/org/apache/mahout/cf/taste/ src/main/e...
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java Fri May 9 14:35:12 2008
@@ -0,0 +1,299 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model.file;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.IOUtils;
+import org.apache.mahout.cf.taste.impl.model.GenericDataModel;
+import org.apache.mahout.cf.taste.impl.model.GenericItem;
+import org.apache.mahout.cf.taste.impl.model.GenericPreference;
+import org.apache.mahout.cf.taste.impl.model.GenericUser;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.User;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Timer;
+import java.util.TimerTask;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * <p>A {@link DataModel} backed by a comma-delimited file. This class assumes that each line of the
+ * file contains a user ID, followed by item ID, followed by preferences value, separated by commas.
+ * The preference value is assumed to be parseable as a <code>double</code>. The user and item IDs
+ * are ready literally as Strings and treated as such in the API. Note that this means that whitespace
+ * matters in the data file; they will be treated as part of the ID values.</p>
+ *
+ * <p>This class is not intended for use with very large amounts of data (over, say, a million rows). For
+ * that, a JDBC-backed {@link DataModel} and a database are more appropriate.
+ * The file will be periodically reloaded if a change is detected.</p>
+ */
+public class FileDataModel implements DataModel {
+
+ private static final Logger log = Logger.getLogger(FileDataModel.class.getName());
+
+ private static final Timer timer = new Timer(true);
+ private static final long RELOAD_CHECK_INTERVAL_MS = 60L * 1000L;
+
+ private final File dataFile;
+ private long lastModified;
+ private boolean loaded;
+ private DataModel delegate;
+ private final ReentrantLock refreshLock;
+ private final ReentrantLock reloadLock;
+
+ /**
+ * @param dataFile file containing preferences data
+ * @throws FileNotFoundException if dataFile does not exist
+ */
+ public FileDataModel(File dataFile) throws FileNotFoundException {
+ if (dataFile == null) {
+ throw new IllegalArgumentException("dataFile is null");
+ }
+ if (!dataFile.exists() || dataFile.isDirectory()) {
+ throw new FileNotFoundException(dataFile.toString());
+ }
+
+ log.info("Creating FileDataModel for file " + dataFile);
+
+ this.dataFile = dataFile;
+ this.lastModified = dataFile.lastModified();
+ this.refreshLock = new ReentrantLock();
+ this.reloadLock = new ReentrantLock();
+
+ // Schedule next refresh
+ timer.schedule(new RefreshTimerTask(), RELOAD_CHECK_INTERVAL_MS, RELOAD_CHECK_INTERVAL_MS);
+ }
+
+ protected void reload() throws IOException {
+ try {
+ reloadLock.lock();
+ Map<String, List<Preference>> data = new HashMap<String, List<Preference>>(1003);
+
+ processFile(data);
+
+ List<User> users = new ArrayList<User>(data.size());
+ for (Map.Entry<String, List<Preference>> entries : data.entrySet()) {
+ users.add(buildUser(entries.getKey(), entries.getValue()));
+ }
+
+ delegate = new GenericDataModel(users);
+ loaded = true;
+
+ } finally {
+ reloadLock.unlock();
+ }
+ }
+
+ private void processFile(Map<String, List<Preference>> data) throws IOException {
+ log.info("Reading file info...");
+ BufferedReader reader = null;
+ try {
+ reader = new BufferedReader(new FileReader(dataFile));
+ boolean notDone = true;
+ while (notDone) {
+ String line = reader.readLine();
+ if (line != null && line.length() > 0) {
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Read line: " + line);
+ }
+ processLine(line, data);
+ } else {
+ notDone = false;
+ }
+ }
+ } finally {
+ IOUtils.quietClose(reader);
+ }
+ }
+
+ private void processLine(String line, Map<String, List<Preference>> data) {
+ int commaOne = line.indexOf((int) ',');
+ int commaTwo = line.indexOf((int) ',', commaOne + 1);
+ if (commaOne < 0 || commaTwo < 0) {
+ throw new IllegalArgumentException("Bad line: " + line);
+ }
+ String userID = line.substring(0, commaOne);
+ String itemID = line.substring(commaOne + 1, commaTwo);
+ double preferenceValue = Double.valueOf(line.substring(commaTwo + 1));
+ List<Preference> prefs = data.get(userID);
+ if (prefs == null) {
+ prefs = new ArrayList<Preference>();
+ data.put(userID, prefs);
+ }
+ Item item = buildItem(itemID);
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Read item " + item + " for user ID " + userID);
+ }
+ prefs.add(buildPreference(null, item, preferenceValue));
+ }
+
+ private void checkLoaded() throws TasteException {
+ if (!loaded) {
+ try {
+ reload();
+ } catch (IOException ioe) {
+ throw new TasteException(ioe);
+ }
+ }
+ }
+
+ public Iterable<? extends User> getUsers() throws TasteException {
+ checkLoaded();
+ return delegate.getUsers();
+ }
+
+ /**
+ * @throws NoSuchElementException if there is no such user
+ */
+ public User getUser(Object id) throws TasteException {
+ checkLoaded();
+ return delegate.getUser(id);
+ }
+
+ public Iterable<? extends Item> getItems() throws TasteException {
+ checkLoaded();
+ return delegate.getItems();
+ }
+
+ public Item getItem(Object id) throws TasteException {
+ checkLoaded();
+ return delegate.getItem(id);
+ }
+
+ public Iterable<? extends Preference> getPreferencesForItem(Object itemID) throws TasteException {
+ checkLoaded();
+ return delegate.getPreferencesForItem(itemID);
+ }
+
+ public Preference[] getPreferencesForItemAsArray(Object itemID) throws TasteException {
+ checkLoaded();
+ return delegate.getPreferencesForItemAsArray(itemID);
+ }
+
+ public int getNumItems() throws TasteException {
+ checkLoaded();
+ return delegate.getNumItems();
+ }
+
+ public int getNumUsers() throws TasteException {
+ checkLoaded();
+ return delegate.getNumUsers();
+ }
+
+ /**
+ * @throws UnsupportedOperationException
+ */
+ public void setPreference(Object userID, Object itemID, double value) {
+ throw new UnsupportedOperationException();
+ }
+
+ /**
+ * @throws UnsupportedOperationException
+ */
+ public void removePreference(Object userID, Object itemID) {
+ throw new UnsupportedOperationException();
+ }
+
+ public void refresh() {
+ if (refreshLock.isLocked()) {
+ return;
+ }
+ try {
+ refreshLock.lock();
+ try {
+ reload();
+ } catch (IOException ioe) {
+ log.log(Level.WARNING, "Unexpected exception while refreshing", ioe);
+ }
+ } finally {
+ refreshLock.unlock();
+ }
+
+ }
+
+ /**
+ * Subclasses may override to return a different {@link User} implementation.
+ *
+ * @param id user ID
+ * @param prefs user preferences
+ * @return {@link GenericUser} by default
+ */
+ protected User buildUser(String id, List<Preference> prefs) {
+ return new GenericUser<String>(id, prefs);
+ }
+
+ /**
+ * Subclasses may override to return a different {@link Item} implementation.
+ *
+ * @param id item ID
+ * @return {@link GenericItem} by default
+ */
+ protected Item buildItem(String id) {
+ return new GenericItem<String>(id);
+ }
+
+ /**
+ * Subclasses may override to return a different {@link Preference} implementation.
+ *
+ * @param user {@link User} who expresses the preference
+ * @param item preferred {@link Item}
+ * @param value preference value
+ * @return {@link GenericPreference} by default
+ */
+ protected Preference buildPreference(User user, Item item, double value) {
+ return new GenericPreference(user, item, value);
+ }
+
+ @Override
+ public String toString() {
+ return "FileDataModel[dataFile:" + dataFile + ']';
+ }
+
+ private final class RefreshTimerTask extends TimerTask {
+
+ @Override
+ public void run() {
+ if (loaded) {
+ long newModified = dataFile.lastModified();
+ if (newModified > lastModified) {
+ log.fine("File has changed; reloading...");
+ lastModified = newModified;
+ try {
+ reload();
+ } catch (IOException ioe) {
+ log.log(Level.WARNING, "Error while reloading file", ioe);
+ }
+ }
+ }
+ }
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java Fri May 9 14:35:12 2008
@@ -0,0 +1,644 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model.jdbc;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.IOUtils;
+import org.apache.mahout.cf.taste.impl.common.IteratorIterable;
+import org.apache.mahout.cf.taste.impl.model.GenericItem;
+import org.apache.mahout.cf.taste.impl.model.GenericPreference;
+import org.apache.mahout.cf.taste.impl.model.GenericUser;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.model.JDBCDataModel;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.User;
+
+import javax.naming.Context;
+import javax.naming.InitialContext;
+import javax.naming.NamingException;
+import javax.sql.DataSource;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * <p>An abstract superclass for JDBC-related {@link DataModel} implementations, providing most of the common
+ * functionality that any such implementation would need.</p>
+ *
+ * <p>Performance will be a concern with any JDBC-based {@link DataModel}. There are going to be lots of
+ * simultaneous reads and some writes to one table. Make sure the table is set up optimally -- for example,
+ * you'll want to establish indexes.</p>
+ *
+ * <p>You'll also want to use connection pooling of some kind. Most J2EE containers like Tomcat
+ * provide connection pooling, so make sure the {@link DataSource} it exposes is using pooling. Outside a
+ * J2EE container, you can use packages like Jakarta's
+ * <a href="http://jakarta.apache.org/commons/dbcp/">DBCP</a> to create a {@link DataSource} on top of your
+ * database whose {@link Connection}s are pooled.</p>
+ *
+ * <p>Also note: this default implementation assumes that the user and item ID keys are {@link String}s, for
+ * maximum flexibility. You can override this behavior by subclassing an implementation and overriding
+ * {@link #buildItem(String)} and {@link #buildUser(String, List)}. If you don't, just make sure you use
+ * {@link String}s as IDs throughout your code. If your IDs are really numeric, and you use, say, {@link Long}
+ * for IDs in the rest of your code, you will run into subtle problems because the {@link Long} values won't
+ * be equal to or compare correctly to the underlying {@link String} key values.</p>
+ */
+public abstract class AbstractJDBCDataModel implements JDBCDataModel {
+
+ private static final Logger log = Logger.getLogger(AbstractJDBCDataModel.class.getName());
+
+ public static final String DEFAULT_DATASOURCE_NAME = "jdbc/taste";
+ public static final String DEFAULT_PREFERENCE_TABLE = "taste_preferences";
+ public static final String DEFAULT_USER_ID_COLUMN = "user_id";
+ public static final String DEFAULT_ITEM_ID_COLUMN = "item_id";
+ public static final String DEFAULT_PREFERENCE_COLUMN = "preference";
+
+ private final DataSource dataSource;
+ private final String getUserSQL;
+ private final String getNumItemsSQL;
+ private final String getNumUsersSQL;
+ private final String setPreferenceSQL;
+ private final String removePreferenceSQL;
+ private final String getUsersSQL;
+ private final String getItemsSQL;
+ private final String getItemSQL;
+ private final String getPrefsForItemSQL;
+
+ protected AbstractJDBCDataModel(DataSource dataSource,
+ String getUserSQL,
+ String getNumItemsSQL,
+ String getNumUsersSQL,
+ String setPreferenceSQL,
+ String removePreferenceSQL,
+ String getUsersSQL,
+ String getItemsSQL,
+ String getItemSQL,
+ String getPrefsForItemSQL,
+ String getUsersPreferringItemSQL) {
+
+ log.fine("Creating AbstractJDBCModel...");
+ checkNotNullAndLog("dataSource", dataSource);
+ checkNotNullAndLog("getUserSQL", getUserSQL);
+ checkNotNullAndLog("getNumItemsSQL", getNumItemsSQL);
+ checkNotNullAndLog("getNumUsersSQL", getNumUsersSQL);
+ checkNotNullAndLog("setPreferenceSQL", setPreferenceSQL);
+ checkNotNullAndLog("removePreferenceSQL", removePreferenceSQL);
+ checkNotNullAndLog("getUsersSQL", getUsersSQL);
+ checkNotNullAndLog("getItemsSQL", getItemsSQL);
+ checkNotNullAndLog("getItemSQL", getItemSQL);
+ checkNotNullAndLog("getPrefsForItemSQL", getPrefsForItemSQL);
+ checkNotNullAndLog("getUsersPreferringItemSQL", getUsersPreferringItemSQL);
+
+ if (!(dataSource instanceof ConnectionPoolDataSource)) {
+ log.warning("You are not using ConnectionPoolDataSource. Make sure your DataSource pools connections " +
+ "to the database itself, or database performance will be severely reduced.");
+ }
+
+ this.dataSource = dataSource;
+ this.getUserSQL = getUserSQL;
+ this.getNumItemsSQL = getNumItemsSQL;
+ this.getNumUsersSQL = getNumUsersSQL;
+ this.setPreferenceSQL = setPreferenceSQL;
+ this.removePreferenceSQL = removePreferenceSQL;
+ this.getUsersSQL = getUsersSQL;
+ this.getItemsSQL = getItemsSQL;
+ this.getItemSQL = getItemSQL;
+ this.getPrefsForItemSQL = getPrefsForItemSQL;
+ }
+
+ private static void checkNotNullAndLog(String argName, Object value) {
+ if (value == null || value.toString().length() == 0) {
+ throw new IllegalArgumentException(argName + " is null or empty");
+ }
+ if (log.isLoggable(Level.FINE)) {
+ log.fine(argName + ": " + value);
+ }
+ }
+
+ /**
+ * <p>Looks up a {@link DataSource} by name from JNDI. "java:comp/env/" is prepended to the argument
+ * before looking up the name in JNDI.</p>
+ *
+ * @param dataSourceName JNDI name where a {@link DataSource} is bound (e.g. "jdbc/taste")
+ * @return {@link DataSource} under that JNDI name
+ * @throws TasteException if a JNDI error occurs
+ */
+ public static DataSource lookupDataSource(String dataSourceName) throws TasteException {
+ Context context = null;
+ try {
+ context = new InitialContext();
+ return (DataSource) context.lookup("java:comp/env/" + dataSourceName);
+ } catch (NamingException ne) {
+ throw new TasteException(ne);
+ } finally {
+ if (context != null) {
+ try {
+ context.close();
+ } catch (NamingException ne) {
+ log.log(Level.WARNING, "Error while closing Context; continuing...", ne);
+ }
+ }
+ }
+ }
+
+ /**
+ * @return the {@link DataSource} that this instance is using
+ */
+ public DataSource getDataSource() {
+ return dataSource;
+ }
+
+ public final Iterable<? extends User> getUsers() throws TasteException {
+ log.fine("Retrieving all users...");
+ return new IteratorIterable<User>(new ResultSetUserIterator(dataSource, getUsersSQL));
+ }
+
+ /**
+ * @throws NoSuchElementException if there is no such user
+ */
+ public final User getUser(Object id) throws TasteException {
+
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Retrieving user ID '" + id + "'...");
+ }
+
+ Connection conn = null;
+ PreparedStatement stmt = null;
+ ResultSet rs = null;
+
+ String idString = id.toString();
+
+ try {
+ conn = dataSource.getConnection();
+ stmt = conn.prepareStatement(getUserSQL);
+ stmt.setObject(1, id);
+
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Executing SQL query: " + getUserSQL);
+ }
+ rs = stmt.executeQuery();
+
+ List<Preference> prefs = new ArrayList<Preference>();
+ while (rs.next()) {
+ addPreference(rs, prefs);
+ }
+
+ if (prefs.isEmpty()) {
+ throw new NoSuchElementException();
+ }
+
+ return buildUser(idString, prefs);
+
+ } catch (SQLException sqle) {
+ log.log(Level.WARNING, "Exception while retrieving user", sqle);
+ throw new TasteException(sqle);
+ } finally {
+ IOUtils.safeClose(rs, stmt, conn);
+ }
+
+ }
+
+ public final Iterable<? extends Item> getItems() throws TasteException {
+ log.fine("Retrieving all items...");
+ return new IteratorIterable<Item>(new ResultSetItemIterator(dataSource, getItemsSQL));
+ }
+
+ public final Item getItem(Object id) throws TasteException {
+ return getItem(id, false);
+ }
+
+ public final Item getItem(Object id, boolean assumeExists) throws TasteException {
+
+ if (assumeExists) {
+ return buildItem((String) id);
+ }
+
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Retrieving item ID '" + id + "'...");
+ }
+
+ Connection conn = null;
+ PreparedStatement stmt = null;
+ ResultSet rs = null;
+
+ try {
+ conn = dataSource.getConnection();
+ stmt = conn.prepareStatement(getItemSQL);
+ stmt.setObject(1, id);
+
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Executing SQL query: " + getItemSQL);
+ }
+ rs = stmt.executeQuery();
+ if (rs.next()) {
+ return buildItem((String) id);
+ } else {
+ throw new NoSuchElementException();
+ }
+ } catch (SQLException sqle) {
+ log.log(Level.WARNING, "Exception while retrieving item", sqle);
+ throw new TasteException(sqle);
+ } finally {
+ IOUtils.safeClose(rs, stmt, conn);
+ }
+ }
+
+ public final Iterable<? extends Preference> getPreferencesForItem(Object itemID) throws TasteException {
+ return doGetPreferencesForItem(itemID);
+ }
+
+ public final Preference[] getPreferencesForItemAsArray(Object itemID) throws TasteException {
+ List<? extends Preference> list = doGetPreferencesForItem(itemID);
+ return list.toArray(new Preference[list.size()]);
+ }
+
+ private List<? extends Preference> doGetPreferencesForItem(Object itemID) throws TasteException {
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Retrieving preferences for item ID '" + itemID + "'...");
+ }
+ Item item = getItem(itemID);
+ Connection conn = null;
+ PreparedStatement stmt = null;
+ ResultSet rs = null;
+ try {
+ conn = dataSource.getConnection();
+ stmt = conn.prepareStatement(getPrefsForItemSQL);
+ stmt.setObject(1, itemID);
+
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Executing SQL query: " + getPrefsForItemSQL);
+ }
+ rs = stmt.executeQuery();
+ List<Preference> prefs = new ArrayList<Preference>();
+ while (rs.next()) {
+ double preference = rs.getDouble(1);
+ String userID = rs.getString(2);
+ Preference pref = buildPreference(buildUser(userID, null), item, preference);
+ prefs.add(pref);
+ }
+ return prefs;
+ } catch (SQLException sqle) {
+ log.log(Level.WARNING, "Exception while retrieving prefs for item", sqle);
+ throw new TasteException(sqle);
+ } finally {
+ IOUtils.safeClose(rs, stmt, conn);
+ }
+ }
+
+ public final int getNumItems() throws TasteException {
+ return getNumThings("items", getNumItemsSQL);
+ }
+
+ public final int getNumUsers() throws TasteException {
+ return getNumThings("users", getNumUsersSQL);
+ }
+
+ private int getNumThings(String name, String sql) throws TasteException {
+ log.fine("Retrieving number of " + name + " in model...");
+ Connection conn = null;
+ Statement stmt = null;
+ ResultSet rs = null;
+ try {
+ conn = dataSource.getConnection();
+ stmt = conn.createStatement();
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Executing SQL query: " + sql);
+ }
+ rs = stmt.executeQuery(sql);
+ rs.next();
+ return rs.getInt(1);
+ } catch (SQLException sqle) {
+ log.log(Level.WARNING, "Exception while retrieving number of " + name, sqle);
+ throw new TasteException(sqle);
+ } finally {
+ IOUtils.safeClose(rs, stmt, conn);
+ }
+ }
+
+ public final void setPreference(Object userID, Object itemID, double value)
+ throws TasteException {
+ if (userID == null || itemID == null) {
+ throw new IllegalArgumentException("userID or itemID is null");
+ }
+ if (Double.isNaN(value)) {
+ throw new IllegalArgumentException("Invalid value: " + value);
+ }
+
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Setting preference for user '" + userID + "', item '" + itemID + "', value " + value);
+ }
+
+ Connection conn = null;
+ PreparedStatement stmt = null;
+
+ try {
+ conn = dataSource.getConnection();
+
+ stmt = conn.prepareStatement(setPreferenceSQL);
+ stmt.setObject(1, userID);
+ stmt.setObject(2, itemID);
+ stmt.setDouble(3, value);
+ stmt.setDouble(4, value);
+
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Executing SQL update: " + setPreferenceSQL);
+ }
+ stmt.executeUpdate();
+
+ } catch (SQLException sqle) {
+ log.log(Level.WARNING, "Exception while setting preference", sqle);
+ throw new TasteException(sqle);
+ } finally {
+ IOUtils.safeClose(null, stmt, conn);
+ }
+ }
+
+ public final void removePreference(Object userID, Object itemID)
+ throws TasteException {
+ if (userID == null || itemID == null) {
+ throw new IllegalArgumentException("userID or itemID is null");
+ }
+
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Removing preference for user '" + userID + "', item '" + itemID + '\'');
+ }
+
+ Connection conn = null;
+ PreparedStatement stmt = null;
+
+ try {
+ conn = dataSource.getConnection();
+
+ stmt = conn.prepareStatement(removePreferenceSQL);
+ stmt.setObject(1, userID);
+ stmt.setObject(2, itemID);
+
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Executing SQL update: " + removePreferenceSQL);
+ }
+ stmt.executeUpdate();
+
+ } catch (SQLException sqle) {
+ log.log(Level.WARNING, "Exception while removing preference", sqle);
+ throw new TasteException(sqle);
+ } finally {
+ IOUtils.safeClose(null, stmt, conn);
+ }
+ }
+
+ public final void refresh() {
+ // do nothing
+ }
+
+
+ private void addPreference(ResultSet rs, Collection<Preference> prefs)
+ throws SQLException {
+ Item item = buildItem(rs.getString(1));
+ double preferenceValue = rs.getDouble(2);
+ prefs.add(buildPreference(null, item, preferenceValue));
+ }
+
+ /**
+ * <p>Default implementation which returns a new {@link GenericUser} with {@link String} IDs.
+ * Subclasses may override to return a different {@link User} implementation.</p>
+ *
+ * @param id user ID
+ * @param prefs user preferences
+ * @return {@link GenericUser} by default
+ */
+ protected User buildUser(String id, List<Preference> prefs) {
+ return new GenericUser<String>(id, prefs);
+ }
+
+ /**
+ * <p>Default implementation which returns a new {@link GenericItem} with {@link String} IDs.
+ * Subclasses may override to return a different {@link Item} implementation.</p>
+ *
+ * @param id item ID
+ * @return {@link GenericItem} by default
+ */
+ protected Item buildItem(String id) {
+ return new GenericItem<String>(id);
+ }
+
+ /**
+ * Subclasses may override to return a different {@link Preference} implementation.
+ *
+ * @param user {@link User}
+ * @param item {@link Item}
+ * @return {@link GenericPreference} by default
+ */
+ protected Preference buildPreference(User user, Item item, double value) {
+ return new GenericPreference(user, item, value);
+ }
+
+ /**
+ * <p>An {@link java.util.Iterator} which returns {@link org.apache.mahout.cf.taste.model.User}s from a
+ * {@link java.sql.ResultSet}. This is a useful
+ * way to iterate over all user data since it does not require all data to be read into memory
+ * at once. It does however require that the DB connection be held open. Note that this class will
+ * only release database resources after {@link #hasNext()} has been called and has returned false;
+ * callers should make sure to "drain" the entire set of data to avoid tying up database resources.</p>
+ */
+ private final class ResultSetUserIterator implements Iterator<User> {
+
+ private final Connection connection;
+ private final Statement statement;
+ private final ResultSet resultSet;
+ private boolean closed;
+
+ private ResultSetUserIterator(DataSource dataSource, String getUsersSQL) throws TasteException {
+ try {
+ connection = dataSource.getConnection();
+ statement = connection.createStatement();
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Executing SQL query: " + getUsersSQL);
+ }
+ resultSet = statement.executeQuery(getUsersSQL);
+ } catch (SQLException sqle) {
+ close();
+ throw new TasteException(sqle);
+ }
+ }
+
+ public boolean hasNext() {
+ boolean nextExists = false;
+ if (!closed) {
+ try {
+ // No more results if cursor is pointing at last row, or after
+ // Thanks to Rolf W. for pointing out an earlier bug in this condition
+ if (resultSet.isLast() || resultSet.isAfterLast()) {
+ close();
+ } else {
+ nextExists = true;
+ }
+ } catch (SQLException sqle) {
+ log.log(Level.WARNING, "Unexpected exception while accessing ResultSet; continuing...", sqle);
+ close();
+ }
+ }
+ return nextExists;
+ }
+
+ public User next() {
+
+ if (closed) {
+ throw new NoSuchElementException();
+ }
+
+ String currentUserID = null;
+ List<Preference> prefs = new ArrayList<Preference>();
+
+ try {
+ while (resultSet.next()) {
+ String userID = resultSet.getString(3);
+ if (currentUserID == null) {
+ currentUserID = userID;
+ }
+ // Did we move on to a new user?
+ if (!userID.equals(currentUserID)) {
+ // back up one row
+ resultSet.previous();
+ // we're done for now
+ break;
+ }
+ // else add a new preference for the current user
+ addPreference(resultSet, prefs);
+ }
+ } catch (SQLException sqle) {
+ // No good way to handle this since we can't throw an exception
+ log.log(Level.WARNING, "Exception while iterating over users", sqle);
+ close();
+ throw new NoSuchElementException("Can't retrieve more due to exception: " + sqle);
+ }
+
+ if (currentUserID == null) {
+ // nothing left?
+ throw new NoSuchElementException();
+ }
+
+ return buildUser(currentUserID, prefs);
+ }
+
+ /**
+ * @throws UnsupportedOperationException
+ */
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ private void close() {
+ closed = true;
+ IOUtils.safeClose(resultSet, statement, connection);
+ }
+
+ }
+
+ /**
+ * <p>An {@link java.util.Iterator} which returns {@link org.apache.mahout.cf.taste.model.Item}s from a
+ * {@link java.sql.ResultSet}. This is a useful way to iterate over all user data since it does not require
+ * all data to be read into memory at once. It does however require that the DB connection be held open. Note
+ * that this class will only release database resources after {@link #hasNext()} has been called and has returned
+ * <code>false</code>; callers should make sure to "drain" the entire set of data to avoid tying up database
+ * resources.</p>
+ */
+ private final class ResultSetItemIterator implements Iterator<Item> {
+
+ private final Connection connection;
+ private final Statement statement;
+ private final ResultSet resultSet;
+ private boolean closed;
+
+ private ResultSetItemIterator(DataSource dataSource, String getItemsSQL) throws TasteException {
+ try {
+ connection = dataSource.getConnection();
+ statement = connection.createStatement();
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Executing SQL query: " + getItemsSQL);
+ }
+ resultSet = statement.executeQuery(getItemsSQL);
+ } catch (SQLException sqle) {
+ close();
+ throw new TasteException(sqle);
+ }
+ }
+
+ public boolean hasNext() {
+ boolean nextExists = false;
+ if (!closed) {
+ try {
+ // No more results if cursor is pointing at last row, or after
+ // Thanks to Rolf W. for pointing out an earlier bug in this condition
+ if (resultSet.isLast() || resultSet.isAfterLast()) {
+ close();
+ } else {
+ nextExists = true;
+ }
+ } catch (SQLException sqle) {
+ log.log(Level.WARNING, "Unexpected exception while accessing ResultSet; continuing...", sqle);
+ close();
+ }
+ }
+ return nextExists;
+ }
+
+ public Item next() {
+
+ if (closed) {
+ throw new NoSuchElementException();
+ }
+
+ try {
+ if (resultSet.next()) {
+ return buildItem(resultSet.getString(1));
+ } else {
+ throw new NoSuchElementException();
+ }
+ } catch (SQLException sqle) {
+ // No good way to handle this since we can't throw an exception
+ log.log(Level.WARNING, "Exception while iterating over items", sqle);
+ close();
+ throw new NoSuchElementException("Can't retrieve more due to exception: " + sqle);
+ }
+
+ }
+
+ /**
+ * @throws UnsupportedOperationException
+ */
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ private void close() {
+ closed = true;
+ IOUtils.safeClose(resultSet, statement, connection);
+ }
+
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ConnectionPoolDataSource.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ConnectionPoolDataSource.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ConnectionPoolDataSource.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ConnectionPoolDataSource.java Fri May 9 14:35:12 2008
@@ -0,0 +1,122 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model.jdbc;
+
+import org.apache.commons.dbcp.PoolingDataSource;
+import org.apache.commons.pool.ObjectPool;
+import org.apache.commons.pool.PoolableObjectFactory;
+import org.apache.commons.pool.impl.StackObjectPool;
+
+import javax.sql.DataSource;
+import java.io.PrintWriter;
+import java.sql.Connection;
+import java.sql.SQLException;
+import java.util.logging.Logger;
+
+/**
+ * <p>A wrapper {@link DataSource} which pools connections. Why can't Jakarta Commons DBCP provide this directly?</p>
+ */
+public final class ConnectionPoolDataSource implements DataSource {
+
+ private static final Logger log = Logger.getLogger(ConnectionPoolDataSource.class.getName());
+
+ private final DataSource delegate;
+
+ public ConnectionPoolDataSource(DataSource underlyingDataSource) {
+ if (underlyingDataSource == null) {
+ throw new IllegalArgumentException("underlyingDataSource is null");
+ }
+ PoolableObjectFactory poolFactory = new DataSourceConnectionFactory(underlyingDataSource);
+ ObjectPool connectionPool = new StackObjectPool(poolFactory);
+ this.delegate = new PoolingDataSource(connectionPool);
+ }
+
+ public Connection getConnection() throws SQLException {
+ return delegate.getConnection();
+ }
+
+ public Connection getConnection(String username, String password) throws SQLException {
+ return delegate.getConnection(username, password);
+ }
+
+ public PrintWriter getLogWriter() throws SQLException {
+ return delegate.getLogWriter();
+ }
+
+ public void setLogWriter(PrintWriter printWriter) throws SQLException {
+ delegate.setLogWriter(printWriter);
+ }
+
+ public void setLoginTimeout(int timeout) throws SQLException {
+ delegate.setLoginTimeout(timeout);
+ }
+
+ public int getLoginTimeout() throws SQLException {
+ return delegate.getLoginTimeout();
+ }
+
+ // These two methods are new in JDK 6, so they are added to allow it to compile in JDK 6. Really, they
+ // should also delegate to the 'delegate' object. But that would then *only* compile in JDK 6. So for
+ // now they are dummy implementations which do little.
+
+ /**
+ * @throws SQLException always
+ */
+ public <T> T unwrap(Class<T> iface) throws SQLException {
+ throw new SQLException("Unsupported operation");
+ }
+
+ /**
+ * @return false always
+ */
+ public boolean isWrapperFor(Class<?> iface) {
+ return false;
+ }
+
+ private static class DataSourceConnectionFactory implements PoolableObjectFactory {
+
+ private final DataSource dataSource;
+
+ private DataSourceConnectionFactory(DataSource dataSource) {
+ this.dataSource = dataSource;
+ }
+
+ public Object makeObject() throws SQLException {
+ log.fine("Obtaining pooled connection");
+ return dataSource.getConnection();
+ }
+
+ public void destroyObject(Object o) throws SQLException {
+ log.fine("Closing pooled connection");
+ ((Connection) o).close();
+ }
+
+ public boolean validateObject(Object o) {
+ return true;
+ }
+
+ public void activateObject(Object o) {
+ // do nothing
+ }
+
+ public void passivateObject(Object o) {
+ // do nothing
+ }
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/GenericJDBCDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/GenericJDBCDataModel.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/GenericJDBCDataModel.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/GenericJDBCDataModel.java Fri May 9 14:35:12 2008
@@ -0,0 +1,122 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model.jdbc;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.IOUtils;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Properties;
+
+/**
+ * <p>A generic {@link org.apache.mahout.cf.taste.model.DataModel} designed for use with other JDBC data sources;
+ * one just specifies all necessary SQL queries to the constructor here. Optionally, the queries can
+ * be specified from a {@link Properties} object, {@link File}, or {@link InputStream}. This class is
+ * most appropriate when other existing implementations of {@link AbstractJDBCDataModel} are not suitable.
+ * If you are using this class to support a major database, consider contributing a specialized implementation
+ * of {@link AbstractJDBCDataModel} to the project for this database.</p>
+ */
+public class GenericJDBCDataModel extends AbstractJDBCDataModel {
+
+ public static final String DATA_SOURCE_KEY = "dataSource";
+ public static final String GET_USER_SQL_KEY = "getUserSQL";
+ public static final String GET_NUM_USERS_SQL_KEY = "getNumUsersSQL";
+ public static final String GET_NUM_ITEMS_SQL_KEY = "getNumItemsSQL";
+ public static final String SET_PREFERENCE_SQL_KEY = "setPreferenceSQL";
+ public static final String REMOVE_PREFERENCE_SQL_KEY = "removePreferenceSQL";
+ public static final String GET_USERS_SQL_KEY = "getUsersSQL";
+ public static final String GET_ITEMS_SQL_KEY = "getItemsSQL";
+ public static final String GET_ITEM_SQL_KEY = "getItemSQL";
+ public static final String GET_PREFS_FOR_ITEM_SQL_KEY = "getPrefsForItemSQL";
+ public static final String GET_USERS_PREFERRING_ITEM_SQL_KEY = "getUsersPreferringItemSQL";
+
+ /**
+ * <p>Specifies all SQL queries in a {@link Properties} object. See the <code>*_KEY</code>
+ * constants in this class (e.g. {@link #GET_USER_SQL_KEY}) for a list of all keys which
+ * must map to a value in this object.</p>
+ *
+ * @param props {@link Properties} object containing values
+ * @throws TasteException if anything goes wrong during initialization
+ */
+ public GenericJDBCDataModel(Properties props) throws TasteException {
+ super(lookupDataSource(props.getProperty(DATA_SOURCE_KEY)),
+ props.getProperty(GET_USER_SQL_KEY),
+ props.getProperty(GET_NUM_USERS_SQL_KEY),
+ props.getProperty(GET_NUM_ITEMS_SQL_KEY),
+ props.getProperty(SET_PREFERENCE_SQL_KEY),
+ props.getProperty(REMOVE_PREFERENCE_SQL_KEY),
+ props.getProperty(GET_USERS_SQL_KEY),
+ props.getProperty(GET_ITEMS_SQL_KEY),
+ props.getProperty(GET_ITEM_SQL_KEY),
+ props.getProperty(GET_PREFS_FOR_ITEM_SQL_KEY),
+ props.getProperty(GET_USERS_PREFERRING_ITEM_SQL_KEY));
+ }
+
+ /**
+ * <p>See {@link #GenericJDBCDataModel(java.util.Properties)}. This constructor reads values
+ * from a file instead, as if with {@link Properties#load(InputStream)}. So, the file
+ * should be in standard Java properties file format -- containing <code>key=value</code> pairs,
+ * one per line.</p>
+ *
+ * @param propertiesFile properties file
+ * @throws TasteException if anything goes wrong during initialization
+ */
+ public GenericJDBCDataModel(File propertiesFile) throws TasteException {
+ this(getPropertiesFromFile(propertiesFile));
+ }
+
+ /**
+ * <p>See {@link #GenericJDBCDataModel(Properties)}. This constructor reads values
+ * from a resource available in the classpath, as if with {@link Class#getResourceAsStream(String)} and
+ * {@link Properties#load(InputStream)}. This is useful if your configuration file is, for example,
+ * packaged in a JAR file that is in the classpath.</p>
+ *
+ * @param resourcePath path to resource in classpath (e.g. "/com/foo/TasteSQLQueries.properties")
+ * @throws TasteException if anything goes wrong during initialization
+ */
+ public GenericJDBCDataModel(String resourcePath) throws TasteException {
+ this(getPropertiesFromStream(GenericJDBCDataModel.class.getResourceAsStream(resourcePath)));
+ }
+
+ private static Properties getPropertiesFromFile(File file) throws TasteException {
+ try {
+ return getPropertiesFromStream(new FileInputStream(file));
+ } catch (FileNotFoundException fnfe) {
+ throw new TasteException(fnfe);
+ }
+ }
+
+ private static Properties getPropertiesFromStream(InputStream is) throws TasteException {
+ try {
+ try {
+ Properties props = new Properties();
+ props.load(is);
+ return props;
+ } finally {
+ IOUtils.quietClose(is);
+ }
+ } catch (IOException ioe) {
+ throw new TasteException(ioe);
+ }
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLJDBCDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLJDBCDataModel.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLJDBCDataModel.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLJDBCDataModel.java Fri May 9 14:35:12 2008
@@ -0,0 +1,163 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model.jdbc;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.model.DataModel;
+
+import javax.sql.DataSource;
+
+/**
+ * <p>A {@link DataModel} backed by a MySQL database and accessed via JDBC. It may work with other
+ * JDBC databases. By default, this class assumes that there is a {@link DataSource} available under the
+ * JNDI name "jdbc/taste", which gives access to a database with a "taste_preferences" table with the
+ * following schema:</p>
+ *
+ * <table>
+ * <tr><th>user_id</th><th>item_id</th><th>preference</th></tr>
+ * <tr><td>ABC</td><td>123</td><td>0.9</td></tr>
+ * <tr><td>ABC</td><td>456</td><td>0.1</td></tr>
+ * <tr><td>DEF</td><td>123</td><td>0.2</td></tr>
+ * <tr><td>DEF</td><td>789</td><td>0.3</td></tr>
+ * </table>
+ *
+ * <p><code>user_id</code> must have a type compatible with the Java <code>String</code> type.
+ * <code>item_id</code> must have a type compatible with the Java <code>String</code> type.
+ * <code>preference</code> must have a type compatible with the Java <code>double</code> type.
+ * For example, the following command sets up a suitable table in MySQL, complete with
+ * primary key and indexes:</p>
+ *
+ * <pre>
+ * CREATE TABLE taste_preferences (
+ * user_id VARCHAR(10) NOT NULL,
+ * item_id VARCHAR(10) NOT NULL,
+ * preference FLOAT NOT NULL,
+ * PRIMARY KEY (user_id, item_id),
+ * INDEX (user_id),
+ * INDEX (item_id)
+ * )
+ * </pre>
+ *
+ * <h3>Performance Notes</h3>
+ *
+ * <p>See the notes in {@link AbstractJDBCDataModel} regarding using connection pooling. It's pretty vital
+ * to performance.</p>
+ *
+ * <p>Some experimentation suggests that MySQL's InnoDB engine is faster than MyISAM for these kinds of
+ * applications. While MyISAM is the default and, I believe, generally considered the lighter-weight and faster
+ * of the two engines, my guess is the row-level locking of InnoDB helps here. Your mileage may vary.</p>
+ *
+ * <p>Here are some key settings that can be tuned for MySQL, and suggested size for a data set of around
+ * 1 million elements:</p>
+ *
+ * <ul>
+ * <li>innodb_buffer_pool_size=64M</li>
+ * <li>myisam_sort_buffer_size=64M</li>
+ * <li>query_cache_limit=64M</li>
+ * <li>query_cache_min_res_unit=512K</li>
+ * <li>query_cache_type=1</li>
+ * <li>query_cache_size=64M</li>
+ * </ul>
+ *
+ * <p>Thanks to Amila Jayasooriya for contributing MySQL notes above as part of Google Summer of Code 2007.</p>
+ */
+public class MySQLJDBCDataModel extends AbstractJDBCDataModel {
+
+ /**
+ * <p>Creates a {@link MySQLJDBCDataModel} using the default {@link DataSource}
+ * (named {@link #DEFAULT_DATASOURCE_NAME} and default table/column names.</p>
+ *
+ * @throws TasteException if {@link DataSource} can't be found
+ */
+ public MySQLJDBCDataModel() throws TasteException {
+ this(DEFAULT_DATASOURCE_NAME);
+ }
+
+ /**
+ * <p>Creates a {@link MySQLJDBCDataModel} using the default {@link DataSource}
+ * found under the given name, and using default table/column names.</p>
+ *
+ * @param dataSourceName name of {@link DataSource} to look up
+ * @throws TasteException if {@link DataSource} can't be found
+ */
+ public MySQLJDBCDataModel(String dataSourceName) throws TasteException {
+ this(lookupDataSource(dataSourceName),
+ DEFAULT_PREFERENCE_TABLE,
+ DEFAULT_USER_ID_COLUMN,
+ DEFAULT_ITEM_ID_COLUMN,
+ DEFAULT_PREFERENCE_COLUMN);
+ }
+
+ /**
+ * <p>Creates a {@link MySQLJDBCDataModel} using the given {@link DataSource}
+ * and default table/column names.</p>
+ *
+ * @param dataSource {@link DataSource} to use
+ */
+ public MySQLJDBCDataModel(DataSource dataSource) {
+ this(dataSource,
+ DEFAULT_PREFERENCE_TABLE,
+ DEFAULT_USER_ID_COLUMN,
+ DEFAULT_ITEM_ID_COLUMN,
+ DEFAULT_PREFERENCE_COLUMN);
+ }
+
+ /**
+ * <p>Creates a {@link MySQLJDBCDataModel} using the given {@link DataSource}
+ * and default table/column names.</p>
+ *
+ * @param dataSource {@link DataSource} to use
+ * @param preferenceTable name of table containing preference data
+ * @param userIDColumn user ID column name
+ * @param itemIDColumn item ID column name
+ * @param preferenceColumn preference column name
+ */
+ public MySQLJDBCDataModel(DataSource dataSource,
+ String preferenceTable,
+ String userIDColumn,
+ String itemIDColumn,
+ String preferenceColumn) {
+ super(dataSource,
+ // getUserSQL
+ "SELECT " + itemIDColumn + ", " + preferenceColumn + " FROM " + preferenceTable +
+ " WHERE " + userIDColumn + "=? ORDER BY " + itemIDColumn,
+ // getNumItemsSQL
+ "SELECT COUNT(DISTINCT " + itemIDColumn + ") FROM " + preferenceTable,
+ // getNumUsersSQL
+ "SELECT COUNT(DISTINCT " + userIDColumn + ") FROM " + preferenceTable,
+ // setPreferenceSQL
+ "INSERT INTO " + preferenceTable + " SET " + userIDColumn + "=?, " + itemIDColumn +
+ "=?, " + preferenceColumn + "=? ON DUPLICATE KEY UPDATE " + preferenceColumn + "=?",
+ // removePreference SQL
+ "DELETE FROM " + preferenceTable + " WHERE " + userIDColumn + "=? AND " + itemIDColumn + "=?",
+ // getUsersSQL
+ "SELECT " + itemIDColumn + ", " + preferenceColumn + ", " + userIDColumn + " FROM " +
+ preferenceTable + " ORDER BY " + userIDColumn + ", " + itemIDColumn,
+ // getItemsSQL
+ "SELECT DISTINCT " + itemIDColumn + " FROM " + preferenceTable + " ORDER BY " + itemIDColumn,
+ // getItemSQL
+ "SELECT 1 FROM " + preferenceTable + " WHERE " + itemIDColumn + "=?",
+ // getPrefsForItemSQL
+ "SELECT " + preferenceColumn + ", " + userIDColumn + " FROM " +
+ preferenceTable + " WHERE " + itemIDColumn + "=? ORDER BY " + userIDColumn,
+ // getUsersPreferringItemSQL
+ "SELECT DISTINCT " + userIDColumn + " FROM " + preferenceTable + " WHERE " + itemIDColumn +
+ "=? ORDER BY " + userIDColumn);
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java Fri May 9 14:35:12 2008
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.neighborhood;
+
+import org.apache.mahout.cf.taste.correlation.UserCorrelation;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
+
+/**
+ * <p>Contains methods and resources useful to all classes in this package.</p>
+ */
+abstract class AbstractUserNeighborhood implements UserNeighborhood {
+
+ private final UserCorrelation userCorrelation;
+ private final DataModel dataModel;
+ private final double samplingRate;
+
+ AbstractUserNeighborhood(UserCorrelation userCorrelation,
+ DataModel dataModel,
+ double samplingRate) {
+ if (userCorrelation == null || dataModel == null) {
+ throw new IllegalArgumentException("userCorrelation or dataModel is null");
+ }
+ if (Double.isNaN(samplingRate) || samplingRate <= 0.0 || samplingRate > 1.0) {
+ throw new IllegalArgumentException("samplingRate must be in (0,1]");
+ }
+ this.userCorrelation = userCorrelation;
+ this.dataModel = dataModel;
+ this.samplingRate = samplingRate;
+ }
+
+ final UserCorrelation getUserCorrelation() {
+ return userCorrelation;
+ }
+
+ final DataModel getDataModel() {
+ return dataModel;
+ }
+
+ final boolean sampleForUser() {
+ return samplingRate >= 1.0 || Math.random() < samplingRate;
+ }
+
+ public final void refresh() {
+ userCorrelation.refresh();
+ dataModel.refresh();
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java Fri May 9 14:35:12 2008
@@ -0,0 +1,171 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.neighborhood;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.correlation.UserCorrelation;
+import org.apache.mahout.cf.taste.impl.common.SoftCache;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.User;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.ListIterator;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * <p>Computes a neigbhorhood consisting of the nearest n {@link User}s to a given {@link User}.
+ * "Nearest" is defined by the given {@link UserCorrelation}.</p>
+ */
+public final class NearestNUserNeighborhood extends AbstractUserNeighborhood {
+
+ private static final Logger log = Logger.getLogger(NearestNUserNeighborhood.class.getName());
+
+ private final SoftCache<Object, Collection<User>> cache;
+
+ /**
+ * @param n neighborhood size
+ * @param userCorrelation nearness metric
+ * @param dataModel data model
+ * @throws IllegalArgumentException if n < 1, or userCorrelation or dataModel are <code>null</code>
+ */
+ public NearestNUserNeighborhood(int n,
+ UserCorrelation userCorrelation,
+ DataModel dataModel) throws TasteException {
+ this(n, userCorrelation, dataModel, 1.0);
+ }
+
+ /**
+ * @param n neighborhood size
+ * @param userCorrelation nearness metric
+ * @param dataModel data model
+ * @param samplingRate percentage of users to consider when building neighborhood -- decrease to
+ * trade quality for performance
+ * @throws IllegalArgumentException if n < 1 or samplingRate is NaN or not in (0,1],
+ * or userCorrelation or dataModel are <code>null</code>
+ */
+ public NearestNUserNeighborhood(int n,
+ UserCorrelation userCorrelation,
+ DataModel dataModel,
+ double samplingRate) throws TasteException {
+ super(userCorrelation, dataModel, samplingRate);
+ if (n < 1) {
+ throw new IllegalArgumentException("n must be at least 1");
+ }
+ this.cache = new SoftCache<Object, Collection<User>>(new Retriever(n), dataModel.getNumUsers());
+ }
+
+ public Collection<User> getUserNeighborhood(Object userID) throws TasteException {
+ return cache.get(userID);
+ }
+
+ @Override
+ public String toString() {
+ return "NearestNUserNeighborhood";
+ }
+
+
+ private final class Retriever implements SoftCache.Retriever<Object, Collection<User>> {
+
+ private final int n;
+
+ private Retriever(int n) {
+ this.n = n;
+ }
+
+ public Collection<User> getValue(Object key) throws TasteException {
+ if (log.isLoggable(Level.FINER)) {
+ log.fine("Computing neighborhood around user ID '" + key + '\'');
+ }
+
+ DataModel dataModel = getDataModel();
+ User theUser = dataModel.getUser(key);
+ UserCorrelation userCorrelationImpl = getUserCorrelation();
+
+ LinkedList<UserCorrelationPair> queue = new LinkedList<UserCorrelationPair>();
+ boolean full = false;
+ for (User user : dataModel.getUsers()) {
+ if (sampleForUser() && !key.equals(user.getID())) {
+ double theCorrelation = userCorrelationImpl.userCorrelation(theUser, user);
+ if (!Double.isNaN(theCorrelation) && (!full || theCorrelation > queue.getLast().theCorrelation)) {
+ ListIterator<UserCorrelationPair> iterator = queue.listIterator(queue.size());
+ while (iterator.hasPrevious()) {
+ if (theCorrelation <= iterator.previous().theCorrelation) {
+ iterator.next();
+ break;
+ }
+ }
+ iterator.add(new UserCorrelationPair(user, theCorrelation));
+ if (full) {
+ queue.removeLast();
+ } else if (queue.size() > n) {
+ full = true;
+ queue.removeLast();
+ }
+ }
+ }
+ }
+
+ List<User> neighborhood = new ArrayList<User>(queue.size());
+ for (UserCorrelationPair pair : queue) {
+ neighborhood.add(pair.user);
+ }
+
+ if (log.isLoggable(Level.FINER)) {
+ log.fine("UserNeighborhood around user ID '" + key + "' is: " + neighborhood);
+ }
+
+ return Collections.unmodifiableList(neighborhood);
+ }
+ }
+
+ private static final class UserCorrelationPair implements Comparable<UserCorrelationPair> {
+
+ final User user;
+ final double theCorrelation;
+
+ private UserCorrelationPair(User user, double theCorrelation) {
+ this.user = user;
+ this.theCorrelation = theCorrelation;
+ }
+
+ @Override
+ public int hashCode() {
+ return user.hashCode() ^ Double.valueOf(theCorrelation).hashCode();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof UserCorrelationPair)) {
+ return false;
+ }
+ UserCorrelationPair other = (UserCorrelationPair) o;
+ return user.equals(other.user) && theCorrelation == other.theCorrelation;
+ }
+
+ public int compareTo(UserCorrelationPair otherPair) {
+ double otherCorrelation = otherPair.theCorrelation;
+ return theCorrelation > otherCorrelation ? -1 : theCorrelation < otherCorrelation ? 1 : 0;
+ }
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java Fri May 9 14:35:12 2008
@@ -0,0 +1,127 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.neighborhood;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.correlation.UserCorrelation;
+import org.apache.mahout.cf.taste.impl.common.SoftCache;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.User;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * <p>Computes a neigbhorhood consisting of all {@link User}s whose similarity to the
+ * given {@link User} meets or exceeds a certain threshold. Similartiy is defined by the given
+ * {@link UserCorrelation}.</p>
+ */
+public final class ThresholdUserNeighborhood extends AbstractUserNeighborhood {
+
+ private static final Logger log = Logger.getLogger(ThresholdUserNeighborhood.class.getName());
+
+ private final SoftCache<Object, Collection<User>> cache;
+
+ /**
+ * @param threshold similarity threshold
+ * @param userCorrelation similarity metric
+ * @param dataModel data model
+ * @throws IllegalArgumentException if threshold is {@link Double#NaN},
+ * or if samplingRate is not positive and less than or equal to 1.0, or if userCorrelation
+ * or dataModel are <code>null</code>
+ */
+ public ThresholdUserNeighborhood(double threshold,
+ UserCorrelation userCorrelation,
+ DataModel dataModel) throws TasteException {
+ this(threshold, userCorrelation, dataModel, 1.0);
+ }
+
+ /**
+ * @param threshold similarity threshold
+ * @param userCorrelation similarity metric
+ * @param dataModel data model
+ * @param samplingRate percentage of users to consider when building neighborhood -- decrease to
+ * trade quality for performance
+ * @throws IllegalArgumentException if threshold or samplingRate is {@link Double#NaN},
+ * or if samplingRate is not positive and less than or equal to 1.0, or if userCorrelation
+ * or dataModel are <code>null</code>
+ */
+ public ThresholdUserNeighborhood(double threshold,
+ UserCorrelation userCorrelation,
+ DataModel dataModel,
+ double samplingRate) throws TasteException {
+ super(userCorrelation, dataModel, samplingRate);
+ if (Double.isNaN(threshold)) {
+ throw new IllegalArgumentException("threshold must not be NaN");
+ }
+ this.cache = new SoftCache<Object, Collection<User>>(new Retriever(threshold), dataModel.getNumUsers());
+ }
+
+ public Collection<User> getUserNeighborhood(Object userID) throws TasteException {
+ return cache.get(userID);
+ }
+
+ @Override
+ public String toString() {
+ return "ThresholdUserNeighborhood";
+ }
+
+
+ private final class Retriever implements SoftCache.Retriever<Object, Collection<User>> {
+
+ private final double threshold;
+
+ private Retriever(double threshold) {
+ this.threshold = threshold;
+ }
+
+ public Collection<User> getValue(Object key) throws TasteException {
+ if (log.isLoggable(Level.FINER)) {
+ log.fine("Computing neighborhood around user ID '" + key + '\'');
+ }
+
+ DataModel dataModel = getDataModel();
+ User theUser = dataModel.getUser(key);
+ List<User> neighborhood = new ArrayList<User>();
+ Iterator<? extends User> users = dataModel.getUsers().iterator();
+ UserCorrelation userCorrelationImpl = getUserCorrelation();
+
+ while (users.hasNext()) {
+ User user = users.next();
+ if (sampleForUser() && !key.equals(user.getID())) {
+ double theCorrelation = userCorrelationImpl.userCorrelation(theUser, user);
+ if (!Double.isNaN(theCorrelation) && theCorrelation >= threshold) {
+ neighborhood.add(user);
+ }
+ }
+ }
+
+ if (log.isLoggable(Level.FINER)) {
+ log.fine("UserNeighborhood around user ID '" + key + "' is: " + neighborhood);
+ }
+
+ return Collections.unmodifiableList(neighborhood);
+ }
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractRecommender.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractRecommender.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractRecommender.java Fri May 9 14:35:12 2008
@@ -0,0 +1,128 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Recommender;
+
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+public abstract class AbstractRecommender implements Recommender {
+
+ private static final Logger log = Logger.getLogger(AbstractRecommender.class.getName());
+
+ private final DataModel dataModel;
+ private final ReentrantLock refreshLock;
+
+ protected AbstractRecommender(DataModel dataModel) {
+ if (dataModel == null) {
+ throw new IllegalArgumentException("dataModel is null");
+ }
+ this.dataModel = dataModel;
+ this.refreshLock = new ReentrantLock();
+ }
+
+ /**
+ * <p>Default implementation which just calls
+ * {@link Recommender#recommend(Object, int, org.apache.mahout.cf.taste.recommender.Rescorer)},
+ * with a {@link org.apache.mahout.cf.taste.recommender.Rescorer} that does nothing.</p>
+ */
+ public List<RecommendedItem> recommend(Object userID, int howMany) throws TasteException {
+ return recommend(userID, howMany, NullRescorer.getItemInstance());
+ }
+
+ /**
+ * <p>Default implementation which just calls {@link DataModel#setPreference(Object, Object, double)}.</p>
+ *
+ * @throws IllegalArgumentException if userID or itemID is <code>null</code>, or if value is
+ * {@link Double#NaN}
+ */
+ public void setPreference(Object userID, Object itemID, double value) throws TasteException {
+ if (userID == null || itemID == null) {
+ throw new IllegalArgumentException("userID or itemID is null");
+ }
+ if (Double.isNaN(value)) {
+ throw new IllegalArgumentException("Invalid value: " + value);
+ }
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Setting preference for user '" + userID + "', item '" + itemID + "', value " + value);
+ }
+ dataModel.setPreference(userID, itemID, value);
+ }
+
+ /**
+ * <p>Default implementation which just calls
+ * {@link DataModel#removePreference(Object, Object)} (Object, Object)}.</p>
+ *
+ * @throws IllegalArgumentException if userID or itemID is <code>null</code>
+ */
+ public void removePreference(Object userID, Object itemID) throws TasteException {
+ if (userID == null || itemID == null) {
+ throw new IllegalArgumentException("userID or itemID is null");
+ }
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Remove preference for user '" + userID + "', item '" + itemID + '\'');
+ }
+ dataModel.removePreference(userID, itemID);
+ }
+
+ public DataModel getDataModel() {
+ return dataModel;
+ }
+
+ public void refresh() {
+ if (refreshLock.isLocked()) {
+ return;
+ }
+ try {
+ refreshLock.lock();
+ dataModel.refresh();
+ } finally {
+ refreshLock.unlock();
+ }
+ }
+
+ /**
+ * @param theUser {@link User} being evaluated
+ * @return all {@link Item}s in the {@link DataModel} for which the {@link User} has not expressed a preference
+ * @throws TasteException if an error occurs while listing {@link Item}s
+ */
+ protected Set<Item> getAllOtherItems(User theUser) throws TasteException {
+ if (theUser == null) {
+ throw new IllegalArgumentException("theUser is null");
+ }
+ Set<Item> allItems = new HashSet<Item>(dataModel.getNumItems());
+ for (Item item : dataModel.getItems()) {
+ // If not already preferred by the user, add it
+ if (theUser.getPreferenceFor(item.getID()) == null) {
+ allItems.add(item);
+ }
+ }
+ return allItems;
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java Fri May 9 14:35:12 2008
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Rescorer;
+
+import java.io.Serializable;
+import java.util.Comparator;
+
+/**
+ * <p>A simple {@link org.apache.mahout.cf.taste.recommender.Rescorer} which always returns the original score.</p>
+ */
+final class ByRescoreComparator implements Comparator<RecommendedItem>, Serializable {
+
+ private final Rescorer<Item> rescorer;
+
+ ByRescoreComparator(Rescorer<Item> rescorer) {
+ if (rescorer == null) {
+ throw new IllegalArgumentException("rescorer is null");
+ }
+ this.rescorer = rescorer;
+ }
+
+ public int compare(RecommendedItem o1, RecommendedItem o2) {
+ double rescored1 = rescorer.rescore(o1.getItem(), o1.getValue());
+ double rescored2 = rescorer.rescore(o2.getItem(), o2.getValue());
+ if (rescored1 < rescored2) {
+ return 1;
+ } else if (rescored1 > rescored2) {
+ return -1;
+ } else {
+ return 0;
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "ByRescoreComparator[rescorer:" + rescorer + ']';
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/CachingRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/CachingRecommender.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/CachingRecommender.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/CachingRecommender.java Fri May 9 14:35:12 2008
@@ -0,0 +1,207 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.Pair;
+import org.apache.mahout.cf.taste.impl.common.SoftCache;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Recommender;
+import org.apache.mahout.cf.taste.recommender.Rescorer;
+
+import java.lang.ref.SoftReference;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * <p>A {@link Recommender} which caches the results from another {@link Recommender} in memory.
+ * Results are held by {@link SoftReference}s so that the JVM may reclaim memory from the recommendationCache
+ * in low-memory situations.</p>
+ */
+public final class CachingRecommender implements Recommender {
+
+ private static final Logger log = Logger.getLogger(CachingRecommender.class.getName());
+
+ private final Recommender recommender;
+ private final AtomicInteger maxHowMany;
+ private final SoftCache<Object, Recommendations> recommendationCache;
+ private final SoftCache<Pair<?, ?>, Double> estimatedPrefCache;
+ private final ReentrantLock refreshLock;
+
+ public CachingRecommender(Recommender recommender) throws TasteException {
+ if (recommender == null) {
+ throw new IllegalArgumentException("recommender is null");
+ }
+ this.recommender = recommender;
+ this.maxHowMany = new AtomicInteger(1);
+ // Use "num users" as an upper limit on cache size. Rough guess.
+ int numUsers = recommender.getDataModel().getNumUsers();
+ this.recommendationCache =
+ new SoftCache<Object, Recommendations>(
+ new RecommendationRetriever(this.recommender, this.maxHowMany),
+ numUsers);
+ this.estimatedPrefCache =
+ new SoftCache<Pair<?, ?>, Double>(new EstimatedPrefRetriever(this.recommender), numUsers);
+ this.refreshLock = new ReentrantLock();
+ }
+
+ public List<RecommendedItem> recommend(Object userID, int howMany) throws TasteException {
+ if (userID == null) {
+ throw new IllegalArgumentException("user ID is null");
+ }
+ if (howMany < 1) {
+ throw new IllegalArgumentException("howMany must be at least 1");
+ }
+
+ synchronized (maxHowMany) {
+ if (howMany > maxHowMany.get()) {
+ maxHowMany.set(howMany);
+ }
+ }
+
+ Recommendations recommendations = recommendationCache.get(userID);
+ if (recommendations.items.size() < howMany && !recommendations.noMoreRecommendableItems) {
+ clear(userID);
+ recommendations = recommendationCache.get(userID);
+ if (recommendations.items.size() < howMany) {
+ recommendations.noMoreRecommendableItems = true;
+ }
+ }
+
+ return recommendations.items.size() > howMany ?
+ recommendations.items.subList(0, howMany) :
+ recommendations.items;
+ }
+
+ public List<RecommendedItem> recommend(Object userID, int howMany, Rescorer<Item> rescorer)
+ throws TasteException {
+ // Hmm, hard to recommendationCache this since the rescorer may change
+ return recommender.recommend(userID, howMany, rescorer);
+ }
+
+ public double estimatePreference(Object userID, Object itemID) throws TasteException {
+ return estimatedPrefCache.get(new Pair<Object, Object>(userID, itemID));
+ }
+
+ public void setPreference(Object userID, Object itemID, double value) throws TasteException {
+ recommender.setPreference(userID, itemID, value);
+ clear(userID);
+ }
+
+ public void removePreference(Object userID, Object itemID) throws TasteException {
+ recommender.removePreference(userID, itemID);
+ clear(userID);
+ }
+
+ public DataModel getDataModel() {
+ return recommender.getDataModel();
+ }
+
+ public void refresh() {
+ if (refreshLock.isLocked()) {
+ return;
+ }
+ try {
+ refreshLock.lock();
+ recommender.refresh();
+ clear();
+ } finally {
+ refreshLock.unlock();
+ }
+ }
+
+ /**
+ * <p>Clears cached recommendations for the given user.</p>
+ *
+ * @param userID clear cached data associated with this user ID
+ */
+ public void clear(Object userID) {
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Clearing recommendations for user ID '" + userID + "'...");
+ }
+ recommendationCache.remove(userID);
+ }
+
+ /**
+ * <p>Clears all cached recommendations.</p>
+ */
+ public void clear() {
+ log.fine("Clearing all recommendations...");
+ recommendationCache.clear();
+ }
+
+ @Override
+ public String toString() {
+ return "CachingRecommender[recommender:" + recommender + ']';
+ }
+
+ private static final class RecommendationRetriever implements SoftCache.Retriever<Object, Recommendations> {
+
+ private final Recommender recommender;
+ private final AtomicInteger maxHowMany;
+
+ private RecommendationRetriever(Recommender recommender, AtomicInteger maxHowMany) {
+ this.recommender = recommender;
+ this.maxHowMany = maxHowMany;
+ }
+
+ public Recommendations getValue(Object key) throws TasteException {
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Retrieving new recommendations for user ID '" + key + '\'');
+ }
+ return new Recommendations(Collections.unmodifiableList(recommender.recommend(key, maxHowMany.get())));
+ }
+ }
+
+ private static final class EstimatedPrefRetriever implements SoftCache.Retriever<Pair<?, ?>, Double> {
+
+ private final Recommender recommender;
+
+ private EstimatedPrefRetriever(Recommender recommender) {
+ this.recommender = recommender;
+ }
+
+ public Double getValue(Pair<?, ?> key) throws TasteException {
+ Object userID = key.getFirst();
+ Object itemID = key.getSecond();
+ if (log.isLoggable(Level.FINE)) {
+ log.fine("Retrieving estimated preference for user ID '" + userID + "\' and item ID \'" +
+ itemID + '\'');
+ }
+ return recommender.estimatePreference(userID, itemID);
+ }
+ }
+
+ private static final class Recommendations {
+
+ private final List<RecommendedItem> items;
+ private boolean noMoreRecommendableItems;
+
+ private Recommendations(List<RecommendedItem> items) {
+ this.items = items;
+ this.noMoreRecommendableItems = false;
+ }
+ }
+
+}
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ClusterSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ClusterSimilarity.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ClusterSimilarity.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ClusterSimilarity.java Fri May 9 14:35:12 2008
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.model.User;
+
+import java.util.Collection;
+
+/**
+ * <p>Returns the "similarity" between two clusters of users, according to some
+ * definition of similarity. Subclassses define different notions of similarity.</p>
+ *
+ * @see TreeClusteringRecommender
+ */
+public interface ClusterSimilarity extends Refreshable {
+
+ /**
+ * @param cluster1 first cluster of {@link User}s
+ * @param cluster2 second cluste rof {@link User}s
+ * @return "distance" between clusters; a positiv
+ * @throws TasteException if an error occurs while computing similarity, such as
+ * errors accessing an underlying {@link org.apache.mahout.cf.taste.model.DataModel}
+ * @throws IllegalArgumentException if either argument is null or empty
+ */
+ double getSimilarity(Collection<User> cluster1, Collection<User> cluster2) throws TasteException;
+
+}