You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2008/05/09 23:35:17 UTC

svn commit: r654943 [4/9] - in /lucene/mahout/trunk/core: ./ lib/ src/main/examples/org/ src/main/examples/org/apache/ src/main/examples/org/apache/mahout/ src/main/examples/org/apache/mahout/cf/ src/main/examples/org/apache/mahout/cf/taste/ src/main/e...

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java Fri May  9 14:35:12 2008
@@ -0,0 +1,299 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model.file;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.IOUtils;
+import org.apache.mahout.cf.taste.impl.model.GenericDataModel;
+import org.apache.mahout.cf.taste.impl.model.GenericItem;
+import org.apache.mahout.cf.taste.impl.model.GenericPreference;
+import org.apache.mahout.cf.taste.impl.model.GenericUser;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.User;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Timer;
+import java.util.TimerTask;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * <p>A {@link DataModel} backed by a comma-delimited file. This class assumes that each line of the
+ * file contains a user ID, followed by item ID, followed by preferences value, separated by commas.
+ * The preference value is assumed to be parseable as a <code>double</code>. The user and item IDs
+ * are ready literally as Strings and treated as such in the API. Note that this means that whitespace
+ * matters in the data file; they will be treated as part of the ID values.</p>
+ *
+ * <p>This class is not intended for use with very large amounts of data (over, say, a million rows). For
+ * that, a JDBC-backed {@link DataModel} and a database are more appropriate.
+ * The file will be periodically reloaded if a change is detected.</p>
+ */
+public class FileDataModel implements DataModel {
+
+  private static final Logger log = Logger.getLogger(FileDataModel.class.getName());
+
+  private static final Timer timer = new Timer(true);
+  private static final long RELOAD_CHECK_INTERVAL_MS = 60L * 1000L;
+
+  private final File dataFile;
+  private long lastModified;
+  private boolean loaded;
+  private DataModel delegate;
+  private final ReentrantLock refreshLock;
+  private final ReentrantLock reloadLock;
+
+  /**
+   * @param dataFile file containing preferences data
+   * @throws FileNotFoundException if dataFile does not exist
+   */
+  public FileDataModel(File dataFile) throws FileNotFoundException {
+    if (dataFile == null) {
+      throw new IllegalArgumentException("dataFile is null");
+    }
+    if (!dataFile.exists() || dataFile.isDirectory()) {
+      throw new FileNotFoundException(dataFile.toString());
+    }
+
+    log.info("Creating FileDataModel for file " + dataFile);
+
+    this.dataFile = dataFile;
+    this.lastModified = dataFile.lastModified();
+    this.refreshLock = new ReentrantLock();
+    this.reloadLock = new ReentrantLock();
+
+    // Schedule next refresh
+    timer.schedule(new RefreshTimerTask(), RELOAD_CHECK_INTERVAL_MS, RELOAD_CHECK_INTERVAL_MS);
+  }
+
+  protected void reload() throws IOException {
+    try {
+      reloadLock.lock();
+      Map<String, List<Preference>> data = new HashMap<String, List<Preference>>(1003);
+
+      processFile(data);
+
+      List<User> users = new ArrayList<User>(data.size());
+      for (Map.Entry<String, List<Preference>> entries : data.entrySet()) {
+        users.add(buildUser(entries.getKey(), entries.getValue()));
+      }
+
+      delegate = new GenericDataModel(users);
+      loaded = true;
+
+    } finally {
+      reloadLock.unlock();
+    }
+  }
+
+  private void processFile(Map<String, List<Preference>> data) throws IOException {
+    log.info("Reading file info...");
+    BufferedReader reader = null;
+    try {
+      reader = new BufferedReader(new FileReader(dataFile));
+      boolean notDone = true;
+      while (notDone) {
+        String line = reader.readLine();
+        if (line != null && line.length() > 0) {
+          if (log.isLoggable(Level.FINE)) {
+            log.fine("Read line: " + line);
+          }
+          processLine(line, data);
+        } else {
+          notDone = false;
+        }
+      }
+    } finally {
+      IOUtils.quietClose(reader);
+    }
+  }
+
+  private void processLine(String line, Map<String, List<Preference>> data) {
+    int commaOne = line.indexOf((int) ',');
+    int commaTwo = line.indexOf((int) ',', commaOne + 1);
+    if (commaOne < 0 || commaTwo < 0) {
+      throw new IllegalArgumentException("Bad line: " + line);
+    }
+    String userID = line.substring(0, commaOne);
+    String itemID = line.substring(commaOne + 1, commaTwo);
+    double preferenceValue = Double.valueOf(line.substring(commaTwo + 1));
+    List<Preference> prefs = data.get(userID);
+    if (prefs == null) {
+      prefs = new ArrayList<Preference>();
+      data.put(userID, prefs);
+    }
+    Item item = buildItem(itemID);
+    if (log.isLoggable(Level.FINE)) {
+      log.fine("Read item " + item + " for user ID " + userID);
+    }
+    prefs.add(buildPreference(null, item, preferenceValue));
+  }
+
+  private void checkLoaded() throws TasteException {
+    if (!loaded) {
+      try {
+        reload();
+      } catch (IOException ioe) {
+        throw new TasteException(ioe);
+      }
+    }
+  }
+
+  public Iterable<? extends User> getUsers() throws TasteException {
+    checkLoaded();
+    return delegate.getUsers();
+  }
+
+  /**
+   * @throws NoSuchElementException if there is no such user
+   */
+  public User getUser(Object id) throws TasteException {
+    checkLoaded();
+    return delegate.getUser(id);
+  }
+
+  public Iterable<? extends Item> getItems() throws TasteException {
+    checkLoaded();
+    return delegate.getItems();
+  }
+
+  public Item getItem(Object id) throws TasteException {
+    checkLoaded();
+    return delegate.getItem(id);
+  }
+
+  public Iterable<? extends Preference> getPreferencesForItem(Object itemID) throws TasteException {
+    checkLoaded();
+    return delegate.getPreferencesForItem(itemID);
+  }
+
+  public Preference[] getPreferencesForItemAsArray(Object itemID) throws TasteException {
+    checkLoaded();
+    return delegate.getPreferencesForItemAsArray(itemID);
+  }
+
+  public int getNumItems() throws TasteException {
+    checkLoaded();
+    return delegate.getNumItems();
+  }
+
+  public int getNumUsers() throws TasteException {
+    checkLoaded();
+    return delegate.getNumUsers();
+  }
+
+  /**
+   * @throws UnsupportedOperationException
+   */
+  public void setPreference(Object userID, Object itemID, double value) {
+    throw new UnsupportedOperationException();
+  }
+
+  /**
+   * @throws UnsupportedOperationException
+   */
+  public void removePreference(Object userID, Object itemID) {
+    throw new UnsupportedOperationException();
+  }
+
+  public void refresh() {
+    if (refreshLock.isLocked()) {
+      return;
+    }
+    try {
+      refreshLock.lock();
+      try {
+        reload();
+      } catch (IOException ioe) {
+        log.log(Level.WARNING, "Unexpected exception while refreshing", ioe);
+      }
+    } finally {
+      refreshLock.unlock();
+    }
+
+  }
+
+  /**
+   * Subclasses may override to return a different {@link User} implementation.
+   *
+   * @param id user ID
+   * @param prefs user preferences
+   * @return {@link GenericUser} by default
+   */
+  protected User buildUser(String id, List<Preference> prefs) {
+    return new GenericUser<String>(id, prefs);
+  }
+
+  /**
+   * Subclasses may override to return a different {@link Item} implementation.
+   *
+   * @param id item ID
+   * @return {@link GenericItem} by default
+   */
+  protected Item buildItem(String id) {
+    return new GenericItem<String>(id);
+  }
+
+  /**
+   * Subclasses may override to return a different {@link Preference} implementation.
+   *
+   * @param user {@link User} who expresses the preference
+   * @param item preferred {@link Item}
+   * @param value preference value
+   * @return {@link GenericPreference} by default
+   */
+  protected Preference buildPreference(User user, Item item, double value) {
+    return new GenericPreference(user, item, value);
+  }
+
+  @Override
+  public String toString() {
+    return "FileDataModel[dataFile:" + dataFile + ']';
+  }
+
+  private final class RefreshTimerTask extends TimerTask {
+
+    @Override
+    public void run() {
+      if (loaded) {
+        long newModified = dataFile.lastModified();
+        if (newModified > lastModified) {
+          log.fine("File has changed; reloading...");
+          lastModified = newModified;
+          try {
+            reload();
+          } catch (IOException ioe) {
+            log.log(Level.WARNING, "Error while reloading file", ioe);
+          }
+        }
+      }
+    }
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java Fri May  9 14:35:12 2008
@@ -0,0 +1,644 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model.jdbc;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.IOUtils;
+import org.apache.mahout.cf.taste.impl.common.IteratorIterable;
+import org.apache.mahout.cf.taste.impl.model.GenericItem;
+import org.apache.mahout.cf.taste.impl.model.GenericPreference;
+import org.apache.mahout.cf.taste.impl.model.GenericUser;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.model.JDBCDataModel;
+import org.apache.mahout.cf.taste.model.Preference;
+import org.apache.mahout.cf.taste.model.User;
+
+import javax.naming.Context;
+import javax.naming.InitialContext;
+import javax.naming.NamingException;
+import javax.sql.DataSource;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * <p>An abstract superclass for JDBC-related {@link DataModel} implementations, providing most of the common
+ * functionality that any such implementation would need.</p>
+ *
+ * <p>Performance will be a concern with any JDBC-based {@link DataModel}. There are going to be lots of
+ * simultaneous reads and some writes to one table. Make sure the table is set up optimally -- for example,
+ * you'll want to establish indexes.</p>
+ *
+ * <p>You'll also want to use connection pooling of some kind. Most J2EE containers like Tomcat
+ * provide connection pooling, so make sure the {@link DataSource} it exposes is using pooling. Outside a
+ * J2EE container, you can use packages like Jakarta's
+ * <a href="http://jakarta.apache.org/commons/dbcp/">DBCP</a> to create a {@link DataSource} on top of your
+ * database whose {@link Connection}s are pooled.</p>
+ *
+ * <p>Also note: this default implementation assumes that the user and item ID keys are {@link String}s, for
+ * maximum flexibility. You can override this behavior by subclassing an implementation and overriding
+ * {@link #buildItem(String)} and {@link #buildUser(String, List)}. If you don't, just make sure you use
+ * {@link String}s as IDs throughout your code. If your IDs are really numeric, and you use, say, {@link Long}
+ * for IDs in the rest of your code, you will run into subtle problems because the {@link Long} values won't
+ * be equal to or compare correctly to the underlying {@link String} key values.</p>
+ */
+public abstract class AbstractJDBCDataModel implements JDBCDataModel {
+
+  private static final Logger log = Logger.getLogger(AbstractJDBCDataModel.class.getName());
+
+  public static final String DEFAULT_DATASOURCE_NAME = "jdbc/taste";
+  public static final String DEFAULT_PREFERENCE_TABLE = "taste_preferences";
+  public static final String DEFAULT_USER_ID_COLUMN = "user_id";
+  public static final String DEFAULT_ITEM_ID_COLUMN = "item_id";
+  public static final String DEFAULT_PREFERENCE_COLUMN = "preference";
+
+  private final DataSource dataSource;
+  private final String getUserSQL;
+  private final String getNumItemsSQL;
+  private final String getNumUsersSQL;
+  private final String setPreferenceSQL;
+  private final String removePreferenceSQL;
+  private final String getUsersSQL;
+  private final String getItemsSQL;
+  private final String getItemSQL;
+  private final String getPrefsForItemSQL;
+
+  protected AbstractJDBCDataModel(DataSource dataSource,
+                                  String getUserSQL,
+                                  String getNumItemsSQL,
+                                  String getNumUsersSQL,
+                                  String setPreferenceSQL,
+                                  String removePreferenceSQL,
+                                  String getUsersSQL,
+                                  String getItemsSQL,
+                                  String getItemSQL,
+                                  String getPrefsForItemSQL,
+                                  String getUsersPreferringItemSQL) {
+
+    log.fine("Creating AbstractJDBCModel...");
+    checkNotNullAndLog("dataSource", dataSource);
+    checkNotNullAndLog("getUserSQL", getUserSQL);
+    checkNotNullAndLog("getNumItemsSQL", getNumItemsSQL);
+    checkNotNullAndLog("getNumUsersSQL", getNumUsersSQL);
+    checkNotNullAndLog("setPreferenceSQL", setPreferenceSQL);
+    checkNotNullAndLog("removePreferenceSQL", removePreferenceSQL);
+    checkNotNullAndLog("getUsersSQL", getUsersSQL);
+    checkNotNullAndLog("getItemsSQL", getItemsSQL);
+    checkNotNullAndLog("getItemSQL", getItemSQL);
+    checkNotNullAndLog("getPrefsForItemSQL", getPrefsForItemSQL);
+    checkNotNullAndLog("getUsersPreferringItemSQL", getUsersPreferringItemSQL);
+
+    if (!(dataSource instanceof ConnectionPoolDataSource)) {
+      log.warning("You are not using ConnectionPoolDataSource. Make sure your DataSource pools connections " +
+                  "to the database itself, or database performance will be severely reduced.");
+    }
+
+    this.dataSource = dataSource;
+    this.getUserSQL = getUserSQL;
+    this.getNumItemsSQL = getNumItemsSQL;
+    this.getNumUsersSQL = getNumUsersSQL;
+    this.setPreferenceSQL = setPreferenceSQL;
+    this.removePreferenceSQL = removePreferenceSQL;
+    this.getUsersSQL = getUsersSQL;
+    this.getItemsSQL = getItemsSQL;
+    this.getItemSQL = getItemSQL;
+    this.getPrefsForItemSQL = getPrefsForItemSQL;
+  }
+
+  private static void checkNotNullAndLog(String argName, Object value) {
+    if (value == null || value.toString().length() == 0) {
+      throw new IllegalArgumentException(argName + " is null or empty");
+    }
+    if (log.isLoggable(Level.FINE)) {
+      log.fine(argName + ": " + value);
+    }
+  }
+
+  /**
+   * <p>Looks up a {@link DataSource} by name from JNDI. "java:comp/env/" is prepended to the argument
+   * before looking up the name in JNDI.</p>
+   *
+   * @param dataSourceName JNDI name where a {@link DataSource} is bound (e.g. "jdbc/taste")
+   * @return {@link DataSource} under that JNDI name
+   * @throws TasteException if a JNDI error occurs
+   */
+  public static DataSource lookupDataSource(String dataSourceName) throws TasteException {
+    Context context = null;
+    try {
+      context = new InitialContext();
+      return (DataSource) context.lookup("java:comp/env/" + dataSourceName);
+    } catch (NamingException ne) {
+      throw new TasteException(ne);
+    } finally {
+      if (context != null) {
+        try {
+          context.close();
+        } catch (NamingException ne) {
+          log.log(Level.WARNING, "Error while closing Context; continuing...", ne);
+        }
+      }
+    }
+  }
+
+  /**
+   * @return the {@link DataSource} that this instance is using
+   */
+  public DataSource getDataSource() {
+    return dataSource;
+  }
+
+  public final Iterable<? extends User> getUsers() throws TasteException {
+    log.fine("Retrieving all users...");
+    return new IteratorIterable<User>(new ResultSetUserIterator(dataSource, getUsersSQL));
+  }
+
+  /**
+   * @throws NoSuchElementException if there is no such user
+   */
+  public final User getUser(Object id) throws TasteException {
+
+    if (log.isLoggable(Level.FINE)) {
+      log.fine("Retrieving user ID '" + id + "'...");
+    }
+
+    Connection conn = null;
+    PreparedStatement stmt = null;
+    ResultSet rs = null;
+
+    String idString = id.toString();
+
+    try {
+      conn = dataSource.getConnection();
+      stmt = conn.prepareStatement(getUserSQL);
+      stmt.setObject(1, id);
+
+      if (log.isLoggable(Level.FINE)) {
+        log.fine("Executing SQL query: " + getUserSQL);
+      }
+      rs = stmt.executeQuery();
+
+      List<Preference> prefs = new ArrayList<Preference>();
+      while (rs.next()) {
+        addPreference(rs, prefs);
+      }
+
+      if (prefs.isEmpty()) {
+        throw new NoSuchElementException();
+      }
+
+      return buildUser(idString, prefs);
+
+    } catch (SQLException sqle) {
+      log.log(Level.WARNING, "Exception while retrieving user", sqle);
+      throw new TasteException(sqle);
+    } finally {
+      IOUtils.safeClose(rs, stmt, conn);
+    }
+
+  }
+
+  public final Iterable<? extends Item> getItems() throws TasteException {
+    log.fine("Retrieving all items...");
+    return new IteratorIterable<Item>(new ResultSetItemIterator(dataSource, getItemsSQL));
+  }
+
+  public final Item getItem(Object id) throws TasteException {
+    return getItem(id, false);
+  }
+
+  public final Item getItem(Object id, boolean assumeExists) throws TasteException {
+
+    if (assumeExists) {
+      return buildItem((String) id);
+    }
+
+    if (log.isLoggable(Level.FINE)) {
+      log.fine("Retrieving item ID '" + id + "'...");
+    }
+
+    Connection conn = null;
+    PreparedStatement stmt = null;
+    ResultSet rs = null;
+
+    try {
+      conn = dataSource.getConnection();
+      stmt = conn.prepareStatement(getItemSQL);
+      stmt.setObject(1, id);
+
+      if (log.isLoggable(Level.FINE)) {
+        log.fine("Executing SQL query: " + getItemSQL);
+      }
+      rs = stmt.executeQuery();
+      if (rs.next()) {
+        return buildItem((String) id);
+      } else {
+        throw new NoSuchElementException();
+      }
+    } catch (SQLException sqle) {
+      log.log(Level.WARNING, "Exception while retrieving item", sqle);
+      throw new TasteException(sqle);
+    } finally {
+      IOUtils.safeClose(rs, stmt, conn);
+    }
+  }
+
+  public final Iterable<? extends Preference> getPreferencesForItem(Object itemID) throws TasteException {
+    return doGetPreferencesForItem(itemID);
+  }
+
+  public final Preference[] getPreferencesForItemAsArray(Object itemID) throws TasteException {
+    List<? extends Preference> list = doGetPreferencesForItem(itemID);
+    return list.toArray(new Preference[list.size()]);
+  }
+
+  private List<? extends Preference> doGetPreferencesForItem(Object itemID) throws TasteException {
+    if (log.isLoggable(Level.FINE)) {
+      log.fine("Retrieving preferences for item ID '" + itemID + "'...");
+    }
+    Item item = getItem(itemID);
+    Connection conn = null;
+    PreparedStatement stmt = null;
+    ResultSet rs = null;
+    try {
+      conn = dataSource.getConnection();
+      stmt = conn.prepareStatement(getPrefsForItemSQL);
+      stmt.setObject(1, itemID);
+
+      if (log.isLoggable(Level.FINE)) {
+        log.fine("Executing SQL query: " + getPrefsForItemSQL);
+      }
+      rs = stmt.executeQuery();
+      List<Preference> prefs = new ArrayList<Preference>();
+      while (rs.next()) {
+        double preference = rs.getDouble(1);
+        String userID = rs.getString(2);
+        Preference pref = buildPreference(buildUser(userID, null), item, preference);
+        prefs.add(pref);
+      }
+      return prefs;
+    } catch (SQLException sqle) {
+      log.log(Level.WARNING, "Exception while retrieving prefs for item", sqle);
+      throw new TasteException(sqle);
+    } finally {
+      IOUtils.safeClose(rs, stmt, conn);
+    }
+  }
+
+  public final int getNumItems() throws TasteException {
+    return getNumThings("items", getNumItemsSQL);
+  }
+
+  public final int getNumUsers() throws TasteException {
+    return getNumThings("users", getNumUsersSQL);
+  }
+
+  private int getNumThings(String name, String sql) throws TasteException {
+    log.fine("Retrieving number of " + name + " in model...");
+    Connection conn = null;
+    Statement stmt = null;
+    ResultSet rs = null;
+    try {
+      conn = dataSource.getConnection();
+      stmt = conn.createStatement();
+      if (log.isLoggable(Level.FINE)) {
+        log.fine("Executing SQL query: " + sql);
+      }
+      rs = stmt.executeQuery(sql);
+      rs.next();
+      return rs.getInt(1);
+    } catch (SQLException sqle) {
+      log.log(Level.WARNING, "Exception while retrieving number of " + name, sqle);
+      throw new TasteException(sqle);
+    } finally {
+      IOUtils.safeClose(rs, stmt, conn);
+    }
+  }
+
+  public final void setPreference(Object userID, Object itemID, double value)
+          throws TasteException {
+    if (userID == null || itemID == null) {
+      throw new IllegalArgumentException("userID or itemID is null");
+    }
+    if (Double.isNaN(value)) {
+      throw new IllegalArgumentException("Invalid value: " + value);
+    }
+
+    if (log.isLoggable(Level.FINE)) {
+      log.fine("Setting preference for user '" + userID + "', item '" + itemID + "', value " + value);
+    }
+
+    Connection conn = null;
+    PreparedStatement stmt = null;
+
+    try {
+      conn = dataSource.getConnection();
+
+      stmt = conn.prepareStatement(setPreferenceSQL);
+      stmt.setObject(1, userID);
+      stmt.setObject(2, itemID);
+      stmt.setDouble(3, value);
+      stmt.setDouble(4, value);
+
+      if (log.isLoggable(Level.FINE)) {
+        log.fine("Executing SQL update: " + setPreferenceSQL);
+      }
+      stmt.executeUpdate();
+
+    } catch (SQLException sqle) {
+      log.log(Level.WARNING, "Exception while setting preference", sqle);
+      throw new TasteException(sqle);
+    } finally {
+      IOUtils.safeClose(null, stmt, conn);
+    }
+  }
+
+  public final void removePreference(Object userID, Object itemID)
+          throws TasteException {
+    if (userID == null || itemID == null) {
+      throw new IllegalArgumentException("userID or itemID is null");
+    }
+
+    if (log.isLoggable(Level.FINE)) {
+      log.fine("Removing preference for user '" + userID + "', item '" + itemID + '\'');
+    }
+
+    Connection conn = null;
+    PreparedStatement stmt = null;
+
+    try {
+      conn = dataSource.getConnection();
+
+      stmt = conn.prepareStatement(removePreferenceSQL);
+      stmt.setObject(1, userID);
+      stmt.setObject(2, itemID);
+
+      if (log.isLoggable(Level.FINE)) {
+        log.fine("Executing SQL update: " + removePreferenceSQL);
+      }
+      stmt.executeUpdate();
+
+    } catch (SQLException sqle) {
+      log.log(Level.WARNING, "Exception while removing preference", sqle);
+      throw new TasteException(sqle);
+    } finally {
+      IOUtils.safeClose(null, stmt, conn);
+    }
+  }
+
+  public final void refresh() {
+    // do nothing
+  }
+
+
+  private void addPreference(ResultSet rs, Collection<Preference> prefs)
+          throws SQLException {
+    Item item = buildItem(rs.getString(1));
+    double preferenceValue = rs.getDouble(2);
+    prefs.add(buildPreference(null, item, preferenceValue));
+  }
+
+  /**
+   * <p>Default implementation which returns a new {@link GenericUser} with {@link String} IDs.
+   * Subclasses may override to return a different {@link User} implementation.</p>
+   *
+   * @param id user ID
+   * @param prefs user preferences
+   * @return {@link GenericUser} by default
+   */
+  protected User buildUser(String id, List<Preference> prefs) {
+    return new GenericUser<String>(id, prefs);
+  }
+
+  /**
+   * <p>Default implementation which returns a new {@link GenericItem} with {@link String} IDs.
+   * Subclasses may override to return a different {@link Item} implementation.</p>
+   *
+   * @param id item ID
+   * @return {@link GenericItem} by default
+   */
+  protected Item buildItem(String id) {
+    return new GenericItem<String>(id);
+  }
+
+  /**
+   * Subclasses may override to return a different {@link Preference} implementation.
+   *
+   * @param user {@link User}
+   * @param item {@link Item}
+   * @return {@link GenericPreference} by default
+   */
+  protected Preference buildPreference(User user, Item item, double value) {
+    return new GenericPreference(user, item, value);
+  }
+
+  /**
+   * <p>An {@link java.util.Iterator} which returns {@link org.apache.mahout.cf.taste.model.User}s from a
+   * {@link java.sql.ResultSet}. This is a useful
+   * way to iterate over all user data since it does not require all data to be read into memory
+   * at once. It does however require that the DB connection be held open. Note that this class will
+   * only release database resources after {@link #hasNext()} has been called and has returned false;
+   * callers should make sure to "drain" the entire set of data to avoid tying up database resources.</p>
+   */
+  private final class ResultSetUserIterator implements Iterator<User> {
+
+    private final Connection connection;
+    private final Statement statement;
+    private final ResultSet resultSet;
+    private boolean closed;
+
+    private ResultSetUserIterator(DataSource dataSource, String getUsersSQL) throws TasteException {
+      try {
+        connection = dataSource.getConnection();
+        statement = connection.createStatement();
+        if (log.isLoggable(Level.FINE)) {
+          log.fine("Executing SQL query: " + getUsersSQL);
+        }
+        resultSet = statement.executeQuery(getUsersSQL);
+      } catch (SQLException sqle) {
+        close();
+        throw new TasteException(sqle);
+      }
+    }
+
+    public boolean hasNext() {
+      boolean nextExists = false;
+      if (!closed) {
+        try {
+          // No more results if cursor is pointing at last row, or after
+          // Thanks to Rolf W. for pointing out an earlier bug in this condition
+          if (resultSet.isLast() || resultSet.isAfterLast()) {
+            close();
+          } else {
+            nextExists = true;
+          }
+        } catch (SQLException sqle) {
+          log.log(Level.WARNING, "Unexpected exception while accessing ResultSet; continuing...", sqle);
+          close();
+        }
+      }
+      return nextExists;
+    }
+
+    public User next() {
+
+      if (closed) {
+        throw new NoSuchElementException();
+      }
+
+      String currentUserID = null;
+      List<Preference> prefs = new ArrayList<Preference>();
+
+      try {
+        while (resultSet.next()) {
+          String userID = resultSet.getString(3);
+          if (currentUserID == null) {
+            currentUserID = userID;
+          }
+          // Did we move on to a new user?
+          if (!userID.equals(currentUserID)) {
+            // back up one row
+            resultSet.previous();
+            // we're done for now
+            break;
+          }
+          // else add a new preference for the current user
+          addPreference(resultSet, prefs);
+        }
+      } catch (SQLException sqle) {
+        // No good way to handle this since we can't throw an exception
+        log.log(Level.WARNING, "Exception while iterating over users", sqle);
+        close();
+        throw new NoSuchElementException("Can't retrieve more due to exception: " + sqle);
+      }
+
+      if (currentUserID == null) {
+        // nothing left?
+        throw new NoSuchElementException();
+      }
+
+      return buildUser(currentUserID, prefs);
+    }
+
+    /**
+     * @throws UnsupportedOperationException
+     */
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+
+    private void close() {
+      closed = true;
+      IOUtils.safeClose(resultSet, statement, connection);
+    }
+
+  }
+
+  /**
+   * <p>An {@link java.util.Iterator} which returns {@link org.apache.mahout.cf.taste.model.Item}s from a
+   * {@link java.sql.ResultSet}. This is a useful way to iterate over all user data since it does not require
+   * all data to be read into memory at once. It does however require that the DB connection be held open. Note
+   * that this class will only release database resources after {@link #hasNext()} has been called and has returned
+   * <code>false</code>; callers should make sure to "drain" the entire set of data to avoid tying up database
+   * resources.</p>
+   */
+  private final class ResultSetItemIterator implements Iterator<Item> {
+
+    private final Connection connection;
+    private final Statement statement;
+    private final ResultSet resultSet;
+    private boolean closed;
+
+    private ResultSetItemIterator(DataSource dataSource, String getItemsSQL) throws TasteException {
+      try {
+        connection = dataSource.getConnection();
+        statement = connection.createStatement();
+        if (log.isLoggable(Level.FINE)) {
+          log.fine("Executing SQL query: " + getItemsSQL);
+        }
+        resultSet = statement.executeQuery(getItemsSQL);
+      } catch (SQLException sqle) {
+        close();
+        throw new TasteException(sqle);
+      }
+    }
+
+    public boolean hasNext() {
+      boolean nextExists = false;
+      if (!closed) {
+        try {
+          // No more results if cursor is pointing at last row, or after
+          // Thanks to Rolf W. for pointing out an earlier bug in this condition
+          if (resultSet.isLast() || resultSet.isAfterLast()) {
+            close();
+          } else {
+            nextExists = true;
+          }
+        } catch (SQLException sqle) {
+          log.log(Level.WARNING, "Unexpected exception while accessing ResultSet; continuing...", sqle);
+          close();
+        }
+      }
+      return nextExists;
+    }
+
+    public Item next() {
+
+      if (closed) {
+        throw new NoSuchElementException();
+      }
+
+      try {
+        if (resultSet.next()) {
+          return buildItem(resultSet.getString(1));
+        } else {
+          throw new NoSuchElementException();
+        }
+      } catch (SQLException sqle) {
+        // No good way to handle this since we can't throw an exception
+        log.log(Level.WARNING, "Exception while iterating over items", sqle);
+        close();
+        throw new NoSuchElementException("Can't retrieve more due to exception: " + sqle);
+      }
+
+    }
+
+    /**
+     * @throws UnsupportedOperationException
+     */
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+
+    private void close() {
+      closed = true;
+      IOUtils.safeClose(resultSet, statement, connection);
+    }
+
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ConnectionPoolDataSource.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ConnectionPoolDataSource.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ConnectionPoolDataSource.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/ConnectionPoolDataSource.java Fri May  9 14:35:12 2008
@@ -0,0 +1,122 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model.jdbc;
+
+import org.apache.commons.dbcp.PoolingDataSource;
+import org.apache.commons.pool.ObjectPool;
+import org.apache.commons.pool.PoolableObjectFactory;
+import org.apache.commons.pool.impl.StackObjectPool;
+
+import javax.sql.DataSource;
+import java.io.PrintWriter;
+import java.sql.Connection;
+import java.sql.SQLException;
+import java.util.logging.Logger;
+
+/**
+ * <p>A wrapper {@link DataSource} which pools connections. Why can't Jakarta Commons DBCP provide this directly?</p>
+ */
+public final class ConnectionPoolDataSource implements DataSource {
+
+  private static final Logger log = Logger.getLogger(ConnectionPoolDataSource.class.getName());
+
+  private final DataSource delegate;
+
+  public ConnectionPoolDataSource(DataSource underlyingDataSource) {
+    if (underlyingDataSource == null) {
+      throw new IllegalArgumentException("underlyingDataSource is null");
+    }
+    PoolableObjectFactory poolFactory = new DataSourceConnectionFactory(underlyingDataSource);
+    ObjectPool connectionPool = new StackObjectPool(poolFactory);
+    this.delegate = new PoolingDataSource(connectionPool);
+  }
+
+  public Connection getConnection() throws SQLException {
+    return delegate.getConnection();
+  }
+
+  public Connection getConnection(String username, String password) throws SQLException {
+    return delegate.getConnection(username, password);
+  }
+
+  public PrintWriter getLogWriter() throws SQLException {
+    return delegate.getLogWriter();
+  }
+
+  public void setLogWriter(PrintWriter printWriter) throws SQLException {
+    delegate.setLogWriter(printWriter);
+  }
+
+  public void setLoginTimeout(int timeout) throws SQLException {
+    delegate.setLoginTimeout(timeout);
+  }
+
+  public int getLoginTimeout() throws SQLException {
+    return delegate.getLoginTimeout();
+  }
+
+  // These two methods are new in JDK 6, so they are added to allow it to compile in JDK 6. Really, they
+  // should also delegate to the 'delegate' object. But that would then *only* compile in JDK 6. So for
+  // now they are dummy implementations which do little.
+
+  /**
+   * @throws SQLException always
+   */
+  public <T> T unwrap(Class<T> iface) throws SQLException {
+    throw new SQLException("Unsupported operation");
+  }
+
+  /**
+   * @return false always
+   */
+  public boolean isWrapperFor(Class<?> iface) {
+    return false;
+  }
+
+  private static class DataSourceConnectionFactory implements PoolableObjectFactory {
+
+    private final DataSource dataSource;
+
+    private DataSourceConnectionFactory(DataSource dataSource) {
+      this.dataSource = dataSource;
+    }
+
+    public Object makeObject() throws SQLException {
+      log.fine("Obtaining pooled connection");
+      return dataSource.getConnection();
+    }
+
+    public void destroyObject(Object o) throws SQLException {
+      log.fine("Closing pooled connection");
+      ((Connection) o).close();
+    }
+
+    public boolean validateObject(Object o) {
+      return true;
+    }
+
+    public void activateObject(Object o) {
+      // do nothing
+    }
+
+    public void passivateObject(Object o) {
+      // do nothing
+    }
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/GenericJDBCDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/GenericJDBCDataModel.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/GenericJDBCDataModel.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/GenericJDBCDataModel.java Fri May  9 14:35:12 2008
@@ -0,0 +1,122 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model.jdbc;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.IOUtils;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Properties;
+
+/**
+ * <p>A generic {@link org.apache.mahout.cf.taste.model.DataModel} designed for use with other JDBC data sources;
+ * one just specifies all necessary SQL queries to the constructor here. Optionally, the queries can
+ * be specified from a {@link Properties} object, {@link File}, or {@link InputStream}. This class is
+ * most appropriate when other existing implementations of {@link AbstractJDBCDataModel} are not suitable.
+ * If you are using this class to support a major database, consider contributing a specialized implementation
+ * of {@link AbstractJDBCDataModel} to the project for this database.</p>
+ */
+public class GenericJDBCDataModel extends AbstractJDBCDataModel {
+
+  public static final String DATA_SOURCE_KEY = "dataSource";
+  public static final String GET_USER_SQL_KEY = "getUserSQL";
+  public static final String GET_NUM_USERS_SQL_KEY = "getNumUsersSQL";
+  public static final String GET_NUM_ITEMS_SQL_KEY = "getNumItemsSQL";
+  public static final String SET_PREFERENCE_SQL_KEY = "setPreferenceSQL";
+  public static final String REMOVE_PREFERENCE_SQL_KEY = "removePreferenceSQL";
+  public static final String GET_USERS_SQL_KEY = "getUsersSQL";
+  public static final String GET_ITEMS_SQL_KEY = "getItemsSQL";
+  public static final String GET_ITEM_SQL_KEY = "getItemSQL";
+  public static final String GET_PREFS_FOR_ITEM_SQL_KEY = "getPrefsForItemSQL";
+  public static final String GET_USERS_PREFERRING_ITEM_SQL_KEY = "getUsersPreferringItemSQL";
+
+  /**
+   * <p>Specifies all SQL queries in a {@link Properties} object. See the <code>*_KEY</code>
+   * constants in this class (e.g. {@link #GET_USER_SQL_KEY}) for a list of all keys which
+   * must map to a value in this object.</p>
+   *
+   * @param props {@link Properties} object containing values
+   * @throws TasteException if anything goes wrong during initialization
+   */
+  public GenericJDBCDataModel(Properties props) throws TasteException {
+    super(lookupDataSource(props.getProperty(DATA_SOURCE_KEY)),
+          props.getProperty(GET_USER_SQL_KEY),
+          props.getProperty(GET_NUM_USERS_SQL_KEY),
+          props.getProperty(GET_NUM_ITEMS_SQL_KEY),
+          props.getProperty(SET_PREFERENCE_SQL_KEY),
+          props.getProperty(REMOVE_PREFERENCE_SQL_KEY),
+          props.getProperty(GET_USERS_SQL_KEY),
+          props.getProperty(GET_ITEMS_SQL_KEY),
+          props.getProperty(GET_ITEM_SQL_KEY),
+          props.getProperty(GET_PREFS_FOR_ITEM_SQL_KEY),
+          props.getProperty(GET_USERS_PREFERRING_ITEM_SQL_KEY));
+  }
+
+  /**
+   * <p>See {@link #GenericJDBCDataModel(java.util.Properties)}. This constructor reads values
+   * from a file instead, as if with {@link Properties#load(InputStream)}. So, the file
+   * should be in standard Java properties file format -- containing <code>key=value</code> pairs,
+   * one per line.</p>
+   *
+   * @param propertiesFile properties file
+   * @throws TasteException if anything goes wrong during initialization
+   */
+  public GenericJDBCDataModel(File propertiesFile) throws TasteException {
+    this(getPropertiesFromFile(propertiesFile));
+  }
+
+  /**
+   * <p>See {@link #GenericJDBCDataModel(Properties)}. This constructor reads values
+   * from a resource available in the classpath, as if with {@link Class#getResourceAsStream(String)} and
+   * {@link Properties#load(InputStream)}. This is useful if your configuration file is, for example,
+   * packaged in a JAR file that is in the classpath.</p>
+   *
+   * @param resourcePath path to resource in classpath (e.g. "/com/foo/TasteSQLQueries.properties")
+   * @throws TasteException if anything goes wrong during initialization
+   */
+  public GenericJDBCDataModel(String resourcePath) throws TasteException {
+    this(getPropertiesFromStream(GenericJDBCDataModel.class.getResourceAsStream(resourcePath)));
+  }
+
+  private static Properties getPropertiesFromFile(File file) throws TasteException {
+    try {
+      return getPropertiesFromStream(new FileInputStream(file));
+    } catch (FileNotFoundException fnfe) {
+      throw new TasteException(fnfe);
+    }
+  }
+
+  private static Properties getPropertiesFromStream(InputStream is) throws TasteException {
+    try {
+      try {
+        Properties props = new Properties();
+        props.load(is);
+        return props;
+      } finally {
+        IOUtils.quietClose(is);
+      }
+    } catch (IOException ioe) {
+      throw new TasteException(ioe);
+    }
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLJDBCDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLJDBCDataModel.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLJDBCDataModel.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLJDBCDataModel.java Fri May  9 14:35:12 2008
@@ -0,0 +1,163 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model.jdbc;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.model.DataModel;
+
+import javax.sql.DataSource;
+
+/**
+ * <p>A {@link DataModel} backed by a MySQL database and accessed via JDBC. It may work with other
+ * JDBC databases. By default, this class assumes that there is a {@link DataSource} available under the
+ * JNDI name "jdbc/taste", which gives access to a database with a "taste_preferences" table with the
+ * following schema:</p>
+ *
+ * <table>
+ * <tr><th>user_id</th><th>item_id</th><th>preference</th></tr>
+ * <tr><td>ABC</td><td>123</td><td>0.9</td></tr>
+ * <tr><td>ABC</td><td>456</td><td>0.1</td></tr>
+ * <tr><td>DEF</td><td>123</td><td>0.2</td></tr>
+ * <tr><td>DEF</td><td>789</td><td>0.3</td></tr>
+ * </table>
+ *
+ * <p><code>user_id</code> must have a type compatible with the Java <code>String</code> type.
+ * <code>item_id</code> must have a type compatible with the Java <code>String</code> type.
+ * <code>preference</code> must have a type compatible with the Java <code>double</code> type.
+ * For example, the following command sets up a suitable table in MySQL, complete with
+ * primary key and indexes:</p>
+ *
+ * <pre>
+ * CREATE TABLE taste_preferences (
+ *   user_id VARCHAR(10) NOT NULL,
+ *   item_id VARCHAR(10) NOT NULL,
+ *   preference FLOAT NOT NULL,
+ *   PRIMARY KEY (user_id, item_id),
+ *   INDEX (user_id),
+ *   INDEX (item_id)
+ * )
+ * </pre>
+ *
+ * <h3>Performance Notes</h3>
+ *
+ * <p>See the notes in {@link AbstractJDBCDataModel} regarding using connection pooling. It's pretty vital
+ * to performance.</p>
+ *
+ * <p>Some experimentation suggests that MySQL's InnoDB engine is faster than MyISAM for these kinds of
+ * applications. While MyISAM is the default and, I believe, generally considered the lighter-weight and faster
+ * of the two engines, my guess is the row-level locking of InnoDB helps here. Your mileage may vary.</p>
+ *
+ * <p>Here are some key settings that can be tuned for MySQL, and suggested size for a data set of around
+ * 1 million elements:</p>
+ *
+ * <ul>
+ * <li>innodb_buffer_pool_size=64M</li>
+ * <li>myisam_sort_buffer_size=64M</li>
+ * <li>query_cache_limit=64M</li>
+ * <li>query_cache_min_res_unit=512K</li>
+ * <li>query_cache_type=1</li>
+ * <li>query_cache_size=64M</li>
+ * </ul>
+ *
+ * <p>Thanks to Amila Jayasooriya for contributing MySQL notes above as part of Google Summer of Code 2007.</p>
+ */
+public class MySQLJDBCDataModel extends AbstractJDBCDataModel {
+
+  /**
+   * <p>Creates a {@link MySQLJDBCDataModel} using the default {@link DataSource}
+   * (named {@link #DEFAULT_DATASOURCE_NAME} and default table/column names.</p>
+   *
+   * @throws TasteException if {@link DataSource} can't be found
+   */
+  public MySQLJDBCDataModel() throws TasteException {
+    this(DEFAULT_DATASOURCE_NAME);
+  }
+
+  /**
+   * <p>Creates a {@link MySQLJDBCDataModel} using the default {@link DataSource}
+   * found under the given name, and using default table/column names.</p>
+   *
+   * @param dataSourceName name of {@link DataSource} to look up
+   * @throws TasteException if {@link DataSource} can't be found
+   */
+  public MySQLJDBCDataModel(String dataSourceName) throws TasteException {
+    this(lookupDataSource(dataSourceName),
+         DEFAULT_PREFERENCE_TABLE,
+         DEFAULT_USER_ID_COLUMN,
+         DEFAULT_ITEM_ID_COLUMN,
+         DEFAULT_PREFERENCE_COLUMN);
+  }
+
+  /**
+   * <p>Creates a {@link MySQLJDBCDataModel} using the given {@link DataSource}
+   * and default table/column names.</p>
+   *
+   * @param dataSource {@link DataSource} to use
+   */
+  public MySQLJDBCDataModel(DataSource dataSource) {
+    this(dataSource,
+         DEFAULT_PREFERENCE_TABLE,
+         DEFAULT_USER_ID_COLUMN,
+         DEFAULT_ITEM_ID_COLUMN,
+         DEFAULT_PREFERENCE_COLUMN);
+  }
+
+  /**
+   * <p>Creates a {@link MySQLJDBCDataModel} using the given {@link DataSource}
+   * and default table/column names.</p>
+   *
+   * @param dataSource {@link DataSource} to use
+   * @param preferenceTable name of table containing preference data
+   * @param userIDColumn user ID column name
+   * @param itemIDColumn item ID column name
+   * @param preferenceColumn preference column name
+   */
+  public MySQLJDBCDataModel(DataSource dataSource,
+                            String preferenceTable,
+                            String userIDColumn,
+                            String itemIDColumn,
+                            String preferenceColumn) {
+    super(dataSource,
+          // getUserSQL
+          "SELECT " + itemIDColumn + ", " + preferenceColumn + " FROM " + preferenceTable +
+          " WHERE " + userIDColumn + "=? ORDER BY " + itemIDColumn,
+          // getNumItemsSQL
+          "SELECT COUNT(DISTINCT " + itemIDColumn + ") FROM " + preferenceTable,
+          // getNumUsersSQL
+          "SELECT COUNT(DISTINCT " + userIDColumn + ") FROM " + preferenceTable,
+          // setPreferenceSQL
+          "INSERT INTO " + preferenceTable + " SET " + userIDColumn + "=?, " + itemIDColumn +
+          "=?, " + preferenceColumn + "=? ON DUPLICATE KEY UPDATE " + preferenceColumn + "=?",
+          // removePreference SQL
+          "DELETE FROM " + preferenceTable + " WHERE " + userIDColumn + "=? AND " + itemIDColumn + "=?",
+          // getUsersSQL
+          "SELECT " + itemIDColumn + ", " + preferenceColumn + ", " + userIDColumn + " FROM " +
+          preferenceTable + " ORDER BY " + userIDColumn + ", " + itemIDColumn,
+          // getItemsSQL
+          "SELECT DISTINCT " + itemIDColumn + " FROM " + preferenceTable + " ORDER BY " + itemIDColumn,
+          // getItemSQL
+          "SELECT 1 FROM " + preferenceTable + " WHERE " + itemIDColumn + "=?",
+          // getPrefsForItemSQL
+          "SELECT " + preferenceColumn + ", " + userIDColumn + " FROM " +
+          preferenceTable + " WHERE " + itemIDColumn + "=? ORDER BY " + userIDColumn,
+          // getUsersPreferringItemSQL
+          "SELECT DISTINCT " + userIDColumn + " FROM " + preferenceTable + " WHERE " + itemIDColumn +
+          "=? ORDER BY " + userIDColumn);
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java Fri May  9 14:35:12 2008
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.neighborhood;
+
+import org.apache.mahout.cf.taste.correlation.UserCorrelation;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
+
+/**
+ * <p>Contains methods and resources useful to all classes in this package.</p>
+ */
+abstract class AbstractUserNeighborhood implements UserNeighborhood {
+
+  private final UserCorrelation userCorrelation;
+  private final DataModel dataModel;
+  private final double samplingRate;
+
+  AbstractUserNeighborhood(UserCorrelation userCorrelation,
+                           DataModel dataModel,
+                           double samplingRate) {
+    if (userCorrelation == null || dataModel == null) {
+      throw new IllegalArgumentException("userCorrelation or dataModel is null");
+    }
+    if (Double.isNaN(samplingRate) || samplingRate <= 0.0 || samplingRate > 1.0) {
+      throw new IllegalArgumentException("samplingRate must be in (0,1]");
+    }
+    this.userCorrelation = userCorrelation;
+    this.dataModel = dataModel;
+    this.samplingRate = samplingRate;
+  }
+
+  final UserCorrelation getUserCorrelation() {
+    return userCorrelation;
+  }
+
+  final DataModel getDataModel() {
+    return dataModel;
+  }
+
+  final boolean sampleForUser() {
+    return samplingRate >= 1.0 || Math.random() < samplingRate;
+  }
+
+  public final void refresh() {
+    userCorrelation.refresh();
+    dataModel.refresh();
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java Fri May  9 14:35:12 2008
@@ -0,0 +1,171 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.neighborhood;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.correlation.UserCorrelation;
+import org.apache.mahout.cf.taste.impl.common.SoftCache;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.User;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.ListIterator;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * <p>Computes a neigbhorhood consisting of the nearest n {@link User}s to a given {@link User}.
+ * "Nearest" is defined by the given {@link UserCorrelation}.</p>
+ */
+public final class NearestNUserNeighborhood extends AbstractUserNeighborhood {
+
+  private static final Logger log = Logger.getLogger(NearestNUserNeighborhood.class.getName());
+
+  private final SoftCache<Object, Collection<User>> cache;
+
+  /**
+   * @param n neighborhood size
+   * @param userCorrelation nearness metric
+   * @param dataModel data model
+   * @throws IllegalArgumentException if n &lt; 1, or userCorrelation or dataModel are <code>null</code>
+   */
+  public NearestNUserNeighborhood(int n,
+                                  UserCorrelation userCorrelation,
+                                  DataModel dataModel) throws TasteException {
+    this(n, userCorrelation, dataModel, 1.0);
+  }
+
+  /**
+   * @param n neighborhood size
+   * @param userCorrelation nearness metric
+   * @param dataModel data model
+   * @param samplingRate percentage of users to consider when building neighborhood -- decrease to
+   * trade quality for performance
+   * @throws IllegalArgumentException if n &lt; 1 or samplingRate is NaN or not in (0,1],
+   * or userCorrelation or dataModel are <code>null</code>
+   */
+  public NearestNUserNeighborhood(int n,
+                                  UserCorrelation userCorrelation,
+                                  DataModel dataModel,
+                                  double samplingRate) throws TasteException {
+    super(userCorrelation, dataModel, samplingRate);
+    if (n < 1) {
+      throw new IllegalArgumentException("n must be at least 1");
+    }
+    this.cache = new SoftCache<Object, Collection<User>>(new Retriever(n), dataModel.getNumUsers());
+  }
+
+  public Collection<User> getUserNeighborhood(Object userID) throws TasteException {
+    return cache.get(userID);
+  }
+
+  @Override
+  public String toString() {
+    return "NearestNUserNeighborhood";
+  }
+
+
+  private final class Retriever implements SoftCache.Retriever<Object, Collection<User>> {
+
+    private final int n;
+
+    private Retriever(int n) {
+      this.n = n;
+    }
+
+    public Collection<User> getValue(Object key) throws TasteException {
+      if (log.isLoggable(Level.FINER)) {
+        log.fine("Computing neighborhood around user ID '" + key + '\'');
+      }
+
+      DataModel dataModel = getDataModel();
+      User theUser = dataModel.getUser(key);
+      UserCorrelation userCorrelationImpl = getUserCorrelation();
+
+      LinkedList<UserCorrelationPair> queue = new LinkedList<UserCorrelationPair>();
+      boolean full = false;
+      for (User user : dataModel.getUsers()) {
+        if (sampleForUser() && !key.equals(user.getID())) {
+          double theCorrelation = userCorrelationImpl.userCorrelation(theUser, user);
+          if (!Double.isNaN(theCorrelation) && (!full || theCorrelation > queue.getLast().theCorrelation)) {
+            ListIterator<UserCorrelationPair> iterator = queue.listIterator(queue.size());
+            while (iterator.hasPrevious()) {
+              if (theCorrelation <= iterator.previous().theCorrelation) {
+                iterator.next();
+                break;
+              }
+            }
+            iterator.add(new UserCorrelationPair(user, theCorrelation));
+            if (full) {
+              queue.removeLast();
+            } else if (queue.size() > n) {
+              full = true;
+              queue.removeLast();
+            }
+          }
+        }
+      }
+
+      List<User> neighborhood = new ArrayList<User>(queue.size());
+      for (UserCorrelationPair pair : queue) {
+        neighborhood.add(pair.user);
+      }
+
+      if (log.isLoggable(Level.FINER)) {
+        log.fine("UserNeighborhood around user ID '" + key + "' is: " + neighborhood);
+      }
+
+      return Collections.unmodifiableList(neighborhood);
+    }
+  }
+
+  private static final class UserCorrelationPair implements Comparable<UserCorrelationPair> {
+
+    final User user;
+    final double theCorrelation;
+
+    private UserCorrelationPair(User user, double theCorrelation) {
+      this.user = user;
+      this.theCorrelation = theCorrelation;
+    }
+
+    @Override
+    public int hashCode() {
+      return user.hashCode() ^ Double.valueOf(theCorrelation).hashCode();
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (!(o instanceof UserCorrelationPair)) {
+        return false;
+      }
+      UserCorrelationPair other = (UserCorrelationPair) o;
+      return user.equals(other.user) && theCorrelation == other.theCorrelation;
+    }
+
+    public int compareTo(UserCorrelationPair otherPair) {
+      double otherCorrelation = otherPair.theCorrelation;
+      return theCorrelation > otherCorrelation ? -1 : theCorrelation < otherCorrelation ? 1 : 0;
+    }
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java Fri May  9 14:35:12 2008
@@ -0,0 +1,127 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.neighborhood;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.correlation.UserCorrelation;
+import org.apache.mahout.cf.taste.impl.common.SoftCache;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.User;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * <p>Computes a neigbhorhood consisting of all {@link User}s whose similarity to the
+ * given {@link User} meets or exceeds a certain threshold. Similartiy is defined by the given
+ * {@link UserCorrelation}.</p>
+ */
+public final class ThresholdUserNeighborhood extends AbstractUserNeighborhood {
+
+  private static final Logger log = Logger.getLogger(ThresholdUserNeighborhood.class.getName());
+
+  private final SoftCache<Object, Collection<User>> cache;
+
+  /**
+   * @param threshold similarity threshold
+   * @param userCorrelation similarity metric
+   * @param dataModel data model
+   * @throws IllegalArgumentException if threshold is {@link Double#NaN},
+   * or if samplingRate is not positive and less than or equal to 1.0, or if userCorrelation
+   * or dataModel are <code>null</code>
+   */
+  public ThresholdUserNeighborhood(double threshold,
+                                   UserCorrelation userCorrelation,
+                                   DataModel dataModel) throws TasteException {
+    this(threshold, userCorrelation, dataModel, 1.0);
+  }
+
+  /**
+   * @param threshold similarity threshold
+   * @param userCorrelation similarity metric
+   * @param dataModel data model
+   * @param samplingRate percentage of users to consider when building neighborhood -- decrease to
+   * trade quality for performance
+   * @throws IllegalArgumentException if threshold or samplingRate is {@link Double#NaN},
+   * or if samplingRate is not positive and less than or equal to 1.0, or if userCorrelation
+   * or dataModel are <code>null</code>
+   */
+  public ThresholdUserNeighborhood(double threshold,
+                                   UserCorrelation userCorrelation,
+                                   DataModel dataModel,
+                                   double samplingRate) throws TasteException {
+    super(userCorrelation, dataModel, samplingRate);
+    if (Double.isNaN(threshold)) {
+      throw new IllegalArgumentException("threshold must not be NaN");
+    }
+    this.cache = new SoftCache<Object, Collection<User>>(new Retriever(threshold), dataModel.getNumUsers());
+  }
+
+  public Collection<User> getUserNeighborhood(Object userID) throws TasteException {
+    return cache.get(userID);
+  }
+
+  @Override
+  public String toString() {
+    return "ThresholdUserNeighborhood";
+  }
+
+
+  private final class Retriever implements SoftCache.Retriever<Object, Collection<User>> {
+
+    private final double threshold;
+
+    private Retriever(double threshold) {
+      this.threshold = threshold;
+    }
+
+    public Collection<User> getValue(Object key) throws TasteException {
+      if (log.isLoggable(Level.FINER)) {
+        log.fine("Computing neighborhood around user ID '" + key + '\'');
+      }
+
+      DataModel dataModel = getDataModel();
+      User theUser = dataModel.getUser(key);
+      List<User> neighborhood = new ArrayList<User>();
+      Iterator<? extends User> users = dataModel.getUsers().iterator();
+      UserCorrelation userCorrelationImpl = getUserCorrelation();
+
+      while (users.hasNext()) {
+        User user = users.next();
+        if (sampleForUser() && !key.equals(user.getID())) {
+          double theCorrelation = userCorrelationImpl.userCorrelation(theUser, user);
+          if (!Double.isNaN(theCorrelation) && theCorrelation >= threshold) {
+            neighborhood.add(user);
+          }
+        }
+      }
+
+      if (log.isLoggable(Level.FINER)) {
+        log.fine("UserNeighborhood around user ID '" + key + "' is: " + neighborhood);
+      }
+
+      return Collections.unmodifiableList(neighborhood);
+    }
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractRecommender.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractRecommender.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractRecommender.java Fri May  9 14:35:12 2008
@@ -0,0 +1,128 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Recommender;
+
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+public abstract class AbstractRecommender implements Recommender {
+
+  private static final Logger log = Logger.getLogger(AbstractRecommender.class.getName());
+
+  private final DataModel dataModel;
+  private final ReentrantLock refreshLock;
+
+  protected AbstractRecommender(DataModel dataModel) {
+    if (dataModel == null) {
+      throw new IllegalArgumentException("dataModel is null");
+    }
+    this.dataModel = dataModel;
+    this.refreshLock = new ReentrantLock();
+  }
+
+  /**
+   * <p>Default implementation which just calls
+   * {@link Recommender#recommend(Object, int, org.apache.mahout.cf.taste.recommender.Rescorer)},
+   * with a {@link org.apache.mahout.cf.taste.recommender.Rescorer} that does nothing.</p>
+   */
+  public List<RecommendedItem> recommend(Object userID, int howMany) throws TasteException {
+    return recommend(userID, howMany, NullRescorer.getItemInstance());
+  }
+
+  /**
+   * <p>Default implementation which just calls {@link DataModel#setPreference(Object, Object, double)}.</p>
+   *
+   * @throws IllegalArgumentException if userID or itemID is <code>null</code>, or if value is
+   * {@link Double#NaN}
+   */
+  public void setPreference(Object userID, Object itemID, double value) throws TasteException {
+    if (userID == null || itemID == null) {
+      throw new IllegalArgumentException("userID or itemID is null");
+    }
+    if (Double.isNaN(value)) {
+      throw new IllegalArgumentException("Invalid value: " + value);
+    }
+    if (log.isLoggable(Level.FINE)) {
+      log.fine("Setting preference for user '" + userID + "', item '" + itemID + "', value " + value);
+    }
+    dataModel.setPreference(userID, itemID, value);
+  }
+
+  /**
+   * <p>Default implementation which just calls
+   * {@link DataModel#removePreference(Object, Object)} (Object, Object)}.</p>
+   *
+   * @throws IllegalArgumentException if userID or itemID is <code>null</code>
+   */
+  public void removePreference(Object userID, Object itemID) throws TasteException {
+    if (userID == null || itemID == null) {
+      throw new IllegalArgumentException("userID or itemID is null");
+    }
+    if (log.isLoggable(Level.FINE)) {
+      log.fine("Remove preference for user '" + userID + "', item '" + itemID + '\'');
+    }
+    dataModel.removePreference(userID, itemID);
+  }
+
+  public DataModel getDataModel() {
+    return dataModel;
+  }
+
+  public void refresh() {
+    if (refreshLock.isLocked()) {
+      return;
+    }
+    try {
+      refreshLock.lock();
+      dataModel.refresh();
+    } finally {
+      refreshLock.unlock();
+    }
+  }
+
+  /**
+   * @param theUser {@link User} being evaluated
+   * @return all {@link Item}s in the {@link DataModel} for which the {@link User} has not expressed a preference
+   * @throws TasteException if an error occurs while listing {@link Item}s
+   */
+  protected Set<Item> getAllOtherItems(User theUser) throws TasteException {
+    if (theUser == null) {
+      throw new IllegalArgumentException("theUser is null");
+    }
+    Set<Item> allItems = new HashSet<Item>(dataModel.getNumItems());
+    for (Item item : dataModel.getItems()) {
+      // If not already preferred by the user, add it
+      if (theUser.getPreferenceFor(item.getID()) == null) {
+        allItems.add(item);
+      }
+    }
+    return allItems;
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java Fri May  9 14:35:12 2008
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Rescorer;
+
+import java.io.Serializable;
+import java.util.Comparator;
+
+/**
+ * <p>A simple {@link org.apache.mahout.cf.taste.recommender.Rescorer} which always returns the original score.</p>
+ */
+final class ByRescoreComparator implements Comparator<RecommendedItem>, Serializable {
+
+  private final Rescorer<Item> rescorer;
+
+  ByRescoreComparator(Rescorer<Item> rescorer) {
+    if (rescorer == null) {
+      throw new IllegalArgumentException("rescorer is null");
+    }
+    this.rescorer = rescorer;
+  }
+
+  public int compare(RecommendedItem o1, RecommendedItem o2) {
+    double rescored1 = rescorer.rescore(o1.getItem(), o1.getValue());
+    double rescored2 = rescorer.rescore(o2.getItem(), o2.getValue());
+    if (rescored1 < rescored2) {
+      return 1;
+    } else if (rescored1 > rescored2) {
+      return -1;
+    } else {
+      return 0;
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "ByRescoreComparator[rescorer:" + rescorer + ']';
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/CachingRecommender.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/CachingRecommender.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/CachingRecommender.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/CachingRecommender.java Fri May  9 14:35:12 2008
@@ -0,0 +1,207 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.Pair;
+import org.apache.mahout.cf.taste.impl.common.SoftCache;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.model.Item;
+import org.apache.mahout.cf.taste.recommender.RecommendedItem;
+import org.apache.mahout.cf.taste.recommender.Recommender;
+import org.apache.mahout.cf.taste.recommender.Rescorer;
+
+import java.lang.ref.SoftReference;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * <p>A {@link Recommender} which caches the results from another {@link Recommender} in memory.
+ * Results are held by {@link SoftReference}s so that the JVM may reclaim memory from the recommendationCache
+ * in low-memory situations.</p>
+ */
+public final class CachingRecommender implements Recommender {
+
+  private static final Logger log = Logger.getLogger(CachingRecommender.class.getName());
+
+  private final Recommender recommender;
+  private final AtomicInteger maxHowMany;
+  private final SoftCache<Object, Recommendations> recommendationCache;
+  private final SoftCache<Pair<?, ?>, Double> estimatedPrefCache;
+  private final ReentrantLock refreshLock;
+
+  public CachingRecommender(Recommender recommender) throws TasteException {
+    if (recommender == null) {
+      throw new IllegalArgumentException("recommender is null");
+    }
+    this.recommender = recommender;
+    this.maxHowMany = new AtomicInteger(1);
+    // Use "num users" as an upper limit on cache size. Rough guess.
+    int numUsers = recommender.getDataModel().getNumUsers();
+    this.recommendationCache =
+            new SoftCache<Object, Recommendations>(
+                    new RecommendationRetriever(this.recommender, this.maxHowMany),
+                    numUsers);
+    this.estimatedPrefCache =
+            new SoftCache<Pair<?, ?>, Double>(new EstimatedPrefRetriever(this.recommender), numUsers);
+    this.refreshLock = new ReentrantLock();
+  }
+
+  public List<RecommendedItem> recommend(Object userID, int howMany) throws TasteException {
+    if (userID == null) {
+      throw new IllegalArgumentException("user ID is null");
+    }
+    if (howMany < 1) {
+      throw new IllegalArgumentException("howMany must be at least 1");
+    }
+
+    synchronized (maxHowMany) {
+      if (howMany > maxHowMany.get()) {
+        maxHowMany.set(howMany);
+      }
+    }
+
+    Recommendations recommendations = recommendationCache.get(userID);
+    if (recommendations.items.size() < howMany && !recommendations.noMoreRecommendableItems) {
+      clear(userID);
+      recommendations = recommendationCache.get(userID);
+      if (recommendations.items.size() < howMany) {
+        recommendations.noMoreRecommendableItems = true;
+      }
+    }
+
+    return recommendations.items.size() > howMany ?
+           recommendations.items.subList(0, howMany) :
+           recommendations.items;
+  }
+
+  public List<RecommendedItem> recommend(Object userID, int howMany, Rescorer<Item> rescorer)
+          throws TasteException {
+    // Hmm, hard to recommendationCache this since the rescorer may change
+    return recommender.recommend(userID, howMany, rescorer);
+  }
+
+  public double estimatePreference(Object userID, Object itemID) throws TasteException {
+    return estimatedPrefCache.get(new Pair<Object, Object>(userID, itemID));
+  }
+
+  public void setPreference(Object userID, Object itemID, double value) throws TasteException {
+    recommender.setPreference(userID, itemID, value);
+    clear(userID);
+  }
+
+  public void removePreference(Object userID, Object itemID) throws TasteException {
+    recommender.removePreference(userID, itemID);
+    clear(userID);
+  }
+
+  public DataModel getDataModel() {
+    return recommender.getDataModel();
+  }
+
+  public void refresh() {
+    if (refreshLock.isLocked()) {
+      return;
+    }
+    try {
+      refreshLock.lock();
+      recommender.refresh();
+      clear();
+    } finally {
+      refreshLock.unlock();
+    }
+  }
+
+  /**
+   * <p>Clears cached recommendations for the given user.</p>
+   *
+   * @param userID clear cached data associated with this user ID
+   */
+  public void clear(Object userID) {
+    if (log.isLoggable(Level.FINE)) {
+      log.fine("Clearing recommendations for user ID '" + userID + "'...");
+    }
+    recommendationCache.remove(userID);
+  }
+
+  /**
+   * <p>Clears all cached recommendations.</p>
+   */
+  public void clear() {
+    log.fine("Clearing all recommendations...");
+    recommendationCache.clear();
+  }
+
+  @Override
+  public String toString() {
+    return "CachingRecommender[recommender:" + recommender + ']';
+  }
+
+  private static final class RecommendationRetriever implements SoftCache.Retriever<Object, Recommendations> {
+
+    private final Recommender recommender;
+    private final AtomicInteger maxHowMany;
+
+    private RecommendationRetriever(Recommender recommender, AtomicInteger maxHowMany) {
+      this.recommender = recommender;
+      this.maxHowMany = maxHowMany;
+    }
+
+    public Recommendations getValue(Object key) throws TasteException {
+      if (log.isLoggable(Level.FINE)) {
+        log.fine("Retrieving new recommendations for user ID '" + key + '\'');
+      }
+      return new Recommendations(Collections.unmodifiableList(recommender.recommend(key, maxHowMany.get())));
+    }
+  }
+
+  private static final class EstimatedPrefRetriever implements SoftCache.Retriever<Pair<?, ?>, Double> {
+
+    private final Recommender recommender;
+
+    private EstimatedPrefRetriever(Recommender recommender) {
+      this.recommender = recommender;
+    }
+
+    public Double getValue(Pair<?, ?> key) throws TasteException {
+      Object userID = key.getFirst();
+      Object itemID = key.getSecond();
+      if (log.isLoggable(Level.FINE)) {
+        log.fine("Retrieving estimated preference for user ID '" + userID + "\' and item ID \'" +
+                 itemID + '\'');
+      }
+      return recommender.estimatePreference(userID, itemID);
+    }
+  }
+
+  private static final class Recommendations {
+
+    private final List<RecommendedItem> items;
+    private boolean noMoreRecommendableItems;
+
+    private Recommendations(List<RecommendedItem> items) {
+      this.items = items;
+      this.noMoreRecommendableItems = false;
+    }
+  }
+
+}

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ClusterSimilarity.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ClusterSimilarity.java?rev=654943&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ClusterSimilarity.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ClusterSimilarity.java Fri May  9 14:35:12 2008
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.recommender;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.model.User;
+
+import java.util.Collection;
+
+/**
+ * <p>Returns the "similarity" between two clusters of users, according to some
+ * definition of similarity. Subclassses define different notions of similarity.</p>
+ *
+ * @see TreeClusteringRecommender
+ */
+public interface ClusterSimilarity extends Refreshable {
+
+  /**
+   * @param cluster1 first cluster of {@link User}s
+   * @param cluster2 second cluste rof {@link User}s
+   * @return "distance" between clusters; a positiv
+   * @throws TasteException if an error occurs while computing similarity, such as
+   * errors accessing an underlying {@link org.apache.mahout.cf.taste.model.DataModel}
+   * @throws IllegalArgumentException if either argument is null or empty
+   */
+  double getSimilarity(Collection<User> cluster1, Collection<User> cluster2) throws TasteException;
+
+}