You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/07/08 15:26:42 UTC
svn commit: r792131 - in
/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl:
eval/ model/jdbc/
Author: srowen
Date: Wed Jul 8 13:26:42 2009
New Revision: 792131
URL: http://svn.apache.org/viewvc?rev=792131&view=rev
Log:
Faster JDBC access: true forward-only cursor use now, and workarounds to enable streaming in MySQL driver
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractBooleanPrefJDBCDataModel.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLBooleanPrefJDBCDataModel.java
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java?rev=792131&r1=792130&r2=792131&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java Wed Jul 8 13:26:42 2009
@@ -103,6 +103,7 @@
RunningAverage fallOut = new FullRunningAverage();
for (User user : dataModel.getUsers()) {
if (random.nextDouble() < evaluationPercentage) {
+ long start = System.currentTimeMillis();
Object id = user.getID();
Collection<Item> relevantItems = new FastSet<Item>(at);
Preference[] prefs = user.getPreferencesAsArray();
@@ -152,6 +153,8 @@
(double) (numItems - numRelevantItems));
}
+ long end = System.currentTimeMillis();
+ log.info("Evaluated with user " + user + " in " + (end - start) + "ms");
log.info("Precision/recall/fall-out: {} / {} / {}", new Object[] {
precision.getAverage(), recall.getAverage(), fallOut.getAverage()
});
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractBooleanPrefJDBCDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractBooleanPrefJDBCDataModel.java?rev=792131&r1=792130&r2=792131&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractBooleanPrefJDBCDataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractBooleanPrefJDBCDataModel.java Wed Jul 8 13:26:42 2009
@@ -100,7 +100,9 @@
try {
conn = getDataSource().getConnection();
- stmt = conn.prepareStatement(getUserSQL);
+ stmt = conn.prepareStatement(getUserSQL, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
+ stmt.setFetchDirection(ResultSet.FETCH_FORWARD);
+ stmt.setFetchSize(getFetchSize());
stmt.setObject(1, id);
log.debug("Executing SQL query: {}", getUserSQL);
@@ -176,7 +178,9 @@
ResultSet rs = null;
try {
conn = getDataSource().getConnection();
- stmt = conn.prepareStatement(getPrefsForItemSQL);
+ stmt = conn.prepareStatement(getPrefsForItemSQL, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
+ stmt.setFetchDirection(ResultSet.FETCH_FORWARD);
+ stmt.setFetchSize(getFetchSize());
stmt.setObject(1, itemID);
log.debug("Executing SQL query: {}", getPrefsForItemSQL);
@@ -216,11 +220,16 @@
connection = getDataSource().getConnection();
// These settings should enable the ResultSet to be iterated in both directions
statement = connection.prepareStatement(getUsersSQL,
- ResultSet.TYPE_SCROLL_INSENSITIVE,
+ ResultSet.TYPE_FORWARD_ONLY,
ResultSet.CONCUR_READ_ONLY);
- statement.setFetchDirection(ResultSet.FETCH_UNKNOWN);
+ statement.setFetchDirection(ResultSet.FETCH_FORWARD);
+ statement.setFetchSize(getFetchSize()); // TODO only for MySQL
log.debug("Executing SQL query: {}", getUsersSQL);
resultSet = statement.executeQuery();
+ boolean anyResults = resultSet.next();
+ if (!anyResults) {
+ close();
+ }
} catch (SQLException sqle) {
close();
throw new TasteException(sqle);
@@ -234,7 +243,7 @@
try {
// No more results if cursor is pointing at last row, or after
// Thanks to Rolf W. for pointing out an earlier bug in this condition
- if (resultSet.isLast() || resultSet.isAfterLast()) {
+ if (resultSet.isAfterLast()) {
close();
} else {
nextExists = true;
@@ -250,7 +259,7 @@
@Override
public User next() {
- if (closed) {
+ if (!hasNext()) {
throw new NoSuchElementException();
}
@@ -258,21 +267,18 @@
FastSet<Object> itemIDs = new FastSet<Object>();
try {
- while (resultSet.next()) {
+ do {
String userID = resultSet.getString(2);
if (currentUserID == null) {
currentUserID = userID;
}
// Did we move on to a new user?
if (!userID.equals(currentUserID)) {
- // back up one row
- resultSet.previous();
- // we're done for now
break;
}
// else add a new preference for the current user
itemIDs.add(resultSet.getString(1));
- }
+ } while (resultSet.next());
} catch (SQLException sqle) {
// No good way to handle this since we can't throw an exception
log.warn("Exception while iterating over users", sqle);
@@ -280,11 +286,6 @@
throw new NoSuchElementException("Can't retrieve more due to exception: " + sqle);
}
- if (currentUserID == null) {
- // nothing left?
- throw new NoSuchElementException();
- }
-
return buildUser(currentUserID, itemIDs);
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java?rev=792131&r1=792130&r2=792131&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java Wed Jul 8 13:26:42 2009
@@ -78,6 +78,8 @@
public static final String DEFAULT_ITEM_ID_COLUMN = "item_id";
public static final String DEFAULT_PREFERENCE_COLUMN = "preference";
+ static final int DEFAULT_FETCH_SIZE = 1000; // A max, "big" number of rows to buffer at once
+
private final DataSource dataSource;
private final String preferenceTable;
private final String userIDColumn;
@@ -243,6 +245,10 @@
return preferenceColumn;
}
+ protected int getFetchSize() {
+ return DEFAULT_FETCH_SIZE;
+ }
+
@Override
public Iterable<? extends User> getUsers() throws TasteException {
log.debug("Retrieving all users...");
@@ -265,7 +271,9 @@
try {
conn = dataSource.getConnection();
- stmt = conn.prepareStatement(getUserSQL);
+ stmt = conn.prepareStatement(getUserSQL, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
+ stmt.setFetchDirection(ResultSet.FETCH_FORWARD);
+ stmt.setFetchSize(getFetchSize());
stmt.setObject(1, id);
log.debug("Executing SQL query: {}", getUserSQL);
@@ -354,7 +362,9 @@
ResultSet rs = null;
try {
conn = dataSource.getConnection();
- stmt = conn.prepareStatement(getPrefsForItemSQL);
+ stmt = conn.prepareStatement(getPrefsForItemSQL, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
+ stmt.setFetchDirection(ResultSet.FETCH_FORWARD);
+ stmt.setFetchSize(getFetchSize());
stmt.setObject(1, itemID);
log.debug("Executing SQL query: {}", getPrefsForItemSQL);
@@ -559,11 +569,16 @@
connection = dataSource.getConnection();
// These settings should enable the ResultSet to be iterated in both directions
statement = connection.prepareStatement(getUsersSQL,
- ResultSet.TYPE_SCROLL_INSENSITIVE,
+ ResultSet.TYPE_FORWARD_ONLY,
ResultSet.CONCUR_READ_ONLY);
- statement.setFetchDirection(ResultSet.FETCH_UNKNOWN);
+ statement.setFetchDirection(ResultSet.FETCH_FORWARD);
+ statement.setFetchSize(getFetchSize());
log.debug("Executing SQL query: {}", getUsersSQL);
resultSet = statement.executeQuery();
+ boolean anyResults = resultSet.next();
+ if (!anyResults) {
+ close();
+ }
} catch (SQLException sqle) {
close();
throw new TasteException(sqle);
@@ -575,9 +590,7 @@
boolean nextExists = false;
if (!closed) {
try {
- // No more results if cursor is pointing at last row, or after
- // Thanks to Rolf W. for pointing out an earlier bug in this condition
- if (resultSet.isLast() || resultSet.isAfterLast()) {
+ if (resultSet.isAfterLast()) {
close();
} else {
nextExists = true;
@@ -593,7 +606,7 @@
@Override
public User next() {
- if (closed) {
+ if (!hasNext()) {
throw new NoSuchElementException();
}
@@ -601,21 +614,18 @@
List<Preference> prefs = new ArrayList<Preference>();
try {
- while (resultSet.next()) {
+ do {
String userID = resultSet.getString(3);
if (currentUserID == null) {
currentUserID = userID;
}
// Did we move on to a new user?
if (!userID.equals(currentUserID)) {
- // back up one row
- resultSet.previous();
- // we're done for now
break;
}
// else add a new preference for the current user
addPreference(resultSet, prefs);
- }
+ } while (resultSet.next());
} catch (SQLException sqle) {
// No good way to handle this since we can't throw an exception
log.warn("Exception while iterating over users", sqle);
@@ -623,11 +633,6 @@
throw new NoSuchElementException("Can't retrieve more due to exception: " + sqle);
}
- if (currentUserID == null) {
- // nothing left?
- throw new NoSuchElementException();
- }
-
return buildUser(currentUserID, prefs);
}
@@ -664,10 +669,15 @@
private ResultSetItemIterator(DataSource dataSource, String getItemsSQL) throws TasteException {
try {
connection = dataSource.getConnection();
- statement = connection.prepareStatement(getItemsSQL);
+ statement = connection.prepareStatement(getItemsSQL, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
statement.setFetchDirection(ResultSet.FETCH_FORWARD);
+ statement.setFetchSize(getFetchSize());
log.debug("Executing SQL query: {}", getItemsSQL);
resultSet = statement.executeQuery();
+ boolean anyResults = resultSet.next();
+ if (!anyResults) {
+ close();
+ }
} catch (SQLException sqle) {
close();
throw new TasteException(sqle);
@@ -679,9 +689,7 @@
boolean nextExists = false;
if (!closed) {
try {
- // No more results if cursor is pointing at last row, or after
- // Thanks to Rolf W. for pointing out an earlier bug in this condition
- if (resultSet.isLast() || resultSet.isAfterLast()) {
+ if (resultSet.isAfterLast()) {
close();
} else {
nextExists = true;
@@ -702,11 +710,9 @@
}
try {
- if (resultSet.next()) {
- return buildItem(resultSet.getString(1));
- } else {
- throw new NoSuchElementException();
- }
+ Item item = buildItem(resultSet.getString(1));
+ resultSet.next();
+ return item;
} catch (SQLException sqle) {
// No good way to handle this since we can't throw an exception
log.warn("Exception while iterating over items", sqle);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLBooleanPrefJDBCDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLBooleanPrefJDBCDataModel.java?rev=792131&r1=792130&r2=792131&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLBooleanPrefJDBCDataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLBooleanPrefJDBCDataModel.java Wed Jul 8 13:26:42 2009
@@ -122,4 +122,10 @@
"WHERE tp1." + itemIDColumn + "=? and tp2." + itemIDColumn + "=?");
}
+ @Override
+ protected int getFetchSize() {
+ // Need to return this for MySQL Connector/J to make it use streaming mode
+ return Integer.MIN_VALUE;
+ }
+
}
\ No newline at end of file