You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/07/08 15:26:42 UTC

svn commit: r792131 - in /lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl: eval/ model/jdbc/

Author: srowen
Date: Wed Jul  8 13:26:42 2009
New Revision: 792131

URL: http://svn.apache.org/viewvc?rev=792131&view=rev
Log:
Faster JDBC access: true forward-only cursor use now, and workarounds to enable streaming in MySQL driver

Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractBooleanPrefJDBCDataModel.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLBooleanPrefJDBCDataModel.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java?rev=792131&r1=792130&r2=792131&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/eval/GenericRecommenderIRStatsEvaluator.java Wed Jul  8 13:26:42 2009
@@ -103,6 +103,7 @@
     RunningAverage fallOut = new FullRunningAverage();
     for (User user : dataModel.getUsers()) {
       if (random.nextDouble() < evaluationPercentage) {
+        long start = System.currentTimeMillis();
         Object id = user.getID();
         Collection<Item> relevantItems = new FastSet<Item>(at);
         Preference[] prefs = user.getPreferencesAsArray();
@@ -152,6 +153,8 @@
                              (double) (numItems - numRelevantItems));
           }
 
+          long end = System.currentTimeMillis();
+          log.info("Evaluated with user " + user + " in " + (end - start) + "ms");
           log.info("Precision/recall/fall-out: {} / {} / {}", new Object[] {
               precision.getAverage(), recall.getAverage(), fallOut.getAverage()
           });

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractBooleanPrefJDBCDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractBooleanPrefJDBCDataModel.java?rev=792131&r1=792130&r2=792131&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractBooleanPrefJDBCDataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractBooleanPrefJDBCDataModel.java Wed Jul  8 13:26:42 2009
@@ -100,7 +100,9 @@
 
     try {
       conn = getDataSource().getConnection();
-      stmt = conn.prepareStatement(getUserSQL);
+      stmt = conn.prepareStatement(getUserSQL, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
+      stmt.setFetchDirection(ResultSet.FETCH_FORWARD);
+      stmt.setFetchSize(getFetchSize());
       stmt.setObject(1, id);
 
       log.debug("Executing SQL query: {}", getUserSQL);
@@ -176,7 +178,9 @@
     ResultSet rs = null;
     try {
       conn = getDataSource().getConnection();
-      stmt = conn.prepareStatement(getPrefsForItemSQL);
+      stmt = conn.prepareStatement(getPrefsForItemSQL, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
+      stmt.setFetchDirection(ResultSet.FETCH_FORWARD);
+      stmt.setFetchSize(getFetchSize());
       stmt.setObject(1, itemID);
 
       log.debug("Executing SQL query: {}", getPrefsForItemSQL);
@@ -216,11 +220,16 @@
         connection = getDataSource().getConnection();
         // These settings should enable the ResultSet to be iterated in both directions
         statement = connection.prepareStatement(getUsersSQL,
-                                                ResultSet.TYPE_SCROLL_INSENSITIVE,
+                                                ResultSet.TYPE_FORWARD_ONLY,
                                                 ResultSet.CONCUR_READ_ONLY);
-        statement.setFetchDirection(ResultSet.FETCH_UNKNOWN);
+        statement.setFetchDirection(ResultSet.FETCH_FORWARD);
+        statement.setFetchSize(getFetchSize()); // TODO only for MySQL
         log.debug("Executing SQL query: {}", getUsersSQL);
         resultSet = statement.executeQuery();
+        boolean anyResults = resultSet.next();
+        if (!anyResults) {
+          close();
+        }
       } catch (SQLException sqle) {
         close();
         throw new TasteException(sqle);
@@ -234,7 +243,7 @@
         try {
           // No more results if cursor is pointing at last row, or after
           // Thanks to Rolf W. for pointing out an earlier bug in this condition
-          if (resultSet.isLast() || resultSet.isAfterLast()) {
+          if (resultSet.isAfterLast()) {
             close();
           } else {
             nextExists = true;
@@ -250,7 +259,7 @@
     @Override
     public User next() {
 
-      if (closed) {
+      if (!hasNext()) {
         throw new NoSuchElementException();
       }
 
@@ -258,21 +267,18 @@
       FastSet<Object> itemIDs = new FastSet<Object>();
 
       try {
-        while (resultSet.next()) {
+        do {
           String userID = resultSet.getString(2);
           if (currentUserID == null) {
             currentUserID = userID;
           }
           // Did we move on to a new user?
           if (!userID.equals(currentUserID)) {
-            // back up one row
-            resultSet.previous();
-            // we're done for now
             break;
           }
           // else add a new preference for the current user
           itemIDs.add(resultSet.getString(1));
-        }
+        } while (resultSet.next());
       } catch (SQLException sqle) {
         // No good way to handle this since we can't throw an exception
         log.warn("Exception while iterating over users", sqle);
@@ -280,11 +286,6 @@
         throw new NoSuchElementException("Can't retrieve more due to exception: " + sqle);
       }
 
-      if (currentUserID == null) {
-        // nothing left?
-        throw new NoSuchElementException();
-      }
-
       return buildUser(currentUserID, itemIDs);
     }
 

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java?rev=792131&r1=792130&r2=792131&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java Wed Jul  8 13:26:42 2009
@@ -78,6 +78,8 @@
   public static final String DEFAULT_ITEM_ID_COLUMN = "item_id";
   public static final String DEFAULT_PREFERENCE_COLUMN = "preference";
 
+  static final int DEFAULT_FETCH_SIZE = 1000; // A max, "big" number of rows to buffer at once
+
   private final DataSource dataSource;
   private final String preferenceTable;
   private final String userIDColumn;
@@ -243,6 +245,10 @@
     return preferenceColumn;
   }
 
+  protected int getFetchSize() {
+    return DEFAULT_FETCH_SIZE;
+  }
+
   @Override
   public Iterable<? extends User> getUsers() throws TasteException {
     log.debug("Retrieving all users...");
@@ -265,7 +271,9 @@
 
     try {
       conn = dataSource.getConnection();
-      stmt = conn.prepareStatement(getUserSQL);
+      stmt = conn.prepareStatement(getUserSQL, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
+      stmt.setFetchDirection(ResultSet.FETCH_FORWARD);
+      stmt.setFetchSize(getFetchSize());
       stmt.setObject(1, id);
 
       log.debug("Executing SQL query: {}", getUserSQL);
@@ -354,7 +362,9 @@
     ResultSet rs = null;
     try {
       conn = dataSource.getConnection();
-      stmt = conn.prepareStatement(getPrefsForItemSQL);
+      stmt = conn.prepareStatement(getPrefsForItemSQL, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
+      stmt.setFetchDirection(ResultSet.FETCH_FORWARD);
+      stmt.setFetchSize(getFetchSize());
       stmt.setObject(1, itemID);
 
       log.debug("Executing SQL query: {}", getPrefsForItemSQL);
@@ -559,11 +569,16 @@
         connection = dataSource.getConnection();
         // These settings should enable the ResultSet to be iterated in both directions
         statement = connection.prepareStatement(getUsersSQL,
-                                                ResultSet.TYPE_SCROLL_INSENSITIVE,
+                                                ResultSet.TYPE_FORWARD_ONLY,
                                                 ResultSet.CONCUR_READ_ONLY);
-        statement.setFetchDirection(ResultSet.FETCH_UNKNOWN);
+        statement.setFetchDirection(ResultSet.FETCH_FORWARD);
+        statement.setFetchSize(getFetchSize());
         log.debug("Executing SQL query: {}", getUsersSQL);
         resultSet = statement.executeQuery();
+        boolean anyResults = resultSet.next();
+        if (!anyResults) {
+          close();
+        }
       } catch (SQLException sqle) {
         close();
         throw new TasteException(sqle);
@@ -575,9 +590,7 @@
       boolean nextExists = false;
       if (!closed) {
         try {
-          // No more results if cursor is pointing at last row, or after
-          // Thanks to Rolf W. for pointing out an earlier bug in this condition
-          if (resultSet.isLast() || resultSet.isAfterLast()) {
+          if (resultSet.isAfterLast()) {
             close();
           } else {
             nextExists = true;
@@ -593,7 +606,7 @@
     @Override
     public User next() {
 
-      if (closed) {
+      if (!hasNext()) {
         throw new NoSuchElementException();
       }
 
@@ -601,21 +614,18 @@
       List<Preference> prefs = new ArrayList<Preference>();
 
       try {
-        while (resultSet.next()) {
+        do {
           String userID = resultSet.getString(3);
           if (currentUserID == null) {
             currentUserID = userID;
           }
           // Did we move on to a new user?
           if (!userID.equals(currentUserID)) {
-            // back up one row
-            resultSet.previous();
-            // we're done for now
             break;
           }
           // else add a new preference for the current user
           addPreference(resultSet, prefs);
-        }
+        } while (resultSet.next());
       } catch (SQLException sqle) {
         // No good way to handle this since we can't throw an exception
         log.warn("Exception while iterating over users", sqle);
@@ -623,11 +633,6 @@
         throw new NoSuchElementException("Can't retrieve more due to exception: " + sqle);
       }
 
-      if (currentUserID == null) {
-        // nothing left?
-        throw new NoSuchElementException();
-      }
-
       return buildUser(currentUserID, prefs);
     }
 
@@ -664,10 +669,15 @@
     private ResultSetItemIterator(DataSource dataSource, String getItemsSQL) throws TasteException {
       try {
         connection = dataSource.getConnection();
-        statement = connection.prepareStatement(getItemsSQL);
+        statement = connection.prepareStatement(getItemsSQL, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
         statement.setFetchDirection(ResultSet.FETCH_FORWARD);
+        statement.setFetchSize(getFetchSize());
         log.debug("Executing SQL query: {}", getItemsSQL);
         resultSet = statement.executeQuery();
+        boolean anyResults = resultSet.next();
+        if (!anyResults) {
+          close();
+        }
       } catch (SQLException sqle) {
         close();
         throw new TasteException(sqle);
@@ -679,9 +689,7 @@
       boolean nextExists = false;
       if (!closed) {
         try {
-          // No more results if cursor is pointing at last row, or after
-          // Thanks to Rolf W. for pointing out an earlier bug in this condition
-          if (resultSet.isLast() || resultSet.isAfterLast()) {
+          if (resultSet.isAfterLast()) {
             close();
           } else {
             nextExists = true;
@@ -702,11 +710,9 @@
       }
 
       try {
-        if (resultSet.next()) {
-          return buildItem(resultSet.getString(1));
-        } else {
-          throw new NoSuchElementException();
-        }
+        Item item = buildItem(resultSet.getString(1));
+        resultSet.next();
+        return item;
       } catch (SQLException sqle) {
         // No good way to handle this since we can't throw an exception
         log.warn("Exception while iterating over items", sqle);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLBooleanPrefJDBCDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLBooleanPrefJDBCDataModel.java?rev=792131&r1=792130&r2=792131&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLBooleanPrefJDBCDataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/MySQLBooleanPrefJDBCDataModel.java Wed Jul  8 13:26:42 2009
@@ -122,4 +122,10 @@
           "WHERE tp1." + itemIDColumn + "=? and tp2." + itemIDColumn + "=?");
   }
 
+  @Override
+  protected int getFetchSize() {
+    // Need to return this for MySQL Connector/J to make it use streaming mode
+    return Integer.MIN_VALUE;
+  }
+
 }
\ No newline at end of file