You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/07/09 14:58:02 UTC

svn commit: r792536 - in /lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl: common/ model/jdbc/

Author: srowen
Date: Thu Jul  9 12:58:02 2009
New Revision: 792536

URL: http://svn.apache.org/viewvc?rev=792536&view=rev
Log:
Small performance enhancement - notion of a skipping Iterator which can efficiently advance

Added:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SkippingIterator.java
Modified:
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/ArrayIterator.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterator.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractBooleanPrefJDBCDataModel.java
    lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/ArrayIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/ArrayIterator.java?rev=792536&r1=792535&r2=792536&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/ArrayIterator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/ArrayIterator.java Thu Jul  9 12:58:02 2009
@@ -24,7 +24,7 @@
 /**
  * <p>Simple, fast {@link Iterator} for an array.</p>
  */
-public final class ArrayIterator<T> implements Iterator<T>, Iterable<T> {
+public final class ArrayIterator<T> implements SkippingIterator<T>, Iterable<T> {
 
   private final T[] array;
   private int position;
@@ -66,6 +66,13 @@
   }
 
   @Override
+  public void skip(int n) {
+    if (n > 0) {
+      position += n;
+    }
+  }
+
+  @Override
   public Iterator<T> iterator() {
     return this;
   }

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java?rev=792536&r1=792535&r2=792536&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java Thu Jul  9 12:58:02 2009
@@ -25,7 +25,6 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
-import java.util.Iterator;
 import java.util.NoSuchElementException;
 import java.util.zip.GZIPInputStream;
 import java.util.zip.ZipInputStream;
@@ -38,7 +37,7 @@
  *
  * This class will uncompress files that end in .zip or .gz accordingly, too.
  */
-public final class FileLineIterator implements Iterator<String>, Closeable {
+public final class FileLineIterator implements SkippingIterator<String>, Closeable {
 
   private final BufferedReader reader;
   private String nextLine;
@@ -113,6 +112,17 @@
   }
 
   @Override
+  public void skip(int n) {
+    try {
+      for (int i = 0; i < n && nextLine != null; i++) {
+        nextLine = reader.readLine();
+      }
+    } catch (IOException ioe) {
+      close();
+    }
+  }
+
+  @Override
   public void close() {
     nextLine = null;
     IOUtils.quietClose(reader);

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterator.java?rev=792536&r1=792535&r2=792536&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterator.java Thu Jul  9 12:58:02 2009
@@ -58,12 +58,28 @@
 
   private void doNext() {
     boolean found = false;
-    while (delegate.hasNext()) {
-      T delegateNext = delegate.next();
-      if (r.nextDouble() < samplingRate) {
-        next = delegateNext;
+    if (delegate instanceof SkippingIterator) {
+      SkippingIterator<? extends T> skippingDelegate = (SkippingIterator<? extends T>) delegate;
+      int toSkip = 0;
+      while (r.nextDouble() >= samplingRate) {
+        toSkip++;
+      }
+      // Really, would be nicer to select value from geometric distribution, for small values of samplingRate
+      if (toSkip > 0) {
+        skippingDelegate.skip(toSkip);
+      }
+      if (skippingDelegate.hasNext()) {
+        next = skippingDelegate.next();
         found = true;
-        break;
+      }
+    } else {
+      while (delegate.hasNext()) {
+        T delegateNext = delegate.next();
+        if (r.nextDouble() < samplingRate) {
+          next = delegateNext;
+          found = true;
+          break;
+        }
       }
     }
     if (!found) {

Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SkippingIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SkippingIterator.java?rev=792536&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SkippingIterator.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SkippingIterator.java Thu Jul  9 12:58:02 2009
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.Iterator;
+
+/**
+ * Adds ability to skip ahead in an iterator, perhaps more efficiently
+ * than by calling {@link #next()} repeatedly.
+ */
+public interface SkippingIterator<V> extends Iterator<V> {
+
+  /**
+   * Skip the next n elements supplied by this {@link Iterator}. If there are
+   * less than n elements remaining, this skips all remaining elements in the
+   * {@link Iterator}. This method has the same effect as calling {@link #next()}
+   * n times, except that it will never throw {@link java.util.NoSuchElementException}.
+   */
+  void skip(int n);
+
+}

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractBooleanPrefJDBCDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractBooleanPrefJDBCDataModel.java?rev=792536&r1=792535&r2=792536&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractBooleanPrefJDBCDataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractBooleanPrefJDBCDataModel.java Thu Jul  9 12:58:02 2009
@@ -25,6 +25,7 @@
 import org.apache.mahout.cf.taste.impl.common.IOUtils;
 import org.apache.mahout.cf.taste.impl.common.FastSet;
 import org.apache.mahout.cf.taste.impl.common.IteratorIterable;
+import org.apache.mahout.cf.taste.impl.common.SkippingIterator;
 import org.apache.mahout.cf.taste.impl.model.BooleanPrefUser;
 import org.apache.mahout.cf.taste.impl.model.BooleanPreference;
 
@@ -35,7 +36,6 @@
 import java.sql.SQLException;
 import java.util.List;
 import java.util.ArrayList;
-import java.util.Iterator;
 import java.util.NoSuchElementException;
 
 
@@ -208,7 +208,7 @@
     return new BooleanPreference(user, item);
   }
 
-  private final class ResultSetUserIterator implements Iterator<User> {
+  private final class ResultSetUserIterator implements SkippingIterator<User> {
 
     private final Connection connection;
     private final PreparedStatement statement;
@@ -223,7 +223,7 @@
                                                 ResultSet.TYPE_FORWARD_ONLY,
                                                 ResultSet.CONCUR_READ_ONLY);
         statement.setFetchDirection(ResultSet.FETCH_FORWARD);
-        statement.setFetchSize(getFetchSize()); // TODO only for MySQL
+        statement.setFetchSize(getFetchSize());
         log.debug("Executing SQL query: {}", getUsersSQL);
         resultSet = statement.executeQuery();
         boolean anyResults = resultSet.next();
@@ -241,8 +241,6 @@
       boolean nextExists = false;
       if (!closed) {
         try {
-          // No more results if cursor is pointing at last row, or after
-          // Thanks to Rolf W. for pointing out an earlier bug in this condition
           if (resultSet.isAfterLast()) {
             close();
           } else {
@@ -302,6 +300,25 @@
       IOUtils.quietClose(resultSet, statement, connection);
     }
 
+    @Override
+    public void skip(int n) {
+      if (n >= 1 && hasNext()) {
+        try {
+          int distinctUserNamesSeen = 0;
+          String currentUserID = null;
+          do {
+            String userID = resultSet.getString(2);
+            if (!userID.equals(currentUserID)) {
+              distinctUserNamesSeen++;
+            }
+            currentUserID = userID;
+          } while (distinctUserNamesSeen <= n && resultSet.next());
+        } catch (SQLException sqle) {
+          log.warn("Exception while iterating over users", sqle);
+          close();
+        }
+      }
+    }
   }
 
 }
\ No newline at end of file

Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java?rev=792536&r1=792535&r2=792536&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java Thu Jul  9 12:58:02 2009
@@ -22,6 +22,7 @@
 import org.apache.mahout.cf.taste.common.NoSuchUserException;
 import org.apache.mahout.cf.taste.impl.common.IOUtils;
 import org.apache.mahout.cf.taste.impl.common.IteratorIterable;
+import org.apache.mahout.cf.taste.impl.common.SkippingIterator;
 import org.apache.mahout.cf.taste.impl.model.GenericItem;
 import org.apache.mahout.cf.taste.impl.model.GenericPreference;
 import org.apache.mahout.cf.taste.impl.model.GenericUser;
@@ -43,7 +44,6 @@
 import java.sql.SQLException;
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.Iterator;
 import java.util.List;
 import java.util.NoSuchElementException;
 
@@ -569,7 +569,7 @@
    * only release database resources after {@link #hasNext()} has been called and has returned false;
    * callers should make sure to "drain" the entire set of data to avoid tying up database resources.</p>
    */
-  private final class ResultSetUserIterator implements Iterator<User> {
+  private final class ResultSetUserIterator implements SkippingIterator<User> {
 
     private final Connection connection;
     private final PreparedStatement statement;
@@ -661,6 +661,26 @@
       IOUtils.quietClose(resultSet, statement, connection);
     }
 
+    @Override
+    public void skip(int n) {
+      if (n >= 1 && hasNext()) {
+        try {
+          int distinctUserNamesSeen = 0;
+          String currentUserID = null;
+          do {
+            String userID = resultSet.getString(3);
+            if (!userID.equals(currentUserID)) {
+              distinctUserNamesSeen++;
+            }
+            currentUserID = userID;
+          } while (distinctUserNamesSeen <= n && resultSet.next());
+        } catch (SQLException sqle) {
+          log.warn("Exception while iterating over users", sqle);
+          close();
+        }
+      }
+    }
+
   }
 
   /**
@@ -671,7 +691,7 @@
    * <code>false</code>; callers should make sure to "drain" the entire set of data to avoid tying up database
    * resources.</p>
    */
-  private final class ResultSetItemIterator implements Iterator<Item> {
+  private final class ResultSetItemIterator implements SkippingIterator<Item> {
 
     private final Connection connection;
     private final PreparedStatement statement;
@@ -747,6 +767,16 @@
       IOUtils.quietClose(resultSet, statement, connection);
     }
 
+    @Override
+    public void skip(int n) {
+      try {
+        resultSet.relative(n);
+      } catch (SQLException sqle) {
+        log.warn("Exception while iterating over items", sqle);
+        close();
+      }
+    }
+
   }
 
 }