You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/07/09 14:58:02 UTC
svn commit: r792536 - in
/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl:
common/ model/jdbc/
Author: srowen
Date: Thu Jul 9 12:58:02 2009
New Revision: 792536
URL: http://svn.apache.org/viewvc?rev=792536&view=rev
Log:
Small performance enhancement - notion of a skipping Iterator which can efficiently advance
Added:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SkippingIterator.java
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/ArrayIterator.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterator.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractBooleanPrefJDBCDataModel.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/ArrayIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/ArrayIterator.java?rev=792536&r1=792535&r2=792536&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/ArrayIterator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/ArrayIterator.java Thu Jul 9 12:58:02 2009
@@ -24,7 +24,7 @@
/**
* <p>Simple, fast {@link Iterator} for an array.</p>
*/
-public final class ArrayIterator<T> implements Iterator<T>, Iterable<T> {
+public final class ArrayIterator<T> implements SkippingIterator<T>, Iterable<T> {
private final T[] array;
private int position;
@@ -66,6 +66,13 @@
}
@Override
+ public void skip(int n) {
+ if (n > 0) {
+ position += n;
+ }
+ }
+
+ @Override
public Iterator<T> iterator() {
return this;
}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java?rev=792536&r1=792535&r2=792536&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FileLineIterator.java Thu Jul 9 12:58:02 2009
@@ -25,7 +25,6 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
-import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipInputStream;
@@ -38,7 +37,7 @@
*
* This class will uncompress files that end in .zip or .gz accordingly, too.
*/
-public final class FileLineIterator implements Iterator<String>, Closeable {
+public final class FileLineIterator implements SkippingIterator<String>, Closeable {
private final BufferedReader reader;
private String nextLine;
@@ -113,6 +112,17 @@
}
@Override
+ public void skip(int n) {
+ try {
+ for (int i = 0; i < n && nextLine != null; i++) {
+ nextLine = reader.readLine();
+ }
+ } catch (IOException ioe) {
+ close();
+ }
+ }
+
+ @Override
public void close() {
nextLine = null;
IOUtils.quietClose(reader);
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterator.java?rev=792536&r1=792535&r2=792536&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterator.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterator.java Thu Jul 9 12:58:02 2009
@@ -58,12 +58,28 @@
private void doNext() {
boolean found = false;
- while (delegate.hasNext()) {
- T delegateNext = delegate.next();
- if (r.nextDouble() < samplingRate) {
- next = delegateNext;
+ if (delegate instanceof SkippingIterator) {
+ SkippingIterator<? extends T> skippingDelegate = (SkippingIterator<? extends T>) delegate;
+ int toSkip = 0;
+ while (r.nextDouble() >= samplingRate) {
+ toSkip++;
+ }
+ // Really, would be nicer to select value from geometric distribution, for small values of samplingRate
+ if (toSkip > 0) {
+ skippingDelegate.skip(toSkip);
+ }
+ if (skippingDelegate.hasNext()) {
+ next = skippingDelegate.next();
found = true;
- break;
+ }
+ } else {
+ while (delegate.hasNext()) {
+ T delegateNext = delegate.next();
+ if (r.nextDouble() < samplingRate) {
+ next = delegateNext;
+ found = true;
+ break;
+ }
}
}
if (!found) {
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SkippingIterator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SkippingIterator.java?rev=792536&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SkippingIterator.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SkippingIterator.java Thu Jul 9 12:58:02 2009
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.Iterator;
+
+/**
+ * Adds ability to skip ahead in an iterator, perhaps more efficiently
+ * than by calling {@link #next()} repeatedly.
+ */
+public interface SkippingIterator<V> extends Iterator<V> {
+
+ /**
+ * Skip the next n elements supplied by this {@link Iterator}. If there are
+ * less than n elements remaining, this skips all remaining elements in the
+ * {@link Iterator}. This method has the same effect as calling {@link #next()}
+ * n times, except that it will never throw {@link java.util.NoSuchElementException}.
+ */
+ void skip(int n);
+
+}
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractBooleanPrefJDBCDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractBooleanPrefJDBCDataModel.java?rev=792536&r1=792535&r2=792536&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractBooleanPrefJDBCDataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractBooleanPrefJDBCDataModel.java Thu Jul 9 12:58:02 2009
@@ -25,6 +25,7 @@
import org.apache.mahout.cf.taste.impl.common.IOUtils;
import org.apache.mahout.cf.taste.impl.common.FastSet;
import org.apache.mahout.cf.taste.impl.common.IteratorIterable;
+import org.apache.mahout.cf.taste.impl.common.SkippingIterator;
import org.apache.mahout.cf.taste.impl.model.BooleanPrefUser;
import org.apache.mahout.cf.taste.impl.model.BooleanPreference;
@@ -35,7 +36,6 @@
import java.sql.SQLException;
import java.util.List;
import java.util.ArrayList;
-import java.util.Iterator;
import java.util.NoSuchElementException;
@@ -208,7 +208,7 @@
return new BooleanPreference(user, item);
}
- private final class ResultSetUserIterator implements Iterator<User> {
+ private final class ResultSetUserIterator implements SkippingIterator<User> {
private final Connection connection;
private final PreparedStatement statement;
@@ -223,7 +223,7 @@
ResultSet.TYPE_FORWARD_ONLY,
ResultSet.CONCUR_READ_ONLY);
statement.setFetchDirection(ResultSet.FETCH_FORWARD);
- statement.setFetchSize(getFetchSize()); // TODO only for MySQL
+ statement.setFetchSize(getFetchSize());
log.debug("Executing SQL query: {}", getUsersSQL);
resultSet = statement.executeQuery();
boolean anyResults = resultSet.next();
@@ -241,8 +241,6 @@
boolean nextExists = false;
if (!closed) {
try {
- // No more results if cursor is pointing at last row, or after
- // Thanks to Rolf W. for pointing out an earlier bug in this condition
if (resultSet.isAfterLast()) {
close();
} else {
@@ -302,6 +300,25 @@
IOUtils.quietClose(resultSet, statement, connection);
}
+ @Override
+ public void skip(int n) {
+ if (n >= 1 && hasNext()) {
+ try {
+ int distinctUserNamesSeen = 0;
+ String currentUserID = null;
+ do {
+ String userID = resultSet.getString(2);
+ if (!userID.equals(currentUserID)) {
+ distinctUserNamesSeen++;
+ }
+ currentUserID = userID;
+ } while (distinctUserNamesSeen <= n && resultSet.next());
+ } catch (SQLException sqle) {
+ log.warn("Exception while iterating over users", sqle);
+ close();
+ }
+ }
+ }
}
}
\ No newline at end of file
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java?rev=792536&r1=792535&r2=792536&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/jdbc/AbstractJDBCDataModel.java Thu Jul 9 12:58:02 2009
@@ -22,6 +22,7 @@
import org.apache.mahout.cf.taste.common.NoSuchUserException;
import org.apache.mahout.cf.taste.impl.common.IOUtils;
import org.apache.mahout.cf.taste.impl.common.IteratorIterable;
+import org.apache.mahout.cf.taste.impl.common.SkippingIterator;
import org.apache.mahout.cf.taste.impl.model.GenericItem;
import org.apache.mahout.cf.taste.impl.model.GenericPreference;
import org.apache.mahout.cf.taste.impl.model.GenericUser;
@@ -43,7 +44,6 @@
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
@@ -569,7 +569,7 @@
* only release database resources after {@link #hasNext()} has been called and has returned false;
* callers should make sure to "drain" the entire set of data to avoid tying up database resources.</p>
*/
- private final class ResultSetUserIterator implements Iterator<User> {
+ private final class ResultSetUserIterator implements SkippingIterator<User> {
private final Connection connection;
private final PreparedStatement statement;
@@ -661,6 +661,26 @@
IOUtils.quietClose(resultSet, statement, connection);
}
+ @Override
+ public void skip(int n) {
+ if (n >= 1 && hasNext()) {
+ try {
+ int distinctUserNamesSeen = 0;
+ String currentUserID = null;
+ do {
+ String userID = resultSet.getString(3);
+ if (!userID.equals(currentUserID)) {
+ distinctUserNamesSeen++;
+ }
+ currentUserID = userID;
+ } while (distinctUserNamesSeen <= n && resultSet.next());
+ } catch (SQLException sqle) {
+ log.warn("Exception while iterating over users", sqle);
+ close();
+ }
+ }
+ }
+
}
/**
@@ -671,7 +691,7 @@
* <code>false</code>; callers should make sure to "drain" the entire set of data to avoid tying up database
* resources.</p>
*/
- private final class ResultSetItemIterator implements Iterator<Item> {
+ private final class ResultSetItemIterator implements SkippingIterator<Item> {
private final Connection connection;
private final PreparedStatement statement;
@@ -747,6 +767,16 @@
IOUtils.quietClose(resultSet, statement, connection);
}
+ @Override
+ public void skip(int n) {
+ try {
+ resultSet.relative(n);
+ } catch (SQLException sqle) {
+ log.warn("Exception while iterating over items", sqle);
+ close();
+ }
+ }
+
}
}