You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sr...@apache.org on 2009/08/17 18:52:32 UTC
svn commit: r805035 - in
/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste:
impl/common/ impl/model/ model/
Author: srowen
Date: Mon Aug 17 16:52:31 2009
New Revision: 805035
URL: http://svn.apache.org/viewvc?rev=805035&view=rev
Log:
MAHOUT-162
Added:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractIDMigrator.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractJDBCIDMigrator.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/MemoryIDMigrator.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/MySQLJDBCIDMigrator.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/IDMigrator.java
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RandomUtils.java
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RandomUtils.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RandomUtils.java?rev=805035&r1=805034&r2=805035&view=diff
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RandomUtils.java (original)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RandomUtils.java Mon Aug 17 16:52:31 2009
@@ -60,20 +60,6 @@
}
/**
- * See String.hashCode() from Apache Harmony
- */
- public static long hashStringToLong(String value) {
- long hash = 0L;
- int multiplier = 1;
- for (int i = value.length() - 1; i >= 0; i--) {
- hash += value.charAt(i) * multiplier;
- int shifted = multiplier << 5;
- multiplier = shifted - multiplier;
- }
- return hash;
- }
-
- /**
* <p>Finds next-largest "twin primes": numbers p and p+2 such that both are prime. Finds the smallest such p such
* that the smaller twin, p, is greater than or equal to n. Returns p+2, the larger of the two twins.</p>
*/
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractIDMigrator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractIDMigrator.java?rev=805035&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractIDMigrator.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractIDMigrator.java Mon Aug 17 16:52:31 2009
@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.model.IDMigrator;
+
+import java.nio.charset.Charset;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+
+public abstract class AbstractIDMigrator implements IDMigrator {
+
+ private static final Charset UTF8_CHARSET = Charset.forName("UTF8");
+
+ private final MessageDigest md5Digest;
+
+ protected AbstractIDMigrator() {
+ try {
+ md5Digest = MessageDigest.getInstance("MD5");
+ } catch (NoSuchAlgorithmException nsae) {
+ // Can't happen
+ throw new IllegalStateException(nsae);
+ }
+ }
+
+ protected final long hash(String value) {
+ byte[] md5hash;
+ synchronized (md5Digest) {
+ md5hash = md5Digest.digest(value.getBytes(UTF8_CHARSET));
+ md5Digest.reset();
+ }
+ long hash = 0L;
+ for (int i = 0; i < 8; i++) {
+ hash = (hash << 8) | (md5hash[i] & 0x00000000000000FFL);
+ }
+ return hash;
+ }
+
+ @Override
+ public long toLongID(String stringID) throws TasteException {
+ long longID = hash(stringID);
+ storeMapping(longID, stringID);
+ return longID;
+ }
+
+ @Override
+ public void initialize(Iterable<String> stringIDs) throws TasteException {
+ for (String stringID : stringIDs) {
+ toLongID(stringID);
+ }
+ }
+
+ protected abstract void storeMapping(long longID, String stringID) throws TasteException;
+
+ @Override
+ public abstract String toStringID(long longID) throws TasteException;
+
+}
\ No newline at end of file
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractJDBCIDMigrator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractJDBCIDMigrator.java?rev=805035&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractJDBCIDMigrator.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractJDBCIDMigrator.java Mon Aug 17 16:52:31 2009
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.IOUtils;
+
+import javax.sql.DataSource;
+import java.sql.Connection;
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+
+public abstract class AbstractJDBCIDMigrator extends AbstractIDMigrator {
+
+ public static final String DEFAULT_MAPPING_TABLE = "taste_id_mapping";
+ public static final String DEFAULT_LONG_ID_COLUMN = "long_id";
+ public static final String DEFAULT_STRING_ID_COLUMN = "string_id";
+
+ private final DataSource dataSource;
+ private final String getStringIDSQL;
+ private final String storeMappingSQL;
+
+ public AbstractJDBCIDMigrator(DataSource dataSource,
+ String getStringIDSQL,
+ String storeMappingSQL) {
+ this.dataSource = dataSource;
+ this.getStringIDSQL = getStringIDSQL;
+ this.storeMappingSQL = storeMappingSQL;
+ }
+
+ @Override
+ protected final void storeMapping(long longID, String stringID) throws TasteException {
+ Connection conn = null;
+ PreparedStatement stmt = null;
+ try {
+ conn = dataSource.getConnection();
+ stmt = conn.prepareStatement(storeMappingSQL);
+ stmt.setLong(1, longID);
+ stmt.setString(2, stringID);
+ stmt.executeUpdate();
+ } catch (SQLException sqle) {
+ throw new TasteException(sqle);
+ } finally {
+ IOUtils.quietClose(null, stmt, conn);
+ }
+ }
+
+ @Override
+ public final String toStringID(long longID) throws TasteException {
+ Connection conn = null;
+ PreparedStatement stmt = null;
+ ResultSet rs = null;
+ try {
+ conn = dataSource.getConnection();
+ stmt = conn.prepareStatement(getStringIDSQL, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
+ stmt.setFetchDirection(ResultSet.FETCH_FORWARD);
+ stmt.setFetchSize(1);
+ stmt.setLong(1, longID);
+ rs = stmt.executeQuery();
+ if (rs.next()) {
+ return rs.getString(1);
+ } else {
+ return null;
+ }
+ } catch (SQLException sqle) {
+ throw new TasteException(sqle);
+ } finally {
+ IOUtils.quietClose(rs, stmt, conn);
+ }
+ }
+
+}
\ No newline at end of file
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/MemoryIDMigrator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/MemoryIDMigrator.java?rev=805035&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/MemoryIDMigrator.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/MemoryIDMigrator.java Mon Aug 17 16:52:31 2009
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model;
+
+import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
+
+public final class MemoryIDMigrator extends AbstractIDMigrator {
+
+ private final FastByIDMap<String> longToString;
+
+ public MemoryIDMigrator() {
+ this.longToString = new FastByIDMap<String>(100);
+ }
+
+ @Override
+ protected void storeMapping(long longID, String stringID) {
+ synchronized (longToString) {
+ longToString.put(longID, stringID);
+ }
+ }
+
+ @Override
+ public String toStringID(long longID) {
+ synchronized (longToString) {
+ return longToString.get(longID);
+ }
+ }
+
+}
\ No newline at end of file
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/MySQLJDBCIDMigrator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/MySQLJDBCIDMigrator.java?rev=805035&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/MySQLJDBCIDMigrator.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/model/MySQLJDBCIDMigrator.java Mon Aug 17 16:52:31 2009
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.model;
+
+import javax.sql.DataSource;
+
+/**
+ * <p>An implementation for MySQL. The following statement would create a table suitable for use with
+ * this class:</p>
+ *
+ * <pre>
+ * CREATE TABLE taste_id_migration (
+ * long_id BIGINT NOT NULL,
+ * string_id VARCHAR(255) NOT NULL,
+ * PRIMARY KEY (long_id)
+ * )
+ * </pre>
+ */
+public final class MySQLJDBCIDMigrator extends AbstractJDBCIDMigrator {
+
+ public MySQLJDBCIDMigrator(DataSource dataSource) {
+ this(dataSource, DEFAULT_MAPPING_TABLE, DEFAULT_LONG_ID_COLUMN, DEFAULT_STRING_ID_COLUMN);
+ }
+
+ public MySQLJDBCIDMigrator(DataSource dataSource,
+ String mappingTable,
+ String longIDColumn,
+ String stringIDColumn) {
+ super(dataSource,
+ "SELECT " + stringIDColumn + " FROM " + mappingTable + " WHERE " + longIDColumn + "=?",
+ "REPLACE INTO " + mappingTable + " (" + longIDColumn + ',' + stringIDColumn + ") VALUES (?,?)");
+ }
+
+}
\ No newline at end of file
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/IDMigrator.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/IDMigrator.java?rev=805035&view=auto
==============================================================================
--- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/IDMigrator.java (added)
+++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/model/IDMigrator.java Mon Aug 17 16:52:31 2009
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.model;
+
+import org.apache.mahout.cf.taste.common.TasteException;
+
+/**
+ * <p>Mahout 0.2 changed the framework to operate only in terms of numeric (long) ID values
+ * for users and items. This is, obviously, not compatible with applications that used other
+ * key types -- most commonly {@link String}. Implementation of this class provide support for
+ * mapping String to longs and vice versa in order to provide a smoother migration path to
+ * applications that must still use strings as IDs.</p>
+ *
+ * <p>The mapping from strings to 64-bit numeric values is fixed here, to provide a standard
+ * implementation that is 'portable' or reproducible outside the framework easily. See
+ * {@link #toLongID(String)}.</p>
+ *
+ * <p>Because this mapping is deterministically computable, it does not need to be stored. Indeed,
+ * subclasses' job is to store the reverse mapping. There are an infinite number of strings but only
+ * a fixed number of longs, so, it is possible for two strings to map to the same value. Subclasses
+ * do not treat this as an error but rather retain only the most recent mapping, overwriting a previous
+ * mapping. The probability of collision in a 64-bit space is quite small, but not zero. However,
+ * in the context of a collaborative filtering problem, the consequence of a collision is small, at worst
+ * -- perhaps one user receives another recommendations.</p>
+ *
+ * @since 0.2
+ */
+public interface IDMigrator {
+
+ /**
+ * @return the top 8 bytes of the MD5 hash of the bytes of the given {@link String}'s UTF-8 encoding as a long.
+ * The reverse mapping is also stored.
+ * @throws TasteException if an error occurs while storing the mapping
+ */
+ long toLongID(String stringID) throws TasteException;
+
+ /**
+ * @return the string ID most recently associated with the given long ID, or null if doesn't exist
+ * @throws TasteException if an error occurs while retrieving the mapping
+ */
+ String toStringID(long longID) throws TasteException;
+
+ /**
+ * Make the mapping aware of the given string IDs.
+ *
+ * @throws TasteException if an error occurs while storing the mappings
+ */
+ void initialize(Iterable<String> stringIDs) throws TasteException;
+
+}