You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ss...@apache.org on 2014/05/18 18:51:28 UTC
svn commit: r1595634 - in /mahout/trunk: ./
mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/
mrlegacy/src/test/java/org/apache/mahout/vectorizer/encoders/
Author: ssc
Date: Sun May 18 16:51:28 2014
New Revision: 1595634
URL: http://svn.apache.org/r1595634
Log:
MAHOUT-1385 Caching Encoders don't cache
Added:
mahout/trunk/mrlegacy/src/test/java/org/apache/mahout/vectorizer/encoders/CachingEncoderTest.java
Modified:
mahout/trunk/CHANGELOG
mahout/trunk/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java
mahout/trunk/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java
Modified: mahout/trunk/CHANGELOG
URL: http://svn.apache.org/viewvc/mahout/trunk/CHANGELOG?rev=1595634&r1=1595633&r2=1595634&view=diff
==============================================================================
--- mahout/trunk/CHANGELOG (original)
+++ mahout/trunk/CHANGELOG Sun May 18 16:51:28 2014
@@ -2,6 +2,8 @@ Mahout Change Log
Release 1.0 - unreleased
+ MAHOUT-1385: Caching Encoders don't cache (Johannes Schulte, Manoj Awasthi via ssc)
+
MAHOUT-1527: Fix wikipedia classifier example (Andrew Palumbo via ssc)
MAHOUT-1542: Tutorial for playing with Mahout's Spark shell (ssc)
Modified: mahout/trunk/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java?rev=1595634&r1=1595633&r2=1595634&view=diff
==============================================================================
--- mahout/trunk/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java (original)
+++ mahout/trunk/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java Sun May 18 16:51:28 2014
@@ -34,12 +34,12 @@ public class CachingContinuousValueEncod
private void initCaches() {
this.caches = new OpenIntIntHashMap[getProbes()];
- for (int ii = 0; ii < getProbes(); ii++) {
- caches[ii] = new OpenIntIntHashMap();
+ for (int probe = 0; probe < getProbes(); probe++) {
+ caches[probe] = new OpenIntIntHashMap();
}
}
- protected OpenIntIntHashMap[] getCaches() {
+ OpenIntIntHashMap[] getCaches() {
return caches;
}
@@ -49,14 +49,16 @@ public class CachingContinuousValueEncod
initCaches();
}
- protected int hashForProbe(String originalForm, int dataSize, String name, int probe) {
+ @Override
+ protected int hashForProbe(byte[] originalForm, int dataSize, String name, int probe) {
Preconditions.checkArgument(dataSize == this.dataSize,
"dataSize argument [" + dataSize + "] does not match expected dataSize [" + this.dataSize + ']');
- if (caches[probe].containsKey(originalForm.hashCode())) {
- return caches[probe].get(originalForm.hashCode());
+ int originalHashcode = originalForm.hashCode();
+ if (caches[probe].containsKey(originalHashcode)) {
+ return caches[probe].get(originalHashcode);
}
- int hash = hashForProbe(originalForm.getBytes(Charsets.UTF_8), dataSize, name, probe);
- caches[probe].put(originalForm.hashCode(), hash);
+ int hash = super.hashForProbe(originalForm, dataSize, name, probe);
+ caches[probe].put(originalHashcode, hash);
return hash;
}
}
Modified: mahout/trunk/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java?rev=1595634&r1=1595633&r2=1595634&view=diff
==============================================================================
--- mahout/trunk/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java (original)
+++ mahout/trunk/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java Sun May 18 16:51:28 2014
@@ -17,15 +17,13 @@
package org.apache.mahout.vectorizer.encoders;
-import com.google.common.base.Charsets;
import org.apache.mahout.math.map.OpenIntIntHashMap;
-
import com.google.common.base.Preconditions;
public class CachingStaticWordValueEncoder extends StaticWordValueEncoder {
+
private final int dataSize;
private OpenIntIntHashMap[] caches;
-// private TIntIntHashMap[] caches;
public CachingStaticWordValueEncoder(String name, int dataSize) {
super(name);
@@ -34,13 +32,13 @@ public class CachingStaticWordValueEncod
}
private void initCaches() {
- this.caches = new OpenIntIntHashMap[getProbes()];
- for (int ii = 0; ii < getProbes(); ii++) {
- caches[ii] = new OpenIntIntHashMap();
+ caches = new OpenIntIntHashMap[getProbes()];
+ for (int probe = 0; probe < getProbes(); probe++) {
+ caches[probe] = new OpenIntIntHashMap();
}
}
- protected OpenIntIntHashMap[] getCaches() {
+ OpenIntIntHashMap[] getCaches() {
return caches;
}
@@ -50,14 +48,16 @@ public class CachingStaticWordValueEncod
initCaches();
}
- protected int hashForProbe(String originalForm, int dataSize, String name, int probe) {
+ @Override
+ protected int hashForProbe(byte[] originalForm, int dataSize, String name, int probe) {
Preconditions.checkArgument(dataSize == this.dataSize,
"dataSize argument [" + dataSize + "] does not match expected dataSize [" + this.dataSize + ']');
- if (caches[probe].containsKey(originalForm.hashCode())) {
- return caches[probe].get(originalForm.hashCode());
+ int originalHashcode = originalForm.hashCode();
+ if (caches[probe].containsKey(originalHashcode)) {
+ return caches[probe].get(originalHashcode);
}
- int hash = hashForProbe(originalForm.getBytes(Charsets.UTF_8), dataSize, name, probe);
- caches[probe].put(originalForm.hashCode(), hash);
+ int hash = super.hashForProbe(originalForm, dataSize, name, probe);
+ caches[probe].put(originalHashcode, hash);
return hash;
}
}
Added: mahout/trunk/mrlegacy/src/test/java/org/apache/mahout/vectorizer/encoders/CachingEncoderTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/mrlegacy/src/test/java/org/apache/mahout/vectorizer/encoders/CachingEncoderTest.java?rev=1595634&view=auto
==============================================================================
--- mahout/trunk/mrlegacy/src/test/java/org/apache/mahout/vectorizer/encoders/CachingEncoderTest.java (added)
+++ mahout/trunk/mrlegacy/src/test/java/org/apache/mahout/vectorizer/encoders/CachingEncoderTest.java Sun May 18 16:51:28 2014
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.vectorizer.encoders;
+
+import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.Vector;
+import org.junit.Test;
+
+public class CachingEncoderTest extends MahoutTestCase {
+
+ private static final int CARDINALITY = 10;
+ private static final String NAME = "name";
+ private static final String WORD = "word";
+ private static final String CONTINUOUSVAL = "123";
+
+ @Test
+ public void testCacheAreUsedStaticWord() {
+ CachingStaticWordValueEncoder encoder = new CachingStaticWordValueEncoder(NAME, CARDINALITY);
+ Vector v = new DenseVector(CARDINALITY);
+ encoder.addToVector(WORD, v);
+ assertFalse("testCacheAreUsedStaticWord: cache should have values", encoder.getCaches()[0].isEmpty());
+ }
+
+ @Test
+ public void testCacheAreUsedContinuous() {
+ CachingContinuousValueEncoder encoder = new CachingContinuousValueEncoder(NAME, CARDINALITY);
+ Vector v = new DenseVector(CARDINALITY);
+ encoder.addToVector(CONTINUOUSVAL, 1.0, v);
+ assertFalse("testCacheAreUsedContinuous: cache should have values", encoder.getCaches()[0].isEmpty());
+ }
+
+}