You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ss...@apache.org on 2014/05/18 18:51:28 UTC

svn commit: r1595634 - in /mahout/trunk: ./ mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/ mrlegacy/src/test/java/org/apache/mahout/vectorizer/encoders/

Author: ssc
Date: Sun May 18 16:51:28 2014
New Revision: 1595634

URL: http://svn.apache.org/r1595634
Log:
MAHOUT-1385 Caching Encoders don't cache

Added:
    mahout/trunk/mrlegacy/src/test/java/org/apache/mahout/vectorizer/encoders/CachingEncoderTest.java
Modified:
    mahout/trunk/CHANGELOG
    mahout/trunk/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java
    mahout/trunk/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java

Modified: mahout/trunk/CHANGELOG
URL: http://svn.apache.org/viewvc/mahout/trunk/CHANGELOG?rev=1595634&r1=1595633&r2=1595634&view=diff
==============================================================================
--- mahout/trunk/CHANGELOG (original)
+++ mahout/trunk/CHANGELOG Sun May 18 16:51:28 2014
@@ -2,6 +2,8 @@ Mahout Change Log
 
 Release 1.0 - unreleased
 
+  MAHOUT-1385: Caching Encoders don't cache (Johannes Schulte, Manoj Awasthi via ssc)
+
   MAHOUT-1527: Fix wikipedia classifier example (Andrew Palumbo via ssc)
 
   MAHOUT-1542: Tutorial for playing with Mahout's Spark shell (ssc)

Modified: mahout/trunk/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java?rev=1595634&r1=1595633&r2=1595634&view=diff
==============================================================================
--- mahout/trunk/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java (original)
+++ mahout/trunk/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingContinuousValueEncoder.java Sun May 18 16:51:28 2014
@@ -34,12 +34,12 @@ public class CachingContinuousValueEncod
 
   private void initCaches() {
     this.caches = new OpenIntIntHashMap[getProbes()];
-    for (int ii = 0; ii < getProbes(); ii++) {
-      caches[ii] = new OpenIntIntHashMap();
+    for (int probe = 0; probe < getProbes(); probe++) {
+      caches[probe] = new OpenIntIntHashMap();
     }
   }
 
-  protected OpenIntIntHashMap[] getCaches() {
+  OpenIntIntHashMap[] getCaches() {
     return caches;
   }
 
@@ -49,14 +49,16 @@ public class CachingContinuousValueEncod
     initCaches();
   }
 
-  protected int hashForProbe(String originalForm, int dataSize, String name, int probe) {
+  @Override
+  protected int hashForProbe(byte[] originalForm, int dataSize, String name, int probe) {
     Preconditions.checkArgument(dataSize == this.dataSize,
         "dataSize argument [" + dataSize + "] does not match expected dataSize [" + this.dataSize + ']');
-    if (caches[probe].containsKey(originalForm.hashCode())) {
-      return caches[probe].get(originalForm.hashCode());
+    int originalHashcode = originalForm.hashCode();
+    if (caches[probe].containsKey(originalHashcode)) {
+      return caches[probe].get(originalHashcode);
     }
-    int hash = hashForProbe(originalForm.getBytes(Charsets.UTF_8), dataSize, name, probe);
-    caches[probe].put(originalForm.hashCode(), hash);
+    int hash = super.hashForProbe(originalForm, dataSize, name, probe);
+    caches[probe].put(originalHashcode, hash);
     return hash;
   }
 }

Modified: mahout/trunk/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java?rev=1595634&r1=1595633&r2=1595634&view=diff
==============================================================================
--- mahout/trunk/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java (original)
+++ mahout/trunk/mrlegacy/src/main/java/org/apache/mahout/vectorizer/encoders/CachingStaticWordValueEncoder.java Sun May 18 16:51:28 2014
@@ -17,15 +17,13 @@
 
 package org.apache.mahout.vectorizer.encoders;
 
-import com.google.common.base.Charsets;
 import org.apache.mahout.math.map.OpenIntIntHashMap;
-
 import com.google.common.base.Preconditions;
 
 public class CachingStaticWordValueEncoder extends StaticWordValueEncoder {
+
   private final int dataSize;
   private OpenIntIntHashMap[] caches;
-//  private TIntIntHashMap[] caches;
 
   public CachingStaticWordValueEncoder(String name, int dataSize) {
     super(name);
@@ -34,13 +32,13 @@ public class CachingStaticWordValueEncod
   }
 
   private void initCaches() {
-    this.caches = new OpenIntIntHashMap[getProbes()];
-    for (int ii = 0; ii < getProbes(); ii++) {
-      caches[ii] = new OpenIntIntHashMap();
+    caches = new OpenIntIntHashMap[getProbes()];
+    for (int probe = 0; probe < getProbes(); probe++) {
+      caches[probe] = new OpenIntIntHashMap();
     }
   }
 
-  protected OpenIntIntHashMap[] getCaches() {
+  OpenIntIntHashMap[] getCaches() {
     return caches;
   }
 
@@ -50,14 +48,16 @@ public class CachingStaticWordValueEncod
     initCaches();
   }
 
-  protected int hashForProbe(String originalForm, int dataSize, String name, int probe) {
+  @Override
+  protected int hashForProbe(byte[] originalForm, int dataSize, String name, int probe) {
     Preconditions.checkArgument(dataSize == this.dataSize,
         "dataSize argument [" + dataSize + "] does not match expected dataSize [" + this.dataSize + ']');
-    if (caches[probe].containsKey(originalForm.hashCode())) {
-      return caches[probe].get(originalForm.hashCode());
+    int originalHashcode = originalForm.hashCode();
+    if (caches[probe].containsKey(originalHashcode)) {
+      return caches[probe].get(originalHashcode);
     }
-    int hash = hashForProbe(originalForm.getBytes(Charsets.UTF_8), dataSize, name, probe);
-    caches[probe].put(originalForm.hashCode(), hash);
+    int hash = super.hashForProbe(originalForm, dataSize, name, probe);
+    caches[probe].put(originalHashcode, hash);
     return hash;
   }
 }

Added: mahout/trunk/mrlegacy/src/test/java/org/apache/mahout/vectorizer/encoders/CachingEncoderTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/mrlegacy/src/test/java/org/apache/mahout/vectorizer/encoders/CachingEncoderTest.java?rev=1595634&view=auto
==============================================================================
--- mahout/trunk/mrlegacy/src/test/java/org/apache/mahout/vectorizer/encoders/CachingEncoderTest.java (added)
+++ mahout/trunk/mrlegacy/src/test/java/org/apache/mahout/vectorizer/encoders/CachingEncoderTest.java Sun May 18 16:51:28 2014
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.vectorizer.encoders;
+
+import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.Vector;
+import org.junit.Test;
+
+public class CachingEncoderTest extends MahoutTestCase {
+
+  private static final int CARDINALITY = 10;
+  private static final String NAME = "name";
+  private static final String WORD = "word";
+  private static final String CONTINUOUSVAL = "123";
+
+  @Test
+  public void testCacheAreUsedStaticWord() {
+    CachingStaticWordValueEncoder encoder = new CachingStaticWordValueEncoder(NAME, CARDINALITY);
+    Vector v = new DenseVector(CARDINALITY);
+    encoder.addToVector(WORD, v);
+    assertFalse("testCacheAreUsedStaticWord: cache should have values", encoder.getCaches()[0].isEmpty());
+  }
+
+  @Test
+  public void testCacheAreUsedContinuous() {
+    CachingContinuousValueEncoder encoder = new CachingContinuousValueEncoder(NAME, CARDINALITY);
+    Vector v = new DenseVector(CARDINALITY);
+    encoder.addToVector(CONTINUOUSVAL, 1.0, v);
+    assertFalse("testCacheAreUsedContinuous: cache should have values", encoder.getCaches()[0].isEmpty());
+  }
+
+}