You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by td...@apache.org on 2010/11/05 04:20:35 UTC

svn commit: r1031415 - in /mahout/trunk/core/src: main/java/org/apache/mahout/vectorizer/encoders/ test/java/org/apache/mahout/vectorizer/encoders/

Author: tdunning
Date: Fri Nov  5 03:20:32 2010
New Revision: 1031415

URL: http://svn.apache.org/viewvc?rev=1031415&view=rev
Log:
MAHOUT-539 - Fixed small bug in ConstantValueEncoder
Added tests for ConstantValueEncoder
Added more testing for ContinuousValueEncoder
Allow ContinuousValueEncoder to accept null original form and use weight instead.

Added:
    mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoderTest.java
      - copied, changed from r1031414, mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoderTest.java
Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java
    mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoder.java
    mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoder.java
    mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoderTest.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java?rev=1031415&r1=1031414&r2=1031415&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java Fri Nov  5 03:20:32 2010
@@ -38,9 +38,11 @@ public abstract class CachingValueEncode
   @Override
   public void setProbes(int probes) {
     super.setProbes(probes);
-    cacheProbeLocations(CONTINUOUS_VALUE_HASH_SEED);
+    cacheProbeLocations(getSeed());
   }
 
+  protected abstract int getSeed();
+
   private void cacheProbeLocations(int seed) {
     cachedProbes = new int[getProbes()];
     for (int i = 0; i < getProbes(); i++) {

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoder.java?rev=1031415&r1=1031414&r2=1031415&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoder.java Fri Nov  5 03:20:32 2010
@@ -49,4 +49,9 @@ public class ConstantValueEncoder extend
   public String asString(String originalForm) {
     return getName();
   }
+
+  @Override
+  protected int getSeed() {
+    return 0;
+  }
 }

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoder.java?rev=1031415&r1=1031414&r2=1031415&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoder.java Fri Nov  5 03:20:32 2010
@@ -48,7 +48,11 @@ public class ContinuousValueEncoder exte
 
   @Override
   protected double getWeight(byte[] originalForm, double w) {
-    return w * Double.parseDouble(new String(originalForm));
+    if (originalForm!=null) {
+      return w * Double.parseDouble(new String(originalForm));
+    } else {
+      return w;
+    }
   }
 
   /**
@@ -63,4 +67,9 @@ public class ContinuousValueEncoder exte
   public String asString(String originalForm) {
     return getName() + ':' + originalForm;
   }
+
+  @Override
+  protected int getSeed() {
+    return CONTINUOUS_VALUE_HASH_SEED;
+  }
 }

Copied: mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoderTest.java (from r1031414, mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoderTest.java)
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoderTest.java?p2=mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoderTest.java&p1=mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoderTest.java&r1=1031414&r2=1031415&rev=1031415&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoderTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoderTest.java Fri Nov  5 03:20:32 2010
@@ -22,58 +22,53 @@ import org.apache.mahout.math.DenseVecto
 import org.apache.mahout.math.Vector;
 import org.junit.Test;
 
-public final class ContinuousValueEncoderTest extends MahoutTestCase {
-  
+public final class ConstantValueEncoderTest extends MahoutTestCase {
+
   @Test
   public void testAddToVector() {
-    FeatureVectorEncoder enc = new ContinuousValueEncoder("foo");
+    FeatureVectorEncoder enc = new ConstantValueEncoder("foo");
     Vector v1 = new DenseVector(20);
-    enc.addToVector("-123", v1);
+    enc.addToVector((byte[]) null, -123, v1);
     assertEquals(-123, v1.minValue(), 0);
     assertEquals(0, v1.maxValue(), 0);
     assertEquals(123, v1.norm(1), 0);
 
     v1 = new DenseVector(20);
-    enc.addToVector("123", v1);
+    enc.addToVector((byte[]) null, 123, v1);
     assertEquals(123, v1.maxValue(), 0);
     assertEquals(0, v1.minValue(), 0);
     assertEquals(123, v1.norm(1), 0);
 
     Vector v2 = new DenseVector(20);
     enc.setProbes(2);
-    enc.addToVector("123", v2);
+    enc.addToVector((byte[]) null, 123, v2);
     assertEquals(123, v2.maxValue(), 0);
     assertEquals(2 * 123, v2.norm(1), 0);
 
+    // v1 has one probe, v2 has two.  The first probe in v2 should be in the same
+    // place as the only probe in v1
     v1 = v2.minus(v1);
     assertEquals(123, v1.maxValue(), 0);
     assertEquals(123, v1.norm(1), 0);
 
     Vector v3 = new DenseVector(20);
     enc.setProbes(2);
-    enc.addToVector("100", v3);
+    enc.addToVector((byte[]) null, 100, v3);
     v1 = v2.minus(v3);
     assertEquals(23, v1.maxValue(), 0);
     assertEquals(2 * 23, v1.norm(1), 0);
 
-    enc.addToVector("7", v1);
+    enc.addToVector((byte[]) null, 7, v1);
     assertEquals(30, v1.maxValue(), 0);
     assertEquals(2 * 30, v1.norm(1), 0);
+    assertEquals(30, v1.get(9), 0);
     assertEquals(30, v1.get(10), 0);
-    assertEquals(30, v1.get(18), 0);
-
-    try {
-      enc.addToVector("foobar", v1);
-      fail("Should have noticed bad numeric format");
-    } catch (NumberFormatException e) {
-      assertEquals("For input string: \"foobar\"", e.getMessage());
-    }
   }
 
   @Test
   public void testAsString() {
-    ContinuousValueEncoder enc = new ContinuousValueEncoder("foo");
-    assertEquals("foo:123", enc.asString("123"));
+    ConstantValueEncoder enc = new ConstantValueEncoder("foo");
+    assertEquals("foo", enc.asString("123"));
   }
 
 }

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoderTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoderTest.java?rev=1031415&r1=1031414&r2=1031415&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoderTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoderTest.java Fri Nov  5 03:20:32 2010
@@ -45,6 +45,8 @@ public final class ContinuousValueEncode
     assertEquals(123, v2.maxValue(), 0);
     assertEquals(2 * 123, v2.norm(1), 0);
 
+    // v1 has one probe, v2 has two.  The first probe in v2 should be in the same
+    // place as the only probe in v1
     v1 = v2.minus(v1);
     assertEquals(123, v1.maxValue(), 0);
     assertEquals(123, v1.norm(1), 0);
@@ -62,6 +64,13 @@ public final class ContinuousValueEncode
     assertEquals(30, v1.get(10), 0);
     assertEquals(30, v1.get(18), 0);
 
+    v2 = new DenseVector(20);
+    v3 = new DenseVector(20);
+    enc.setProbes(6);
+    enc.addToVector("145", v2);
+    enc.addToVector((byte[]) null, 145, v3);
+    assertEquals(0, v2.minus(v3).norm(1), 0);
+
     try {
       enc.addToVector("foobar", v1);
       fail("Should have noticed bad numeric format");