You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by td...@apache.org on 2010/11/05 04:20:35 UTC
svn commit: r1031415 - in /mahout/trunk/core/src:
main/java/org/apache/mahout/vectorizer/encoders/
test/java/org/apache/mahout/vectorizer/encoders/
Author: tdunning
Date: Fri Nov 5 03:20:32 2010
New Revision: 1031415
URL: http://svn.apache.org/viewvc?rev=1031415&view=rev
Log:
MAHOUT-539 - Fixed small bug in ConstantValueEncoder
Added tests for ConstantValueEncoder
Added more testing for ContinuousValueEncoder
Allow ContinuousValueEncoder to accept null original form and use weight instead.
Added:
mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoderTest.java
- copied, changed from r1031414, mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoderTest.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoder.java
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoder.java
mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoderTest.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java?rev=1031415&r1=1031414&r2=1031415&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java Fri Nov 5 03:20:32 2010
@@ -38,9 +38,11 @@ public abstract class CachingValueEncode
@Override
public void setProbes(int probes) {
super.setProbes(probes);
- cacheProbeLocations(CONTINUOUS_VALUE_HASH_SEED);
+ cacheProbeLocations(getSeed());
}
+ protected abstract int getSeed();
+
private void cacheProbeLocations(int seed) {
cachedProbes = new int[getProbes()];
for (int i = 0; i < getProbes(); i++) {
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoder.java?rev=1031415&r1=1031414&r2=1031415&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoder.java Fri Nov 5 03:20:32 2010
@@ -49,4 +49,9 @@ public class ConstantValueEncoder extend
public String asString(String originalForm) {
return getName();
}
+
+ @Override
+ protected int getSeed() {
+ return 0;
+ }
}
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoder.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoder.java?rev=1031415&r1=1031414&r2=1031415&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoder.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoder.java Fri Nov 5 03:20:32 2010
@@ -48,7 +48,11 @@ public class ContinuousValueEncoder exte
@Override
protected double getWeight(byte[] originalForm, double w) {
- return w * Double.parseDouble(new String(originalForm));
+ if (originalForm!=null) {
+ return w * Double.parseDouble(new String(originalForm));
+ } else {
+ return w;
+ }
}
/**
@@ -63,4 +67,9 @@ public class ContinuousValueEncoder exte
public String asString(String originalForm) {
return getName() + ':' + originalForm;
}
+
+ @Override
+ protected int getSeed() {
+ return CONTINUOUS_VALUE_HASH_SEED;
+ }
}
Copied: mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoderTest.java (from r1031414, mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoderTest.java)
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoderTest.java?p2=mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoderTest.java&p1=mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoderTest.java&r1=1031414&r2=1031415&rev=1031415&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoderTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoderTest.java Fri Nov 5 03:20:32 2010
@@ -22,58 +22,53 @@ import org.apache.mahout.math.DenseVecto
import org.apache.mahout.math.Vector;
import org.junit.Test;
-public final class ContinuousValueEncoderTest extends MahoutTestCase {
-
+public final class ConstantValueEncoderTest extends MahoutTestCase {
+
@Test
public void testAddToVector() {
- FeatureVectorEncoder enc = new ContinuousValueEncoder("foo");
+ FeatureVectorEncoder enc = new ConstantValueEncoder("foo");
Vector v1 = new DenseVector(20);
- enc.addToVector("-123", v1);
+ enc.addToVector((byte[]) null, -123, v1);
assertEquals(-123, v1.minValue(), 0);
assertEquals(0, v1.maxValue(), 0);
assertEquals(123, v1.norm(1), 0);
v1 = new DenseVector(20);
- enc.addToVector("123", v1);
+ enc.addToVector((byte[]) null, 123, v1);
assertEquals(123, v1.maxValue(), 0);
assertEquals(0, v1.minValue(), 0);
assertEquals(123, v1.norm(1), 0);
Vector v2 = new DenseVector(20);
enc.setProbes(2);
- enc.addToVector("123", v2);
+ enc.addToVector((byte[]) null, 123, v2);
assertEquals(123, v2.maxValue(), 0);
assertEquals(2 * 123, v2.norm(1), 0);
+ // v1 has one probe, v2 has two. The first probe in v2 should be in the same
+ // place as the only probe in v1
v1 = v2.minus(v1);
assertEquals(123, v1.maxValue(), 0);
assertEquals(123, v1.norm(1), 0);
Vector v3 = new DenseVector(20);
enc.setProbes(2);
- enc.addToVector("100", v3);
+ enc.addToVector((byte[]) null, 100, v3);
v1 = v2.minus(v3);
assertEquals(23, v1.maxValue(), 0);
assertEquals(2 * 23, v1.norm(1), 0);
- enc.addToVector("7", v1);
+ enc.addToVector((byte[]) null, 7, v1);
assertEquals(30, v1.maxValue(), 0);
assertEquals(2 * 30, v1.norm(1), 0);
+ assertEquals(30, v1.get(9), 0);
assertEquals(30, v1.get(10), 0);
- assertEquals(30, v1.get(18), 0);
-
- try {
- enc.addToVector("foobar", v1);
- fail("Should have noticed bad numeric format");
- } catch (NumberFormatException e) {
- assertEquals("For input string: \"foobar\"", e.getMessage());
- }
}
@Test
public void testAsString() {
- ContinuousValueEncoder enc = new ContinuousValueEncoder("foo");
- assertEquals("foo:123", enc.asString("123"));
+ ConstantValueEncoder enc = new ConstantValueEncoder("foo");
+ assertEquals("foo", enc.asString("123"));
}
}
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoderTest.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoderTest.java?rev=1031415&r1=1031414&r2=1031415&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoderTest.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoderTest.java Fri Nov 5 03:20:32 2010
@@ -45,6 +45,8 @@ public final class ContinuousValueEncode
assertEquals(123, v2.maxValue(), 0);
assertEquals(2 * 123, v2.norm(1), 0);
+ // v1 has one probe, v2 has two. The first probe in v2 should be in the same
+ // place as the only probe in v1
v1 = v2.minus(v1);
assertEquals(123, v1.maxValue(), 0);
assertEquals(123, v1.norm(1), 0);
@@ -62,6 +64,13 @@ public final class ContinuousValueEncode
assertEquals(30, v1.get(10), 0);
assertEquals(30, v1.get(18), 0);
+ v2 = new DenseVector(20);
+ v3 = new DenseVector(20);
+ enc.setProbes(6);
+ enc.addToVector("145", v2);
+ enc.addToVector((byte[]) null, 145, v3);
+ assertEquals(0, v2.minus(v3).norm(1), 0);
+
try {
enc.addToVector("foobar", v1);
fail("Should have noticed bad numeric format");