You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ss...@apache.org on 2013/07/29 09:03:03 UTC
svn commit: r1507954 - in /mahout/trunk/core/src:
main/java/org/apache/mahout/clustering/spectral/eigencuts/
test/java/org/apache/mahout/clustering/canopy/
test/java/org/apache/mahout/common/
Author: ssc
Date: Mon Jul 29 07:03:02 2013
New Revision: 1507954
URL: http://svn.apache.org/r1507954
Log:
fixing bugs introduced by MAHOUT-1284
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsSensitivityNode.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsSensitivityNode.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsSensitivityNode.java?rev=1507954&r1=1507953&r2=1507954&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsSensitivityNode.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsSensitivityNode.java Mon Jul 29 07:03:02 2013
@@ -35,7 +35,9 @@ public class EigencutsSensitivityNode im
private int row;
private int column;
private double sensitivity;
-
+
+ public EigencutsSensitivityNode() {}
+
public EigencutsSensitivityNode(int i, int j, double s) {
row = i;
column = j;
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java?rev=1507954&r1=1507953&r2=1507954&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java Mon Jul 29 07:03:02 2013
@@ -21,6 +21,7 @@ import java.util.Collection;
import java.util.List;
import java.util.Set;
+import com.google.common.collect.Iterables;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -258,17 +259,16 @@ public final class TestCanopyCreation ex
List<VectorWritable> points = getPointsWritable();
reducer.reduce(new Text("centroid"), points, context);
- Set<Text> keys = writer.getKeys();
- assertEquals("Number of centroids", 3, keys.size());
+ Iterable<Text> keys = writer.getKeysInInsertionOrder();
+ assertEquals("Number of centroids", 3, Iterables.size(keys));
int i = 0;
for (Text key : keys) {
List<ClusterWritable> data = writer.getValue(key);
ClusterWritable clusterWritable = data.get(0);
- Canopy canopy = (Canopy)clusterWritable.getValue();
- assertEquals(manhattanCentroids.get(i).asFormatString()
- + " is not equal to "
- + canopy.computeCentroid().asFormatString(), manhattanCentroids
- .get(i), canopy.computeCentroid());
+ Canopy canopy = (Canopy) clusterWritable.getValue();
+ assertEquals(manhattanCentroids.get(i).asFormatString() + " is not equal to "
+ + canopy.computeCentroid().asFormatString(),
+ manhattanCentroids.get(i), canopy.computeCentroid());
i++;
}
}
@@ -282,29 +282,27 @@ public final class TestCanopyCreation ex
public void testCanopyReducerEuclidean() throws Exception {
CanopyReducer reducer = new CanopyReducer();
Configuration conf = getConfiguration();
- conf.set(CanopyConfigKeys.DISTANCE_MEASURE_KEY,
- "org.apache.mahout.common.distance.EuclideanDistanceMeasure");
+ conf.set(CanopyConfigKeys.DISTANCE_MEASURE_KEY, "org.apache.mahout.common.distance.EuclideanDistanceMeasure");
conf.set(CanopyConfigKeys.T1_KEY, String.valueOf(3.1));
conf.set(CanopyConfigKeys.T2_KEY, String.valueOf(2.1));
conf.set(CanopyConfigKeys.CF_KEY, "0");
DummyRecordWriter<Text, ClusterWritable> writer = new DummyRecordWriter<Text, ClusterWritable>();
- Reducer<Text, VectorWritable, Text, ClusterWritable>.Context context = DummyRecordWriter
- .build(reducer, conf, writer, Text.class, VectorWritable.class);
+ Reducer<Text, VectorWritable, Text, ClusterWritable>.Context context =
+ DummyRecordWriter.build(reducer, conf, writer, Text.class, VectorWritable.class);
reducer.setup(context);
List<VectorWritable> points = getPointsWritable();
reducer.reduce(new Text("centroid"), points, context);
- Set<Text> keys = writer.getKeys();
- assertEquals("Number of centroids", 3, keys.size());
+ Iterable<Text> keys = writer.getKeysInInsertionOrder();
+ assertEquals("Number of centroids", 3, Iterables.size(keys));
int i = 0;
for (Text key : keys) {
List<ClusterWritable> data = writer.getValue(key);
ClusterWritable clusterWritable = data.get(0);
- Canopy canopy = (Canopy)clusterWritable.getValue();
- assertEquals(euclideanCentroids.get(i).asFormatString()
- + " is not equal to "
- + canopy.computeCentroid().asFormatString(), euclideanCentroids
- .get(i), canopy.computeCentroid());
+ Canopy canopy = (Canopy) clusterWritable.getValue();
+ assertEquals(euclideanCentroids.get(i).asFormatString() + " is not equal to "
+ + canopy.computeCentroid().asFormatString(),
+ euclideanCentroids.get(i), canopy.computeCentroid());
i++;
}
}
@@ -333,29 +331,27 @@ public final class TestCanopyCreation ex
try {
Writable key = new Text();
ClusterWritable clusterWritable = new ClusterWritable();
- assertTrue("more to come", reader.next(key, clusterWritable));
+ assertTrue("more to come", reader.next(key, clusterWritable));
assertEquals("1st key", "C-0", key.toString());
List<Pair<Double,Double>> refCenters = Lists.newArrayList();
refCenters.add(new Pair<Double,Double>(1.5,1.5));
refCenters.add(new Pair<Double,Double>(4.333333333333334,4.333333333333334));
- Pair<Double,Double> c = new Pair<Double,Double>(clusterWritable.getValue() .getCenter().get(0),
- clusterWritable.getValue().getCenter().get(1));
+ Pair<Double,Double> c = new Pair<Double,Double>(clusterWritable.getValue() .getCenter().get(0),
+ clusterWritable.getValue().getCenter().get(1));
assertTrue("center "+c+" not found", findAndRemove(c, refCenters, EPSILON));
assertTrue("more to come", reader.next(key, clusterWritable));
assertEquals("2nd key", "C-1", key.toString());
c = new Pair<Double,Double>(clusterWritable.getValue().getCenter().get(0),
- clusterWritable.getValue().getCenter().get(1));
- assertTrue("center "+c+" not found", findAndRemove(c, refCenters, EPSILON));
+ clusterWritable.getValue().getCenter().get(1));
+ assertTrue("center " + c + " not found", findAndRemove(c, refCenters, EPSILON));
assertFalse("more to come", reader.next(key, clusterWritable));
} finally {
Closeables.close(reader, true);
}
}
- static boolean findAndRemove(Pair<Double, Double> target,
- Collection<Pair<Double, Double>> list,
- double epsilon) {
+ static boolean findAndRemove(Pair<Double, Double> target, Collection<Pair<Double, Double>> list, double epsilon) {
for (Pair<Double,Double> curr : list) {
if ( (Math.abs(target.getFirst() - curr.getFirst()) < epsilon)
&& (Math.abs(target.getSecond() - curr.getSecond()) < epsilon) ) {
Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java?rev=1507954&r1=1507953&r2=1507954&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java Mon Jul 29 07:03:02 2013
@@ -29,8 +29,6 @@ import org.apache.hadoop.mapreduce.Reduc
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
@@ -45,47 +43,47 @@ import java.util.Set;
public final class DummyRecordWriter<K extends Writable, V extends Writable> extends RecordWriter<K, V> {
- private static final Logger log = LoggerFactory.getLogger(DummyRecordWriter.class);
-
+ private final List<K> keysInInsertionOrder = Lists.newArrayList();
private final Map<K, List<V>> data = Maps.newHashMap();
@Override
public void write(K key, V value) {
+
// if the user reuses the same writable class, we need to create a new one
// otherwise the Map content will be modified after the insert
try {
- if (!(key instanceof NullWritable)) {
- K newKey = (K) key.getClass().newInstance();
- cloneWritable(key, newKey);
- key = newKey;
+
+ K keyToUse = key instanceof NullWritable ? key : (K) cloneWritable(key);
+ V valueToUse = (V) cloneWritable(value);
+
+ keysInInsertionOrder.add(keyToUse);
+
+ List<V> points = data.get(key);
+ if (points == null) {
+ points = Lists.newArrayList();
+ data.put(keyToUse, points);
}
- V newValue = (V) value.getClass().newInstance();
- cloneWritable(value, newValue);
- value = newValue;
- } catch (InstantiationException e) {
- log.error(e.getMessage());
- } catch (IllegalAccessException e) {
- log.error(e.getMessage());
+ points.add(valueToUse);
+
} catch (IOException e) {
- log.error(e.getMessage());
+ throw new RuntimeException(e.getMessage(), e);
}
+ }
- List<V> points = data.get(key);
- if (points == null) {
- points = Lists.newArrayList();
- data.put(key, points);
+ private Writable cloneWritable(Writable original) throws IOException {
+
+ Writable clone;
+ try {
+ clone = original.getClass().asSubclass(Writable.class).newInstance();
+ } catch (Exception e) {
+ throw new RuntimeException("Unable to instantiate writable!", e);
}
- points.add(value);
- }
+ ByteArrayOutputStream bytes = new ByteArrayOutputStream();
- private void cloneWritable(Writable from, Writable to) throws IOException {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- DataOutputStream dos = new DataOutputStream(baos);
- from.write(dos);
- dos.close();
- ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
- DataInputStream dis = new DataInputStream(bais);
- to.readFields(dis);
+ original.write(new DataOutputStream(bytes));
+ clone.readFields(new DataInputStream(new ByteArrayInputStream(bytes.toByteArray())));
+
+ return clone;
}
@Override
@@ -104,6 +102,10 @@ public final class DummyRecordWriter<K e
return data.keySet();
}
+ public Iterable<K> getKeysInInsertionOrder() {
+ return keysInInsertionOrder;
+ }
+
public static <K1, V1, K2, V2> Mapper<K1, V1, K2, V2>.Context build(Mapper<K1, V1, K2, V2> mapper,
Configuration configuration,
RecordWriter<K2, V2> output) {