You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ss...@apache.org on 2013/07/29 09:03:03 UTC

svn commit: r1507954 - in /mahout/trunk/core/src: main/java/org/apache/mahout/clustering/spectral/eigencuts/ test/java/org/apache/mahout/clustering/canopy/ test/java/org/apache/mahout/common/

Author: ssc
Date: Mon Jul 29 07:03:02 2013
New Revision: 1507954

URL: http://svn.apache.org/r1507954
Log:
fixing bugs introduced by MAHOUT-1284

Modified:
    mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsSensitivityNode.java
    mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
    mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java

Modified: mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsSensitivityNode.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsSensitivityNode.java?rev=1507954&r1=1507953&r2=1507954&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsSensitivityNode.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsSensitivityNode.java Mon Jul 29 07:03:02 2013
@@ -35,7 +35,9 @@ public class EigencutsSensitivityNode im
   private int row;
   private int column;
   private double sensitivity;
-  
+
+  public EigencutsSensitivityNode() {}
+
   public EigencutsSensitivityNode(int i, int j, double s) {
     row = i;
     column = j;

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java?rev=1507954&r1=1507953&r2=1507954&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java Mon Jul 29 07:03:02 2013
@@ -21,6 +21,7 @@ import java.util.Collection;
 import java.util.List;
 import java.util.Set;
 
+import com.google.common.collect.Iterables;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -258,17 +259,16 @@ public final class TestCanopyCreation ex
 
     List<VectorWritable> points = getPointsWritable();
     reducer.reduce(new Text("centroid"), points, context);
-    Set<Text> keys = writer.getKeys();
-    assertEquals("Number of centroids", 3, keys.size());
+    Iterable<Text> keys = writer.getKeysInInsertionOrder();
+    assertEquals("Number of centroids", 3, Iterables.size(keys));
     int i = 0;
     for (Text key : keys) {
       List<ClusterWritable> data = writer.getValue(key);
       ClusterWritable clusterWritable = data.get(0);
-	  Canopy canopy = (Canopy)clusterWritable.getValue();
-	  assertEquals(manhattanCentroids.get(i).asFormatString()
-          + " is not equal to "
-          + canopy.computeCentroid().asFormatString(), manhattanCentroids
-          .get(i), canopy.computeCentroid());
+      Canopy canopy = (Canopy) clusterWritable.getValue();
+      assertEquals(manhattanCentroids.get(i).asFormatString() + " is not equal to "
+          + canopy.computeCentroid().asFormatString(),
+          manhattanCentroids.get(i), canopy.computeCentroid());
       i++;
     }
   }
@@ -282,29 +282,27 @@ public final class TestCanopyCreation ex
   public void testCanopyReducerEuclidean() throws Exception {
     CanopyReducer reducer = new CanopyReducer();
     Configuration conf = getConfiguration();
-    conf.set(CanopyConfigKeys.DISTANCE_MEASURE_KEY,
-        "org.apache.mahout.common.distance.EuclideanDistanceMeasure");
+    conf.set(CanopyConfigKeys.DISTANCE_MEASURE_KEY, "org.apache.mahout.common.distance.EuclideanDistanceMeasure");
     conf.set(CanopyConfigKeys.T1_KEY, String.valueOf(3.1));
     conf.set(CanopyConfigKeys.T2_KEY, String.valueOf(2.1));
     conf.set(CanopyConfigKeys.CF_KEY, "0");
     DummyRecordWriter<Text, ClusterWritable> writer = new DummyRecordWriter<Text, ClusterWritable>();
-    Reducer<Text, VectorWritable, Text, ClusterWritable>.Context context = DummyRecordWriter
-        .build(reducer, conf, writer, Text.class, VectorWritable.class);
+    Reducer<Text, VectorWritable, Text, ClusterWritable>.Context context =
+        DummyRecordWriter.build(reducer, conf, writer, Text.class, VectorWritable.class);
     reducer.setup(context);
 
     List<VectorWritable> points = getPointsWritable();
     reducer.reduce(new Text("centroid"), points, context);
-    Set<Text> keys = writer.getKeys();
-    assertEquals("Number of centroids", 3, keys.size());
+    Iterable<Text> keys = writer.getKeysInInsertionOrder();
+    assertEquals("Number of centroids", 3, Iterables.size(keys));
     int i = 0;
     for (Text key : keys) {
       List<ClusterWritable> data = writer.getValue(key);
       ClusterWritable clusterWritable = data.get(0);
-      Canopy canopy = (Canopy)clusterWritable.getValue();
-      assertEquals(euclideanCentroids.get(i).asFormatString()
-          + " is not equal to "
-          + canopy.computeCentroid().asFormatString(), euclideanCentroids
-          .get(i), canopy.computeCentroid());
+      Canopy canopy = (Canopy) clusterWritable.getValue();
+      assertEquals(euclideanCentroids.get(i).asFormatString() + " is not equal to "
+          + canopy.computeCentroid().asFormatString(),
+          euclideanCentroids.get(i), canopy.computeCentroid());
       i++;
     }
   }
@@ -333,29 +331,27 @@ public final class TestCanopyCreation ex
     try {
       Writable key = new Text();
       ClusterWritable clusterWritable = new ClusterWritable();
-	  assertTrue("more to come", reader.next(key, clusterWritable));
+      assertTrue("more to come", reader.next(key, clusterWritable));
       assertEquals("1st key", "C-0", key.toString());
 
       List<Pair<Double,Double>> refCenters = Lists.newArrayList();
       refCenters.add(new Pair<Double,Double>(1.5,1.5));
       refCenters.add(new Pair<Double,Double>(4.333333333333334,4.333333333333334));
-	  Pair<Double,Double> c = new Pair<Double,Double>(clusterWritable.getValue() .getCenter().get(0),
-			clusterWritable.getValue().getCenter().get(1));
+      Pair<Double,Double> c = new Pair<Double,Double>(clusterWritable.getValue() .getCenter().get(0),
+      clusterWritable.getValue().getCenter().get(1));
       assertTrue("center "+c+" not found", findAndRemove(c, refCenters, EPSILON));
       assertTrue("more to come", reader.next(key, clusterWritable));
       assertEquals("2nd key", "C-1", key.toString());
       c = new Pair<Double,Double>(clusterWritable.getValue().getCenter().get(0),
-    		  clusterWritable.getValue().getCenter().get(1));
-      assertTrue("center "+c+" not found", findAndRemove(c, refCenters, EPSILON));
+          clusterWritable.getValue().getCenter().get(1));
+      assertTrue("center " + c + " not found", findAndRemove(c, refCenters, EPSILON));
       assertFalse("more to come", reader.next(key, clusterWritable));
     } finally {
       Closeables.close(reader, true);
     }
   }
 
-  static boolean findAndRemove(Pair<Double, Double> target,
-                               Collection<Pair<Double, Double>> list,
-                               double epsilon) {
+  static boolean findAndRemove(Pair<Double, Double> target, Collection<Pair<Double, Double>> list, double epsilon) {
     for (Pair<Double,Double> curr : list) {
       if ( (Math.abs(target.getFirst() - curr.getFirst()) < epsilon) 
            && (Math.abs(target.getSecond() - curr.getSecond()) < epsilon) ) {

Modified: mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java
URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java?rev=1507954&r1=1507953&r2=1507954&view=diff
==============================================================================
--- mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java (original)
+++ mahout/trunk/core/src/test/java/org/apache/mahout/common/DummyRecordWriter.java Mon Jul 29 07:03:02 2013
@@ -29,8 +29,6 @@ import org.apache.hadoop.mapreduce.Reduc
 import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.mapreduce.TaskAttemptID;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
@@ -45,47 +43,47 @@ import java.util.Set;
 
 public final class DummyRecordWriter<K extends Writable, V extends Writable> extends RecordWriter<K, V> {
 
-  private static final Logger log = LoggerFactory.getLogger(DummyRecordWriter.class);
-
+  private final List<K> keysInInsertionOrder = Lists.newArrayList();
   private final Map<K, List<V>> data = Maps.newHashMap();
 
   @Override
   public void write(K key, V value) {
+
     // if the user reuses the same writable class, we need to create a new one
     // otherwise the Map content will be modified after the insert
     try {
-      if (!(key instanceof NullWritable)) {
-        K newKey = (K) key.getClass().newInstance();
-        cloneWritable(key, newKey);
-        key = newKey;
+
+      K keyToUse = key instanceof NullWritable ? key : (K) cloneWritable(key);
+      V valueToUse = (V) cloneWritable(value);
+
+      keysInInsertionOrder.add(keyToUse);
+
+      List<V> points = data.get(key);
+      if (points == null) {
+        points = Lists.newArrayList();
+        data.put(keyToUse, points);
       }
-      V newValue = (V) value.getClass().newInstance();
-      cloneWritable(value, newValue);
-      value = newValue;
-    } catch (InstantiationException e) {
-      log.error(e.getMessage());
-    } catch (IllegalAccessException e) {
-      log.error(e.getMessage());
+      points.add(valueToUse);
+
     } catch (IOException e) {
-      log.error(e.getMessage());
+      throw new RuntimeException(e.getMessage(), e);
     }
+  }
 
-    List<V> points = data.get(key);
-    if (points == null) {
-      points = Lists.newArrayList();
-      data.put(key, points);
+  private Writable cloneWritable(Writable original) throws IOException {
+
+    Writable clone;
+    try {
+      clone = original.getClass().asSubclass(Writable.class).newInstance();
+    } catch (Exception e) {
+      throw new RuntimeException("Unable to instantiate writable!", e);
     }
-    points.add(value);
-  }
+    ByteArrayOutputStream bytes = new ByteArrayOutputStream();
 
-  private void cloneWritable(Writable from, Writable to) throws IOException {
-    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    DataOutputStream dos = new DataOutputStream(baos);
-    from.write(dos);
-    dos.close();
-    ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
-    DataInputStream dis = new DataInputStream(bais);
-    to.readFields(dis);
+    original.write(new DataOutputStream(bytes));
+    clone.readFields(new DataInputStream(new ByteArrayInputStream(bytes.toByteArray())));
+
+    return clone;
   }
 
   @Override
@@ -104,6 +102,10 @@ public final class DummyRecordWriter<K e
     return data.keySet();
   }
 
+  public Iterable<K> getKeysInInsertionOrder() {
+    return keysInInsertionOrder;
+  }
+
   public static <K1, V1, K2, V2> Mapper<K1, V1, K2, V2>.Context build(Mapper<K1, V1, K2, V2> mapper,
                                                                       Configuration configuration,
                                                                       RecordWriter<K2, V2> output) {