You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@mahout.apache.org by Aleksander Sadecki <al...@pi.esisar.grenoble-inp.fr> on 2014/05/21 16:30:10 UTC

wrong value class: wt: 0.0 vec: null is not class org.apache.mahout.clustering.iterator.ClusterWritable

Hi, 

I wrote a simple piece of code: 

import java.io.BufferedReader; 
import java.io.FileReader; 
import java.io.IOException; 

import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.fs.FileSystem; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.IntWritable; 
import org.apache.hadoop.io.LongWritable; 
import org.apache.hadoop.io.SequenceFile; 
import org.apache.mahout.clustering.Cluster; 
import org.apache.mahout.clustering.canopy.CanopyDriver; 
import org.apache.mahout.clustering.classify.WeightedPropertyVectorWritable; 
import org.apache.mahout.common.distance.EuclideanDistanceMeasure; 
import org.apache.mahout.math.RandomAccessSparseVector; 
import org.apache.mahout.math.Vector; 
import org.apache.mahout.math.VectorWritable; 

public class Clustering { 

private final static String root = "C:\\root\\BI\\"; 
private final static String dataDir = root + "synthetic_control.data"; 
private final static String seqDir = root + "synthetic_control.seq"; 
private final static String outputDir = root + "output"; 
private final static String partMDir = outputDir + "\\" + "clusters-0-final" + "\\part-r-00000"; 

private final static String SEPARATOR = " "; 

private final static int NUMBER_OF_ELEMENTS = 2; 

private Configuration conf; 
private FileSystem fs; 

public Clustering() throws IOException { 
conf = new Configuration(); 
fs = FileSystem.get(conf); 
} 

public void convertToVectorFile() throws IOException { 

BufferedReader reader = new BufferedReader(new FileReader(dataDir)); 
SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, 
new Path(seqDir), LongWritable.class, VectorWritable.class); 

String line; 
long counter = 0; 
while ((line = reader.readLine()) != null) { 
String[] c; 
c = line.split(SEPARATOR); 
double[] d = new double[c.length]; 

for (int i = 0; i &lt; NUMBER_OF_ELEMENTS; i++) { 
try { 
d[i] = Double.parseDouble(c[i]); 
} catch (Exception ex) { 
d[i] = 0; 
} 
} 

Vector vec = new RandomAccessSparseVector(c.length); 
vec.assign(d); 

VectorWritable writable = new VectorWritable(); 
writable.set(vec); 
writer.append(new LongWritable(counter++), writable); 

} 
writer.close(); 
} 

public void createClusters(double t1, double t2, 
double clusterClassificationThreshold, boolean runSequential) 
throws ClassNotFoundException, IOException, InterruptedException { 

EuclideanDistanceMeasure measure = new EuclideanDistanceMeasure(); 
Path inputPath = new Path(seqDir); 
Path outputPath = new Path(outputDir); 

CanopyDriver.run(inputPath, outputPath, measure, t1, t2, runSequential, 
clusterClassificationThreshold, runSequential); 
} 

public void printClusters() throws IOException { 
SequenceFile.Reader readerSequence = new SequenceFile.Reader(fs, 
new Path(partMDir), conf); 

IntWritable key = new IntWritable(); 
WeightedPropertyVectorWritable value = new WeightedPropertyVectorWritable(); 

while (readerSequence.next(key, value)) { 
System.out.println(value.toString() + " belongs to cluster " 
+ key.toString()); 
} 

readerSequence.close(); 
} 
} 



my synthetic_control.data file looks like this: 

0.01 1.0 
0.1 0.9 
0.1 0.95 
12.0 13.0 
12.5 12.8 

when I run my code it throws: 

java.io.IOException: wrong value class: wt: 0.0 vec: null is not class org.apache.mahout.clustering.iterator.ClusterWritable 
at org.apache.hadoop.io.SequenceFile$Reader.next(SequenceFile.java:1936) 
at com.my.package.bi.canopy.CanopyClustering.printClusters(CanopyClustering.java:129) 
at com.my.package.bi.BIManager.printClusters(BIManager.java:20) 
at com.my.package.bi.Main.main(Main.java:15) 

Eclipse prints log, where everything looks well: 

DEBUG CanopyClusterer - Created new Canopy:0 at center:[0.010, 1.000] 
DEBUG CanopyClusterer - Added point: [0.100, 0.900] to canopy: C-0 
DEBUG CanopyClusterer - Added point: [0.100, 0.950] to canopy: C-0 
DEBUG CanopyClusterer - Created new Canopy:1 at center:[12.000, 13.000] 
DEBUG CanopyClusterer - Added point: [12.500, 12.800] to canopy: C-1 
DEBUG CanopyDriver - Writing Canopy:C-0 center:[0.070, 0.950] numPoints:3 radius:[0.042, 0.041] 
DEBUG CanopyDriver - Writing Canopy:C-1 center:[12.250, 12.900] numPoints:2 radius:[0.250, 0.100] 

The exception comes from line 

while (readerSequence.next(key, value)) { 

in method printClusters() 

Where could be a problem? 

Thank you in advance