You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ro...@apache.org on 2010/05/02 18:05:16 UTC
svn commit: r940252 -
/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
Author: robinanil
Date: Sun May 2 16:05:16 2010
New Revision: 940252
URL: http://svn.apache.org/viewvc?rev=940252&view=rev
Log:
Serialize/Deserialize Benchmarks
Modified:
lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
Modified: lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java?rev=940252&r1=940251&r2=940252&view=diff
==============================================================================
--- lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java (original)
+++ lucene/mahout/trunk/utils/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java Sun May 2 16:05:16 2010
@@ -17,6 +17,7 @@
package org.apache.mahout.benchmark;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collections;
@@ -35,6 +36,11 @@ import org.apache.commons.cli2.builder.D
import org.apache.commons.cli2.builder.GroupBuilder;
import org.apache.commons.cli2.commandline.Parser;
import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.SequenceFile;
import org.apache.mahout.common.CommandLineUtil;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.common.Summarizable;
@@ -43,12 +49,14 @@ import org.apache.mahout.common.commandl
import org.apache.mahout.common.distance.CosineDistanceMeasure;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
+import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
import org.apache.mahout.common.distance.TanimotoDistanceMeasure;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.SequentialAccessSparseVector;
import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -67,6 +75,7 @@ public class VectorBenchmarks implements
private final int opsPerUnit;
private final Map<String,Integer> implType = new HashMap<String,Integer>();
private final Map<String,List<String[]>> statsMap = new HashMap<String,List<String[]>>();
+
public VectorBenchmarks(int cardinality, int sparsity, int numVectors, int loop, int opsPerUnit) {
Random r = RandomUtils.getRandom();
@@ -253,6 +262,105 @@ public class VectorBenchmarks implements
}
+ public void serializeBenchmark() throws IOException {
+ Configuration conf = new Configuration();
+ FileSystem fs = FileSystem.get(conf);
+ SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf,
+ new Path("/tmp/dense-vector"), IntWritable.class, VectorWritable.class);
+
+ IntWritable one = new IntWritable(0);
+ VectorWritable vec = new VectorWritable();
+
+ TimingStatistics stats = new TimingStatistics();
+ for (int l = 0; l < loop; l++) {
+ for (int i = 0; i < numVectors; i++) {
+ TimingStatistics.Call call = stats.newCall();
+ vec.set(vectors[0][i]);
+ writer.append(one, vec);
+ call.end();
+ }
+ }
+ writer.close();
+ printStats(stats, "Serialize", "DenseVector");
+
+ writer = new SequenceFile.Writer(fs, conf,
+ new Path("/tmp/randsparse-vector"), IntWritable.class, VectorWritable.class);
+ stats = new TimingStatistics();
+ for (int l = 0; l < loop; l++) {
+ for (int i = 0; i < numVectors; i++) {
+ TimingStatistics.Call call = stats.newCall();
+ vec.set(vectors[1][i]);
+ writer.append(one, vec);
+ call.end();
+ }
+ }
+ writer.close();
+ printStats(stats, "Serialize", "RandSparseVector");
+
+ writer = new SequenceFile.Writer(fs, conf,
+ new Path("/tmp/seqsparse-vector"), IntWritable.class, VectorWritable.class);
+ stats = new TimingStatistics();
+ for (int l = 0; l < loop; l++) {
+ for (int i = 0; i < numVectors; i++) {
+ TimingStatistics.Call call = stats.newCall();
+ vec.set(vectors[2][i]);
+ writer.append(one, vec);
+ call.end();
+ }
+ }
+ writer.close();
+ printStats(stats, "Serialize", "SeqSparseVector");
+
+ }
+
+ public void deserializeBenchmark() throws IOException {
+ Configuration conf = new Configuration();
+ FileSystem fs = FileSystem.get(conf);
+
+ SequenceFile.Reader reader = new SequenceFile.Reader(fs,
+ new Path("/tmp/dense-vector"), conf);
+
+ IntWritable one = new IntWritable(0);
+ VectorWritable vec = new VectorWritable();
+ TimingStatistics stats = new TimingStatistics();
+ for (int l = 0; l < loop; l++) {
+ for (int i = 0; i < numVectors; i++) {
+ TimingStatistics.Call call = stats.newCall();
+ reader.next(one, vec);
+ call.end();
+ }
+ }
+ reader.close();
+ printStats(stats, "Deserialize", "DenseVector");
+
+ reader = new SequenceFile.Reader(fs,
+ new Path("/tmp/randsparse-vector"), conf);
+ stats = new TimingStatistics();
+ for (int l = 0; l < loop; l++) {
+ for (int i = 0; i < numVectors; i++) {
+ TimingStatistics.Call call = stats.newCall();
+ reader.next(one, vec);
+ call.end();
+ }
+ }
+ reader.close();
+ printStats(stats, "Deserialize", "RandSparseVector");
+
+ reader = new SequenceFile.Reader(fs,
+ new Path("/tmp/seqsparse-vector"), conf);
+ stats = new TimingStatistics();
+ for (int l = 0; l < loop; l++) {
+ for (int i = 0; i < numVectors; i++) {
+ TimingStatistics.Call call = stats.newCall();
+ reader.next(one, vec);
+ call.end();
+ }
+ }
+ reader.close();
+ printStats(stats, "Deserialize", "SeqSparseVector");
+
+ }
+
public void dotBenchmark() {
double result = 0;
TimingStatistics stats = new TimingStatistics();
@@ -528,7 +636,7 @@ public class VectorBenchmarks implements
}
- public static void main(String[] args) {
+ public static void main(String[] args) throws IOException {
DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
ArgumentBuilder abuilder = new ArgumentBuilder();
@@ -583,7 +691,7 @@ public class VectorBenchmarks implements
numVectors = Integer.parseInt((String) cmdLine.getValue(numVectorsOpt));
}
- int loop = 600;
+ int loop = 200;
if (cmdLine.hasOption(loopOpt)) {
loop = Integer.parseInt((String) cmdLine.getValue(loopOpt));
@@ -598,10 +706,12 @@ public class VectorBenchmarks implements
mark.incrementalCreateBenchmark();
mark.cloneBenchmark();
mark.dotBenchmark();
+ mark.serializeBenchmark();
+ mark.deserializeBenchmark();
mark.distanceMeasureBenchmark(new CosineDistanceMeasure());
mark.distanceMeasureBenchmark(new SquaredEuclideanDistanceMeasure());
mark.distanceMeasureBenchmark(new EuclideanDistanceMeasure());
- //mark.distanceMeasureBenchmark(new ManhattanDistanceMeasure());
+ mark.distanceMeasureBenchmark(new ManhattanDistanceMeasure());
mark.distanceMeasureBenchmark(new TanimotoDistanceMeasure());
log.info("\n{}", mark.summarize());