You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ra...@apache.org on 2018/06/27 14:52:18 UTC

[50/51] [partial] mahout git commit: MAHOUT-2042 and MAHOUT-2045 Delete directories which were moved/no longer in use

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/ssvd.props
----------------------------------------------------------------------
diff --git a/community/mahout-mr/conf/ssvd.props b/community/mahout-mr/conf/ssvd.props
new file mode 100644
index 0000000..26a52c7
--- /dev/null
+++ b/community/mahout-mr/conf/ssvd.props
@@ -0,0 +1,14 @@
+#i|input =
+#o|output =
+#k|rank =
+#t|tempDir = 
+#p|oversampling = 
+#r|blockHeight = 
+#s|minSplitSize = 
+#U|computeU = 
+#uhs|uHalfSigma = 
+#V|computeV = 
+#vhs|vHalfSigma = 
+#t|reduceTasks = 
+#w|wide = 
+#q|powerIter =

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/svd.props
----------------------------------------------------------------------
diff --git a/community/mahout-mr/conf/svd.props b/community/mahout-mr/conf/svd.props
new file mode 100644
index 0000000..8c9a467
--- /dev/null
+++ b/community/mahout-mr/conf/svd.props
@@ -0,0 +1,6 @@
+#i|input =
+#o|output =
+#nr|numRows =
+#nc|numCols =
+#r|rank =
+#t|tempDir = 
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/trainlogistic.props
----------------------------------------------------------------------
diff --git a/community/mahout-mr/conf/trainlogistic.props b/community/mahout-mr/conf/trainlogistic.props
new file mode 100644
index 0000000..f474942
--- /dev/null
+++ b/community/mahout-mr/conf/trainlogistic.props
@@ -0,0 +1,2 @@
+#lambda|lambda =
+#passes|passes =

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/transpose.props
----------------------------------------------------------------------
diff --git a/community/mahout-mr/conf/transpose.props b/community/mahout-mr/conf/transpose.props
new file mode 100644
index 0000000..025f945
--- /dev/null
+++ b/community/mahout-mr/conf/transpose.props
@@ -0,0 +1,2 @@
+#i|input =
+#o|output =

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/conf/vectordump.props
----------------------------------------------------------------------
diff --git a/community/mahout-mr/conf/vectordump.props b/community/mahout-mr/conf/vectordump.props
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/community/mahout-mr/conf/vectordump.props
@@ -0,0 +1 @@
+

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/integration/bin/prep_asf_mail_archives.sh
----------------------------------------------------------------------
diff --git a/community/mahout-mr/integration/bin/prep_asf_mail_archives.sh b/community/mahout-mr/integration/bin/prep_asf_mail_archives.sh
new file mode 100755
index 0000000..77f5d13
--- /dev/null
+++ b/community/mahout-mr/integration/bin/prep_asf_mail_archives.sh
@@ -0,0 +1,106 @@
+#!/bin/bash
+# 
+# Performs the setup procedures for clustering the ASF mail archives
+# described in Taming Text.
+# 
+# Required Command-line Parameters:
+#
+#   $1 - Path to this script's working directory, you will need about
+#        22GB of free space to run this script.
+#
+#   $2 - Path to where the ASF Public Archive data is, untarred.
+#        If you are running Hadoop and the files are in HDFS, then
+#        this will need to be an HDFS path.   Default is $1/input
+#   $3 - Path to where this script saves the SequenceFile output.
+#        If you are running Hadoop and you want the sequence files
+#        saved to your HDFS then you need to set this value to an 
+#        HDFS path and make sure you set HADOOP_HOME so Mahout can
+#        find Hadoop.  Default is $1/sequence-files
+#
+#
+# Required Environment Variables:
+#
+#   MAHOUT_HOME   
+#          Root directory of your Mahout distribution
+#
+#   HADOOP_HOME
+#          Only needed if you want to send output to HDFS
+#
+# Example:
+#   ./prep_asf_mail_archives.sh /mnt/asf-mail-archives /mnt/asf-archives/asf-mail-archives-7-18-2011 /mnt/asf-mail-archives/output
+#
+#   This will download the TAR files from S3, extract them, and then
+#   run the Mahout org.apache.mahout.text.SequenceFilesFromMailArchives job
+#   to create Hadoop SequenceFiles in /mnt/asf-mail-archives/output
+#
+#/**
+# * Licensed to the Apache Software Foundation (ASF) under one or more
+# * contributor license agreements.  See the NOTICE file distributed with
+# * this work for additional information regarding copyright ownership.
+# * The ASF licenses this file to You under the Apache License, Version 2.0
+# * (the "License"); you may not use this file except in compliance with
+# * the License.  You may obtain a copy of the License at
+# *
+# *     http://www.apache.org/licenses/LICENSE-2.0
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# */
+
+if [ "$MAHOUT_HOME" = "" ]; then
+  echo "Error: MAHOUT_HOME is not set."
+  exit 1
+fi
+
+if [ "$1" = "" ]; then
+  echo "Error: Please pass the path to your prep directory, such as /mnt/asf-mail-archives.\n\n\tUsage: $0 workingDir inputPath outputPath\n"
+  exit 1
+fi
+
+# Location where this script saves files
+PREP_DIR=$1
+
+if [ "$2" != "" ]; then
+  SEQFILE_INPUT_DIR=$2
+else
+  SEQFILE_INPUT_DIR=$PREP_DIR/input
+fi
+
+
+# Change this to an HDFS path if you are running Hadoop
+if [ "$3" != "" ]; then
+  SEQFILE_OUTPUT_DIR=$3
+else
+  SEQFILE_OUTPUT_DIR=$PREP_DIR/sequence-files
+fi
+
+# If output sent to HDFS, clear MAHOUT_LOCAL and make sure HADOOP_HOME is set
+if [[ "$SEQFILE_OUTPUT_DIR" = hdfs://* ]]; then
+  export MAHOUT_LOCAL=
+  if [ "$HADOOP_HOME" = "" ]; then
+    echo "Error: HADOOP_HOME must be set if you want to send output to HDFS."
+    exit 1
+  fi
+else
+  export MAHOUT_LOCAL=$PREP_DIR  
+fi
+
+echo "Running $0 with:
+  PREP_DIR = $PREP_DIR
+  SEQFILE_INPUT_DIR = $SEQFILE_INPUT_DIR
+  SEQFILE_OUTPUT_DIR = $SEQFILE_OUTPUT_DIR
+  MAHOUT_LOCAL = $MAHOUT_LOCAL
+  HADOOP_HOME = $HADOOP_HOME"
+
+# Run Mahout in Local mode! Remove this if you want the
+# sequence files stored in your HDFS
+
+
+# convert the extracted gz files into Hadoop SequenceFiles
+echo "Converting extracted directories to SequenceFiles ..."
+$MAHOUT_HOME/bin/mahout org.apache.mahout.text.SequenceFilesFromMailArchives \
+--input $SEQFILE_INPUT_DIR --output $SEQFILE_OUTPUT_DIR --subject --body \
+-c UTF-8 -chunk 1024 -prefix asf_archives

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/integration/pom.xml
----------------------------------------------------------------------
diff --git a/community/mahout-mr/integration/pom.xml b/community/mahout-mr/integration/pom.xml
new file mode 100644
index 0000000..cb0c19a
--- /dev/null
+++ b/community/mahout-mr/integration/pom.xml
@@ -0,0 +1,198 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache.mahout</groupId>
+    <artifactId>mahout</artifactId>
+    <version>0.13.1-SNAPSHOT</version>
+    <relativePath>../pom.xml</relativePath>
+  </parent>
+
+  <artifactId>mahout-integration</artifactId>
+  <name>Mahout Integration</name>
+  <description>Optional components of Mahout which generally support interaction with third party systems,
+    formats, APIs, etc.</description>
+
+  <packaging>jar</packaging>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-remote-resources-plugin</artifactId>
+        <configuration>
+          <appendedResourcesDirectory>../community/mahout-mr/src/appended-resources</appendedResourcesDirectory>
+          <resourceBundles>
+            <resourceBundle>org.apache:apache-jar-resource-bundle:1.4</resourceBundle>
+          </resourceBundles>
+          <supplementalModels>
+            <supplementalModel>supplemental-models.xml</supplementalModel>
+          </supplementalModels>
+        </configuration>
+      </plugin>
+
+      <plugin>
+        <artifactId>maven-javadoc-plugin</artifactId>
+      </plugin>
+
+      <plugin>
+        <artifactId>maven-source-plugin</artifactId>
+      </plugin>
+
+    </plugins>
+
+  </build>
+
+  <dependencies>
+
+    <!-- own modules -->
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mahout-hdfs</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mahout-mr</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mahout-hdfs</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mahout-mr</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mahout-math</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>mahout-math</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
+    <!-- 3rd party -->
+
+    <dependency>
+      <groupId>commons-dbcp</groupId>
+      <artifactId>commons-dbcp</artifactId>
+      <optional>true</optional>
+    </dependency>
+
+    <dependency>
+      <groupId>commons-pool</groupId>
+      <artifactId>commons-pool</artifactId>
+      <optional>true</optional>
+    </dependency>
+
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.solr</groupId>
+      <artifactId>solr-commons-csv</artifactId>
+      <version>3.5.0</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.lucene</groupId>
+      <artifactId>lucene-benchmark</artifactId>
+      <optional>true</optional>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.lucene</groupId>
+      <artifactId>lucene-analyzers-common</artifactId>
+      <optional>true</optional>
+    </dependency>
+
+    <dependency>
+      <groupId>org.mongodb</groupId>
+      <artifactId>mongo-java-driver</artifactId>
+      <version>2.11.2</version>
+      <optional>true</optional>
+    </dependency>
+
+    <dependency>
+      <groupId>org.mongodb</groupId>
+      <artifactId>bson</artifactId>
+      <version>2.11.2</version>
+      <optional>true</optional>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.hbase</groupId>
+      <artifactId>hbase-client</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.hectorclient</groupId>
+      <artifactId>hector-core</artifactId>
+      <version>1.1-4</version>
+      <optional>true</optional>
+    </dependency>
+
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-jcl</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>com.carrotsearch.randomizedtesting</groupId>
+      <artifactId>randomizedtesting-runner</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.easymock</groupId>
+      <artifactId>easymock</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+  </dependencies>
+
+</project>

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/BenchmarkRunner.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/BenchmarkRunner.java b/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/BenchmarkRunner.java
new file mode 100644
index 0000000..549cf2c
--- /dev/null
+++ b/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/BenchmarkRunner.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.benchmark;
+
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.common.TimingStatistics;
+import org.apache.mahout.math.Vector;
+
+import com.google.common.base.Function;
+
+public final class BenchmarkRunner {
+  private static final int BUCKET_SIZE = 10000;
+  private static final Random R = RandomUtils.getRandom();
+  private final long maxTimeUsec;
+  private final long leadTimeUsec;
+
+  public BenchmarkRunner(long leadTimeMs, long maxTimeMs) {
+    maxTimeUsec = TimeUnit.MILLISECONDS.toNanos(maxTimeMs);
+    leadTimeUsec = TimeUnit.MILLISECONDS.toNanos(leadTimeMs);
+  }
+
+  public abstract static class BenchmarkFn implements Function<Integer, Boolean> {
+    protected int randIndex() {
+      return BenchmarkRunner.randIndex();
+    }
+
+    protected boolean randBool() {
+      return BenchmarkRunner.randBool();
+    }
+
+    /**
+     * Adds a random data dependency so that JVM does not remove dead code.
+     */
+    protected boolean depends(Vector v) {
+      return randIndex() < v.getNumNondefaultElements();
+    }
+  }
+
+  public abstract static class BenchmarkFnD implements Function<Integer, Double> {
+    protected int randIndex() {
+      return BenchmarkRunner.randIndex();
+    }
+
+    protected boolean randBool() {
+      return BenchmarkRunner.randBool();
+    }
+
+    /**
+     * Adds a random data dependency so that JVM does not remove dead code.
+     */
+    protected boolean depends(Vector v) {
+      return randIndex() < v.getNumNondefaultElements();
+    }
+  }
+
+  private static int randIndex() {
+    return R.nextInt(BUCKET_SIZE);
+  }
+
+  private static boolean randBool() {
+    return R.nextBoolean();
+  }
+
+  public TimingStatistics benchmark(BenchmarkFn function) {
+    TimingStatistics stats = new TimingStatistics();
+    boolean result = false;
+    while (true) {
+      int i = R.nextInt(BUCKET_SIZE);
+      TimingStatistics.Call call = stats.newCall(leadTimeUsec);
+      result = result ^ function.apply(i);
+      if (call.end(maxTimeUsec)) {
+        break;
+      }
+    }
+    return stats;
+  }
+
+  public TimingStatistics benchmarkD(BenchmarkFnD function) {
+    TimingStatistics stats = new TimingStatistics();
+    double result = 0;
+    while (true) {
+      int i = R.nextInt(BUCKET_SIZE);
+      TimingStatistics.Call call = stats.newCall(leadTimeUsec);
+      result += function.apply(i);
+      if (call.end(maxTimeUsec)) {
+        break;
+      }
+    }
+    // print result to prevent hotspot from eliminating deadcode
+    System.err.println("Result = " + result);
+    return stats;
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/CloneBenchmark.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/CloneBenchmark.java b/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/CloneBenchmark.java
new file mode 100644
index 0000000..5e6ab4d
--- /dev/null
+++ b/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/CloneBenchmark.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.benchmark;
+
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_VECTOR;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_SPARSE_VECTOR;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_SPARSE_VECTOR;
+
+import org.apache.mahout.benchmark.BenchmarkRunner.BenchmarkFn;
+
+public class CloneBenchmark {
+  public static final String CLONE = "Clone";
+  private final VectorBenchmarks mark;
+
+  public CloneBenchmark(VectorBenchmarks mark) {
+    this.mark = mark;
+  }
+
+  public void benchmark() {
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        mark.vectors[0][mark.vIndex(i)] = mark.vectors[0][mark.vIndex(i)].clone();
+
+        return depends(mark.vectors[0][mark.vIndex(i)]);
+      }
+    }), CLONE, DENSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        mark.vectors[1][mark.vIndex(i)] = mark.vectors[1][mark.vIndex(i)].clone();
+
+        return depends(mark.vectors[1][mark.vIndex(i)]);
+      }
+    }), CLONE, RAND_SPARSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        mark.vectors[2][mark.vIndex(i)] = mark.vectors[2][mark.vIndex(i)].clone();
+
+        return depends(mark.vectors[2][mark.vIndex(i)]);
+      }
+    }), CLONE, SEQ_SPARSE_VECTOR);
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java b/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java
new file mode 100644
index 0000000..b1c2ded
--- /dev/null
+++ b/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/ClosestCentroidBenchmark.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.benchmark;
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.common.TimingStatistics;
+import org.apache.mahout.common.distance.DistanceMeasure;
+import org.apache.mahout.math.SparseMatrix;
+import org.apache.mahout.math.Vector;
+
+public class ClosestCentroidBenchmark {
+  private final VectorBenchmarks mark;
+
+  public ClosestCentroidBenchmark(VectorBenchmarks mark) {
+    this.mark = mark;
+  }
+
+  public void benchmark(DistanceMeasure measure) throws IOException {
+    SparseMatrix clusterDistances = new SparseMatrix(mark.numClusters, mark.numClusters);
+    for (int i = 0; i < mark.numClusters; i++) {
+      for (int j = 0; j < mark.numClusters; j++) {
+        double distance = Double.POSITIVE_INFINITY;
+        if (i != j) {
+          distance = measure.distance(mark.clusters[i], mark.clusters[j]);
+        }
+        clusterDistances.setQuick(i, j, distance);
+      }
+    }
+
+    long distanceCalculations = 0;
+    TimingStatistics stats = new TimingStatistics();
+    for (int l = 0; l < mark.loop; l++) {
+      TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec);
+      for (int i = 0; i < mark.numVectors; i++) {
+        Vector vector = mark.vectors[1][mark.vIndex(i)];
+        double minDistance = Double.MAX_VALUE;
+        for (int k = 0; k < mark.numClusters; k++) {
+          double distance = measure.distance(vector, mark.clusters[k]);
+          distanceCalculations++;
+          if (distance < minDistance) {
+            minDistance = distance;
+          }
+        }
+      }
+      if (call.end(mark.maxTimeUsec)) {
+        break;
+      }
+    }
+    mark.printStats(stats, measure.getClass().getName(), "Closest C w/o Elkan's trick", "distanceCalculations = "
+        + distanceCalculations);
+
+    distanceCalculations = 0;
+    stats = new TimingStatistics();
+    Random rand = RandomUtils.getRandom();
+    for (int l = 0; l < mark.loop; l++) {
+      TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec);
+      for (int i = 0; i < mark.numVectors; i++) {
+        Vector vector = mark.vectors[1][mark.vIndex(i)];
+        int closestCentroid = rand.nextInt(mark.numClusters);
+        double dist = measure.distance(vector, mark.clusters[closestCentroid]);
+        distanceCalculations++;
+        for (int k = 0; k < mark.numClusters; k++) {
+          if (closestCentroid != k) {
+            double centroidDist = clusterDistances.getQuick(k, closestCentroid);
+            if (centroidDist < 2 * dist) {
+              dist = measure.distance(vector, mark.clusters[k]);
+              closestCentroid = k;
+              distanceCalculations++;
+            }
+          }
+        }
+      }
+      if (call.end(mark.maxTimeUsec)) {
+        break;
+      }
+    }
+    mark.printStats(stats, measure.getClass().getName(), "Closest C w/ Elkan's trick", "distanceCalculations = "
+        + distanceCalculations);
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/DistanceBenchmark.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/DistanceBenchmark.java b/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/DistanceBenchmark.java
new file mode 100644
index 0000000..25d0ad7
--- /dev/null
+++ b/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/DistanceBenchmark.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.benchmark;
+
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_RAND;
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_SEQ;
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_VECTOR;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_DENSE;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_SEQ;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_SPARSE_VECTOR;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_DENSE;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_RAND;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_SPARSE_VECTOR;
+
+import org.apache.mahout.benchmark.BenchmarkRunner.BenchmarkFnD;
+import org.apache.mahout.common.distance.DistanceMeasure;
+
+public class DistanceBenchmark {
+  private final VectorBenchmarks mark;
+
+  public DistanceBenchmark(VectorBenchmarks mark) {
+    this.mark = mark;
+  }
+
+  public void benchmark(final DistanceMeasure measure) {
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return measure.distance(mark.vectors[0][mark.vIndex(i)], mark.vectors[0][mark.vIndex(randIndex())]);
+      }
+    }), measure.getClass().getName(), DENSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return measure.distance(mark.vectors[1][mark.vIndex(i)], mark.vectors[1][mark.vIndex(randIndex())]);
+      }
+    }), measure.getClass().getName(), RAND_SPARSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return measure.distance(mark.vectors[2][mark.vIndex(i)], mark.vectors[2][mark.vIndex(randIndex())]);
+      }
+    }), measure.getClass().getName(), SEQ_SPARSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return measure.distance(mark.vectors[0][mark.vIndex(i)], mark.vectors[1][mark.vIndex(randIndex())]);
+      }
+    }), measure.getClass().getName(), DENSE_FN_RAND);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return measure.distance(mark.vectors[0][mark.vIndex(i)], mark.vectors[2][mark.vIndex(randIndex())]);
+      }
+    }), measure.getClass().getName(), DENSE_FN_SEQ);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return measure.distance(mark.vectors[1][mark.vIndex(i)], mark.vectors[0][mark.vIndex(randIndex())]);
+      }
+    }), measure.getClass().getName(), RAND_FN_DENSE);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return measure.distance(mark.vectors[1][mark.vIndex(i)], mark.vectors[2][mark.vIndex(randIndex())]);
+      }
+    }), measure.getClass().getName(), RAND_FN_SEQ);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return measure.distance(mark.vectors[2][mark.vIndex(i)], mark.vectors[0][mark.vIndex(randIndex())]);
+      }
+    }), measure.getClass().getName(), SEQ_FN_DENSE);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return measure.distance(mark.vectors[2][mark.vIndex(i)], mark.vectors[1][mark.vIndex(randIndex())]);
+      }
+    }), measure.getClass().getName(), SEQ_FN_RAND);
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/DotBenchmark.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/DotBenchmark.java b/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/DotBenchmark.java
new file mode 100644
index 0000000..fc7f911
--- /dev/null
+++ b/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/DotBenchmark.java
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.benchmark;
+
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_RAND;
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_SEQ;
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_VECTOR;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_DENSE;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_SEQ;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_SPARSE_VECTOR;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_DENSE;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_RAND;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_SPARSE_VECTOR;
+
+import org.apache.mahout.benchmark.BenchmarkRunner.BenchmarkFn;
+import org.apache.mahout.benchmark.BenchmarkRunner.BenchmarkFnD;
+
+public class DotBenchmark {
+  private static final String DOT_PRODUCT = "DotProduct";
+  private static final String NORM1 = "Norm1";
+  private static final String NORM2 = "Norm2";
+  private static final String LOG_NORMALIZE = "LogNormalize";
+  private final VectorBenchmarks mark;
+
+  public DotBenchmark(VectorBenchmarks mark) {
+    this.mark = mark;
+  }
+
+  public void benchmark() {
+    benchmarkDot();
+    benchmarkNorm1();
+    benchmarkNorm2();
+    benchmarkLogNormalize();
+  }
+
+  private void benchmarkLogNormalize() {
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        return depends(mark.vectors[0][mark.vIndex(i)].logNormalize());
+      }
+    }), LOG_NORMALIZE, DENSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        return depends(mark.vectors[1][mark.vIndex(i)].logNormalize());
+      }
+    }), LOG_NORMALIZE, RAND_SPARSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        return depends(mark.vectors[2][mark.vIndex(i)].logNormalize());
+      }
+    }), LOG_NORMALIZE, SEQ_SPARSE_VECTOR);
+  }
+
+  private void benchmarkNorm1() {
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return mark.vectors[0][mark.vIndex(i)].norm(1);
+      }
+    }), NORM1, DENSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return mark.vectors[1][mark.vIndex(i)].norm(1);
+      }
+    }), NORM1, RAND_SPARSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return mark.vectors[2][mark.vIndex(i)].norm(1);
+      }
+    }), NORM1, SEQ_SPARSE_VECTOR);
+  }
+
+  private void benchmarkNorm2() {
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return mark.vectors[0][mark.vIndex(i)].norm(2);
+      }
+    }), NORM2, DENSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return mark.vectors[1][mark.vIndex(i)].norm(2);
+      }
+    }), NORM2, RAND_SPARSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return mark.vectors[2][mark.vIndex(i)].norm(2);
+      }
+    }), NORM2, SEQ_SPARSE_VECTOR);
+  }
+
+  private void benchmarkDot() {
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return mark.vectors[0][mark.vIndex(i)].dot(mark.vectors[0][mark.vIndex(randIndex())]);
+      }
+    }), DOT_PRODUCT, DENSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return mark.vectors[1][mark.vIndex(i)].dot(mark.vectors[1][mark.vIndex(randIndex())]);
+      }
+    }), DOT_PRODUCT, RAND_SPARSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return mark.vectors[2][mark.vIndex(i)].dot(mark.vectors[2][mark.vIndex(randIndex())]);
+      }
+    }), DOT_PRODUCT, SEQ_SPARSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return mark.vectors[0][mark.vIndex(i)].dot(mark.vectors[1][mark.vIndex(randIndex())]);
+      }
+    }), DOT_PRODUCT, DENSE_FN_RAND);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return mark.vectors[0][mark.vIndex(i)].dot(mark.vectors[2][mark.vIndex(randIndex())]);
+      }
+    }), DOT_PRODUCT, DENSE_FN_SEQ);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return mark.vectors[1][mark.vIndex(i)].dot(mark.vectors[0][mark.vIndex(randIndex())]);
+      }
+    }), DOT_PRODUCT, RAND_FN_DENSE);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return mark.vectors[1][mark.vIndex(i)].dot(mark.vectors[2][mark.vIndex(randIndex())]);
+      }
+    }), DOT_PRODUCT, RAND_FN_SEQ);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return mark.vectors[2][mark.vIndex(i)].dot(mark.vectors[0][mark.vIndex(randIndex())]);
+      }
+    }), DOT_PRODUCT, SEQ_FN_DENSE);
+
+    mark.printStats(mark.getRunner().benchmarkD(new BenchmarkFnD() {
+      @Override
+      public Double apply(Integer i) {
+        return mark.vectors[2][mark.vIndex(i)].dot(mark.vectors[1][mark.vIndex(randIndex())]);
+      }
+    }), DOT_PRODUCT, SEQ_FN_RAND);
+  }
+
+  public static void main(String[] args) {
+    VectorBenchmarks mark = new VectorBenchmarks(1000000, 100, 1000, 10, 1);
+    mark.createData();
+    new DotBenchmark(mark).benchmarkNorm2();
+    System.out.println(mark);
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/MinusBenchmark.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/MinusBenchmark.java b/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/MinusBenchmark.java
new file mode 100644
index 0000000..82fb693
--- /dev/null
+++ b/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/MinusBenchmark.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.benchmark;
+
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_RAND;
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_SEQ;
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_VECTOR;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_DENSE;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_SEQ;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_SPARSE_VECTOR;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_DENSE;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_RAND;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_SPARSE_VECTOR;
+
+import org.apache.mahout.benchmark.BenchmarkRunner.BenchmarkFn;
+import org.apache.mahout.math.Vector;
+
+public class MinusBenchmark {
+
+  private static final String MINUS = "Minus";
+  private final VectorBenchmarks mark;
+
+  public MinusBenchmark(VectorBenchmarks mark) {
+    this.mark = mark;
+  }
+
+  public void benchmark() {
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[0][mark.vIndex(i)].minus(mark.vectors[0][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), MINUS, DENSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[1][mark.vIndex(i)].minus(mark.vectors[1][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), MINUS, RAND_SPARSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[2][mark.vIndex(i)].minus(mark.vectors[2][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), MINUS, SEQ_SPARSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[0][mark.vIndex(i)].minus(mark.vectors[1][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), MINUS, DENSE_FN_RAND);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[0][mark.vIndex(i)].minus(mark.vectors[2][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), MINUS, DENSE_FN_SEQ);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[1][mark.vIndex(i)].minus(mark.vectors[0][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), MINUS, RAND_FN_DENSE);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[1][mark.vIndex(i)].minus(mark.vectors[2][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), MINUS, RAND_FN_SEQ);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[2][mark.vIndex(i)].minus(mark.vectors[0][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), MINUS, SEQ_FN_DENSE);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[2][mark.vIndex(i)].minus(mark.vectors[1][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), MINUS, SEQ_FN_RAND);
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/PlusBenchmark.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/PlusBenchmark.java b/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/PlusBenchmark.java
new file mode 100644
index 0000000..bd76e94
--- /dev/null
+++ b/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/PlusBenchmark.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.benchmark;
+
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_RAND;
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_SEQ;
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_VECTOR;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_DENSE;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_SEQ;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_SPARSE_VECTOR;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_DENSE;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_RAND;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_SPARSE_VECTOR;
+
+import org.apache.mahout.benchmark.BenchmarkRunner.BenchmarkFn;
+import org.apache.mahout.math.Vector;
+
+public class PlusBenchmark {
+
+  private static final String PLUS = "Plus";
+  private final VectorBenchmarks mark;
+
+  public PlusBenchmark(VectorBenchmarks mark) {
+    this.mark = mark;
+  }
+
+  public void benchmark() {
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[0][mark.vIndex(i)].plus(mark.vectors[0][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), PLUS, DENSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[1][mark.vIndex(i)].plus(mark.vectors[1][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), PLUS, RAND_SPARSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[2][mark.vIndex(i)].plus(mark.vectors[2][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), PLUS, SEQ_SPARSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[0][mark.vIndex(i)].plus(mark.vectors[1][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), PLUS, DENSE_FN_RAND);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[0][mark.vIndex(i)].plus(mark.vectors[2][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), PLUS, DENSE_FN_SEQ);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[1][mark.vIndex(i)].plus(mark.vectors[0][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), PLUS, RAND_FN_DENSE);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[1][mark.vIndex(i)].plus(mark.vectors[2][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), PLUS, RAND_FN_SEQ);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[2][mark.vIndex(i)].plus(mark.vectors[0][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), PLUS, SEQ_FN_DENSE);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[2][mark.vIndex(i)].plus(mark.vectors[1][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), PLUS, SEQ_FN_RAND);
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java b/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java
new file mode 100644
index 0000000..cd403c2
--- /dev/null
+++ b/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/SerializationBenchmark.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.benchmark;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.Writable;
+import org.apache.mahout.common.TimingStatistics;
+import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterator;
+import org.apache.mahout.math.VectorWritable;
+
+import java.io.IOException;
+
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_VECTOR;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_SPARSE_VECTOR;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_SPARSE_VECTOR;
+
+public class SerializationBenchmark {
+  public static final String SERIALIZE = "Serialize";
+  public static final String DESERIALIZE = "Deserialize";
+  private final VectorBenchmarks mark;
+
+  public SerializationBenchmark(VectorBenchmarks mark) {
+    this.mark = mark;
+  }
+
+  public void benchmark() throws IOException {
+    serializeBenchmark();
+    deserializeBenchmark();
+  }
+
+  public void serializeBenchmark() throws IOException {
+    Configuration conf = new Configuration();
+    FileSystem fs = FileSystem.get(conf);
+
+    Writable one = new IntWritable(0);
+    VectorWritable vec = new VectorWritable();
+    TimingStatistics stats = new TimingStatistics();
+
+    try (SequenceFile.Writer writer =
+             new SequenceFile.Writer(fs, conf, new Path("/tmp/dense-vector"),
+                 IntWritable.class, VectorWritable.class)){
+      for (int i = 0; i < mark.loop; i++) {
+        TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec);
+        vec.set(mark.vectors[0][mark.vIndex(i)]);
+        writer.append(one, vec);
+        if (call.end(mark.maxTimeUsec)) {
+          break;
+        }
+      }
+    }
+    mark.printStats(stats, SERIALIZE, DENSE_VECTOR);
+
+    stats = new TimingStatistics();
+    try (SequenceFile.Writer writer =
+             new SequenceFile.Writer(fs, conf,
+                 new Path("/tmp/randsparse-vector"), IntWritable.class, VectorWritable.class)){
+      for (int i = 0; i < mark.loop; i++) {
+        TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec);
+        vec.set(mark.vectors[1][mark.vIndex(i)]);
+        writer.append(one, vec);
+        if (call.end(mark.maxTimeUsec)) {
+          break;
+        }
+      }
+    }
+    mark.printStats(stats, SERIALIZE, RAND_SPARSE_VECTOR);
+
+    stats = new TimingStatistics();
+    try (SequenceFile.Writer writer =
+             new SequenceFile.Writer(fs, conf,
+                 new Path("/tmp/seqsparse-vector"), IntWritable.class, VectorWritable.class)) {
+      for (int i = 0; i < mark.loop; i++) {
+        TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec);
+        vec.set(mark.vectors[2][mark.vIndex(i)]);
+        writer.append(one, vec);
+        if (call.end(mark.maxTimeUsec)) {
+          break;
+        }
+      }
+    }
+    mark.printStats(stats, SERIALIZE, SEQ_SPARSE_VECTOR);
+
+  }
+
+  public void deserializeBenchmark() throws IOException {
+    doDeserializeBenchmark(DENSE_VECTOR, "/tmp/dense-vector");
+    doDeserializeBenchmark(RAND_SPARSE_VECTOR, "/tmp/randsparse-vector");
+    doDeserializeBenchmark(SEQ_SPARSE_VECTOR, "/tmp/seqsparse-vector");
+  }
+
+  private void doDeserializeBenchmark(String name, String pathString) throws IOException {
+    TimingStatistics stats = new TimingStatistics();
+    TimingStatistics.Call call = stats.newCall(mark.leadTimeUsec);
+    SequenceFileValueIterator<Writable> iterator = new SequenceFileValueIterator<>(new Path(pathString), true,
+        new Configuration());
+    while (iterator.hasNext()) {
+      iterator.next();
+      call.end();
+      call = stats.newCall(mark.leadTimeUsec);
+    }
+    iterator.close();
+    mark.printStats(stats, DESERIALIZE, name);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/TimesBenchmark.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/TimesBenchmark.java b/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/TimesBenchmark.java
new file mode 100644
index 0000000..bf81228
--- /dev/null
+++ b/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/TimesBenchmark.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.benchmark;
+
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_RAND;
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_FN_SEQ;
+import static org.apache.mahout.benchmark.VectorBenchmarks.DENSE_VECTOR;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_DENSE;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_FN_SEQ;
+import static org.apache.mahout.benchmark.VectorBenchmarks.RAND_SPARSE_VECTOR;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_DENSE;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_FN_RAND;
+import static org.apache.mahout.benchmark.VectorBenchmarks.SEQ_SPARSE_VECTOR;
+
+import org.apache.mahout.benchmark.BenchmarkRunner.BenchmarkFn;
+import org.apache.mahout.math.Vector;
+
+public class TimesBenchmark {
+
+  private static final String TIMES = "Times";
+  private final VectorBenchmarks mark;
+
+  public TimesBenchmark(VectorBenchmarks mark) {
+    this.mark = mark;
+  }
+
+  public void benchmark() {
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[0][mark.vIndex(i)].times(mark.vectors[0][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), TIMES, DENSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[1][mark.vIndex(i)].times(mark.vectors[1][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), TIMES, RAND_SPARSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[2][mark.vIndex(i)].times(mark.vectors[2][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), TIMES, SEQ_SPARSE_VECTOR);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[0][mark.vIndex(i)].times(mark.vectors[1][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), TIMES, DENSE_FN_RAND);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[0][mark.vIndex(i)].times(mark.vectors[2][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), TIMES, DENSE_FN_SEQ);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[1][mark.vIndex(i)].times(mark.vectors[0][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), TIMES, RAND_FN_DENSE);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[1][mark.vIndex(i)].times(mark.vectors[2][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), TIMES, RAND_FN_SEQ);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[2][mark.vIndex(i)].times(mark.vectors[0][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), TIMES, SEQ_FN_DENSE);
+
+    mark.printStats(mark.getRunner().benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        Vector v = mark.vectors[2][mark.vIndex(i)].times(mark.vectors[1][mark.vIndex(randIndex())]);
+        return depends(v);
+      }
+    }), TIMES, SEQ_FN_RAND);
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/e0573de3/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
----------------------------------------------------------------------
diff --git a/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java b/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
new file mode 100644
index 0000000..a076322
--- /dev/null
+++ b/community/mahout-mr/integration/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
@@ -0,0 +1,497 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.benchmark;
+
+import org.apache.commons.cli2.CommandLine;
+import org.apache.commons.cli2.Group;
+import org.apache.commons.cli2.Option;
+import org.apache.commons.cli2.OptionException;
+import org.apache.commons.cli2.builder.ArgumentBuilder;
+import org.apache.commons.cli2.builder.DefaultOptionBuilder;
+import org.apache.commons.cli2.builder.GroupBuilder;
+import org.apache.commons.cli2.commandline.Parser;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.mahout.benchmark.BenchmarkRunner.BenchmarkFn;
+import org.apache.mahout.common.CommandLineUtil;
+import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.common.TimingStatistics;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
+import org.apache.mahout.common.distance.ChebyshevDistanceMeasure;
+import org.apache.mahout.common.distance.CosineDistanceMeasure;
+import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
+import org.apache.mahout.common.distance.ManhattanDistanceMeasure;
+import org.apache.mahout.common.distance.MinkowskiDistanceMeasure;
+import org.apache.mahout.common.distance.SquaredEuclideanDistanceMeasure;
+import org.apache.mahout.common.distance.TanimotoDistanceMeasure;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.RandomAccessSparseVector;
+import org.apache.mahout.math.SequentialAccessSparseVector;
+import org.apache.mahout.math.Vector;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.text.DecimalFormat;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+import java.util.regex.Pattern;
+
+public class VectorBenchmarks {
+  private static final int MAX_TIME_MS = 5000;
+  private static final int LEAD_TIME_MS = 15000;
+  public static final String CLUSTERS = "Clusters";
+  public static final String CREATE_INCREMENTALLY = "Create (incrementally)";
+  public static final String CREATE_COPY = "Create (copy)";
+
+  public static final String DENSE_FN_SEQ = "Dense.fn(Seq)";
+  public static final String RAND_FN_DENSE = "Rand.fn(Dense)";
+  public static final String SEQ_FN_RAND = "Seq.fn(Rand)";
+  public static final String RAND_FN_SEQ = "Rand.fn(Seq)";
+  public static final String SEQ_FN_DENSE = "Seq.fn(Dense)";
+  public static final String DENSE_FN_RAND = "Dense.fn(Rand)";
+  public static final String SEQ_SPARSE_VECTOR = "SeqSparseVector";
+  public static final String RAND_SPARSE_VECTOR = "RandSparseVector";
+  public static final String DENSE_VECTOR = "DenseVector";
+
+  private static final Logger log = LoggerFactory.getLogger(VectorBenchmarks.class);
+  private static final Pattern TAB_NEWLINE_PATTERN = Pattern.compile("[\n\t]");
+  private static final String[] EMPTY = new String[0];
+  private static final DecimalFormat DF = new DecimalFormat("#.##");
+
+  /* package private */
+  final Vector[][] vectors;
+  final Vector[] clusters;
+  final int cardinality;
+  final int numNonZeros;
+  final int numVectors;
+  final int numClusters;
+  final int loop = Integer.MAX_VALUE;
+  final int opsPerUnit;
+  final long maxTimeUsec;
+  final long leadTimeUsec;
+
+  private final List<Vector> randomVectors = new ArrayList<>();
+  private final List<int[]> randomVectorIndices = new ArrayList<>();
+  private final List<double[]> randomVectorValues = new ArrayList<>();
+  private final Map<String, Integer> implType = new HashMap<>();
+  private final Map<String, List<String[]>> statsMap = new HashMap<>();
+  private final BenchmarkRunner runner;
+  private final Random r = RandomUtils.getRandom();
+
+  public VectorBenchmarks(int cardinality, int numNonZeros, int numVectors, int numClusters,
+      int opsPerUnit) {
+    runner = new BenchmarkRunner(LEAD_TIME_MS, MAX_TIME_MS);
+    maxTimeUsec = TimeUnit.MILLISECONDS.toNanos(MAX_TIME_MS);
+    leadTimeUsec = TimeUnit.MILLISECONDS.toNanos(LEAD_TIME_MS);
+
+    this.cardinality = cardinality;
+    this.numNonZeros = numNonZeros;
+    this.numVectors = numVectors;
+    this.numClusters = numClusters;
+    this.opsPerUnit = opsPerUnit;
+
+    setUpVectors(cardinality, numNonZeros, numVectors);
+
+    vectors = new Vector[3][numVectors];
+    clusters = new Vector[numClusters];
+  }
+
+  private void setUpVectors(int cardinality, int numNonZeros, int numVectors) {
+    for (int i = 0; i < numVectors; i++) {
+      Vector v = new SequentialAccessSparseVector(cardinality, numNonZeros); // sparsity!
+      BitSet featureSpace = new BitSet(cardinality);
+      int[] indexes = new int[numNonZeros];
+      double[] values = new double[numNonZeros];
+      int j = 0;
+      while (j < numNonZeros) {
+        double value = r.nextGaussian();
+        int index = r.nextInt(cardinality);
+        if (!featureSpace.get(index) && value != 0) {
+          featureSpace.set(index);
+          indexes[j] = index;
+          values[j++] = value;
+          v.set(index, value);
+        }
+      }
+      randomVectorIndices.add(indexes);
+      randomVectorValues.add(values);
+      randomVectors.add(v);
+    }
+  }
+
+  void printStats(TimingStatistics stats, String benchmarkName, String implName, String content) {
+    printStats(stats, benchmarkName, implName, content, 1);
+  }
+
+  void printStats(TimingStatistics stats, String benchmarkName, String implName) {
+    printStats(stats, benchmarkName, implName, "", 1);
+  }
+
+  private void printStats(TimingStatistics stats, String benchmarkName, String implName,
+      String content, int multiplier) {
+    float speed = multiplier * stats.getNCalls() * (numNonZeros * 1000.0f * 12 / stats.getSumTime());
+    float opsPerSec = stats.getNCalls() * 1000000000.0f / stats.getSumTime();
+    log.info("{} {} \n{} {} \nOps    = {} Units/sec\nIOps   = {} MBytes/sec", benchmarkName,
+        implName, content, stats.toString(), DF.format(opsPerSec), DF.format(speed));
+
+    if (!implType.containsKey(implName)) {
+      implType.put(implName, implType.size());
+    }
+    int implId = implType.get(implName);
+    if (!statsMap.containsKey(benchmarkName)) {
+      statsMap.put(benchmarkName, new ArrayList<String[]>());
+    }
+    List<String[]> implStats = statsMap.get(benchmarkName);
+    while (implStats.size() < implId + 1) {
+      implStats.add(EMPTY);
+    }
+    implStats.set(
+        implId,
+        TAB_NEWLINE_PATTERN.split(stats + "\tSpeed  = " + DF.format(opsPerSec) + " /sec\tRate   = "
+            + DF.format(speed) + " MB/s"));
+  }
+
+  public void createData() {
+    for (int i = 0; i < Math.max(numVectors, numClusters); ++i) {
+      vectors[0][vIndex(i)] = new DenseVector(randomVectors.get(vIndex(i)));
+      vectors[1][vIndex(i)] = new RandomAccessSparseVector(randomVectors.get(vIndex(i)));
+      vectors[2][vIndex(i)] = new SequentialAccessSparseVector(randomVectors.get(vIndex(i)));
+      if (numClusters > 0) {
+        clusters[cIndex(i)] = new RandomAccessSparseVector(randomVectors.get(vIndex(i)));
+      }
+    }
+  }
+
+  public void createBenchmark() {
+    printStats(runner.benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        vectors[0][vIndex(i)] = new DenseVector(randomVectors.get(vIndex(i)));
+        return depends(vectors[0][vIndex(i)]);
+      }
+    }), CREATE_COPY, DENSE_VECTOR);
+
+    printStats(runner.benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        vectors[1][vIndex(i)] = new RandomAccessSparseVector(randomVectors.get(vIndex(i)));
+        return depends(vectors[1][vIndex(i)]);
+      }
+    }), CREATE_COPY, RAND_SPARSE_VECTOR);
+
+    printStats(runner.benchmark(new BenchmarkFn() {
+      @Override
+      public Boolean apply(Integer i) {
+        vectors[2][vIndex(i)] = new SequentialAccessSparseVector(randomVectors.get(vIndex(i)));
+        return depends(vectors[2][vIndex(i)]);
+      }
+    }), CREATE_COPY, SEQ_SPARSE_VECTOR);
+
+    if (numClusters > 0) {
+      printStats(runner.benchmark(new BenchmarkFn() {
+        @Override
+        public Boolean apply(Integer i) {
+          clusters[cIndex(i)] = new RandomAccessSparseVector(randomVectors.get(vIndex(i)));
+          return depends(clusters[cIndex(i)]);
+        }
+      }), CREATE_COPY, CLUSTERS);
+    }
+  }
+
+  private boolean buildVectorIncrementally(TimingStatistics stats, int randomIndex, Vector v, boolean useSetQuick) {
+    int[] indexes = randomVectorIndices.get(randomIndex);
+    double[] values = randomVectorValues.get(randomIndex);
+    List<Integer> randomOrder = new ArrayList<>();
+    for (int i = 0; i < indexes.length; i++) {
+      randomOrder.add(i);
+    }
+    Collections.shuffle(randomOrder);
+    int[] permutation = new int[randomOrder.size()];
+    for (int i = 0; i < randomOrder.size(); i++) {
+      permutation[i] = randomOrder.get(i);
+    }
+
+    TimingStatistics.Call call = stats.newCall(leadTimeUsec);
+    if (useSetQuick) {
+      for (int i : permutation) {
+        v.setQuick(indexes[i], values[i]);
+      }
+    } else {
+      for (int i : permutation) {
+        v.set(indexes[i], values[i]);
+      }
+    }
+    return call.end(maxTimeUsec);
+  }
+
+  public void incrementalCreateBenchmark() {
+    TimingStatistics stats = new TimingStatistics();
+    for (int i = 0; i < loop; i++) {
+      vectors[0][vIndex(i)] = new DenseVector(cardinality);
+      if (buildVectorIncrementally(stats, vIndex(i), vectors[0][vIndex(i)], false)) {
+        break;
+      }
+    }
+    printStats(stats, CREATE_INCREMENTALLY, DENSE_VECTOR);
+
+    stats = new TimingStatistics();
+    for (int i = 0; i < loop; i++) {
+      vectors[1][vIndex(i)] = new RandomAccessSparseVector(cardinality);
+      if (buildVectorIncrementally(stats, vIndex(i), vectors[1][vIndex(i)], false)) {
+        break;
+      }
+    }
+    printStats(stats, CREATE_INCREMENTALLY, RAND_SPARSE_VECTOR);
+
+    stats = new TimingStatistics();
+    for (int i = 0; i < loop; i++) {
+      vectors[2][vIndex(i)] = new SequentialAccessSparseVector(cardinality);
+      if (buildVectorIncrementally(stats, vIndex(i), vectors[2][vIndex(i)], false)) {
+        break;
+      }
+    }
+    printStats(stats, CREATE_INCREMENTALLY, SEQ_SPARSE_VECTOR);
+
+    if (numClusters > 0) {
+      stats = new TimingStatistics();
+      for (int i = 0; i < loop; i++) {
+        clusters[cIndex(i)] = new RandomAccessSparseVector(cardinality);
+        if (buildVectorIncrementally(stats, vIndex(i), clusters[cIndex(i)], false)) {
+          break;
+        }
+      }
+      printStats(stats, CREATE_INCREMENTALLY, CLUSTERS);
+    }
+  }
+
+  public int vIndex(int i) {
+    return i % numVectors;
+  }
+
+  public int cIndex(int i) {
+    return i % numClusters;
+  }
+
+  public static void main(String[] args) throws IOException {
+    DefaultOptionBuilder obuilder = new DefaultOptionBuilder();
+    ArgumentBuilder abuilder = new ArgumentBuilder();
+    GroupBuilder gbuilder = new GroupBuilder();
+
+    Option vectorSizeOpt = obuilder
+        .withLongName("vectorSize")
+        .withRequired(false)
+        .withArgument(abuilder.withName("vs").withDefault(1000000).create())
+        .withDescription("Cardinality of the vector. Default: 1000000").withShortName("vs").create();
+    Option numNonZeroOpt = obuilder
+        .withLongName("numNonZero")
+        .withRequired(false)
+        .withArgument(abuilder.withName("nz").withDefault(1000).create())
+        .withDescription("Size of the vector. Default: 1000").withShortName("nz").create();
+    Option numVectorsOpt = obuilder
+        .withLongName("numVectors")
+        .withRequired(false)
+        .withArgument(abuilder.withName("nv").withDefault(25).create())
+        .withDescription("Number of Vectors to create. Default: 25").withShortName("nv").create();
+    Option numClustersOpt = obuilder
+        .withLongName("numClusters")
+        .withRequired(false)
+        .withArgument(abuilder.withName("nc").withDefault(0).create())
+        .withDescription("Number of clusters to create. Set to non zero to run cluster benchmark. Default: 0")
+        .withShortName("nc").create();
+    Option numOpsOpt = obuilder
+        .withLongName("numOps")
+        .withRequired(false)
+        .withArgument(abuilder.withName("numOps").withDefault(10).create())
+        .withDescription(
+            "Number of operations to do per timer. "
+                + "E.g In distance measure, the distance is calculated numOps times"
+                + " and the total time is measured. Default: 10").withShortName("no").create();
+
+    Option helpOpt = DefaultOptionCreator.helpOption();
+
+    Group group = gbuilder.withName("Options").withOption(vectorSizeOpt).withOption(numNonZeroOpt)
+        .withOption(numVectorsOpt).withOption(numOpsOpt).withOption(numClustersOpt).withOption(helpOpt).create();
+
+    try {
+      Parser parser = new Parser();
+      parser.setGroup(group);
+      CommandLine cmdLine = parser.parse(args);
+
+      if (cmdLine.hasOption(helpOpt)) {
+        CommandLineUtil.printHelpWithGenericOptions(group);
+        return;
+      }
+
+      int cardinality = 1000000;
+      if (cmdLine.hasOption(vectorSizeOpt)) {
+        cardinality = Integer.parseInt((String) cmdLine.getValue(vectorSizeOpt));
+
+      }
+
+      int numClusters = 0;
+      if (cmdLine.hasOption(numClustersOpt)) {
+        numClusters = Integer.parseInt((String) cmdLine.getValue(numClustersOpt));
+      }
+
+      int numNonZero = 1000;
+      if (cmdLine.hasOption(numNonZeroOpt)) {
+        numNonZero = Integer.parseInt((String) cmdLine.getValue(numNonZeroOpt));
+      }
+
+      int numVectors = 25;
+      if (cmdLine.hasOption(numVectorsOpt)) {
+        numVectors = Integer.parseInt((String) cmdLine.getValue(numVectorsOpt));
+
+      }
+
+      int numOps = 10;
+      if (cmdLine.hasOption(numOpsOpt)) {
+        numOps = Integer.parseInt((String) cmdLine.getValue(numOpsOpt));
+
+      }
+      VectorBenchmarks mark = new VectorBenchmarks(cardinality, numNonZero, numVectors, numClusters, numOps);
+      runBenchmark(mark);
+
+      // log.info("\n{}", mark);
+      log.info("\n{}", mark.asCsvString());
+    } catch (OptionException e) {
+      CommandLineUtil.printHelp(group);
+    }
+  }
+
+  private static void runBenchmark(VectorBenchmarks mark) throws IOException {
+    // Required to set up data.
+    mark.createData();
+
+    mark.createBenchmark();
+    if (mark.cardinality < 200000) {
+      // Too slow.
+      mark.incrementalCreateBenchmark();
+    }
+
+    new CloneBenchmark(mark).benchmark();
+    new DotBenchmark(mark).benchmark();
+    new PlusBenchmark(mark).benchmark();
+    new MinusBenchmark(mark).benchmark();
+    new TimesBenchmark(mark).benchmark();
+    new SerializationBenchmark(mark).benchmark();
+
+    DistanceBenchmark distanceBenchmark = new DistanceBenchmark(mark);
+    distanceBenchmark.benchmark(new CosineDistanceMeasure());
+    distanceBenchmark.benchmark(new SquaredEuclideanDistanceMeasure());
+    distanceBenchmark.benchmark(new EuclideanDistanceMeasure());
+    distanceBenchmark.benchmark(new ManhattanDistanceMeasure());
+    distanceBenchmark.benchmark(new TanimotoDistanceMeasure());
+    distanceBenchmark.benchmark(new ChebyshevDistanceMeasure());
+    distanceBenchmark.benchmark(new MinkowskiDistanceMeasure());
+
+    if (mark.numClusters > 0) {
+      ClosestCentroidBenchmark centroidBenchmark = new ClosestCentroidBenchmark(mark);
+      centroidBenchmark.benchmark(new CosineDistanceMeasure());
+      centroidBenchmark.benchmark(new SquaredEuclideanDistanceMeasure());
+      centroidBenchmark.benchmark(new EuclideanDistanceMeasure());
+      centroidBenchmark.benchmark(new ManhattanDistanceMeasure());
+      centroidBenchmark.benchmark(new TanimotoDistanceMeasure());
+      centroidBenchmark.benchmark(new ChebyshevDistanceMeasure());
+      centroidBenchmark.benchmark(new MinkowskiDistanceMeasure());
+    }
+  }
+
+  private String asCsvString() {
+    List<String> keys = new ArrayList<>(statsMap.keySet());
+    Collections.sort(keys);
+    Map<Integer,String> implMap = new HashMap<>();
+    for (Entry<String,Integer> e : implType.entrySet()) {
+      implMap.put(e.getValue(), e.getKey());
+    }
+
+    StringBuilder sb = new StringBuilder(1000);
+    for (String benchmarkName : keys) {
+      int i = 0;
+      for (String[] stats : statsMap.get(benchmarkName)) {
+        if (stats.length < 8) {
+          continue;
+        }
+        sb.append(benchmarkName).append(',');
+        sb.append(implMap.get(i++)).append(',');
+        sb.append(stats[7].trim().split("=|/")[1].trim());
+        sb.append('\n');
+      }
+    }
+    sb.append('\n');
+    return sb.toString();
+  }
+
+  @Override
+  public String toString() {
+    int pad = 24;
+    StringBuilder sb = new StringBuilder(1000);
+    sb.append(StringUtils.rightPad("BenchMarks", pad));
+    for (int i = 0; i < implType.size(); i++) {
+      for (Entry<String,Integer> e : implType.entrySet()) {
+        if (e.getValue() == i) {
+          sb.append(StringUtils.rightPad(e.getKey(), pad).substring(0, pad));
+          break;
+        }
+      }
+    }
+    sb.append('\n');
+    List<String> keys = new ArrayList<>(statsMap.keySet());
+    Collections.sort(keys);
+    for (String benchmarkName : keys) {
+      List<String[]> implTokenizedStats = statsMap.get(benchmarkName);
+      int maxStats = 0;
+      for (String[] stat : implTokenizedStats) {
+        maxStats = Math.max(maxStats, stat.length);
+      }
+
+      for (int i = 0; i < maxStats; i++) {
+        boolean printedName = false;
+        for (String[] stats : implTokenizedStats) {
+          if (i == 0 && !printedName) {
+            sb.append(StringUtils.rightPad(benchmarkName, pad));
+            printedName = true;
+          } else if (!printedName) {
+            printedName = true;
+            sb.append(StringUtils.rightPad("", pad));
+          }
+          if (stats.length > i) {
+            sb.append(StringUtils.rightPad(stats[i], pad));
+          } else {
+            sb.append(StringUtils.rightPad("", pad));
+          }
+
+        }
+        sb.append('\n');
+      }
+      sb.append('\n');
+    }
+    return sb.toString();
+  }
+
+  public BenchmarkRunner getRunner() {
+    return runner;
+  }
+}