You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2016/04/15 10:17:06 UTC

[1/2] spark git commit: [SPARK-14549][ML] Copy the Vector and Matrix classes from mllib to ml in mllib-local

Repository: spark
Updated Branches:
  refs/heads/master a9324a06e -> 96534aa47


http://git-wip-us.apache.org/repos/asf/spark/blob/96534aa4/mllib-local/src/test/scala/org/apache/spark/ml/SparkMLFunSuite.scala
----------------------------------------------------------------------
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/SparkMLFunSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/SparkMLFunSuite.scala
new file mode 100644
index 0000000..cb3b56b
--- /dev/null
+++ b/mllib-local/src/test/scala/org/apache/spark/ml/SparkMLFunSuite.scala
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml
+
+// scalastyle:off
+import org.scalatest.{BeforeAndAfterAll, FunSuite}
+
+/**
+ * Base abstract class for all unit tests in Spark for handling common functionality.
+ */
+private[spark] abstract class SparkMLFunSuite
+  extends FunSuite
+  with BeforeAndAfterAll {
+  // scalastyle:on
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/96534aa4/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BLASSuite.scala
----------------------------------------------------------------------
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BLASSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BLASSuite.scala
new file mode 100644
index 0000000..8a9f497
--- /dev/null
+++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BLASSuite.scala
@@ -0,0 +1,408 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.linalg
+
+import org.apache.spark.ml.SparkMLFunSuite
+import org.apache.spark.ml.linalg.BLAS._
+import org.apache.spark.ml.util.TestingUtils._
+
+class BLASSuite extends SparkMLFunSuite {
+
+  test("copy") {
+    val sx = Vectors.sparse(4, Array(0, 2), Array(1.0, -2.0))
+    val dx = Vectors.dense(1.0, 0.0, -2.0, 0.0)
+    val sy = Vectors.sparse(4, Array(0, 1, 3), Array(2.0, 1.0, 1.0))
+    val dy = Array(2.0, 1.0, 0.0, 1.0)
+
+    val dy1 = Vectors.dense(dy.clone())
+    copy(sx, dy1)
+    assert(dy1 ~== dx absTol 1e-15)
+
+    val dy2 = Vectors.dense(dy.clone())
+    copy(dx, dy2)
+    assert(dy2 ~== dx absTol 1e-15)
+
+    intercept[IllegalArgumentException] {
+      copy(sx, sy)
+    }
+
+    intercept[IllegalArgumentException] {
+      copy(dx, sy)
+    }
+
+    withClue("vector sizes must match") {
+      intercept[Exception] {
+        copy(sx, Vectors.dense(0.0, 1.0, 2.0))
+      }
+    }
+  }
+
+  test("scal") {
+    val a = 0.1
+    val sx = Vectors.sparse(3, Array(0, 2), Array(1.0, -2.0))
+    val dx = Vectors.dense(1.0, 0.0, -2.0)
+
+    scal(a, sx)
+    assert(sx ~== Vectors.sparse(3, Array(0, 2), Array(0.1, -0.2)) absTol 1e-15)
+
+    scal(a, dx)
+    assert(dx ~== Vectors.dense(0.1, 0.0, -0.2) absTol 1e-15)
+  }
+
+  test("axpy") {
+    val alpha = 0.1
+    val sx = Vectors.sparse(3, Array(0, 2), Array(1.0, -2.0))
+    val dx = Vectors.dense(1.0, 0.0, -2.0)
+    val dy = Array(2.0, 1.0, 0.0)
+    val expected = Vectors.dense(2.1, 1.0, -0.2)
+
+    val dy1 = Vectors.dense(dy.clone())
+    axpy(alpha, sx, dy1)
+    assert(dy1 ~== expected absTol 1e-15)
+
+    val dy2 = Vectors.dense(dy.clone())
+    axpy(alpha, dx, dy2)
+    assert(dy2 ~== expected absTol 1e-15)
+
+    val sy = Vectors.sparse(4, Array(0, 1), Array(2.0, 1.0))
+
+    intercept[IllegalArgumentException] {
+      axpy(alpha, sx, sy)
+    }
+
+    intercept[IllegalArgumentException] {
+      axpy(alpha, dx, sy)
+    }
+
+    withClue("vector sizes must match") {
+      intercept[Exception] {
+        axpy(alpha, sx, Vectors.dense(1.0, 2.0))
+      }
+    }
+  }
+
+  test("dot") {
+    val sx = Vectors.sparse(3, Array(0, 2), Array(1.0, -2.0))
+    val dx = Vectors.dense(1.0, 0.0, -2.0)
+    val sy = Vectors.sparse(3, Array(0, 1), Array(2.0, 1.0))
+    val dy = Vectors.dense(2.0, 1.0, 0.0)
+
+    assert(dot(sx, sy) ~== 2.0 absTol 1e-15)
+    assert(dot(sy, sx) ~== 2.0 absTol 1e-15)
+    assert(dot(sx, dy) ~== 2.0 absTol 1e-15)
+    assert(dot(dy, sx) ~== 2.0 absTol 1e-15)
+    assert(dot(dx, dy) ~== 2.0 absTol 1e-15)
+    assert(dot(dy, dx) ~== 2.0 absTol 1e-15)
+
+    assert(dot(sx, sx) ~== 5.0 absTol 1e-15)
+    assert(dot(dx, dx) ~== 5.0 absTol 1e-15)
+    assert(dot(sx, dx) ~== 5.0 absTol 1e-15)
+    assert(dot(dx, sx) ~== 5.0 absTol 1e-15)
+
+    val sx1 = Vectors.sparse(10, Array(0, 3, 5, 7, 8), Array(1.0, 2.0, 3.0, 4.0, 5.0))
+    val sx2 = Vectors.sparse(10, Array(1, 3, 6, 7, 9), Array(1.0, 2.0, 3.0, 4.0, 5.0))
+    assert(dot(sx1, sx2) ~== 20.0 absTol 1e-15)
+    assert(dot(sx2, sx1) ~== 20.0 absTol 1e-15)
+
+    withClue("vector sizes must match") {
+      intercept[Exception] {
+        dot(sx, Vectors.dense(2.0, 1.0))
+      }
+    }
+  }
+
+  test("spr") {
+    // test dense vector
+    val alpha = 0.1
+    val x = new DenseVector(Array(1.0, 2, 2.1, 4))
+    val U = new DenseVector(Array(1.0, 2, 2, 3, 3, 3, 4, 4, 4, 4))
+    val expected = new DenseVector(Array(1.1, 2.2, 2.4, 3.21, 3.42, 3.441, 4.4, 4.8, 4.84, 5.6))
+
+    spr(alpha, x, U)
+    assert(U ~== expected absTol 1e-9)
+
+    val matrix33 = new DenseVector(Array(1.0, 2, 3, 4, 5))
+    withClue("Size of vector must match the rank of matrix") {
+      intercept[Exception] {
+        spr(alpha, x, matrix33)
+      }
+    }
+
+    // test sparse vector
+    val sv = new SparseVector(4, Array(0, 3), Array(1.0, 2))
+    val U2 = new DenseVector(Array(1.0, 2, 2, 3, 3, 3, 4, 4, 4, 4))
+    spr(0.1, sv, U2)
+    val expectedSparse = new DenseVector(Array(1.1, 2.0, 2.0, 3.0, 3.0, 3.0, 4.2, 4.0, 4.0, 4.4))
+    assert(U2 ~== expectedSparse absTol 1e-15)
+  }
+
+  test("syr") {
+    val dA = new DenseMatrix(4, 4,
+      Array(0.0, 1.2, 2.2, 3.1, 1.2, 3.2, 5.3, 4.6, 2.2, 5.3, 1.8, 3.0, 3.1, 4.6, 3.0, 0.8))
+    val x = new DenseVector(Array(0.0, 2.7, 3.5, 2.1))
+    val alpha = 0.15
+
+    val expected = new DenseMatrix(4, 4,
+      Array(0.0, 1.2, 2.2, 3.1, 1.2, 4.2935, 6.7175, 5.4505, 2.2, 6.7175, 3.6375, 4.1025, 3.1,
+        5.4505, 4.1025, 1.4615))
+
+    syr(alpha, x, dA)
+
+    assert(dA ~== expected absTol 1e-15)
+
+    val dB =
+      new DenseMatrix(3, 4, Array(0.0, 1.2, 2.2, 3.1, 1.2, 3.2, 5.3, 4.6, 2.2, 5.3, 1.8, 3.0))
+
+    withClue("Matrix A must be a symmetric Matrix") {
+      intercept[Exception] {
+        syr(alpha, x, dB)
+      }
+    }
+
+    val dC =
+      new DenseMatrix(3, 3, Array(0.0, 1.2, 2.2, 1.2, 3.2, 5.3, 2.2, 5.3, 1.8))
+
+    withClue("Size of vector must match the rank of matrix") {
+      intercept[Exception] {
+        syr(alpha, x, dC)
+      }
+    }
+
+    val y = new DenseVector(Array(0.0, 2.7, 3.5, 2.1, 1.5))
+
+    withClue("Size of vector must match the rank of matrix") {
+      intercept[Exception] {
+        syr(alpha, y, dA)
+      }
+    }
+
+    val xSparse = new SparseVector(4, Array(0, 2, 3), Array(1.0, 3.0, 4.0))
+    val dD = new DenseMatrix(4, 4,
+      Array(0.0, 1.2, 2.2, 3.1, 1.2, 3.2, 5.3, 4.6, 2.2, 5.3, 1.8, 3.0, 3.1, 4.6, 3.0, 0.8))
+    syr(0.1, xSparse, dD)
+    val expectedSparse = new DenseMatrix(4, 4,
+      Array(0.1, 1.2, 2.5, 3.5, 1.2, 3.2, 5.3, 4.6, 2.5, 5.3, 2.7, 4.2, 3.5, 4.6, 4.2, 2.4))
+    assert(dD ~== expectedSparse absTol 1e-15)
+  }
+
+  test("gemm") {
+    val dA =
+      new DenseMatrix(4, 3, Array(0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0))
+    val sA = new SparseMatrix(4, 3, Array(0, 1, 3, 4), Array(1, 0, 2, 3), Array(1.0, 2.0, 1.0, 3.0))
+
+    val B = new DenseMatrix(3, 2, Array(1.0, 0.0, 0.0, 0.0, 2.0, 1.0))
+    val expected = new DenseMatrix(4, 2, Array(0.0, 1.0, 0.0, 0.0, 4.0, 0.0, 2.0, 3.0))
+    val BTman = new DenseMatrix(2, 3, Array(1.0, 0.0, 0.0, 2.0, 0.0, 1.0))
+    val BT = B.transpose
+
+    assert(dA.multiply(B) ~== expected absTol 1e-15)
+    assert(sA.multiply(B) ~== expected absTol 1e-15)
+
+    val C1 = new DenseMatrix(4, 2, Array(1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 0.0))
+    val C2 = C1.copy
+    val C3 = C1.copy
+    val C4 = C1.copy
+    val C5 = C1.copy
+    val C6 = C1.copy
+    val C7 = C1.copy
+    val C8 = C1.copy
+    val C9 = C1.copy
+    val C10 = C1.copy
+    val C11 = C1.copy
+    val C12 = C1.copy
+    val C13 = C1.copy
+    val C14 = C1.copy
+    val C15 = C1.copy
+    val C16 = C1.copy
+    val C17 = C1.copy
+    val expected2 = new DenseMatrix(4, 2, Array(2.0, 1.0, 4.0, 2.0, 4.0, 0.0, 4.0, 3.0))
+    val expected3 = new DenseMatrix(4, 2, Array(2.0, 2.0, 4.0, 2.0, 8.0, 0.0, 6.0, 6.0))
+    val expected4 = new DenseMatrix(4, 2, Array(5.0, 0.0, 10.0, 5.0, 0.0, 0.0, 5.0, 0.0))
+    val expected5 = C1.copy
+
+    gemm(1.0, dA, B, 2.0, C1)
+    gemm(1.0, sA, B, 2.0, C2)
+    gemm(2.0, dA, B, 2.0, C3)
+    gemm(2.0, sA, B, 2.0, C4)
+    assert(C1 ~== expected2 absTol 1e-15)
+    assert(C2 ~== expected2 absTol 1e-15)
+    assert(C3 ~== expected3 absTol 1e-15)
+    assert(C4 ~== expected3 absTol 1e-15)
+    gemm(1.0, dA, B, 0.0, C17)
+    assert(C17 ~== expected absTol 1e-15)
+    gemm(1.0, sA, B, 0.0, C17)
+    assert(C17 ~== expected absTol 1e-15)
+
+    withClue("columns of A don't match the rows of B") {
+      intercept[Exception] {
+        gemm(1.0, dA.transpose, B, 2.0, C1)
+      }
+    }
+
+    val dATman =
+      new DenseMatrix(3, 4, Array(0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0))
+    val sATman =
+      new SparseMatrix(3, 4, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0))
+
+    val dATT = dATman.transpose
+    val sATT = sATman.transpose
+    val BTT = BTman.transpose.asInstanceOf[DenseMatrix]
+
+    assert(dATT.multiply(B) ~== expected absTol 1e-15)
+    assert(sATT.multiply(B) ~== expected absTol 1e-15)
+    assert(dATT.multiply(BTT) ~== expected absTol 1e-15)
+    assert(sATT.multiply(BTT) ~== expected absTol 1e-15)
+
+    gemm(1.0, dATT, BTT, 2.0, C5)
+    gemm(1.0, sATT, BTT, 2.0, C6)
+    gemm(2.0, dATT, BTT, 2.0, C7)
+    gemm(2.0, sATT, BTT, 2.0, C8)
+    gemm(1.0, dA, BTT, 2.0, C9)
+    gemm(1.0, sA, BTT, 2.0, C10)
+    gemm(2.0, dA, BTT, 2.0, C11)
+    gemm(2.0, sA, BTT, 2.0, C12)
+    assert(C5 ~== expected2 absTol 1e-15)
+    assert(C6 ~== expected2 absTol 1e-15)
+    assert(C7 ~== expected3 absTol 1e-15)
+    assert(C8 ~== expected3 absTol 1e-15)
+    assert(C9 ~== expected2 absTol 1e-15)
+    assert(C10 ~== expected2 absTol 1e-15)
+    assert(C11 ~== expected3 absTol 1e-15)
+    assert(C12 ~== expected3 absTol 1e-15)
+
+    gemm(0, dA, B, 5, C13)
+    gemm(0, sA, B, 5, C14)
+    gemm(0, dA, B, 1, C15)
+    gemm(0, sA, B, 1, C16)
+    assert(C13 ~== expected4 absTol 1e-15)
+    assert(C14 ~== expected4 absTol 1e-15)
+    assert(C15 ~== expected5 absTol 1e-15)
+    assert(C16 ~== expected5 absTol 1e-15)
+
+  }
+
+  test("gemv") {
+
+    val dA =
+      new DenseMatrix(4, 3, Array(0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0))
+    val sA = new SparseMatrix(4, 3, Array(0, 1, 3, 4), Array(1, 0, 2, 3), Array(1.0, 2.0, 1.0, 3.0))
+
+    val dA2 =
+      new DenseMatrix(4, 3, Array(0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0), true)
+    val sA2 =
+      new SparseMatrix(4, 3, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0),
+        true)
+
+    val dx = new DenseVector(Array(1.0, 2.0, 3.0))
+    val sx = dx.toSparse
+    val expected = new DenseVector(Array(4.0, 1.0, 2.0, 9.0))
+
+    assert(dA.multiply(dx) ~== expected absTol 1e-15)
+    assert(sA.multiply(dx) ~== expected absTol 1e-15)
+    assert(dA.multiply(sx) ~== expected absTol 1e-15)
+    assert(sA.multiply(sx) ~== expected absTol 1e-15)
+
+    val y1 = new DenseVector(Array(1.0, 3.0, 1.0, 0.0))
+    val y2 = y1.copy
+    val y3 = y1.copy
+    val y4 = y1.copy
+    val y5 = y1.copy
+    val y6 = y1.copy
+    val y7 = y1.copy
+    val y8 = y1.copy
+    val y9 = y1.copy
+    val y10 = y1.copy
+    val y11 = y1.copy
+    val y12 = y1.copy
+    val y13 = y1.copy
+    val y14 = y1.copy
+    val y15 = y1.copy
+    val y16 = y1.copy
+
+    val expected2 = new DenseVector(Array(6.0, 7.0, 4.0, 9.0))
+    val expected3 = new DenseVector(Array(10.0, 8.0, 6.0, 18.0))
+
+    gemv(1.0, dA, dx, 2.0, y1)
+    gemv(1.0, sA, dx, 2.0, y2)
+    gemv(1.0, dA, sx, 2.0, y3)
+    gemv(1.0, sA, sx, 2.0, y4)
+
+    gemv(1.0, dA2, dx, 2.0, y5)
+    gemv(1.0, sA2, dx, 2.0, y6)
+    gemv(1.0, dA2, sx, 2.0, y7)
+    gemv(1.0, sA2, sx, 2.0, y8)
+
+    gemv(2.0, dA, dx, 2.0, y9)
+    gemv(2.0, sA, dx, 2.0, y10)
+    gemv(2.0, dA, sx, 2.0, y11)
+    gemv(2.0, sA, sx, 2.0, y12)
+
+    gemv(2.0, dA2, dx, 2.0, y13)
+    gemv(2.0, sA2, dx, 2.0, y14)
+    gemv(2.0, dA2, sx, 2.0, y15)
+    gemv(2.0, sA2, sx, 2.0, y16)
+
+    assert(y1 ~== expected2 absTol 1e-15)
+    assert(y2 ~== expected2 absTol 1e-15)
+    assert(y3 ~== expected2 absTol 1e-15)
+    assert(y4 ~== expected2 absTol 1e-15)
+
+    assert(y5 ~== expected2 absTol 1e-15)
+    assert(y6 ~== expected2 absTol 1e-15)
+    assert(y7 ~== expected2 absTol 1e-15)
+    assert(y8 ~== expected2 absTol 1e-15)
+
+    assert(y9 ~== expected3 absTol 1e-15)
+    assert(y10 ~== expected3 absTol 1e-15)
+    assert(y11 ~== expected3 absTol 1e-15)
+    assert(y12 ~== expected3 absTol 1e-15)
+
+    assert(y13 ~== expected3 absTol 1e-15)
+    assert(y14 ~== expected3 absTol 1e-15)
+    assert(y15 ~== expected3 absTol 1e-15)
+    assert(y16 ~== expected3 absTol 1e-15)
+
+    withClue("columns of A don't match the rows of B") {
+      intercept[Exception] {
+        gemv(1.0, dA.transpose, dx, 2.0, y1)
+      }
+      intercept[Exception] {
+        gemv(1.0, sA.transpose, dx, 2.0, y1)
+      }
+      intercept[Exception] {
+        gemv(1.0, dA.transpose, sx, 2.0, y1)
+      }
+      intercept[Exception] {
+        gemv(1.0, sA.transpose, sx, 2.0, y1)
+      }
+    }
+
+    val dAT =
+      new DenseMatrix(3, 4, Array(0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0))
+    val sAT =
+      new SparseMatrix(3, 4, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0))
+
+    val dATT = dAT.transpose
+    val sATT = sAT.transpose
+
+    assert(dATT.multiply(dx) ~== expected absTol 1e-15)
+    assert(sATT.multiply(dx) ~== expected absTol 1e-15)
+    assert(dATT.multiply(sx) ~== expected absTol 1e-15)
+    assert(sATT.multiply(sx) ~== expected absTol 1e-15)
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/96534aa4/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BreezeMatrixConversionSuite.scala
----------------------------------------------------------------------
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BreezeMatrixConversionSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BreezeMatrixConversionSuite.scala
new file mode 100644
index 0000000..70a21e4
--- /dev/null
+++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BreezeMatrixConversionSuite.scala
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.linalg
+
+import breeze.linalg.{CSCMatrix => BSM, DenseMatrix => BDM}
+
+import org.apache.spark.ml.SparkMLFunSuite
+
+class BreezeMatrixConversionSuite extends SparkMLFunSuite {
+  test("dense matrix to breeze") {
+    val mat = Matrices.dense(3, 2, Array(0.0, 1.0, 2.0, 3.0, 4.0, 5.0))
+    val breeze = mat.toBreeze.asInstanceOf[BDM[Double]]
+    assert(breeze.rows === mat.numRows)
+    assert(breeze.cols === mat.numCols)
+    assert(breeze.data.eq(mat.asInstanceOf[DenseMatrix].values), "should not copy data")
+  }
+
+  test("dense breeze matrix to matrix") {
+    val breeze = new BDM[Double](3, 2, Array(0.0, 1.0, 2.0, 3.0, 4.0, 5.0))
+    val mat = Matrices.fromBreeze(breeze).asInstanceOf[DenseMatrix]
+    assert(mat.numRows === breeze.rows)
+    assert(mat.numCols === breeze.cols)
+    assert(mat.values.eq(breeze.data), "should not copy data")
+    // transposed matrix
+    val matTransposed = Matrices.fromBreeze(breeze.t).asInstanceOf[DenseMatrix]
+    assert(matTransposed.numRows === breeze.cols)
+    assert(matTransposed.numCols === breeze.rows)
+    assert(matTransposed.values.eq(breeze.data), "should not copy data")
+  }
+
+  test("sparse matrix to breeze") {
+    val values = Array(1.0, 2.0, 4.0, 5.0)
+    val colPtrs = Array(0, 2, 4)
+    val rowIndices = Array(1, 2, 1, 2)
+    val mat = Matrices.sparse(3, 2, colPtrs, rowIndices, values)
+    val breeze = mat.toBreeze.asInstanceOf[BSM[Double]]
+    assert(breeze.rows === mat.numRows)
+    assert(breeze.cols === mat.numCols)
+    assert(breeze.data.eq(mat.asInstanceOf[SparseMatrix].values), "should not copy data")
+  }
+
+  test("sparse breeze matrix to sparse matrix") {
+    val values = Array(1.0, 2.0, 4.0, 5.0)
+    val colPtrs = Array(0, 2, 4)
+    val rowIndices = Array(1, 2, 1, 2)
+    val breeze = new BSM[Double](values, 3, 2, colPtrs, rowIndices)
+    val mat = Matrices.fromBreeze(breeze).asInstanceOf[SparseMatrix]
+    assert(mat.numRows === breeze.rows)
+    assert(mat.numCols === breeze.cols)
+    assert(mat.values.eq(breeze.data), "should not copy data")
+    val matTransposed = Matrices.fromBreeze(breeze.t).asInstanceOf[SparseMatrix]
+    assert(matTransposed.numRows === breeze.cols)
+    assert(matTransposed.numCols === breeze.rows)
+    assert(!matTransposed.values.eq(breeze.data), "has to copy data")
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/96534aa4/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BreezeVectorConversionSuite.scala
----------------------------------------------------------------------
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BreezeVectorConversionSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BreezeVectorConversionSuite.scala
new file mode 100644
index 0000000..00c9ee7
--- /dev/null
+++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BreezeVectorConversionSuite.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.linalg
+
+import breeze.linalg.{DenseVector => BDV, SparseVector => BSV}
+
+import org.apache.spark.ml.SparkMLFunSuite
+
+/**
+ * Test Breeze vector conversions.
+ */
+class BreezeVectorConversionSuite extends SparkMLFunSuite {
+
+  val arr = Array(0.1, 0.2, 0.3, 0.4)
+  val n = 20
+  val indices = Array(0, 3, 5, 10, 13)
+  val values = Array(0.1, 0.5, 0.3, -0.8, -1.0)
+
+  test("dense to breeze") {
+    val vec = Vectors.dense(arr)
+    assert(vec.toBreeze === new BDV[Double](arr))
+  }
+
+  test("sparse to breeze") {
+    val vec = Vectors.sparse(n, indices, values)
+    assert(vec.toBreeze === new BSV[Double](indices, values, n))
+  }
+
+  test("dense breeze to vector") {
+    val breeze = new BDV[Double](arr)
+    val vec = Vectors.fromBreeze(breeze).asInstanceOf[DenseVector]
+    assert(vec.size === arr.length)
+    assert(vec.values.eq(arr), "should not copy data")
+  }
+
+  test("sparse breeze to vector") {
+    val breeze = new BSV[Double](indices, values, n)
+    val vec = Vectors.fromBreeze(breeze).asInstanceOf[SparseVector]
+    assert(vec.size === n)
+    assert(vec.indices.eq(indices), "should not copy data")
+    assert(vec.values.eq(values), "should not copy data")
+  }
+
+  test("sparse breeze with partially-used arrays to vector") {
+    val activeSize = 3
+    val breeze = new BSV[Double](indices, values, activeSize, n)
+    val vec = Vectors.fromBreeze(breeze).asInstanceOf[SparseVector]
+    assert(vec.size === n)
+    assert(vec.indices === indices.slice(0, activeSize))
+    assert(vec.values === values.slice(0, activeSize))
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/96534aa4/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala
----------------------------------------------------------------------
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala
new file mode 100644
index 0000000..5c69c5e
--- /dev/null
+++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala
@@ -0,0 +1,511 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.linalg
+
+import java.util.Random
+
+import breeze.linalg.{CSCMatrix, Matrix => BM}
+import org.mockito.Mockito.when
+import org.scalatest.mock.MockitoSugar._
+import scala.collection.mutable.{Map => MutableMap}
+
+import org.apache.spark.ml.SparkMLFunSuite
+import org.apache.spark.ml.util.TestingUtils._
+
+class MatricesSuite extends SparkMLFunSuite {
+  test("dense matrix construction") {
+    val m = 3
+    val n = 2
+    val values = Array(0.0, 1.0, 2.0, 3.0, 4.0, 5.0)
+    val mat = Matrices.dense(m, n, values).asInstanceOf[DenseMatrix]
+    assert(mat.numRows === m)
+    assert(mat.numCols === n)
+    assert(mat.values.eq(values), "should not copy data")
+  }
+
+  test("dense matrix construction with wrong dimension") {
+    intercept[RuntimeException] {
+      Matrices.dense(3, 2, Array(0.0, 1.0, 2.0))
+    }
+  }
+
+  test("sparse matrix construction") {
+    val m = 3
+    val n = 4
+    val values = Array(1.0, 2.0, 4.0, 5.0)
+    val colPtrs = Array(0, 2, 2, 4, 4)
+    val rowIndices = Array(1, 2, 1, 2)
+    val mat = Matrices.sparse(m, n, colPtrs, rowIndices, values).asInstanceOf[SparseMatrix]
+    assert(mat.numRows === m)
+    assert(mat.numCols === n)
+    assert(mat.values.eq(values), "should not copy data")
+    assert(mat.colPtrs.eq(colPtrs), "should not copy data")
+    assert(mat.rowIndices.eq(rowIndices), "should not copy data")
+
+    val entries: Array[(Int, Int, Double)] = Array((2, 2, 3.0), (1, 0, 1.0), (2, 0, 2.0),
+        (1, 2, 2.0), (2, 2, 2.0), (1, 2, 2.0), (0, 0, 0.0))
+
+    val mat2 = SparseMatrix.fromCOO(m, n, entries)
+    assert(mat.toBreeze === mat2.toBreeze)
+    assert(mat2.values.length == 4)
+  }
+
+  test("sparse matrix construction with wrong number of elements") {
+    intercept[IllegalArgumentException] {
+      Matrices.sparse(3, 2, Array(0, 1), Array(1, 2, 1), Array(0.0, 1.0, 2.0))
+    }
+
+    intercept[IllegalArgumentException] {
+      Matrices.sparse(3, 2, Array(0, 1, 2), Array(1, 2), Array(0.0, 1.0, 2.0))
+    }
+  }
+
+  test("index in matrices incorrect input") {
+    val sm = Matrices.sparse(3, 2, Array(0, 2, 3), Array(1, 2, 1), Array(0.0, 1.0, 2.0))
+    val dm = Matrices.dense(3, 2, Array(0.0, 2.3, 1.4, 3.2, 1.0, 9.1))
+    Array(sm, dm).foreach { mat =>
+      intercept[IllegalArgumentException] { mat.index(4, 1) }
+      intercept[IllegalArgumentException] { mat.index(1, 4) }
+      intercept[IllegalArgumentException] { mat.index(-1, 2) }
+      intercept[IllegalArgumentException] { mat.index(1, -2) }
+    }
+  }
+
+  test("equals") {
+    val dm1 = Matrices.dense(2, 2, Array(0.0, 1.0, 2.0, 3.0))
+    assert(dm1 === dm1)
+    assert(dm1 !== dm1.transpose)
+
+    val dm2 = Matrices.dense(2, 2, Array(0.0, 2.0, 1.0, 3.0))
+    assert(dm1 === dm2.transpose)
+
+    val sm1 = dm1.asInstanceOf[DenseMatrix].toSparse
+    assert(sm1 === sm1)
+    assert(sm1 === dm1)
+    assert(sm1 !== sm1.transpose)
+
+    val sm2 = dm2.asInstanceOf[DenseMatrix].toSparse
+    assert(sm1 === sm2.transpose)
+    assert(sm1 === dm2.transpose)
+  }
+
+  test("matrix copies are deep copies") {
+    val m = 3
+    val n = 2
+
+    val denseMat = Matrices.dense(m, n, Array(0.0, 1.0, 2.0, 3.0, 4.0, 5.0))
+    val denseCopy = denseMat.copy
+
+    assert(!denseMat.toArray.eq(denseCopy.toArray))
+
+    val values = Array(1.0, 2.0, 4.0, 5.0)
+    val colPtrs = Array(0, 2, 4)
+    val rowIndices = Array(1, 2, 1, 2)
+    val sparseMat = Matrices.sparse(m, n, colPtrs, rowIndices, values)
+    val sparseCopy = sparseMat.copy
+
+    assert(!sparseMat.toArray.eq(sparseCopy.toArray))
+  }
+
+  test("matrix indexing and updating") {
+    val m = 3
+    val n = 2
+    val allValues = Array(0.0, 1.0, 2.0, 3.0, 4.0, 0.0)
+
+    val denseMat = new DenseMatrix(m, n, allValues)
+
+    assert(denseMat(0, 1) === 3.0)
+    assert(denseMat(0, 1) === denseMat.values(3))
+    assert(denseMat(0, 1) === denseMat(3))
+    assert(denseMat(0, 0) === 0.0)
+
+    denseMat.update(0, 0, 10.0)
+    assert(denseMat(0, 0) === 10.0)
+    assert(denseMat.values(0) === 10.0)
+
+    val sparseValues = Array(1.0, 2.0, 3.0, 4.0)
+    val colPtrs = Array(0, 2, 4)
+    val rowIndices = Array(1, 2, 0, 1)
+    val sparseMat = new SparseMatrix(m, n, colPtrs, rowIndices, sparseValues)
+
+    assert(sparseMat(0, 1) === 3.0)
+    assert(sparseMat(0, 1) === sparseMat.values(2))
+    assert(sparseMat(0, 0) === 0.0)
+
+    intercept[NoSuchElementException] {
+      sparseMat.update(0, 0, 10.0)
+    }
+
+    intercept[NoSuchElementException] {
+      sparseMat.update(2, 1, 10.0)
+    }
+
+    sparseMat.update(0, 1, 10.0)
+    assert(sparseMat(0, 1) === 10.0)
+    assert(sparseMat.values(2) === 10.0)
+  }
+
+  test("toSparse, toDense") {
+    val m = 3
+    val n = 2
+    val values = Array(1.0, 2.0, 4.0, 5.0)
+    val allValues = Array(1.0, 2.0, 0.0, 0.0, 4.0, 5.0)
+    val colPtrs = Array(0, 2, 4)
+    val rowIndices = Array(0, 1, 1, 2)
+
+    val spMat1 = new SparseMatrix(m, n, colPtrs, rowIndices, values)
+    val deMat1 = new DenseMatrix(m, n, allValues)
+
+    val spMat2 = deMat1.toSparse
+    val deMat2 = spMat1.toDense
+
+    assert(spMat1.toBreeze === spMat2.toBreeze)
+    assert(deMat1.toBreeze === deMat2.toBreeze)
+  }
+
+  test("map, update") {
+    val m = 3
+    val n = 2
+    val values = Array(1.0, 2.0, 4.0, 5.0)
+    val allValues = Array(1.0, 2.0, 0.0, 0.0, 4.0, 5.0)
+    val colPtrs = Array(0, 2, 4)
+    val rowIndices = Array(0, 1, 1, 2)
+
+    val spMat1 = new SparseMatrix(m, n, colPtrs, rowIndices, values)
+    val deMat1 = new DenseMatrix(m, n, allValues)
+    val deMat2 = deMat1.map(_ * 2)
+    val spMat2 = spMat1.map(_ * 2)
+    deMat1.update(_ * 2)
+    spMat1.update(_ * 2)
+
+    assert(spMat1.toArray === spMat2.toArray)
+    assert(deMat1.toArray === deMat2.toArray)
+  }
+
+  test("transpose") {
+    val dA =
+      new DenseMatrix(4, 3, Array(0.0, 1.0, 0.0, 0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0))
+    val sA = new SparseMatrix(4, 3, Array(0, 1, 3, 4), Array(1, 0, 2, 3), Array(1.0, 2.0, 1.0, 3.0))
+
+    val dAT = dA.transpose.asInstanceOf[DenseMatrix]
+    val sAT = sA.transpose.asInstanceOf[SparseMatrix]
+    val dATexpected =
+      new DenseMatrix(3, 4, Array(0.0, 2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0))
+    val sATexpected =
+      new SparseMatrix(3, 4, Array(0, 1, 2, 3, 4), Array(1, 0, 1, 2), Array(2.0, 1.0, 1.0, 3.0))
+
+    assert(dAT.toBreeze === dATexpected.toBreeze)
+    assert(sAT.toBreeze === sATexpected.toBreeze)
+    assert(dA(1, 0) === dAT(0, 1))
+    assert(dA(2, 1) === dAT(1, 2))
+    assert(sA(1, 0) === sAT(0, 1))
+    assert(sA(2, 1) === sAT(1, 2))
+
+    assert(!dA.toArray.eq(dAT.toArray), "has to have a new array")
+    assert(dA.values.eq(dAT.transpose.asInstanceOf[DenseMatrix].values), "should not copy array")
+
+    assert(dAT.toSparse.toBreeze === sATexpected.toBreeze)
+    assert(sAT.toDense.toBreeze === dATexpected.toBreeze)
+  }
+
+  test("foreachActive") {
+    val m = 3
+    val n = 2
+    val values = Array(1.0, 2.0, 4.0, 5.0)
+    val allValues = Array(1.0, 2.0, 0.0, 0.0, 4.0, 5.0)
+    val colPtrs = Array(0, 2, 4)
+    val rowIndices = Array(0, 1, 1, 2)
+
+    val sp = new SparseMatrix(m, n, colPtrs, rowIndices, values)
+    val dn = new DenseMatrix(m, n, allValues)
+
+    val dnMap = MutableMap[(Int, Int), Double]()
+    dn.foreachActive { (i, j, value) =>
+      dnMap.put((i, j), value)
+    }
+    assert(dnMap.size === 6)
+    assert(dnMap(0, 0) === 1.0)
+    assert(dnMap(1, 0) === 2.0)
+    assert(dnMap(2, 0) === 0.0)
+    assert(dnMap(0, 1) === 0.0)
+    assert(dnMap(1, 1) === 4.0)
+    assert(dnMap(2, 1) === 5.0)
+
+    val spMap = MutableMap[(Int, Int), Double]()
+    sp.foreachActive { (i, j, value) =>
+      spMap.put((i, j), value)
+    }
+    assert(spMap.size === 4)
+    assert(spMap(0, 0) === 1.0)
+    assert(spMap(1, 0) === 2.0)
+    assert(spMap(1, 1) === 4.0)
+    assert(spMap(2, 1) === 5.0)
+  }
+
+  test("horzcat, vertcat, eye, speye") {
+    val m = 3
+    val n = 2
+    val values = Array(1.0, 2.0, 4.0, 5.0)
+    val allValues = Array(1.0, 2.0, 0.0, 0.0, 4.0, 5.0)
+    val colPtrs = Array(0, 2, 4)
+    val rowIndices = Array(0, 1, 1, 2)
+    // transposed versions
+    val allValuesT = Array(1.0, 0.0, 2.0, 4.0, 0.0, 5.0)
+    val colPtrsT = Array(0, 1, 3, 4)
+    val rowIndicesT = Array(0, 0, 1, 1)
+
+    val spMat1 = new SparseMatrix(m, n, colPtrs, rowIndices, values)
+    val deMat1 = new DenseMatrix(m, n, allValues)
+    val spMat1T = new SparseMatrix(n, m, colPtrsT, rowIndicesT, values)
+    val deMat1T = new DenseMatrix(n, m, allValuesT)
+
+    // should equal spMat1 & deMat1 respectively
+    val spMat1TT = spMat1T.transpose
+    val deMat1TT = deMat1T.transpose
+
+    val deMat2 = Matrices.eye(3)
+    val spMat2 = Matrices.speye(3)
+    val deMat3 = Matrices.eye(2)
+    val spMat3 = Matrices.speye(2)
+
+    val spHorz = Matrices.horzcat(Array(spMat1, spMat2))
+    val spHorz2 = Matrices.horzcat(Array(spMat1, deMat2))
+    val spHorz3 = Matrices.horzcat(Array(deMat1, spMat2))
+    val deHorz1 = Matrices.horzcat(Array(deMat1, deMat2))
+    val deHorz2 = Matrices.horzcat(Array[Matrix]())
+
+    assert(deHorz1.numRows === 3)
+    assert(spHorz2.numRows === 3)
+    assert(spHorz3.numRows === 3)
+    assert(spHorz.numRows === 3)
+    assert(deHorz1.numCols === 5)
+    assert(spHorz2.numCols === 5)
+    assert(spHorz3.numCols === 5)
+    assert(spHorz.numCols === 5)
+    assert(deHorz2.numRows === 0)
+    assert(deHorz2.numCols === 0)
+    assert(deHorz2.toArray.length === 0)
+
+    assert(deHorz1 ~== spHorz2.asInstanceOf[SparseMatrix].toDense absTol 1e-15)
+    assert(spHorz2 ~== spHorz3 absTol 1e-15)
+    assert(spHorz(0, 0) === 1.0)
+    assert(spHorz(2, 1) === 5.0)
+    assert(spHorz(0, 2) === 1.0)
+    assert(spHorz(1, 2) === 0.0)
+    assert(spHorz(1, 3) === 1.0)
+    assert(spHorz(2, 4) === 1.0)
+    assert(spHorz(1, 4) === 0.0)
+    assert(deHorz1(0, 0) === 1.0)
+    assert(deHorz1(2, 1) === 5.0)
+    assert(deHorz1(0, 2) === 1.0)
+    assert(deHorz1(1, 2) == 0.0)
+    assert(deHorz1(1, 3) === 1.0)
+    assert(deHorz1(2, 4) === 1.0)
+    assert(deHorz1(1, 4) === 0.0)
+
+    // containing transposed matrices
+    val spHorzT = Matrices.horzcat(Array(spMat1TT, spMat2))
+    val spHorz2T = Matrices.horzcat(Array(spMat1TT, deMat2))
+    val spHorz3T = Matrices.horzcat(Array(deMat1TT, spMat2))
+    val deHorz1T = Matrices.horzcat(Array(deMat1TT, deMat2))
+
+    assert(deHorz1T ~== deHorz1 absTol 1e-15)
+    assert(spHorzT ~== spHorz absTol 1e-15)
+    assert(spHorz2T ~== spHorz2 absTol 1e-15)
+    assert(spHorz3T ~== spHorz3 absTol 1e-15)
+
+    intercept[IllegalArgumentException] {
+      Matrices.horzcat(Array(spMat1, spMat3))
+    }
+
+    intercept[IllegalArgumentException] {
+      Matrices.horzcat(Array(deMat1, spMat3))
+    }
+
+    val spVert = Matrices.vertcat(Array(spMat1, spMat3))
+    val deVert1 = Matrices.vertcat(Array(deMat1, deMat3))
+    val spVert2 = Matrices.vertcat(Array(spMat1, deMat3))
+    val spVert3 = Matrices.vertcat(Array(deMat1, spMat3))
+    val deVert2 = Matrices.vertcat(Array[Matrix]())
+
+    assert(deVert1.numRows === 5)
+    assert(spVert2.numRows === 5)
+    assert(spVert3.numRows === 5)
+    assert(spVert.numRows === 5)
+    assert(deVert1.numCols === 2)
+    assert(spVert2.numCols === 2)
+    assert(spVert3.numCols === 2)
+    assert(spVert.numCols === 2)
+    assert(deVert2.numRows === 0)
+    assert(deVert2.numCols === 0)
+    assert(deVert2.toArray.length === 0)
+
+    assert(deVert1 ~== spVert2.asInstanceOf[SparseMatrix].toDense absTol 1e-15)
+    assert(spVert2 ~== spVert3 absTol 1e-15)
+    assert(spVert(0, 0) === 1.0)
+    assert(spVert(2, 1) === 5.0)
+    assert(spVert(3, 0) === 1.0)
+    assert(spVert(3, 1) === 0.0)
+    assert(spVert(4, 1) === 1.0)
+    assert(deVert1(0, 0) === 1.0)
+    assert(deVert1(2, 1) === 5.0)
+    assert(deVert1(3, 0) === 1.0)
+    assert(deVert1(3, 1) === 0.0)
+    assert(deVert1(4, 1) === 1.0)
+
+    // containing transposed matrices
+    val spVertT = Matrices.vertcat(Array(spMat1TT, spMat3))
+    val deVert1T = Matrices.vertcat(Array(deMat1TT, deMat3))
+    val spVert2T = Matrices.vertcat(Array(spMat1TT, deMat3))
+    val spVert3T = Matrices.vertcat(Array(deMat1TT, spMat3))
+
+    assert(deVert1T ~== deVert1 absTol 1e-15)
+    assert(spVertT ~== spVert absTol 1e-15)
+    assert(spVert2T ~== spVert2 absTol 1e-15)
+    assert(spVert3T ~== spVert3 absTol 1e-15)
+
+    intercept[IllegalArgumentException] {
+      Matrices.vertcat(Array(spMat1, spMat2))
+    }
+
+    intercept[IllegalArgumentException] {
+      Matrices.vertcat(Array(deMat1, spMat2))
+    }
+  }
+
+  test("zeros") {
+    val mat = Matrices.zeros(2, 3).asInstanceOf[DenseMatrix]
+    assert(mat.numRows === 2)
+    assert(mat.numCols === 3)
+    assert(mat.values.forall(_ == 0.0))
+  }
+
+  test("ones") {
+    val mat = Matrices.ones(2, 3).asInstanceOf[DenseMatrix]
+    assert(mat.numRows === 2)
+    assert(mat.numCols === 3)
+    assert(mat.values.forall(_ == 1.0))
+  }
+
+  test("eye") {
+    val mat = Matrices.eye(2).asInstanceOf[DenseMatrix]
+    assert(mat.numCols === 2)
+    assert(mat.numCols === 2)
+    assert(mat.values.toSeq === Seq(1.0, 0.0, 0.0, 1.0))
+  }
+
+  test("rand") {
+    val rng = mock[Random]
+    when(rng.nextDouble()).thenReturn(1.0, 2.0, 3.0, 4.0)
+    val mat = Matrices.rand(2, 2, rng).asInstanceOf[DenseMatrix]
+    assert(mat.numRows === 2)
+    assert(mat.numCols === 2)
+    assert(mat.values.toSeq === Seq(1.0, 2.0, 3.0, 4.0))
+  }
+
+  test("randn") {
+    val rng = mock[Random]
+    when(rng.nextGaussian()).thenReturn(1.0, 2.0, 3.0, 4.0)
+    val mat = Matrices.randn(2, 2, rng).asInstanceOf[DenseMatrix]
+    assert(mat.numRows === 2)
+    assert(mat.numCols === 2)
+    assert(mat.values.toSeq === Seq(1.0, 2.0, 3.0, 4.0))
+  }
+
+  test("diag") {
+    val mat = Matrices.diag(Vectors.dense(1.0, 2.0)).asInstanceOf[DenseMatrix]
+    assert(mat.numRows === 2)
+    assert(mat.numCols === 2)
+    assert(mat.values.toSeq === Seq(1.0, 0.0, 0.0, 2.0))
+  }
+
+  test("sprand") {
+    val rng = mock[Random]
+    when(rng.nextInt(4)).thenReturn(0, 1, 1, 3, 2, 2, 0, 1, 3, 0)
+    when(rng.nextDouble()).thenReturn(1.0, 2.0, 3.0, 4.0, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)
+    val mat = SparseMatrix.sprand(4, 4, 0.25, rng)
+    assert(mat.numRows === 4)
+    assert(mat.numCols === 4)
+    assert(mat.rowIndices.toSeq === Seq(3, 0, 2, 1))
+    assert(mat.values.toSeq === Seq(1.0, 2.0, 3.0, 4.0))
+    val mat2 = SparseMatrix.sprand(2, 3, 1.0, rng)
+    assert(mat2.rowIndices.toSeq === Seq(0, 1, 0, 1, 0, 1))
+    assert(mat2.colPtrs.toSeq === Seq(0, 2, 4, 6))
+  }
+
+  test("sprandn") {
+    val rng = mock[Random]
+    when(rng.nextInt(4)).thenReturn(0, 1, 1, 3, 2, 2, 0, 1, 3, 0)
+    when(rng.nextGaussian()).thenReturn(1.0, 2.0, 3.0, 4.0)
+    val mat = SparseMatrix.sprandn(4, 4, 0.25, rng)
+    assert(mat.numRows === 4)
+    assert(mat.numCols === 4)
+    assert(mat.rowIndices.toSeq === Seq(3, 0, 2, 1))
+    assert(mat.values.toSeq === Seq(1.0, 2.0, 3.0, 4.0))
+  }
+
+  test("toString") {
+    val empty = Matrices.ones(0, 0)
+    empty.toString(0, 0)
+
+    val mat = Matrices.rand(5, 10, new Random())
+    mat.toString(-1, -5)
+    mat.toString(0, 0)
+    mat.toString(Int.MinValue, Int.MinValue)
+    mat.toString(Int.MaxValue, Int.MaxValue)
+    var lines = mat.toString(6, 50).lines.toArray
+    assert(lines.size == 5 && lines.forall(_.size <= 50))
+
+    lines = mat.toString(5, 100).lines.toArray
+    assert(lines.size == 5 && lines.forall(_.size <= 100))
+  }
+
+  test("numNonzeros and numActives") {
+    val dm1 = Matrices.dense(3, 2, Array(0, 0, -1, 1, 0, 1))
+    assert(dm1.numNonzeros === 3)
+    assert(dm1.numActives === 6)
+
+    val sm1 = Matrices.sparse(3, 2, Array(0, 2, 3), Array(0, 2, 1), Array(0.0, -1.2, 0.0))
+    assert(sm1.numNonzeros === 1)
+    assert(sm1.numActives === 3)
+  }
+
+  test("fromBreeze with sparse matrix") {
+    // colPtr.last does NOT always equal to values.length in breeze SCSMatrix and
+    // invocation of compact() may be necessary. Refer to SPARK-11507
+    val bm1: BM[Double] = new CSCMatrix[Double](
+      Array(1.0, 1, 1), 3, 3, Array(0, 1, 2, 3), Array(0, 1, 2))
+    val bm2: BM[Double] = new CSCMatrix[Double](
+      Array(1.0, 2, 2, 4), 3, 3, Array(0, 0, 2, 4), Array(1, 2, 1, 2))
+    val sum = bm1 + bm2
+    Matrices.fromBreeze(sum)
+  }
+
+  test("row/col iterator") {
+    val dm = new DenseMatrix(3, 2, Array(0, 1, 2, 3, 4, 0))
+    val sm = dm.toSparse
+    val rows = Seq(Vectors.dense(0, 3), Vectors.dense(1, 4), Vectors.dense(2, 0))
+    val cols = Seq(Vectors.dense(0, 1, 2), Vectors.dense(3, 4, 0))
+    for (m <- Seq(dm, sm)) {
+      assert(m.rowIter.toSeq === rows)
+      assert(m.colIter.toSeq === cols)
+      assert(m.transpose.rowIter.toSeq === cols)
+      assert(m.transpose.colIter.toSeq === rows)
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/96534aa4/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala
----------------------------------------------------------------------
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala
new file mode 100644
index 0000000..504be36
--- /dev/null
+++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala
@@ -0,0 +1,358 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.linalg
+
+import scala.util.Random
+
+import breeze.linalg.{squaredDistance => breezeSquaredDistance, DenseMatrix => BDM}
+import org.json4s.jackson.JsonMethods.{parse => parseJson}
+
+import org.apache.spark.ml.SparkMLFunSuite
+import org.apache.spark.ml.util.TestingUtils._
+
+class VectorsSuite extends SparkMLFunSuite {
+
+  val arr = Array(0.1, 0.0, 0.3, 0.4)
+  val n = 4
+  val indices = Array(0, 2, 3)
+  val values = Array(0.1, 0.3, 0.4)
+
+  test("dense vector construction with varargs") {
+    val vec = Vectors.dense(arr).asInstanceOf[DenseVector]
+    assert(vec.size === arr.length)
+    assert(vec.values.eq(arr))
+  }
+
+  test("dense vector construction from a double array") {
+   val vec = Vectors.dense(arr).asInstanceOf[DenseVector]
+    assert(vec.size === arr.length)
+    assert(vec.values.eq(arr))
+  }
+
+  test("sparse vector construction") {
+    val vec = Vectors.sparse(n, indices, values).asInstanceOf[SparseVector]
+    assert(vec.size === n)
+    assert(vec.indices.eq(indices))
+    assert(vec.values.eq(values))
+  }
+
+  test("sparse vector construction with unordered elements") {
+    val vec = Vectors.sparse(n, indices.zip(values).reverse).asInstanceOf[SparseVector]
+    assert(vec.size === n)
+    assert(vec.indices === indices)
+    assert(vec.values === values)
+  }
+
+  test("sparse vector construction with mismatched indices/values array") {
+    intercept[IllegalArgumentException] {
+      Vectors.sparse(4, Array(1, 2, 3), Array(3.0, 5.0, 7.0, 9.0))
+    }
+    intercept[IllegalArgumentException] {
+      Vectors.sparse(4, Array(1, 2, 3), Array(3.0, 5.0))
+    }
+  }
+
+  test("sparse vector construction with too many indices vs size") {
+    intercept[IllegalArgumentException] {
+      Vectors.sparse(3, Array(1, 2, 3, 4), Array(3.0, 5.0, 7.0, 9.0))
+    }
+  }
+
+  test("dense to array") {
+    val vec = Vectors.dense(arr).asInstanceOf[DenseVector]
+    assert(vec.toArray.eq(arr))
+  }
+
+  test("dense argmax") {
+    val vec = Vectors.dense(Array.empty[Double]).asInstanceOf[DenseVector]
+    assert(vec.argmax === -1)
+
+    val vec2 = Vectors.dense(arr).asInstanceOf[DenseVector]
+    assert(vec2.argmax === 3)
+
+    val vec3 = Vectors.dense(Array(-1.0, 0.0, -2.0, 1.0)).asInstanceOf[DenseVector]
+    assert(vec3.argmax === 3)
+  }
+
+  test("sparse to array") {
+    val vec = Vectors.sparse(n, indices, values).asInstanceOf[SparseVector]
+    assert(vec.toArray === arr)
+  }
+
+  test("sparse argmax") {
+    val vec = Vectors.sparse(0, Array.empty[Int], Array.empty[Double]).asInstanceOf[SparseVector]
+    assert(vec.argmax === -1)
+
+    val vec2 = Vectors.sparse(n, indices, values).asInstanceOf[SparseVector]
+    assert(vec2.argmax === 3)
+
+    val vec3 = Vectors.sparse(5, Array(2, 3, 4), Array(1.0, 0.0, -.7))
+    assert(vec3.argmax === 2)
+
+    // check for case that sparse vector is created with
+    // only negative values {0.0, 0.0,-1.0, -0.7, 0.0}
+    val vec4 = Vectors.sparse(5, Array(2, 3), Array(-1.0, -.7))
+    assert(vec4.argmax === 0)
+
+    val vec5 = Vectors.sparse(11, Array(0, 3, 10), Array(-1.0, -.7, 0.0))
+    assert(vec5.argmax === 1)
+
+    val vec6 = Vectors.sparse(11, Array(0, 1, 2), Array(-1.0, -.7, 0.0))
+    assert(vec6.argmax === 2)
+
+    val vec7 = Vectors.sparse(5, Array(0, 1, 3), Array(-1.0, 0.0, -.7))
+    assert(vec7.argmax === 1)
+
+    val vec8 = Vectors.sparse(5, Array(1, 2), Array(0.0, -1.0))
+    assert(vec8.argmax === 0)
+  }
+
+  test("vector equals") {
+    val dv1 = Vectors.dense(arr.clone())
+    val dv2 = Vectors.dense(arr.clone())
+    val sv1 = Vectors.sparse(n, indices.clone(), values.clone())
+    val sv2 = Vectors.sparse(n, indices.clone(), values.clone())
+
+    val vectors = Seq(dv1, dv2, sv1, sv2)
+
+    for (v <- vectors; u <- vectors) {
+      assert(v === u)
+      assert(v.## === u.##)
+    }
+
+    val another = Vectors.dense(0.1, 0.2, 0.3, 0.4)
+
+    for (v <- vectors) {
+      assert(v != another)
+      assert(v.## != another.##)
+    }
+  }
+
+  test("vectors equals with explicit 0") {
+    val dv1 = Vectors.dense(Array(0, 0.9, 0, 0.8, 0))
+    val sv1 = Vectors.sparse(5, Array(1, 3), Array(0.9, 0.8))
+    val sv2 = Vectors.sparse(5, Array(0, 1, 2, 3, 4), Array(0, 0.9, 0, 0.8, 0))
+
+    val vectors = Seq(dv1, sv1, sv2)
+    for (v <- vectors; u <- vectors) {
+      assert(v === u)
+      assert(v.## === u.##)
+    }
+
+    val another = Vectors.sparse(5, Array(0, 1, 3), Array(0, 0.9, 0.2))
+    for (v <- vectors) {
+      assert(v != another)
+      assert(v.## != another.##)
+    }
+  }
+
+  test("indexing dense vectors") {
+    val vec = Vectors.dense(1.0, 2.0, 3.0, 4.0)
+    assert(vec(0) === 1.0)
+    assert(vec(3) === 4.0)
+  }
+
+  test("indexing sparse vectors") {
+    val vec = Vectors.sparse(7, Array(0, 2, 4, 6), Array(1.0, 2.0, 3.0, 4.0))
+    assert(vec(0) === 1.0)
+    assert(vec(1) === 0.0)
+    assert(vec(2) === 2.0)
+    assert(vec(3) === 0.0)
+    assert(vec(6) === 4.0)
+    val vec2 = Vectors.sparse(8, Array(0, 2, 4, 6), Array(1.0, 2.0, 3.0, 4.0))
+    assert(vec2(6) === 4.0)
+    assert(vec2(7) === 0.0)
+  }
+
+  test("zeros") {
+    assert(Vectors.zeros(3) === Vectors.dense(0.0, 0.0, 0.0))
+  }
+
+  test("Vector.copy") {
+    val sv = Vectors.sparse(4, Array(0, 2), Array(1.0, 2.0))
+    val svCopy = sv.copy
+    (sv, svCopy) match {
+      case (sv: SparseVector, svCopy: SparseVector) =>
+        assert(sv.size === svCopy.size)
+        assert(sv.indices === svCopy.indices)
+        assert(sv.values === svCopy.values)
+        assert(!sv.indices.eq(svCopy.indices))
+        assert(!sv.values.eq(svCopy.values))
+      case _ =>
+        throw new RuntimeException(s"copy returned ${svCopy.getClass} on ${sv.getClass}.")
+    }
+
+    val dv = Vectors.dense(1.0, 0.0, 2.0)
+    val dvCopy = dv.copy
+    (dv, dvCopy) match {
+      case (dv: DenseVector, dvCopy: DenseVector) =>
+        assert(dv.size === dvCopy.size)
+        assert(dv.values === dvCopy.values)
+        assert(!dv.values.eq(dvCopy.values))
+      case _ =>
+        throw new RuntimeException(s"copy returned ${dvCopy.getClass} on ${dv.getClass}.")
+    }
+  }
+
+  test("fromBreeze") {
+    val x = BDM.zeros[Double](10, 10)
+    val v = Vectors.fromBreeze(x(::, 0))
+    assert(v.size === x.rows)
+  }
+
+  test("sqdist") {
+    val random = new Random()
+    for (m <- 1 until 1000 by 100) {
+      val nnz = random.nextInt(m)
+
+      val indices1 = random.shuffle(0 to m - 1).slice(0, nnz).sorted.toArray
+      val values1 = Array.fill(nnz)(random.nextDouble)
+      val sparseVector1 = Vectors.sparse(m, indices1, values1)
+
+      val indices2 = random.shuffle(0 to m - 1).slice(0, nnz).sorted.toArray
+      val values2 = Array.fill(nnz)(random.nextDouble)
+      val sparseVector2 = Vectors.sparse(m, indices2, values2)
+
+      val denseVector1 = Vectors.dense(sparseVector1.toArray)
+      val denseVector2 = Vectors.dense(sparseVector2.toArray)
+
+      val squaredDist = breezeSquaredDistance(sparseVector1.toBreeze, sparseVector2.toBreeze)
+
+      // SparseVector vs. SparseVector
+      assert(Vectors.sqdist(sparseVector1, sparseVector2) ~== squaredDist relTol 1E-8)
+      // DenseVector  vs. SparseVector
+      assert(Vectors.sqdist(denseVector1, sparseVector2) ~== squaredDist relTol 1E-8)
+      // DenseVector  vs. DenseVector
+      assert(Vectors.sqdist(denseVector1, denseVector2) ~== squaredDist relTol 1E-8)
+    }
+  }
+
+  test("foreachActive") {
+    val dv = Vectors.dense(0.0, 1.2, 3.1, 0.0)
+    val sv = Vectors.sparse(4, Seq((1, 1.2), (2, 3.1), (3, 0.0)))
+
+    val dvMap = scala.collection.mutable.Map[Int, Double]()
+    dv.foreachActive { (index, value) =>
+      dvMap.put(index, value)
+    }
+    assert(dvMap.size === 4)
+    assert(dvMap.get(0) === Some(0.0))
+    assert(dvMap.get(1) === Some(1.2))
+    assert(dvMap.get(2) === Some(3.1))
+    assert(dvMap.get(3) === Some(0.0))
+
+    val svMap = scala.collection.mutable.Map[Int, Double]()
+    sv.foreachActive { (index, value) =>
+      svMap.put(index, value)
+    }
+    assert(svMap.size === 3)
+    assert(svMap.get(1) === Some(1.2))
+    assert(svMap.get(2) === Some(3.1))
+    assert(svMap.get(3) === Some(0.0))
+  }
+
+  test("vector p-norm") {
+    val dv = Vectors.dense(0.0, -1.2, 3.1, 0.0, -4.5, 1.9)
+    val sv = Vectors.sparse(6, Seq((1, -1.2), (2, 3.1), (3, 0.0), (4, -4.5), (5, 1.9)))
+
+    assert(Vectors.norm(dv, 1.0) ~== dv.toArray.foldLeft(0.0)((a, v) =>
+      a + math.abs(v)) relTol 1E-8)
+    assert(Vectors.norm(sv, 1.0) ~== sv.toArray.foldLeft(0.0)((a, v) =>
+      a + math.abs(v)) relTol 1E-8)
+
+    assert(Vectors.norm(dv, 2.0) ~== math.sqrt(dv.toArray.foldLeft(0.0)((a, v) =>
+      a + v * v)) relTol 1E-8)
+    assert(Vectors.norm(sv, 2.0) ~== math.sqrt(sv.toArray.foldLeft(0.0)((a, v) =>
+      a + v * v)) relTol 1E-8)
+
+    assert(Vectors.norm(dv, Double.PositiveInfinity) ~== dv.toArray.map(math.abs).max relTol 1E-8)
+    assert(Vectors.norm(sv, Double.PositiveInfinity) ~== sv.toArray.map(math.abs).max relTol 1E-8)
+
+    assert(Vectors.norm(dv, 3.7) ~== math.pow(dv.toArray.foldLeft(0.0)((a, v) =>
+      a + math.pow(math.abs(v), 3.7)), 1.0 / 3.7) relTol 1E-8)
+    assert(Vectors.norm(sv, 3.7) ~== math.pow(sv.toArray.foldLeft(0.0)((a, v) =>
+      a + math.pow(math.abs(v), 3.7)), 1.0 / 3.7) relTol 1E-8)
+  }
+
+  test("Vector numActive and numNonzeros") {
+    val dv = Vectors.dense(0.0, 2.0, 3.0, 0.0)
+    assert(dv.numActives === 4)
+    assert(dv.numNonzeros === 2)
+
+    val sv = Vectors.sparse(4, Array(0, 1, 2), Array(0.0, 2.0, 3.0))
+    assert(sv.numActives === 3)
+    assert(sv.numNonzeros === 2)
+  }
+
+  test("Vector toSparse and toDense") {
+    val dv0 = Vectors.dense(0.0, 2.0, 3.0, 0.0)
+    assert(dv0.toDense === dv0)
+    val dv0s = dv0.toSparse
+    assert(dv0s.numActives === 2)
+    assert(dv0s === dv0)
+
+    val sv0 = Vectors.sparse(4, Array(0, 1, 2), Array(0.0, 2.0, 3.0))
+    assert(sv0.toDense === sv0)
+    val sv0s = sv0.toSparse
+    assert(sv0s.numActives === 2)
+    assert(sv0s === sv0)
+  }
+
+  test("Vector.compressed") {
+    val dv0 = Vectors.dense(1.0, 2.0, 3.0, 0.0)
+    val dv0c = dv0.compressed.asInstanceOf[DenseVector]
+    assert(dv0c === dv0)
+
+    val dv1 = Vectors.dense(0.0, 2.0, 0.0, 0.0)
+    val dv1c = dv1.compressed.asInstanceOf[SparseVector]
+    assert(dv1 === dv1c)
+    assert(dv1c.numActives === 1)
+
+    val sv0 = Vectors.sparse(4, Array(1, 2), Array(2.0, 0.0))
+    val sv0c = sv0.compressed.asInstanceOf[SparseVector]
+    assert(sv0 === sv0c)
+    assert(sv0c.numActives === 1)
+
+    val sv1 = Vectors.sparse(4, Array(0, 1, 2), Array(1.0, 2.0, 3.0))
+    val sv1c = sv1.compressed.asInstanceOf[DenseVector]
+    assert(sv1 === sv1c)
+  }
+
+  test("SparseVector.slice") {
+    val v = new SparseVector(5, Array(1, 2, 4), Array(1.1, 2.2, 4.4))
+    assert(v.slice(Array(0, 2)) === new SparseVector(2, Array(1), Array(2.2)))
+    assert(v.slice(Array(2, 0)) === new SparseVector(2, Array(0), Array(2.2)))
+    assert(v.slice(Array(2, 0, 3, 4)) === new SparseVector(4, Array(0, 3), Array(2.2, 4.4)))
+  }
+
+  test("toJson/fromJson") {
+    val sv0 = Vectors.sparse(0, Array.empty, Array.empty)
+    val sv1 = Vectors.sparse(1, Array.empty, Array.empty)
+    val sv2 = Vectors.sparse(2, Array(1), Array(2.0))
+    val dv0 = Vectors.dense(Array.empty[Double])
+    val dv1 = Vectors.dense(1.0)
+    val dv2 = Vectors.dense(0.0, 2.0)
+    for (v <- Seq(sv0, sv1, sv2, dv0, dv1, dv2)) {
+      val json = v.toJson
+      parseJson(json) // `json` should be a valid JSON string
+      val u = Vectors.fromJson(json)
+      assert(u.getClass === v.getClass, "toJson/fromJson should preserve vector types.")
+      assert(u === v, "toJson/fromJson should preserve vector values.")
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/96534aa4/mllib-local/src/test/scala/org/apache/spark/ml/util/TestingUtils.scala
----------------------------------------------------------------------
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/util/TestingUtils.scala b/mllib-local/src/test/scala/org/apache/spark/ml/util/TestingUtils.scala
new file mode 100644
index 0000000..2bebaa35
--- /dev/null
+++ b/mllib-local/src/test/scala/org/apache/spark/ml/util/TestingUtils.scala
@@ -0,0 +1,236 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.util
+
+import org.scalatest.exceptions.TestFailedException
+
+import org.apache.spark.ml.linalg.{Matrix, Vector}
+
+object TestingUtils {
+
+  val ABS_TOL_MSG = " using absolute tolerance"
+  val REL_TOL_MSG = " using relative tolerance"
+
+  /**
+   * Private helper function for comparing two values using relative tolerance.
+   * Note that if x or y is extremely close to zero, i.e., smaller than Double.MinPositiveValue,
+   * the relative tolerance is meaningless, so the exception will be raised to warn users.
+   */
+  private def RelativeErrorComparison(x: Double, y: Double, eps: Double): Boolean = {
+    val absX = math.abs(x)
+    val absY = math.abs(y)
+    val diff = math.abs(x - y)
+    if (x == y) {
+      true
+    } else if (absX < Double.MinPositiveValue || absY < Double.MinPositiveValue) {
+      throw new TestFailedException(
+        s"$x or $y is extremely close to zero, so the relative tolerance is meaningless.", 0)
+    } else {
+      diff < eps * math.min(absX, absY)
+    }
+  }
+
+  /**
+   * Private helper function for comparing two values using absolute tolerance.
+   */
+  private def AbsoluteErrorComparison(x: Double, y: Double, eps: Double): Boolean = {
+    math.abs(x - y) < eps
+  }
+
+  case class CompareDoubleRightSide(
+    fun: (Double, Double, Double) => Boolean, y: Double, eps: Double, method: String)
+
+  /**
+   * Implicit class for comparing two double values using relative tolerance or absolute tolerance.
+   */
+  implicit class DoubleWithAlmostEquals(val x: Double) {
+
+    /**
+     * When the difference of two values are within eps, returns true; otherwise, returns false.
+     */
+    def ~=(r: CompareDoubleRightSide): Boolean = r.fun(x, r.y, r.eps)
+
+    /**
+     * When the difference of two values are within eps, returns false; otherwise, returns true.
+     */
+    def !~=(r: CompareDoubleRightSide): Boolean = !r.fun(x, r.y, r.eps)
+
+    /**
+     * Throws exception when the difference of two values are NOT within eps;
+     * otherwise, returns true.
+     */
+    def ~==(r: CompareDoubleRightSide): Boolean = {
+      if (!r.fun(x, r.y, r.eps)) {
+        throw new TestFailedException(
+          s"Expected $x and ${r.y} to be within ${r.eps}${r.method}.", 0)
+      }
+      true
+    }
+
+    /**
+     * Throws exception when the difference of two values are within eps; otherwise, returns true.
+     */
+    def !~==(r: CompareDoubleRightSide): Boolean = {
+      if (r.fun(x, r.y, r.eps)) {
+        throw new TestFailedException(
+          s"Did not expect $x and ${r.y} to be within ${r.eps}${r.method}.", 0)
+      }
+      true
+    }
+
+    /**
+     * Comparison using absolute tolerance.
+     */
+    def absTol(eps: Double): CompareDoubleRightSide =
+      CompareDoubleRightSide(AbsoluteErrorComparison, x, eps, ABS_TOL_MSG)
+
+    /**
+     * Comparison using relative tolerance.
+     */
+    def relTol(eps: Double): CompareDoubleRightSide =
+      CompareDoubleRightSide(RelativeErrorComparison, x, eps, REL_TOL_MSG)
+
+    override def toString: String = x.toString
+  }
+
+  case class CompareVectorRightSide(
+    fun: (Vector, Vector, Double) => Boolean, y: Vector, eps: Double, method: String)
+
+  /**
+   * Implicit class for comparing two vectors using relative tolerance or absolute tolerance.
+   */
+  implicit class VectorWithAlmostEquals(val x: Vector) {
+
+    /**
+     * When the difference of two vectors are within eps, returns true; otherwise, returns false.
+     */
+    def ~=(r: CompareVectorRightSide): Boolean = r.fun(x, r.y, r.eps)
+
+    /**
+     * When the difference of two vectors are within eps, returns false; otherwise, returns true.
+     */
+    def !~=(r: CompareVectorRightSide): Boolean = !r.fun(x, r.y, r.eps)
+
+    /**
+     * Throws exception when the difference of two vectors are NOT within eps;
+     * otherwise, returns true.
+     */
+    def ~==(r: CompareVectorRightSide): Boolean = {
+      if (!r.fun(x, r.y, r.eps)) {
+        throw new TestFailedException(
+          s"Expected $x and ${r.y} to be within ${r.eps}${r.method} for all elements.", 0)
+      }
+      true
+    }
+
+    /**
+     * Throws exception when the difference of two vectors are within eps; otherwise, returns true.
+     */
+    def !~==(r: CompareVectorRightSide): Boolean = {
+      if (r.fun(x, r.y, r.eps)) {
+        throw new TestFailedException(
+          s"Did not expect $x and ${r.y} to be within ${r.eps}${r.method} for all elements.", 0)
+      }
+      true
+    }
+
+    /**
+     * Comparison using absolute tolerance.
+     */
+    def absTol(eps: Double): CompareVectorRightSide = CompareVectorRightSide(
+      (x: Vector, y: Vector, eps: Double) => {
+        x.toArray.zip(y.toArray).forall(x => x._1 ~= x._2 absTol eps)
+      }, x, eps, ABS_TOL_MSG)
+
+    /**
+     * Comparison using relative tolerance. Note that comparing against sparse vector
+     * with elements having value of zero will raise exception because it involves with
+     * comparing against zero.
+     */
+    def relTol(eps: Double): CompareVectorRightSide = CompareVectorRightSide(
+      (x: Vector, y: Vector, eps: Double) => {
+        x.toArray.zip(y.toArray).forall(x => x._1 ~= x._2 relTol eps)
+      }, x, eps, REL_TOL_MSG)
+
+    override def toString: String = x.toString
+  }
+
+  case class CompareMatrixRightSide(
+     fun: (Matrix, Matrix, Double) => Boolean, y: Matrix, eps: Double, method: String)
+
+  /**
+   * Implicit class for comparing two matrices using relative tolerance or absolute tolerance.
+   */
+  implicit class MatrixWithAlmostEquals(val x: Matrix) {
+
+    /**
+     * When the difference of two matrices are within eps, returns true; otherwise, returns false.
+     */
+    def ~=(r: CompareMatrixRightSide): Boolean = r.fun(x, r.y, r.eps)
+
+    /**
+     * When the difference of two matrices are within eps, returns false; otherwise, returns true.
+     */
+    def !~=(r: CompareMatrixRightSide): Boolean = !r.fun(x, r.y, r.eps)
+
+    /**
+     * Throws exception when the difference of two matrices are NOT within eps;
+     * otherwise, returns true.
+     */
+    def ~==(r: CompareMatrixRightSide): Boolean = {
+      if (!r.fun(x, r.y, r.eps)) {
+        throw new TestFailedException(
+          s"Expected \n$x\n and \n${r.y}\n to be within ${r.eps}${r.method} for all elements.", 0)
+      }
+      true
+    }
+
+    /**
+     * Throws exception when the difference of two matrices are within eps; otherwise, returns true.
+     */
+    def !~==(r: CompareMatrixRightSide): Boolean = {
+      if (r.fun(x, r.y, r.eps)) {
+        throw new TestFailedException(
+          s"Did not expect \n$x\n and \n${r.y}\n to be within " +
+            "${r.eps}${r.method} for all elements.", 0)
+      }
+      true
+    }
+
+    /**
+     * Comparison using absolute tolerance.
+     */
+    def absTol(eps: Double): CompareMatrixRightSide = CompareMatrixRightSide(
+      (x: Matrix, y: Matrix, eps: Double) => {
+        x.toArray.zip(y.toArray).forall(x => x._1 ~= x._2 absTol eps)
+      }, x, eps, ABS_TOL_MSG)
+
+    /**
+     * Comparison using relative tolerance. Note that comparing against sparse vector
+     * with elements having value of zero will raise exception because it involves with
+     * comparing against zero.
+     */
+    def relTol(eps: Double): CompareMatrixRightSide = CompareMatrixRightSide(
+      (x: Matrix, y: Matrix, eps: Double) => {
+        x.toArray.zip(y.toArray).forall(x => x._1 ~= x._2 relTol eps)
+      }, x, eps, REL_TOL_MSG)
+
+    override def toString: String = x.toString
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/96534aa4/mllib-local/src/test/scala/org/apache/spark/ml/util/TestingUtilsSuite.scala
----------------------------------------------------------------------
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/util/TestingUtilsSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/util/TestingUtilsSuite.scala
new file mode 100644
index 0000000..e374165
--- /dev/null
+++ b/mllib-local/src/test/scala/org/apache/spark/ml/util/TestingUtilsSuite.scala
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.util
+
+import org.scalatest.exceptions.TestFailedException
+
+import org.apache.spark.ml.SparkMLFunSuite
+import org.apache.spark.ml.linalg.Vectors
+import org.apache.spark.ml.util.TestingUtils._
+
+class TestingUtilsSuite extends SparkMLFunSuite {
+
+  test("Comparing doubles using relative error.") {
+
+    assert(23.1 ~== 23.52 relTol 0.02)
+    assert(23.1 ~== 22.74 relTol 0.02)
+    assert(23.1 ~= 23.52 relTol 0.02)
+    assert(23.1 ~= 22.74 relTol 0.02)
+    assert(!(23.1 !~= 23.52 relTol 0.02))
+    assert(!(23.1 !~= 22.74 relTol 0.02))
+
+    // Should throw exception with message when test fails.
+    intercept[TestFailedException](23.1 !~== 23.52 relTol 0.02)
+    intercept[TestFailedException](23.1 !~== 22.74 relTol 0.02)
+    intercept[TestFailedException](23.1 ~== 23.63 relTol 0.02)
+    intercept[TestFailedException](23.1 ~== 22.34 relTol 0.02)
+
+    assert(23.1 !~== 23.63 relTol 0.02)
+    assert(23.1 !~== 22.34 relTol 0.02)
+    assert(23.1 !~= 23.63 relTol 0.02)
+    assert(23.1 !~= 22.34 relTol 0.02)
+    assert(!(23.1 ~= 23.63 relTol 0.02))
+    assert(!(23.1 ~= 22.34 relTol 0.02))
+
+    // Comparing against zero should fail the test and throw exception with message
+    // saying that the relative error is meaningless in this situation.
+    intercept[TestFailedException](0.1 ~== 0.0 relTol 0.032)
+    intercept[TestFailedException](0.1 ~= 0.0 relTol 0.032)
+    intercept[TestFailedException](0.1 !~== 0.0 relTol 0.032)
+    intercept[TestFailedException](0.1 !~= 0.0 relTol 0.032)
+    intercept[TestFailedException](0.0 ~== 0.1 relTol 0.032)
+    intercept[TestFailedException](0.0 ~= 0.1 relTol 0.032)
+    intercept[TestFailedException](0.0 !~== 0.1 relTol 0.032)
+    intercept[TestFailedException](0.0 !~= 0.1 relTol 0.032)
+
+    // Comparisons of numbers very close to zero.
+    assert(10 * Double.MinPositiveValue ~== 9.5 * Double.MinPositiveValue relTol 0.01)
+    assert(10 * Double.MinPositiveValue !~== 11 * Double.MinPositiveValue relTol 0.01)
+
+    assert(-Double.MinPositiveValue ~== 1.18 * -Double.MinPositiveValue relTol 0.012)
+    assert(-Double.MinPositiveValue ~== 1.38 * -Double.MinPositiveValue relTol 0.012)
+  }
+
+  test("Comparing doubles using absolute error.") {
+
+    assert(17.8 ~== 17.99 absTol 0.2)
+    assert(17.8 ~== 17.61 absTol 0.2)
+    assert(17.8 ~= 17.99 absTol 0.2)
+    assert(17.8 ~= 17.61 absTol 0.2)
+    assert(!(17.8 !~= 17.99 absTol 0.2))
+    assert(!(17.8 !~= 17.61 absTol 0.2))
+
+    // Should throw exception with message when test fails.
+    intercept[TestFailedException](17.8 !~== 17.99 absTol 0.2)
+    intercept[TestFailedException](17.8 !~== 17.61 absTol 0.2)
+    intercept[TestFailedException](17.8 ~== 18.01 absTol 0.2)
+    intercept[TestFailedException](17.8 ~== 17.59 absTol 0.2)
+
+    assert(17.8 !~== 18.01 absTol 0.2)
+    assert(17.8 !~== 17.59 absTol 0.2)
+    assert(17.8 !~= 18.01 absTol 0.2)
+    assert(17.8 !~= 17.59 absTol 0.2)
+    assert(!(17.8 ~= 18.01 absTol 0.2))
+    assert(!(17.8 ~= 17.59 absTol 0.2))
+
+    // Comparisons of numbers very close to zero, and both side of zeros
+    assert(
+      Double.MinPositiveValue ~== 4 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
+    assert(
+      Double.MinPositiveValue !~== 6 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
+
+    assert(
+      -Double.MinPositiveValue ~== 3 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
+    assert(
+      Double.MinPositiveValue !~== -4 * Double.MinPositiveValue absTol 5 * Double.MinPositiveValue)
+  }
+
+  test("Comparing vectors using relative error.") {
+
+    // Comparisons of two dense vectors
+    assert(Vectors.dense(Array(3.1, 3.5)) ~== Vectors.dense(Array(3.130, 3.534)) relTol 0.01)
+    assert(Vectors.dense(Array(3.1, 3.5)) !~== Vectors.dense(Array(3.135, 3.534)) relTol 0.01)
+    assert(Vectors.dense(Array(3.1, 3.5)) ~= Vectors.dense(Array(3.130, 3.534)) relTol 0.01)
+    assert(Vectors.dense(Array(3.1, 3.5)) !~= Vectors.dense(Array(3.135, 3.534)) relTol 0.01)
+    assert(!(Vectors.dense(Array(3.1, 3.5)) !~= Vectors.dense(Array(3.130, 3.534)) relTol 0.01))
+    assert(!(Vectors.dense(Array(3.1, 3.5)) ~= Vectors.dense(Array(3.135, 3.534)) relTol 0.01))
+
+    // Should throw exception with message when test fails.
+    intercept[TestFailedException](
+      Vectors.dense(Array(3.1, 3.5)) !~== Vectors.dense(Array(3.130, 3.534)) relTol 0.01)
+
+    intercept[TestFailedException](
+      Vectors.dense(Array(3.1, 3.5)) ~== Vectors.dense(Array(3.135, 3.534)) relTol 0.01)
+
+    // Comparing against zero should fail the test and throw exception with message
+    // saying that the relative error is meaningless in this situation.
+    intercept[TestFailedException](
+      Vectors.dense(Array(3.1, 0.01)) ~== Vectors.dense(Array(3.13, 0.0)) relTol 0.01)
+
+    intercept[TestFailedException](
+      Vectors.dense(Array(3.1, 0.01)) ~== Vectors.sparse(2, Array(0), Array(3.13)) relTol 0.01)
+
+    // Comparisons of two sparse vectors
+    assert(Vectors.dense(Array(3.1, 3.5)) ~==
+      Vectors.sparse(2, Array(0, 1), Array(3.130, 3.534)) relTol 0.01)
+
+    assert(Vectors.dense(Array(3.1, 3.5)) !~==
+      Vectors.sparse(2, Array(0, 1), Array(3.135, 3.534)) relTol 0.01)
+  }
+
+  test("Comparing vectors using absolute error.") {
+
+    // Comparisons of two dense vectors
+    assert(Vectors.dense(Array(3.1, 3.5, 0.0)) ~==
+      Vectors.dense(Array(3.1 + 1E-8, 3.5 + 2E-7, 1E-8)) absTol 1E-6)
+
+    assert(Vectors.dense(Array(3.1, 3.5, 0.0)) !~==
+      Vectors.dense(Array(3.1 + 1E-5, 3.5 + 2E-7, 1 + 1E-3)) absTol 1E-6)
+
+    assert(Vectors.dense(Array(3.1, 3.5, 0.0)) ~=
+      Vectors.dense(Array(3.1 + 1E-8, 3.5 + 2E-7, 1E-8)) absTol 1E-6)
+
+    assert(Vectors.dense(Array(3.1, 3.5, 0.0)) !~=
+      Vectors.dense(Array(3.1 + 1E-5, 3.5 + 2E-7, 1 + 1E-3)) absTol 1E-6)
+
+    assert(!(Vectors.dense(Array(3.1, 3.5, 0.0)) !~=
+      Vectors.dense(Array(3.1 + 1E-8, 3.5 + 2E-7, 1E-8)) absTol 1E-6))
+
+    assert(!(Vectors.dense(Array(3.1, 3.5, 0.0)) ~=
+      Vectors.dense(Array(3.1 + 1E-5, 3.5 + 2E-7, 1 + 1E-3)) absTol 1E-6))
+
+    // Should throw exception with message when test fails.
+    intercept[TestFailedException](Vectors.dense(Array(3.1, 3.5, 0.0)) !~==
+      Vectors.dense(Array(3.1 + 1E-8, 3.5 + 2E-7, 1E-8)) absTol 1E-6)
+
+    intercept[TestFailedException](Vectors.dense(Array(3.1, 3.5, 0.0)) ~==
+      Vectors.dense(Array(3.1 + 1E-5, 3.5 + 2E-7, 1 + 1E-3)) absTol 1E-6)
+
+    // Comparisons of two sparse vectors
+    assert(Vectors.sparse(3, Array(0, 2), Array(3.1, 2.4)) ~==
+      Vectors.sparse(3, Array(0, 2), Array(3.1 + 1E-8, 2.4 + 1E-7)) absTol 1E-6)
+
+    assert(Vectors.sparse(3, Array(0, 2), Array(3.1 + 1E-8, 2.4 + 1E-7)) ~==
+      Vectors.sparse(3, Array(0, 2), Array(3.1, 2.4)) absTol 1E-6)
+
+    assert(Vectors.sparse(3, Array(0, 2), Array(3.1, 2.4)) !~==
+      Vectors.sparse(3, Array(0, 2), Array(3.1 + 1E-3, 2.4)) absTol 1E-6)
+
+    assert(Vectors.sparse(3, Array(0, 2), Array(3.1 + 1E-3, 2.4)) !~==
+      Vectors.sparse(3, Array(0, 2), Array(3.1, 2.4)) absTol 1E-6)
+
+    // Comparisons of a dense vector and a sparse vector
+    assert(Vectors.sparse(3, Array(0, 2), Array(3.1, 2.4)) ~==
+      Vectors.dense(Array(3.1 + 1E-8, 0, 2.4 + 1E-7)) absTol 1E-6)
+
+    assert(Vectors.dense(Array(3.1 + 1E-8, 0, 2.4 + 1E-7)) ~==
+      Vectors.sparse(3, Array(0, 2), Array(3.1, 2.4)) absTol 1E-6)
+
+    assert(Vectors.sparse(3, Array(0, 2), Array(3.1, 2.4)) !~==
+      Vectors.dense(Array(3.1, 1E-3, 2.4)) absTol 1E-6)
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/96534aa4/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala
index e542f21..0c6aabf 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala
@@ -182,8 +182,8 @@ class MLUtilsSuite extends SparkFunSuite with MLlibTestSparkContext {
     for (folds <- 2 to 10) {
       for (seed <- 1 to 5) {
         val foldedRdds = kFold(data, folds, seed)
-        assert(foldedRdds.size === folds)
-        foldedRdds.map { case (training, validation) =>
+        assert(foldedRdds.length === folds)
+        foldedRdds.foreach { case (training, validation) =>
           val result = validation.union(training).collect().sorted
           val validationSize = validation.collect().size.toFloat
           assert(validationSize > 0, "empty validation data")


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org


[2/2] spark git commit: [SPARK-14549][ML] Copy the Vector and Matrix classes from mllib to ml in mllib-local

Posted by me...@apache.org.
[SPARK-14549][ML] Copy the Vector and Matrix classes from mllib to ml in mllib-local

## What changes were proposed in this pull request?

This task will copy the Vector and Matrix classes from mllib to ml package in mllib-local jar. The UDTs and `since` annotation in ml vector and matrix will be removed from now. UDTs will be achieved by #SPARK-14487, and `since` will be replaced by /*  since 1.2.0 */

The BLAS implementation will be copied, and some of the test utilities will be copies as well.

Summary of changes:

1. In mllib-local/src/main/scala/org/apache/spark/**ml**/linalg/BLAS.scala
  - Copied from mllib/src/main/scala/org/apache/spark/**mllib**/linalg/BLAS.scala
  - logDebug("gemm: alpha is equal to 0 and beta is equal to 1. Returning C.") is removed in ml version.
2. In  mllib-local/src/main/scala/org/apache/spark/**ml**/linalg/Matrices.scala
  - Copied from mllib/src/main/scala/org/apache/spark/**mllib**/linalg/Matrices.scala
  - `Since` was removed, and we'll use standard `/* Since /*` Java doc. Will be in another PR.
  - `UDT` related code was removed, and will use `SPARK-13944` https://github.com/apache/spark/pull/12259  to replace the annotation.
3. In mllib-local/src/main/scala/org/apache/spark/**ml**/linalg/Vectors.scala
  - Copied from mllib/src/main/scala/org/apache/spark/**mllib**/linalg/Vectors.scala
  - `Since` was removed.
  - `UDT` related code was removed.
  - In `def parseNumeric`, it was throwing `throw new SparkException(s"Cannot parse $other.")`, and now it's throwing `throw new IllegalArgumentException(s"Cannot parse $other.")`
4. In mllib/src/main/scala/org/apache/spark/**mllib**/linalg/Vectors.scala
  - For consistency with ML version of vector, `def parseNumeric` is now throwing `throw new IllegalArgumentException(s"Cannot parse $other.")`
5. mllib/src/main/scala/org/apache/spark/**mllib**/util/NumericParser.scala is moved to mllib-local/src/main/scala/org/apache/spark/**ml**/util/NumericParser.scala
  - All the `throw new SparkException` were replaced by `throw new IllegalArgumentException`

## How was this patch tested?

unit tests

Author: DB Tsai <db...@netflix.com>

Closes #12317 from dbtsai/dbtsai-ml-vector.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/96534aa4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/96534aa4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/96534aa4

Branch: refs/heads/master
Commit: 96534aa47c39e0ec40bc38be566455d11e21adb2
Parents: a9324a0
Author: DB Tsai <db...@netflix.com>
Authored: Fri Apr 15 01:17:03 2016 -0700
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Fri Apr 15 01:17:03 2016 -0700

----------------------------------------------------------------------
 mllib-local/pom.xml                             |    4 +
 .../org/apache/spark/ml/DummyTesting.scala      |   23 -
 .../scala/org/apache/spark/ml/linalg/BLAS.scala |  723 ++++++++++++
 .../org/apache/spark/ml/linalg/Matrices.scala   | 1026 ++++++++++++++++++
 .../org/apache/spark/ml/linalg/Vectors.scala    |  736 +++++++++++++
 .../org/apache/spark/ml/DummyTestingSuite.scala |   28 -
 .../org/apache/spark/ml/SparkMLFunSuite.scala   |   30 +
 .../org/apache/spark/ml/linalg/BLASSuite.scala  |  408 +++++++
 .../ml/linalg/BreezeMatrixConversionSuite.scala |   71 ++
 .../ml/linalg/BreezeVectorConversionSuite.scala |   67 ++
 .../apache/spark/ml/linalg/MatricesSuite.scala  |  511 +++++++++
 .../apache/spark/ml/linalg/VectorsSuite.scala   |  358 ++++++
 .../org/apache/spark/ml/util/TestingUtils.scala |  236 ++++
 .../spark/ml/util/TestingUtilsSuite.scala       |  187 ++++
 .../apache/spark/mllib/util/MLUtilsSuite.scala  |    4 +-
 15 files changed, 4359 insertions(+), 53 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/96534aa4/mllib-local/pom.xml
----------------------------------------------------------------------
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 68f15dd..60b615a 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -49,6 +49,10 @@
       <scope>test</scope>
     </dependency>
     <dependency>
+      <groupId>org.json4s</groupId>
+      <artifactId>json4s-jackson_${scala.binary.version}</artifactId>
+    </dependency>
+    <dependency>
       <groupId>org.mockito</groupId>
       <artifactId>mockito-core</artifactId>
       <scope>test</scope>

http://git-wip-us.apache.org/repos/asf/spark/blob/96534aa4/mllib-local/src/main/scala/org/apache/spark/ml/DummyTesting.scala
----------------------------------------------------------------------
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/DummyTesting.scala b/mllib-local/src/main/scala/org/apache/spark/ml/DummyTesting.scala
deleted file mode 100644
index 6b3268c..0000000
--- a/mllib-local/src/main/scala/org/apache/spark/ml/DummyTesting.scala
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.ml
-
-// This is a private class testing if the new build works. To be removed soon.
-private[ml] object DummyTesting {
-  private[ml] def add10(input: Double): Double = input + 10
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/96534aa4/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala
----------------------------------------------------------------------
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala
new file mode 100644
index 0000000..41b0c6c
--- /dev/null
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala
@@ -0,0 +1,723 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.linalg
+
+import com.github.fommil.netlib.{BLAS => NetlibBLAS, F2jBLAS}
+import com.github.fommil.netlib.BLAS.{getInstance => NativeBLAS}
+
+/**
+ * BLAS routines for MLlib's vectors and matrices.
+ */
+private[spark] object BLAS extends Serializable {
+
+  @transient private var _f2jBLAS: NetlibBLAS = _
+  @transient private var _nativeBLAS: NetlibBLAS = _
+
+  // For level-1 routines, we use Java implementation.
+  private def f2jBLAS: NetlibBLAS = {
+    if (_f2jBLAS == null) {
+      _f2jBLAS = new F2jBLAS
+    }
+    _f2jBLAS
+  }
+
+  /**
+   * y += a * x
+   */
+  def axpy(a: Double, x: Vector, y: Vector): Unit = {
+    require(x.size == y.size)
+    y match {
+      case dy: DenseVector =>
+        x match {
+          case sx: SparseVector =>
+            axpy(a, sx, dy)
+          case dx: DenseVector =>
+            axpy(a, dx, dy)
+          case _ =>
+            throw new UnsupportedOperationException(
+              s"axpy doesn't support x type ${x.getClass}.")
+        }
+      case _ =>
+        throw new IllegalArgumentException(
+          s"axpy only supports adding to a dense vector but got type ${y.getClass}.")
+    }
+  }
+
+  /**
+   * y += a * x
+   */
+  private def axpy(a: Double, x: DenseVector, y: DenseVector): Unit = {
+    val n = x.size
+    f2jBLAS.daxpy(n, a, x.values, 1, y.values, 1)
+  }
+
+  /**
+   * y += a * x
+   */
+  private def axpy(a: Double, x: SparseVector, y: DenseVector): Unit = {
+    val xValues = x.values
+    val xIndices = x.indices
+    val yValues = y.values
+    val nnz = xIndices.length
+
+    if (a == 1.0) {
+      var k = 0
+      while (k < nnz) {
+        yValues(xIndices(k)) += xValues(k)
+        k += 1
+      }
+    } else {
+      var k = 0
+      while (k < nnz) {
+        yValues(xIndices(k)) += a * xValues(k)
+        k += 1
+      }
+    }
+  }
+
+  /** Y += a * x */
+  private[spark] def axpy(a: Double, X: DenseMatrix, Y: DenseMatrix): Unit = {
+    require(X.numRows == Y.numRows && X.numCols == Y.numCols, "Dimension mismatch: " +
+      s"size(X) = ${(X.numRows, X.numCols)} but size(Y) = ${(Y.numRows, Y.numCols)}.")
+    f2jBLAS.daxpy(X.numRows * X.numCols, a, X.values, 1, Y.values, 1)
+  }
+
+  /**
+   * dot(x, y)
+   */
+  def dot(x: Vector, y: Vector): Double = {
+    require(x.size == y.size,
+      "BLAS.dot(x: Vector, y:Vector) was given Vectors with non-matching sizes:" +
+      " x.size = " + x.size + ", y.size = " + y.size)
+    (x, y) match {
+      case (dx: DenseVector, dy: DenseVector) =>
+        dot(dx, dy)
+      case (sx: SparseVector, dy: DenseVector) =>
+        dot(sx, dy)
+      case (dx: DenseVector, sy: SparseVector) =>
+        dot(sy, dx)
+      case (sx: SparseVector, sy: SparseVector) =>
+        dot(sx, sy)
+      case _ =>
+        throw new IllegalArgumentException(s"dot doesn't support (${x.getClass}, ${y.getClass}).")
+    }
+  }
+
+  /**
+   * dot(x, y)
+   */
+  private def dot(x: DenseVector, y: DenseVector): Double = {
+    val n = x.size
+    f2jBLAS.ddot(n, x.values, 1, y.values, 1)
+  }
+
+  /**
+   * dot(x, y)
+   */
+  private def dot(x: SparseVector, y: DenseVector): Double = {
+    val xValues = x.values
+    val xIndices = x.indices
+    val yValues = y.values
+    val nnz = xIndices.length
+
+    var sum = 0.0
+    var k = 0
+    while (k < nnz) {
+      sum += xValues(k) * yValues(xIndices(k))
+      k += 1
+    }
+    sum
+  }
+
+  /**
+   * dot(x, y)
+   */
+  private def dot(x: SparseVector, y: SparseVector): Double = {
+    val xValues = x.values
+    val xIndices = x.indices
+    val yValues = y.values
+    val yIndices = y.indices
+    val nnzx = xIndices.length
+    val nnzy = yIndices.length
+
+    var kx = 0
+    var ky = 0
+    var sum = 0.0
+    // y catching x
+    while (kx < nnzx && ky < nnzy) {
+      val ix = xIndices(kx)
+      while (ky < nnzy && yIndices(ky) < ix) {
+        ky += 1
+      }
+      if (ky < nnzy && yIndices(ky) == ix) {
+        sum += xValues(kx) * yValues(ky)
+        ky += 1
+      }
+      kx += 1
+    }
+    sum
+  }
+
+  /**
+   * y = x
+   */
+  def copy(x: Vector, y: Vector): Unit = {
+    val n = y.size
+    require(x.size == n)
+    y match {
+      case dy: DenseVector =>
+        x match {
+          case sx: SparseVector =>
+            val sxIndices = sx.indices
+            val sxValues = sx.values
+            val dyValues = dy.values
+            val nnz = sxIndices.length
+
+            var i = 0
+            var k = 0
+            while (k < nnz) {
+              val j = sxIndices(k)
+              while (i < j) {
+                dyValues(i) = 0.0
+                i += 1
+              }
+              dyValues(i) = sxValues(k)
+              i += 1
+              k += 1
+            }
+            while (i < n) {
+              dyValues(i) = 0.0
+              i += 1
+            }
+          case dx: DenseVector =>
+            Array.copy(dx.values, 0, dy.values, 0, n)
+        }
+      case _ =>
+        throw new IllegalArgumentException(s"y must be dense in copy but got ${y.getClass}")
+    }
+  }
+
+  /**
+   * x = a * x
+   */
+  def scal(a: Double, x: Vector): Unit = {
+    x match {
+      case sx: SparseVector =>
+        f2jBLAS.dscal(sx.values.length, a, sx.values, 1)
+      case dx: DenseVector =>
+        f2jBLAS.dscal(dx.values.length, a, dx.values, 1)
+      case _ =>
+        throw new IllegalArgumentException(s"scal doesn't support vector type ${x.getClass}.")
+    }
+  }
+
+  // For level-3 routines, we use the native BLAS.
+  private def nativeBLAS: NetlibBLAS = {
+    if (_nativeBLAS == null) {
+      _nativeBLAS = NativeBLAS
+    }
+    _nativeBLAS
+  }
+
+  /**
+   * Adds alpha * x * x.t to a matrix in-place. This is the same as BLAS's ?SPR.
+   *
+   * @param U the upper triangular part of the matrix in a [[DenseVector]](column major)
+   */
+  def spr(alpha: Double, v: Vector, U: DenseVector): Unit = {
+    spr(alpha, v, U.values)
+  }
+
+  /**
+   * Adds alpha * x * x.t to a matrix in-place. This is the same as BLAS's ?SPR.
+   *
+   * @param U the upper triangular part of the matrix packed in an array (column major)
+   */
+  def spr(alpha: Double, v: Vector, U: Array[Double]): Unit = {
+    val n = v.size
+    v match {
+      case DenseVector(values) =>
+        NativeBLAS.dspr("U", n, alpha, values, 1, U)
+      case SparseVector(size, indices, values) =>
+        val nnz = indices.length
+        var colStartIdx = 0
+        var prevCol = 0
+        var col = 0
+        var j = 0
+        var i = 0
+        var av = 0.0
+        while (j < nnz) {
+          col = indices(j)
+          // Skip empty columns.
+          colStartIdx += (col - prevCol) * (col + prevCol + 1) / 2
+          col = indices(j)
+          av = alpha * values(j)
+          i = 0
+          while (i <= j) {
+            U(colStartIdx + indices(i)) += av * values(i)
+            i += 1
+          }
+          j += 1
+          prevCol = col
+        }
+    }
+  }
+
+  /**
+   * A := alpha * x * x^T^ + A
+   * @param alpha a real scalar that will be multiplied to x * x^T^.
+   * @param x the vector x that contains the n elements.
+   * @param A the symmetric matrix A. Size of n x n.
+   */
+  def syr(alpha: Double, x: Vector, A: DenseMatrix) {
+    val mA = A.numRows
+    val nA = A.numCols
+    require(mA == nA, s"A is not a square matrix (and hence is not symmetric). A: $mA x $nA")
+    require(mA == x.size, s"The size of x doesn't match the rank of A. A: $mA x $nA, x: ${x.size}")
+
+    x match {
+      case dv: DenseVector => syr(alpha, dv, A)
+      case sv: SparseVector => syr(alpha, sv, A)
+      case _ =>
+        throw new IllegalArgumentException(s"syr doesn't support vector type ${x.getClass}.")
+    }
+  }
+
+  private def syr(alpha: Double, x: DenseVector, A: DenseMatrix) {
+    val nA = A.numRows
+    val mA = A.numCols
+
+    nativeBLAS.dsyr("U", x.size, alpha, x.values, 1, A.values, nA)
+
+    // Fill lower triangular part of A
+    var i = 0
+    while (i < mA) {
+      var j = i + 1
+      while (j < nA) {
+        A(j, i) = A(i, j)
+        j += 1
+      }
+      i += 1
+    }
+  }
+
+  private def syr(alpha: Double, x: SparseVector, A: DenseMatrix) {
+    val mA = A.numCols
+    val xIndices = x.indices
+    val xValues = x.values
+    val nnz = xValues.length
+    val Avalues = A.values
+
+    var i = 0
+    while (i < nnz) {
+      val multiplier = alpha * xValues(i)
+      val offset = xIndices(i) * mA
+      var j = 0
+      while (j < nnz) {
+        Avalues(xIndices(j) + offset) += multiplier * xValues(j)
+        j += 1
+      }
+      i += 1
+    }
+  }
+
+  /**
+   * C := alpha * A * B + beta * C
+   * @param alpha a scalar to scale the multiplication A * B.
+   * @param A the matrix A that will be left multiplied to B. Size of m x k.
+   * @param B the matrix B that will be left multiplied by A. Size of k x n.
+   * @param beta a scalar that can be used to scale matrix C.
+   * @param C the resulting matrix C. Size of m x n. C.isTransposed must be false.
+   */
+  def gemm(
+      alpha: Double,
+      A: Matrix,
+      B: DenseMatrix,
+      beta: Double,
+      C: DenseMatrix): Unit = {
+    require(!C.isTransposed,
+      "The matrix C cannot be the product of a transpose() call. C.isTransposed must be false.")
+    if (alpha == 0.0 && beta == 1.0) {
+      // gemm: alpha is equal to 0 and beta is equal to 1. Returning C.
+      return
+    } else if (alpha == 0.0) {
+      f2jBLAS.dscal(C.values.length, beta, C.values, 1)
+    } else {
+      A match {
+        case sparse: SparseMatrix => gemm(alpha, sparse, B, beta, C)
+        case dense: DenseMatrix => gemm(alpha, dense, B, beta, C)
+        case _ =>
+          throw new IllegalArgumentException(s"gemm doesn't support matrix type ${A.getClass}.")
+      }
+    }
+  }
+
+  /**
+   * C := alpha * A * B + beta * C
+   * For `DenseMatrix` A.
+   */
+  private def gemm(
+      alpha: Double,
+      A: DenseMatrix,
+      B: DenseMatrix,
+      beta: Double,
+      C: DenseMatrix): Unit = {
+    val tAstr = if (A.isTransposed) "T" else "N"
+    val tBstr = if (B.isTransposed) "T" else "N"
+    val lda = if (!A.isTransposed) A.numRows else A.numCols
+    val ldb = if (!B.isTransposed) B.numRows else B.numCols
+
+    require(A.numCols == B.numRows,
+      s"The columns of A don't match the rows of B. A: ${A.numCols}, B: ${B.numRows}")
+    require(A.numRows == C.numRows,
+      s"The rows of C don't match the rows of A. C: ${C.numRows}, A: ${A.numRows}")
+    require(B.numCols == C.numCols,
+      s"The columns of C don't match the columns of B. C: ${C.numCols}, A: ${B.numCols}")
+    nativeBLAS.dgemm(tAstr, tBstr, A.numRows, B.numCols, A.numCols, alpha, A.values, lda,
+      B.values, ldb, beta, C.values, C.numRows)
+  }
+
+  /**
+   * C := alpha * A * B + beta * C
+   * For `SparseMatrix` A.
+   */
+  private def gemm(
+      alpha: Double,
+      A: SparseMatrix,
+      B: DenseMatrix,
+      beta: Double,
+      C: DenseMatrix): Unit = {
+    val mA: Int = A.numRows
+    val nB: Int = B.numCols
+    val kA: Int = A.numCols
+    val kB: Int = B.numRows
+
+    require(kA == kB, s"The columns of A don't match the rows of B. A: $kA, B: $kB")
+    require(mA == C.numRows, s"The rows of C don't match the rows of A. C: ${C.numRows}, A: $mA")
+    require(nB == C.numCols,
+      s"The columns of C don't match the columns of B. C: ${C.numCols}, A: $nB")
+
+    val Avals = A.values
+    val Bvals = B.values
+    val Cvals = C.values
+    val ArowIndices = A.rowIndices
+    val AcolPtrs = A.colPtrs
+
+    // Slicing is easy in this case. This is the optimal multiplication setting for sparse matrices
+    if (A.isTransposed) {
+      var colCounterForB = 0
+      if (!B.isTransposed) { // Expensive to put the check inside the loop
+        while (colCounterForB < nB) {
+          var rowCounterForA = 0
+          val Cstart = colCounterForB * mA
+          val Bstart = colCounterForB * kA
+          while (rowCounterForA < mA) {
+            var i = AcolPtrs(rowCounterForA)
+            val indEnd = AcolPtrs(rowCounterForA + 1)
+            var sum = 0.0
+            while (i < indEnd) {
+              sum += Avals(i) * Bvals(Bstart + ArowIndices(i))
+              i += 1
+            }
+            val Cindex = Cstart + rowCounterForA
+            Cvals(Cindex) = beta * Cvals(Cindex) + sum * alpha
+            rowCounterForA += 1
+          }
+          colCounterForB += 1
+        }
+      } else {
+        while (colCounterForB < nB) {
+          var rowCounterForA = 0
+          val Cstart = colCounterForB * mA
+          while (rowCounterForA < mA) {
+            var i = AcolPtrs(rowCounterForA)
+            val indEnd = AcolPtrs(rowCounterForA + 1)
+            var sum = 0.0
+            while (i < indEnd) {
+              sum += Avals(i) * B(ArowIndices(i), colCounterForB)
+              i += 1
+            }
+            val Cindex = Cstart + rowCounterForA
+            Cvals(Cindex) = beta * Cvals(Cindex) + sum * alpha
+            rowCounterForA += 1
+          }
+          colCounterForB += 1
+        }
+      }
+    } else {
+      // Scale matrix first if `beta` is not equal to 1.0
+      if (beta != 1.0) {
+        f2jBLAS.dscal(C.values.length, beta, C.values, 1)
+      }
+      // Perform matrix multiplication and add to C. The rows of A are multiplied by the columns of
+      // B, and added to C.
+      var colCounterForB = 0 // the column to be updated in C
+      if (!B.isTransposed) { // Expensive to put the check inside the loop
+        while (colCounterForB < nB) {
+          var colCounterForA = 0 // The column of A to multiply with the row of B
+          val Bstart = colCounterForB * kB
+          val Cstart = colCounterForB * mA
+          while (colCounterForA < kA) {
+            var i = AcolPtrs(colCounterForA)
+            val indEnd = AcolPtrs(colCounterForA + 1)
+            val Bval = Bvals(Bstart + colCounterForA) * alpha
+            while (i < indEnd) {
+              Cvals(Cstart + ArowIndices(i)) += Avals(i) * Bval
+              i += 1
+            }
+            colCounterForA += 1
+          }
+          colCounterForB += 1
+        }
+      } else {
+        while (colCounterForB < nB) {
+          var colCounterForA = 0 // The column of A to multiply with the row of B
+          val Cstart = colCounterForB * mA
+          while (colCounterForA < kA) {
+            var i = AcolPtrs(colCounterForA)
+            val indEnd = AcolPtrs(colCounterForA + 1)
+            val Bval = B(colCounterForA, colCounterForB) * alpha
+            while (i < indEnd) {
+              Cvals(Cstart + ArowIndices(i)) += Avals(i) * Bval
+              i += 1
+            }
+            colCounterForA += 1
+          }
+          colCounterForB += 1
+        }
+      }
+    }
+  }
+
+  /**
+   * y := alpha * A * x + beta * y
+   * @param alpha a scalar to scale the multiplication A * x.
+   * @param A the matrix A that will be left multiplied to x. Size of m x n.
+   * @param x the vector x that will be left multiplied by A. Size of n x 1.
+   * @param beta a scalar that can be used to scale vector y.
+   * @param y the resulting vector y. Size of m x 1.
+   */
+  def gemv(
+      alpha: Double,
+      A: Matrix,
+      x: Vector,
+      beta: Double,
+      y: DenseVector): Unit = {
+    require(A.numCols == x.size,
+      s"The columns of A don't match the number of elements of x. A: ${A.numCols}, x: ${x.size}")
+    require(A.numRows == y.size,
+      s"The rows of A don't match the number of elements of y. A: ${A.numRows}, y:${y.size}")
+    if (alpha == 0.0 && beta == 1.0) {
+      // gemv: alpha is equal to 0 and beta is equal to 1. Returning y.
+      return
+    } else if (alpha == 0.0) {
+      scal(beta, y)
+    } else {
+      (A, x) match {
+        case (smA: SparseMatrix, dvx: DenseVector) =>
+          gemv(alpha, smA, dvx, beta, y)
+        case (smA: SparseMatrix, svx: SparseVector) =>
+          gemv(alpha, smA, svx, beta, y)
+        case (dmA: DenseMatrix, dvx: DenseVector) =>
+          gemv(alpha, dmA, dvx, beta, y)
+        case (dmA: DenseMatrix, svx: SparseVector) =>
+          gemv(alpha, dmA, svx, beta, y)
+        case _ =>
+          throw new IllegalArgumentException(s"gemv doesn't support running on matrix type " +
+            s"${A.getClass} and vector type ${x.getClass}.")
+      }
+    }
+  }
+
+  /**
+   * y := alpha * A * x + beta * y
+   * For `DenseMatrix` A and `DenseVector` x.
+   */
+  private def gemv(
+      alpha: Double,
+      A: DenseMatrix,
+      x: DenseVector,
+      beta: Double,
+      y: DenseVector): Unit = {
+    val tStrA = if (A.isTransposed) "T" else "N"
+    val mA = if (!A.isTransposed) A.numRows else A.numCols
+    val nA = if (!A.isTransposed) A.numCols else A.numRows
+    nativeBLAS.dgemv(tStrA, mA, nA, alpha, A.values, mA, x.values, 1, beta,
+      y.values, 1)
+  }
+
+  /**
+   * y := alpha * A * x + beta * y
+   * For `DenseMatrix` A and `SparseVector` x.
+   */
+  private def gemv(
+      alpha: Double,
+      A: DenseMatrix,
+      x: SparseVector,
+      beta: Double,
+      y: DenseVector): Unit = {
+    val mA: Int = A.numRows
+    val nA: Int = A.numCols
+
+    val Avals = A.values
+
+    val xIndices = x.indices
+    val xNnz = xIndices.length
+    val xValues = x.values
+    val yValues = y.values
+
+    if (A.isTransposed) {
+      var rowCounterForA = 0
+      while (rowCounterForA < mA) {
+        var sum = 0.0
+        var k = 0
+        while (k < xNnz) {
+          sum += xValues(k) * Avals(xIndices(k) + rowCounterForA * nA)
+          k += 1
+        }
+        yValues(rowCounterForA) = sum * alpha + beta * yValues(rowCounterForA)
+        rowCounterForA += 1
+      }
+    } else {
+      var rowCounterForA = 0
+      while (rowCounterForA < mA) {
+        var sum = 0.0
+        var k = 0
+        while (k < xNnz) {
+          sum += xValues(k) * Avals(xIndices(k) * mA + rowCounterForA)
+          k += 1
+        }
+        yValues(rowCounterForA) = sum * alpha + beta * yValues(rowCounterForA)
+        rowCounterForA += 1
+      }
+    }
+  }
+
+  /**
+   * y := alpha * A * x + beta * y
+   * For `SparseMatrix` A and `SparseVector` x.
+   */
+  private def gemv(
+      alpha: Double,
+      A: SparseMatrix,
+      x: SparseVector,
+      beta: Double,
+      y: DenseVector): Unit = {
+    val xValues = x.values
+    val xIndices = x.indices
+    val xNnz = xIndices.length
+
+    val yValues = y.values
+
+    val mA: Int = A.numRows
+    val nA: Int = A.numCols
+
+    val Avals = A.values
+    val Arows = if (!A.isTransposed) A.rowIndices else A.colPtrs
+    val Acols = if (!A.isTransposed) A.colPtrs else A.rowIndices
+
+    if (A.isTransposed) {
+      var rowCounter = 0
+      while (rowCounter < mA) {
+        var i = Arows(rowCounter)
+        val indEnd = Arows(rowCounter + 1)
+        var sum = 0.0
+        var k = 0
+        while (k < xNnz && i < indEnd) {
+          if (xIndices(k) == Acols(i)) {
+            sum += Avals(i) * xValues(k)
+            i += 1
+          }
+          k += 1
+        }
+        yValues(rowCounter) = sum * alpha + beta * yValues(rowCounter)
+        rowCounter += 1
+      }
+    } else {
+      if (beta != 1.0) scal(beta, y)
+
+      var colCounterForA = 0
+      var k = 0
+      while (colCounterForA < nA && k < xNnz) {
+        if (xIndices(k) == colCounterForA) {
+          var i = Acols(colCounterForA)
+          val indEnd = Acols(colCounterForA + 1)
+
+          val xTemp = xValues(k) * alpha
+          while (i < indEnd) {
+            val rowIndex = Arows(i)
+            yValues(Arows(i)) += Avals(i) * xTemp
+            i += 1
+          }
+          k += 1
+        }
+        colCounterForA += 1
+      }
+    }
+  }
+
+  /**
+   * y := alpha * A * x + beta * y
+   * For `SparseMatrix` A and `DenseVector` x.
+   */
+  private def gemv(
+      alpha: Double,
+      A: SparseMatrix,
+      x: DenseVector,
+      beta: Double,
+      y: DenseVector): Unit = {
+    val xValues = x.values
+    val yValues = y.values
+    val mA: Int = A.numRows
+    val nA: Int = A.numCols
+
+    val Avals = A.values
+    val Arows = if (!A.isTransposed) A.rowIndices else A.colPtrs
+    val Acols = if (!A.isTransposed) A.colPtrs else A.rowIndices
+    // Slicing is easy in this case. This is the optimal multiplication setting for sparse matrices
+    if (A.isTransposed) {
+      var rowCounter = 0
+      while (rowCounter < mA) {
+        var i = Arows(rowCounter)
+        val indEnd = Arows(rowCounter + 1)
+        var sum = 0.0
+        while (i < indEnd) {
+          sum += Avals(i) * xValues(Acols(i))
+          i += 1
+        }
+        yValues(rowCounter) = beta * yValues(rowCounter) + sum * alpha
+        rowCounter += 1
+      }
+    } else {
+      if (beta != 1.0) scal(beta, y)
+      // Perform matrix-vector multiplication and add to y
+      var colCounterForA = 0
+      while (colCounterForA < nA) {
+        var i = Acols(colCounterForA)
+        val indEnd = Acols(colCounterForA + 1)
+        val xVal = xValues(colCounterForA) * alpha
+        while (i < indEnd) {
+          val rowIndex = Arows(i)
+          yValues(rowIndex) += Avals(i) * xVal
+          i += 1
+        }
+        colCounterForA += 1
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/96534aa4/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
----------------------------------------------------------------------
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
new file mode 100644
index 0000000..baa04fb
--- /dev/null
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
@@ -0,0 +1,1026 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.linalg
+
+import java.util.{Arrays, Random}
+
+import scala.collection.mutable.{ArrayBuffer, ArrayBuilder => MArrayBuilder, HashSet => MHashSet}
+
+import breeze.linalg.{CSCMatrix => BSM, DenseMatrix => BDM, Matrix => BM}
+import com.github.fommil.netlib.BLAS.{getInstance => blas}
+
+/**
+ * Trait for a local matrix.
+ */
+sealed trait Matrix extends Serializable {
+
+  /** Number of rows. */
+  def numRows: Int
+
+  /** Number of columns. */
+  def numCols: Int
+
+  /** Flag that keeps track whether the matrix is transposed or not. False by default. */
+  val isTransposed: Boolean = false
+
+  /** Converts to a dense array in column major. */
+  def toArray: Array[Double] = {
+    val newArray = new Array[Double](numRows * numCols)
+    foreachActive { (i, j, v) =>
+      newArray(j * numRows + i) = v
+    }
+    newArray
+  }
+
+  /**
+   * Returns an iterator of column vectors.
+   * This operation could be expensive, depending on the underlying storage.
+   */
+  def colIter: Iterator[Vector]
+
+  /**
+   * Returns an iterator of row vectors.
+   * This operation could be expensive, depending on the underlying storage.
+   */
+  def rowIter: Iterator[Vector] = this.transpose.colIter
+
+  /** Converts to a breeze matrix. */
+  private[ml] def toBreeze: BM[Double]
+
+  /** Gets the (i, j)-th element. */
+  def apply(i: Int, j: Int): Double
+
+  /** Return the index for the (i, j)-th element in the backing array. */
+  private[ml] def index(i: Int, j: Int): Int
+
+  /** Update element at (i, j) */
+  private[ml] def update(i: Int, j: Int, v: Double): Unit
+
+  /** Get a deep copy of the matrix. */
+  def copy: Matrix
+
+  /** Transpose the Matrix. Returns a new `Matrix` instance sharing the same underlying data. */
+  def transpose: Matrix
+
+  /** Convenience method for `Matrix`-`DenseMatrix` multiplication. */
+  def multiply(y: DenseMatrix): DenseMatrix = {
+    val C: DenseMatrix = DenseMatrix.zeros(numRows, y.numCols)
+    BLAS.gemm(1.0, this, y, 0.0, C)
+    C
+  }
+
+  /** Convenience method for `Matrix`-`DenseVector` multiplication. For binary compatibility. */
+  def multiply(y: DenseVector): DenseVector = {
+    multiply(y.asInstanceOf[Vector])
+  }
+
+  /** Convenience method for `Matrix`-`Vector` multiplication. */
+  def multiply(y: Vector): DenseVector = {
+    val output = new DenseVector(new Array[Double](numRows))
+    BLAS.gemv(1.0, this, y, 0.0, output)
+    output
+  }
+
+  /** A human readable representation of the matrix */
+  override def toString: String = toBreeze.toString()
+
+  /** A human readable representation of the matrix with maximum lines and width */
+  def toString(maxLines: Int, maxLineWidth: Int): String = toBreeze.toString(maxLines, maxLineWidth)
+
+  /**
+   * Map the values of this matrix using a function. Generates a new matrix. Performs the
+   * function on only the backing array. For example, an operation such as addition or
+   * subtraction will only be performed on the non-zero values in a `SparseMatrix`.
+   */
+  private[spark] def map(f: Double => Double): Matrix
+
+  /**
+   * Update all the values of this matrix using the function f. Performed in-place on the
+   * backing array. For example, an operation such as addition or subtraction will only be
+   * performed on the non-zero values in a `SparseMatrix`.
+   */
+  private[ml] def update(f: Double => Double): Matrix
+
+  /**
+   * Applies a function `f` to all the active elements of dense and sparse matrix. The ordering
+   * of the elements are not defined.
+   *
+   * @param f the function takes three parameters where the first two parameters are the row
+   *          and column indices respectively with the type `Int`, and the final parameter is the
+   *          corresponding value in the matrix with type `Double`.
+   */
+  private[spark] def foreachActive(f: (Int, Int, Double) => Unit)
+
+  /**
+   * Find the number of non-zero active values.
+   */
+  def numNonzeros: Int
+
+  /**
+   * Find the number of values stored explicitly. These values can be zero as well.
+   */
+  def numActives: Int
+}
+
+/**
+ * Column-major dense matrix.
+ * The entry values are stored in a single array of doubles with columns listed in sequence.
+ * For example, the following matrix
+ * {{{
+ *   1.0 2.0
+ *   3.0 4.0
+ *   5.0 6.0
+ * }}}
+ * is stored as `[1.0, 3.0, 5.0, 2.0, 4.0, 6.0]`.
+ *
+ * @param numRows number of rows
+ * @param numCols number of columns
+ * @param values matrix entries in column major if not transposed or in row major otherwise
+ * @param isTransposed whether the matrix is transposed. If true, `values` stores the matrix in
+ *                     row major.
+ */
+class DenseMatrix (
+    val numRows: Int,
+    val numCols: Int,
+    val values: Array[Double],
+    override val isTransposed: Boolean) extends Matrix {
+
+  require(values.length == numRows * numCols, "The number of values supplied doesn't match the " +
+    s"size of the matrix! values.length: ${values.length}, numRows * numCols: ${numRows * numCols}")
+
+  /**
+   * Column-major dense matrix.
+   * The entry values are stored in a single array of doubles with columns listed in sequence.
+   * For example, the following matrix
+   * {{{
+   *   1.0 2.0
+   *   3.0 4.0
+   *   5.0 6.0
+   * }}}
+   * is stored as `[1.0, 3.0, 5.0, 2.0, 4.0, 6.0]`.
+   *
+   * @param numRows number of rows
+   * @param numCols number of columns
+   * @param values matrix entries in column major
+   */
+  def this(numRows: Int, numCols: Int, values: Array[Double]) =
+    this(numRows, numCols, values, false)
+
+  override def equals(o: Any): Boolean = o match {
+    case m: Matrix => toBreeze == m.toBreeze
+    case _ => false
+  }
+
+  override def hashCode: Int = {
+    Seq(numRows, numCols, toArray).##
+  }
+
+  private[ml] def toBreeze: BM[Double] = {
+    if (!isTransposed) {
+      new BDM[Double](numRows, numCols, values)
+    } else {
+      val breezeMatrix = new BDM[Double](numCols, numRows, values)
+      breezeMatrix.t
+    }
+  }
+
+  private[ml] def apply(i: Int): Double = values(i)
+
+  override def apply(i: Int, j: Int): Double = values(index(i, j))
+
+  private[ml] def index(i: Int, j: Int): Int = {
+    require(i >= 0 && i < numRows, s"Expected 0 <= i < $numRows, got i = $i.")
+    require(j >= 0 && j < numCols, s"Expected 0 <= j < $numCols, got j = $j.")
+    if (!isTransposed) i + numRows * j else j + numCols * i
+  }
+
+  private[ml] def update(i: Int, j: Int, v: Double): Unit = {
+    values(index(i, j)) = v
+  }
+
+  override def copy: DenseMatrix = new DenseMatrix(numRows, numCols, values.clone())
+
+  private[spark] def map(f: Double => Double) = new DenseMatrix(numRows, numCols, values.map(f),
+    isTransposed)
+
+  private[ml] def update(f: Double => Double): DenseMatrix = {
+    val len = values.length
+    var i = 0
+    while (i < len) {
+      values(i) = f(values(i))
+      i += 1
+    }
+    this
+  }
+
+  override def transpose: DenseMatrix = new DenseMatrix(numCols, numRows, values, !isTransposed)
+
+  private[spark] override def foreachActive(f: (Int, Int, Double) => Unit): Unit = {
+    if (!isTransposed) {
+      // outer loop over columns
+      var j = 0
+      while (j < numCols) {
+        var i = 0
+        val indStart = j * numRows
+        while (i < numRows) {
+          f(i, j, values(indStart + i))
+          i += 1
+        }
+        j += 1
+      }
+    } else {
+      // outer loop over rows
+      var i = 0
+      while (i < numRows) {
+        var j = 0
+        val indStart = i * numCols
+        while (j < numCols) {
+          f(i, j, values(indStart + j))
+          j += 1
+        }
+        i += 1
+      }
+    }
+  }
+
+  override def numNonzeros: Int = values.count(_ != 0)
+
+  override def numActives: Int = values.length
+
+  /**
+   * Generate a `SparseMatrix` from the given `DenseMatrix`. The new matrix will have isTransposed
+   * set to false.
+   */
+  def toSparse: SparseMatrix = {
+    val spVals: MArrayBuilder[Double] = new MArrayBuilder.ofDouble
+    val colPtrs: Array[Int] = new Array[Int](numCols + 1)
+    val rowIndices: MArrayBuilder[Int] = new MArrayBuilder.ofInt
+    var nnz = 0
+    var j = 0
+    while (j < numCols) {
+      var i = 0
+      while (i < numRows) {
+        val v = values(index(i, j))
+        if (v != 0.0) {
+          rowIndices += i
+          spVals += v
+          nnz += 1
+        }
+        i += 1
+      }
+      j += 1
+      colPtrs(j) = nnz
+    }
+    new SparseMatrix(numRows, numCols, colPtrs, rowIndices.result(), spVals.result())
+  }
+
+  override def colIter: Iterator[Vector] = {
+    if (isTransposed) {
+      Iterator.tabulate(numCols) { j =>
+        val col = new Array[Double](numRows)
+        blas.dcopy(numRows, values, j, numCols, col, 0, 1)
+        new DenseVector(col)
+      }
+    } else {
+      Iterator.tabulate(numCols) { j =>
+        new DenseVector(values.slice(j * numRows, (j + 1) * numRows))
+      }
+    }
+  }
+}
+
+/**
+ * Factory methods for [[org.apache.spark.ml.linalg.DenseMatrix]].
+ */
+object DenseMatrix {
+
+  /**
+   * Generate a `DenseMatrix` consisting of zeros.
+   * @param numRows number of rows of the matrix
+   * @param numCols number of columns of the matrix
+   * @return `DenseMatrix` with size `numRows` x `numCols` and values of zeros
+   */
+  def zeros(numRows: Int, numCols: Int): DenseMatrix = {
+    require(numRows.toLong * numCols <= Int.MaxValue,
+            s"$numRows x $numCols dense matrix is too large to allocate")
+    new DenseMatrix(numRows, numCols, new Array[Double](numRows * numCols))
+  }
+
+  /**
+   * Generate a `DenseMatrix` consisting of ones.
+   * @param numRows number of rows of the matrix
+   * @param numCols number of columns of the matrix
+   * @return `DenseMatrix` with size `numRows` x `numCols` and values of ones
+   */
+  def ones(numRows: Int, numCols: Int): DenseMatrix = {
+    require(numRows.toLong * numCols <= Int.MaxValue,
+            s"$numRows x $numCols dense matrix is too large to allocate")
+    new DenseMatrix(numRows, numCols, Array.fill(numRows * numCols)(1.0))
+  }
+
+  /**
+   * Generate an Identity Matrix in `DenseMatrix` format.
+   * @param n number of rows and columns of the matrix
+   * @return `DenseMatrix` with size `n` x `n` and values of ones on the diagonal
+   */
+  def eye(n: Int): DenseMatrix = {
+    val identity = DenseMatrix.zeros(n, n)
+    var i = 0
+    while (i < n) {
+      identity.update(i, i, 1.0)
+      i += 1
+    }
+    identity
+  }
+
+  /**
+   * Generate a `DenseMatrix` consisting of `i.i.d.` uniform random numbers.
+   * @param numRows number of rows of the matrix
+   * @param numCols number of columns of the matrix
+   * @param rng a random number generator
+   * @return `DenseMatrix` with size `numRows` x `numCols` and values in U(0, 1)
+   */
+  def rand(numRows: Int, numCols: Int, rng: Random): DenseMatrix = {
+    require(numRows.toLong * numCols <= Int.MaxValue,
+            s"$numRows x $numCols dense matrix is too large to allocate")
+    new DenseMatrix(numRows, numCols, Array.fill(numRows * numCols)(rng.nextDouble()))
+  }
+
+  /**
+   * Generate a `DenseMatrix` consisting of `i.i.d.` gaussian random numbers.
+   * @param numRows number of rows of the matrix
+   * @param numCols number of columns of the matrix
+   * @param rng a random number generator
+   * @return `DenseMatrix` with size `numRows` x `numCols` and values in N(0, 1)
+   */
+  def randn(numRows: Int, numCols: Int, rng: Random): DenseMatrix = {
+    require(numRows.toLong * numCols <= Int.MaxValue,
+            s"$numRows x $numCols dense matrix is too large to allocate")
+    new DenseMatrix(numRows, numCols, Array.fill(numRows * numCols)(rng.nextGaussian()))
+  }
+
+  /**
+   * Generate a diagonal matrix in `DenseMatrix` format from the supplied values.
+   * @param vector a `Vector` that will form the values on the diagonal of the matrix
+   * @return Square `DenseMatrix` with size `values.length` x `values.length` and `values`
+   *         on the diagonal
+   */
+  def diag(vector: Vector): DenseMatrix = {
+    val n = vector.size
+    val matrix = DenseMatrix.zeros(n, n)
+    val values = vector.toArray
+    var i = 0
+    while (i < n) {
+      matrix.update(i, i, values(i))
+      i += 1
+    }
+    matrix
+  }
+}
+
+/**
+ * Column-major sparse matrix.
+ * The entry values are stored in Compressed Sparse Column (CSC) format.
+ * For example, the following matrix
+ * {{{
+ *   1.0 0.0 4.0
+ *   0.0 3.0 5.0
+ *   2.0 0.0 6.0
+ * }}}
+ * is stored as `values: [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]`,
+ * `rowIndices=[0, 2, 1, 0, 1, 2]`, `colPointers=[0, 2, 3, 6]`.
+ *
+ * @param numRows number of rows
+ * @param numCols number of columns
+ * @param colPtrs the index corresponding to the start of a new column (if not transposed)
+ * @param rowIndices the row index of the entry (if not transposed). They must be in strictly
+ *                   increasing order for each column
+ * @param values nonzero matrix entries in column major (if not transposed)
+ * @param isTransposed whether the matrix is transposed. If true, the matrix can be considered
+ *                     Compressed Sparse Row (CSR) format, where `colPtrs` behaves as rowPtrs,
+ *                     and `rowIndices` behave as colIndices, and `values` are stored in row major.
+ */
+class SparseMatrix (
+    val numRows: Int,
+    val numCols: Int,
+    val colPtrs: Array[Int],
+    val rowIndices: Array[Int],
+    val values: Array[Double],
+    override val isTransposed: Boolean) extends Matrix {
+
+  require(values.length == rowIndices.length, "The number of row indices and values don't match! " +
+    s"values.length: ${values.length}, rowIndices.length: ${rowIndices.length}")
+  // The Or statement is for the case when the matrix is transposed
+  require(colPtrs.length == numCols + 1 || colPtrs.length == numRows + 1, "The length of the " +
+    "column indices should be the number of columns + 1. Currently, colPointers.length: " +
+    s"${colPtrs.length}, numCols: $numCols")
+  require(values.length == colPtrs.last, "The last value of colPtrs must equal the number of " +
+    s"elements. values.length: ${values.length}, colPtrs.last: ${colPtrs.last}")
+
+  /**
+   * Column-major sparse matrix.
+   * The entry values are stored in Compressed Sparse Column (CSC) format.
+   * For example, the following matrix
+   * {{{
+   *   1.0 0.0 4.0
+   *   0.0 3.0 5.0
+   *   2.0 0.0 6.0
+   * }}}
+   * is stored as `values: [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]`,
+   * `rowIndices=[0, 2, 1, 0, 1, 2]`, `colPointers=[0, 2, 3, 6]`.
+   *
+   * @param numRows number of rows
+   * @param numCols number of columns
+   * @param colPtrs the index corresponding to the start of a new column
+   * @param rowIndices the row index of the entry. They must be in strictly increasing
+   *                   order for each column
+   * @param values non-zero matrix entries in column major
+   */
+  def this(
+      numRows: Int,
+      numCols: Int,
+      colPtrs: Array[Int],
+      rowIndices: Array[Int],
+      values: Array[Double]) = this(numRows, numCols, colPtrs, rowIndices, values, false)
+
+  override def equals(o: Any): Boolean = o match {
+    case m: Matrix => toBreeze == m.toBreeze
+    case _ => false
+  }
+
+  private[ml] def toBreeze: BM[Double] = {
+     if (!isTransposed) {
+       new BSM[Double](values, numRows, numCols, colPtrs, rowIndices)
+     } else {
+       val breezeMatrix = new BSM[Double](values, numCols, numRows, colPtrs, rowIndices)
+       breezeMatrix.t
+     }
+  }
+
+  override def apply(i: Int, j: Int): Double = {
+    val ind = index(i, j)
+    if (ind < 0) 0.0 else values(ind)
+  }
+
+  private[ml] def index(i: Int, j: Int): Int = {
+    require(i >= 0 && i < numRows, s"Expected 0 <= i < $numRows, got i = $i.")
+    require(j >= 0 && j < numCols, s"Expected 0 <= j < $numCols, got j = $j.")
+    if (!isTransposed) {
+      Arrays.binarySearch(rowIndices, colPtrs(j), colPtrs(j + 1), i)
+    } else {
+      Arrays.binarySearch(rowIndices, colPtrs(i), colPtrs(i + 1), j)
+    }
+  }
+
+  private[ml] def update(i: Int, j: Int, v: Double): Unit = {
+    val ind = index(i, j)
+    if (ind < 0) {
+      throw new NoSuchElementException("The given row and column indices correspond to a zero " +
+        "value. Only non-zero elements in Sparse Matrices can be updated.")
+    } else {
+      values(ind) = v
+    }
+  }
+
+  override def copy: SparseMatrix = {
+    new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values.clone())
+  }
+
+  private[spark] def map(f: Double => Double) =
+    new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values.map(f), isTransposed)
+
+  private[ml] def update(f: Double => Double): SparseMatrix = {
+    val len = values.length
+    var i = 0
+    while (i < len) {
+      values(i) = f(values(i))
+      i += 1
+    }
+    this
+  }
+
+  override def transpose: SparseMatrix =
+    new SparseMatrix(numCols, numRows, colPtrs, rowIndices, values, !isTransposed)
+
+  private[spark] override def foreachActive(f: (Int, Int, Double) => Unit): Unit = {
+    if (!isTransposed) {
+      var j = 0
+      while (j < numCols) {
+        var idx = colPtrs(j)
+        val idxEnd = colPtrs(j + 1)
+        while (idx < idxEnd) {
+          f(rowIndices(idx), j, values(idx))
+          idx += 1
+        }
+        j += 1
+      }
+    } else {
+      var i = 0
+      while (i < numRows) {
+        var idx = colPtrs(i)
+        val idxEnd = colPtrs(i + 1)
+        while (idx < idxEnd) {
+          val j = rowIndices(idx)
+          f(i, j, values(idx))
+          idx += 1
+        }
+        i += 1
+      }
+    }
+  }
+
+  /**
+   * Generate a `DenseMatrix` from the given `SparseMatrix`. The new matrix will have isTransposed
+   * set to false.
+   */
+  def toDense: DenseMatrix = {
+    new DenseMatrix(numRows, numCols, toArray)
+  }
+
+  override def numNonzeros: Int = values.count(_ != 0)
+
+  override def numActives: Int = values.length
+
+  override def colIter: Iterator[Vector] = {
+    if (isTransposed) {
+      val indicesArray = Array.fill(numCols)(MArrayBuilder.make[Int])
+      val valuesArray = Array.fill(numCols)(MArrayBuilder.make[Double])
+      var i = 0
+      while (i < numRows) {
+        var k = colPtrs(i)
+        val rowEnd = colPtrs(i + 1)
+        while (k < rowEnd) {
+          val j = rowIndices(k)
+          indicesArray(j) += i
+          valuesArray(j) += values(k)
+          k += 1
+        }
+        i += 1
+      }
+      Iterator.tabulate(numCols) { j =>
+        val ii = indicesArray(j).result()
+        val vv = valuesArray(j).result()
+        new SparseVector(numRows, ii, vv)
+      }
+    } else {
+      Iterator.tabulate(numCols) { j =>
+        val colStart = colPtrs(j)
+        val colEnd = colPtrs(j + 1)
+        val ii = rowIndices.slice(colStart, colEnd)
+        val vv = values.slice(colStart, colEnd)
+        new SparseVector(numRows, ii, vv)
+      }
+    }
+  }
+}
+
+/**
+ * Factory methods for [[org.apache.spark.ml.linalg.SparseMatrix]].
+ */
+object SparseMatrix {
+
+  /**
+   * Generate a `SparseMatrix` from Coordinate List (COO) format. Input must be an array of
+   * (i, j, value) tuples. Entries that have duplicate values of i and j are
+   * added together. Tuples where value is equal to zero will be omitted.
+   * @param numRows number of rows of the matrix
+   * @param numCols number of columns of the matrix
+   * @param entries Array of (i, j, value) tuples
+   * @return The corresponding `SparseMatrix`
+   */
+  def fromCOO(numRows: Int, numCols: Int, entries: Iterable[(Int, Int, Double)]): SparseMatrix = {
+    val sortedEntries = entries.toSeq.sortBy(v => (v._2, v._1))
+    val numEntries = sortedEntries.size
+    if (sortedEntries.nonEmpty) {
+      // Since the entries are sorted by column index, we only need to check the first and the last.
+      for (col <- Seq(sortedEntries.head._2, sortedEntries.last._2)) {
+        require(col >= 0 && col < numCols, s"Column index out of range [0, $numCols): $col.")
+      }
+    }
+    val colPtrs = new Array[Int](numCols + 1)
+    val rowIndices = MArrayBuilder.make[Int]
+    rowIndices.sizeHint(numEntries)
+    val values = MArrayBuilder.make[Double]
+    values.sizeHint(numEntries)
+    var nnz = 0
+    var prevCol = 0
+    var prevRow = -1
+    var prevVal = 0.0
+    // Append a dummy entry to include the last one at the end of the loop.
+    (sortedEntries.view :+ (numRows, numCols, 1.0)).foreach { case (i, j, v) =>
+      if (v != 0) {
+        if (i == prevRow && j == prevCol) {
+          prevVal += v
+        } else {
+          if (prevVal != 0) {
+            require(prevRow >= 0 && prevRow < numRows,
+              s"Row index out of range [0, $numRows): $prevRow.")
+            nnz += 1
+            rowIndices += prevRow
+            values += prevVal
+          }
+          prevRow = i
+          prevVal = v
+          while (prevCol < j) {
+            colPtrs(prevCol + 1) = nnz
+            prevCol += 1
+          }
+        }
+      }
+    }
+    new SparseMatrix(numRows, numCols, colPtrs, rowIndices.result(), values.result())
+  }
+
+  /**
+   * Generate an Identity Matrix in `SparseMatrix` format.
+   * @param n number of rows and columns of the matrix
+   * @return `SparseMatrix` with size `n` x `n` and values of ones on the diagonal
+   */
+  def speye(n: Int): SparseMatrix = {
+    new SparseMatrix(n, n, (0 to n).toArray, (0 until n).toArray, Array.fill(n)(1.0))
+  }
+
+  /**
+   * Generates the skeleton of a random `SparseMatrix` with a given random number generator.
+   * The values of the matrix returned are undefined.
+   */
+  private def genRandMatrix(
+      numRows: Int,
+      numCols: Int,
+      density: Double,
+      rng: Random): SparseMatrix = {
+    require(numRows > 0, s"numRows must be greater than 0 but got $numRows")
+    require(numCols > 0, s"numCols must be greater than 0 but got $numCols")
+    require(density >= 0.0 && density <= 1.0,
+      s"density must be a double in the range 0.0 <= d <= 1.0. Currently, density: $density")
+    val size = numRows.toLong * numCols
+    val expected = size * density
+    assert(expected < Int.MaxValue,
+      "The expected number of nonzeros cannot be greater than Int.MaxValue.")
+    val nnz = math.ceil(expected).toInt
+    if (density == 0.0) {
+      new SparseMatrix(numRows, numCols, new Array[Int](numCols + 1), Array[Int](), Array[Double]())
+    } else if (density == 1.0) {
+      val colPtrs = Array.tabulate(numCols + 1)(j => j * numRows)
+      val rowIndices = Array.tabulate(size.toInt)(idx => idx % numRows)
+      new SparseMatrix(numRows, numCols, colPtrs, rowIndices, new Array[Double](numRows * numCols))
+    } else if (density < 0.34) {
+      // draw-by-draw, expected number of iterations is less than 1.5 * nnz
+      val entries = MHashSet[(Int, Int)]()
+      while (entries.size < nnz) {
+        entries += ((rng.nextInt(numRows), rng.nextInt(numCols)))
+      }
+      SparseMatrix.fromCOO(numRows, numCols, entries.map(v => (v._1, v._2, 1.0)))
+    } else {
+      // selection-rejection method
+      var idx = 0L
+      var numSelected = 0
+      var j = 0
+      val colPtrs = new Array[Int](numCols + 1)
+      val rowIndices = new Array[Int](nnz)
+      while (j < numCols && numSelected < nnz) {
+        var i = 0
+        while (i < numRows && numSelected < nnz) {
+          if (rng.nextDouble() < 1.0 * (nnz - numSelected) / (size - idx)) {
+            rowIndices(numSelected) = i
+            numSelected += 1
+          }
+          i += 1
+          idx += 1
+        }
+        colPtrs(j + 1) = numSelected
+        j += 1
+      }
+      new SparseMatrix(numRows, numCols, colPtrs, rowIndices, new Array[Double](nnz))
+    }
+  }
+
+  /**
+   * Generate a `SparseMatrix` consisting of `i.i.d`. uniform random numbers. The number of non-zero
+   * elements equal the ceiling of `numRows` x `numCols` x `density`
+   *
+   * @param numRows number of rows of the matrix
+   * @param numCols number of columns of the matrix
+   * @param density the desired density for the matrix
+   * @param rng a random number generator
+   * @return `SparseMatrix` with size `numRows` x `numCols` and values in U(0, 1)
+   */
+  def sprand(numRows: Int, numCols: Int, density: Double, rng: Random): SparseMatrix = {
+    val mat = genRandMatrix(numRows, numCols, density, rng)
+    mat.update(i => rng.nextDouble())
+  }
+
+  /**
+   * Generate a `SparseMatrix` consisting of `i.i.d`. gaussian random numbers.
+   * @param numRows number of rows of the matrix
+   * @param numCols number of columns of the matrix
+   * @param density the desired density for the matrix
+   * @param rng a random number generator
+   * @return `SparseMatrix` with size `numRows` x `numCols` and values in N(0, 1)
+   */
+  def sprandn(numRows: Int, numCols: Int, density: Double, rng: Random): SparseMatrix = {
+    val mat = genRandMatrix(numRows, numCols, density, rng)
+    mat.update(i => rng.nextGaussian())
+  }
+
+  /**
+   * Generate a diagonal matrix in `SparseMatrix` format from the supplied values.
+   * @param vector a `Vector` that will form the values on the diagonal of the matrix
+   * @return Square `SparseMatrix` with size `values.length` x `values.length` and non-zero
+   *         `values` on the diagonal
+   */
+  def spdiag(vector: Vector): SparseMatrix = {
+    val n = vector.size
+    vector match {
+      case sVec: SparseVector =>
+        SparseMatrix.fromCOO(n, n, sVec.indices.zip(sVec.values).map(v => (v._1, v._1, v._2)))
+      case dVec: DenseVector =>
+        val entries = dVec.values.zipWithIndex
+        val nnzVals = entries.filter(v => v._1 != 0.0)
+        SparseMatrix.fromCOO(n, n, nnzVals.map(v => (v._2, v._2, v._1)))
+    }
+  }
+}
+
+/**
+ * Factory methods for [[org.apache.spark.ml.linalg.Matrix]].
+ */
+object Matrices {
+
+  /**
+   * Creates a column-major dense matrix.
+   *
+   * @param numRows number of rows
+   * @param numCols number of columns
+   * @param values matrix entries in column major
+   */
+  def dense(numRows: Int, numCols: Int, values: Array[Double]): Matrix = {
+    new DenseMatrix(numRows, numCols, values)
+  }
+
+  /**
+   * Creates a column-major sparse matrix in Compressed Sparse Column (CSC) format.
+   *
+   * @param numRows number of rows
+   * @param numCols number of columns
+   * @param colPtrs the index corresponding to the start of a new column
+   * @param rowIndices the row index of the entry
+   * @param values non-zero matrix entries in column major
+   */
+  def sparse(
+     numRows: Int,
+     numCols: Int,
+     colPtrs: Array[Int],
+     rowIndices: Array[Int],
+     values: Array[Double]): Matrix = {
+    new SparseMatrix(numRows, numCols, colPtrs, rowIndices, values)
+  }
+
+  /**
+   * Creates a Matrix instance from a breeze matrix.
+   * @param breeze a breeze matrix
+   * @return a Matrix instance
+   */
+  private[ml] def fromBreeze(breeze: BM[Double]): Matrix = {
+    breeze match {
+      case dm: BDM[Double] =>
+        new DenseMatrix(dm.rows, dm.cols, dm.data, dm.isTranspose)
+      case sm: BSM[Double] =>
+        // Spark-11507. work around breeze issue 479.
+        val mat = if (sm.colPtrs.last != sm.data.length) {
+          val matCopy = sm.copy
+          matCopy.compact()
+          matCopy
+        } else {
+          sm
+        }
+        // There is no isTranspose flag for sparse matrices in Breeze
+        new SparseMatrix(mat.rows, mat.cols, mat.colPtrs, mat.rowIndices, mat.data)
+      case _ =>
+        throw new UnsupportedOperationException(
+          s"Do not support conversion from type ${breeze.getClass.getName}.")
+    }
+  }
+
+  /**
+   * Generate a `Matrix` consisting of zeros.
+   * @param numRows number of rows of the matrix
+   * @param numCols number of columns of the matrix
+   * @return `Matrix` with size `numRows` x `numCols` and values of zeros
+   */
+  def zeros(numRows: Int, numCols: Int): Matrix = DenseMatrix.zeros(numRows, numCols)
+
+  /**
+   * Generate a `DenseMatrix` consisting of ones.
+   * @param numRows number of rows of the matrix
+   * @param numCols number of columns of the matrix
+   * @return `Matrix` with size `numRows` x `numCols` and values of ones
+   */
+  def ones(numRows: Int, numCols: Int): Matrix = DenseMatrix.ones(numRows, numCols)
+
+  /**
+   * Generate a dense Identity Matrix in `Matrix` format.
+   * @param n number of rows and columns of the matrix
+   * @return `Matrix` with size `n` x `n` and values of ones on the diagonal
+   */
+  def eye(n: Int): Matrix = DenseMatrix.eye(n)
+
+  /**
+   * Generate a sparse Identity Matrix in `Matrix` format.
+   * @param n number of rows and columns of the matrix
+   * @return `Matrix` with size `n` x `n` and values of ones on the diagonal
+   */
+  def speye(n: Int): Matrix = SparseMatrix.speye(n)
+
+  /**
+   * Generate a `DenseMatrix` consisting of `i.i.d.` uniform random numbers.
+   * @param numRows number of rows of the matrix
+   * @param numCols number of columns of the matrix
+   * @param rng a random number generator
+   * @return `Matrix` with size `numRows` x `numCols` and values in U(0, 1)
+   */
+  def rand(numRows: Int, numCols: Int, rng: Random): Matrix =
+    DenseMatrix.rand(numRows, numCols, rng)
+
+  /**
+   * Generate a `SparseMatrix` consisting of `i.i.d.` gaussian random numbers.
+   * @param numRows number of rows of the matrix
+   * @param numCols number of columns of the matrix
+   * @param density the desired density for the matrix
+   * @param rng a random number generator
+   * @return `Matrix` with size `numRows` x `numCols` and values in U(0, 1)
+   */
+  def sprand(numRows: Int, numCols: Int, density: Double, rng: Random): Matrix =
+    SparseMatrix.sprand(numRows, numCols, density, rng)
+
+  /**
+   * Generate a `DenseMatrix` consisting of `i.i.d.` gaussian random numbers.
+   * @param numRows number of rows of the matrix
+   * @param numCols number of columns of the matrix
+   * @param rng a random number generator
+   * @return `Matrix` with size `numRows` x `numCols` and values in N(0, 1)
+   */
+  def randn(numRows: Int, numCols: Int, rng: Random): Matrix =
+    DenseMatrix.randn(numRows, numCols, rng)
+
+  /**
+   * Generate a `SparseMatrix` consisting of `i.i.d.` gaussian random numbers.
+   * @param numRows number of rows of the matrix
+   * @param numCols number of columns of the matrix
+   * @param density the desired density for the matrix
+   * @param rng a random number generator
+   * @return `Matrix` with size `numRows` x `numCols` and values in N(0, 1)
+   */
+  def sprandn(numRows: Int, numCols: Int, density: Double, rng: Random): Matrix =
+    SparseMatrix.sprandn(numRows, numCols, density, rng)
+
+  /**
+   * Generate a diagonal matrix in `Matrix` format from the supplied values.
+   * @param vector a `Vector` that will form the values on the diagonal of the matrix
+   * @return Square `Matrix` with size `values.length` x `values.length` and `values`
+   *         on the diagonal
+   */
+  def diag(vector: Vector): Matrix = DenseMatrix.diag(vector)
+
+  /**
+   * Horizontally concatenate a sequence of matrices. The returned matrix will be in the format
+   * the matrices are supplied in. Supplying a mix of dense and sparse matrices will result in
+   * a sparse matrix. If the Array is empty, an empty `DenseMatrix` will be returned.
+   * @param matrices array of matrices
+   * @return a single `Matrix` composed of the matrices that were horizontally concatenated
+   */
+  def horzcat(matrices: Array[Matrix]): Matrix = {
+    if (matrices.isEmpty) {
+      return new DenseMatrix(0, 0, Array[Double]())
+    } else if (matrices.length == 1) {
+      return matrices(0)
+    }
+    val numRows = matrices(0).numRows
+    var hasSparse = false
+    var numCols = 0
+    matrices.foreach { mat =>
+      require(numRows == mat.numRows, "The number of rows of the matrices in this sequence, " +
+        "don't match!")
+      mat match {
+        case sparse: SparseMatrix => hasSparse = true
+        case dense: DenseMatrix => // empty on purpose
+        case _ => throw new IllegalArgumentException("Unsupported matrix format. Expected " +
+          s"SparseMatrix or DenseMatrix. Instead got: ${mat.getClass}")
+      }
+      numCols += mat.numCols
+    }
+    if (!hasSparse) {
+      new DenseMatrix(numRows, numCols, matrices.flatMap(_.toArray))
+    } else {
+      var startCol = 0
+      val entries: Array[(Int, Int, Double)] = matrices.flatMap { mat =>
+        val nCols = mat.numCols
+        mat match {
+          case spMat: SparseMatrix =>
+            val data = new Array[(Int, Int, Double)](spMat.values.length)
+            var cnt = 0
+            spMat.foreachActive { (i, j, v) =>
+              data(cnt) = (i, j + startCol, v)
+              cnt += 1
+            }
+            startCol += nCols
+            data
+          case dnMat: DenseMatrix =>
+            val data = new ArrayBuffer[(Int, Int, Double)]()
+            dnMat.foreachActive { (i, j, v) =>
+              if (v != 0.0) {
+                data.append((i, j + startCol, v))
+              }
+            }
+            startCol += nCols
+            data
+        }
+      }
+      SparseMatrix.fromCOO(numRows, numCols, entries)
+    }
+  }
+
+  /**
+   * Vertically concatenate a sequence of matrices. The returned matrix will be in the format
+   * the matrices are supplied in. Supplying a mix of dense and sparse matrices will result in
+   * a sparse matrix. If the Array is empty, an empty `DenseMatrix` will be returned.
+   * @param matrices array of matrices
+   * @return a single `Matrix` composed of the matrices that were vertically concatenated
+   */
+  def vertcat(matrices: Array[Matrix]): Matrix = {
+    if (matrices.isEmpty) {
+      return new DenseMatrix(0, 0, Array[Double]())
+    } else if (matrices.length == 1) {
+      return matrices(0)
+    }
+    val numCols = matrices(0).numCols
+    var hasSparse = false
+    var numRows = 0
+    matrices.foreach { mat =>
+      require(numCols == mat.numCols, "The number of rows of the matrices in this sequence, " +
+        "don't match!")
+      mat match {
+        case sparse: SparseMatrix => hasSparse = true
+        case dense: DenseMatrix => // empty on purpose
+        case _ => throw new IllegalArgumentException("Unsupported matrix format. Expected " +
+          s"SparseMatrix or DenseMatrix. Instead got: ${mat.getClass}")
+      }
+      numRows += mat.numRows
+    }
+    if (!hasSparse) {
+      val allValues = new Array[Double](numRows * numCols)
+      var startRow = 0
+      matrices.foreach { mat =>
+        var j = 0
+        val nRows = mat.numRows
+        mat.foreachActive { (i, j, v) =>
+          val indStart = j * numRows + startRow
+          allValues(indStart + i) = v
+        }
+        startRow += nRows
+      }
+      new DenseMatrix(numRows, numCols, allValues)
+    } else {
+      var startRow = 0
+      val entries: Array[(Int, Int, Double)] = matrices.flatMap { mat =>
+        val nRows = mat.numRows
+        mat match {
+          case spMat: SparseMatrix =>
+            val data = new Array[(Int, Int, Double)](spMat.values.length)
+            var cnt = 0
+            spMat.foreachActive { (i, j, v) =>
+              data(cnt) = (i + startRow, j, v)
+              cnt += 1
+            }
+            startRow += nRows
+            data
+          case dnMat: DenseMatrix =>
+            val data = new ArrayBuffer[(Int, Int, Double)]()
+            dnMat.foreachActive { (i, j, v) =>
+              if (v != 0.0) {
+                data.append((i + startRow, j, v))
+              }
+            }
+            startRow += nRows
+            data
+        }
+      }
+      SparseMatrix.fromCOO(numRows, numCols, entries)
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/96534aa4/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
----------------------------------------------------------------------
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
new file mode 100644
index 0000000..fd4ce9a
--- /dev/null
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
@@ -0,0 +1,736 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.linalg
+
+import java.lang.{Double => JavaDouble, Integer => JavaInteger, Iterable => JavaIterable}
+import java.util
+
+import scala.annotation.varargs
+import scala.collection.JavaConverters._
+
+import breeze.linalg.{DenseVector => BDV, SparseVector => BSV, Vector => BV}
+import org.json4s.DefaultFormats
+import org.json4s.JsonDSL._
+import org.json4s.jackson.JsonMethods.{compact, parse => parseJson, render}
+
+/**
+ * Represents a numeric vector, whose index type is Int and value type is Double.
+ *
+ * Note: Users should not implement this interface.
+ */
+sealed trait Vector extends Serializable {
+
+  /**
+   * Size of the vector.
+   */
+  def size: Int
+
+  /**
+   * Converts the instance to a double array.
+   */
+  def toArray: Array[Double]
+
+  override def equals(other: Any): Boolean = {
+    other match {
+      case v2: Vector =>
+        if (this.size != v2.size) return false
+        (this, v2) match {
+          case (s1: SparseVector, s2: SparseVector) =>
+            Vectors.equals(s1.indices, s1.values, s2.indices, s2.values)
+          case (s1: SparseVector, d1: DenseVector) =>
+            Vectors.equals(s1.indices, s1.values, 0 until d1.size, d1.values)
+          case (d1: DenseVector, s1: SparseVector) =>
+            Vectors.equals(0 until d1.size, d1.values, s1.indices, s1.values)
+          case (_, _) => util.Arrays.equals(this.toArray, v2.toArray)
+        }
+      case _ => false
+    }
+  }
+
+  /**
+   * Returns a hash code value for the vector. The hash code is based on its size and its first 128
+   * nonzero entries, using a hash algorithm similar to [[java.util.Arrays.hashCode]].
+   */
+  override def hashCode(): Int = {
+    // This is a reference implementation. It calls return in foreachActive, which is slow.
+    // Subclasses should override it with optimized implementation.
+    var result: Int = 31 + size
+    var nnz = 0
+    this.foreachActive { (index, value) =>
+      if (nnz < Vectors.MAX_HASH_NNZ) {
+        // ignore explicit 0 for comparison between sparse and dense
+        if (value != 0) {
+          result = 31 * result + index
+          val bits = java.lang.Double.doubleToLongBits(value)
+          result = 31 * result + (bits ^ (bits >>> 32)).toInt
+          nnz += 1
+        }
+      } else {
+        return result
+      }
+    }
+    result
+  }
+
+  /**
+   * Converts the instance to a breeze vector.
+   */
+  private[spark] def toBreeze: BV[Double]
+
+  /**
+   * Gets the value of the ith element.
+   * @param i index
+   */
+  def apply(i: Int): Double = toBreeze(i)
+
+  /**
+   * Makes a deep copy of this vector.
+   */
+  def copy: Vector = {
+    throw new NotImplementedError(s"copy is not implemented for ${this.getClass}.")
+  }
+
+  /**
+   * Applies a function `f` to all the active elements of dense and sparse vector.
+   *
+   * @param f the function takes two parameters where the first parameter is the index of
+   *          the vector with type `Int`, and the second parameter is the corresponding value
+   *          with type `Double`.
+   */
+  def foreachActive(f: (Int, Double) => Unit): Unit
+
+  /**
+   * Number of active entries.  An "active entry" is an element which is explicitly stored,
+   * regardless of its value.  Note that inactive entries have value 0.
+   */
+  def numActives: Int
+
+  /**
+   * Number of nonzero elements. This scans all active values and count nonzeros.
+   */
+  def numNonzeros: Int
+
+  /**
+   * Converts this vector to a sparse vector with all explicit zeros removed.
+   */
+  def toSparse: SparseVector
+
+  /**
+   * Converts this vector to a dense vector.
+   */
+  def toDense: DenseVector = new DenseVector(this.toArray)
+
+  /**
+   * Returns a vector in either dense or sparse format, whichever uses less storage.
+   */
+  def compressed: Vector = {
+    val nnz = numNonzeros
+    // A dense vector needs 8 * size + 8 bytes, while a sparse vector needs 12 * nnz + 20 bytes.
+    if (1.5 * (nnz + 1.0) < size) {
+      toSparse
+    } else {
+      toDense
+    }
+  }
+
+  /**
+   * Find the index of a maximal element.  Returns the first maximal element in case of a tie.
+   * Returns -1 if vector has length 0.
+   */
+  def argmax: Int
+
+  /**
+   * Converts the vector to a JSON string.
+   */
+  def toJson: String
+}
+
+/**
+ * Factory methods for [[org.apache.spark.ml.linalg.Vector]].
+ * We don't use the name `Vector` because Scala imports
+ * [[scala.collection.immutable.Vector]] by default.
+ */
+object Vectors {
+
+  /**
+   * Creates a dense vector from its values.
+   */
+  @varargs
+  def dense(firstValue: Double, otherValues: Double*): Vector =
+    new DenseVector((firstValue +: otherValues).toArray)
+
+  // A dummy implicit is used to avoid signature collision with the one generated by @varargs.
+  /**
+   * Creates a dense vector from a double array.
+   */
+  def dense(values: Array[Double]): Vector = new DenseVector(values)
+
+  /**
+   * Creates a sparse vector providing its index array and value array.
+   *
+   * @param size vector size.
+   * @param indices index array, must be strictly increasing.
+   * @param values value array, must have the same length as indices.
+   */
+  def sparse(size: Int, indices: Array[Int], values: Array[Double]): Vector =
+    new SparseVector(size, indices, values)
+
+  /**
+   * Creates a sparse vector using unordered (index, value) pairs.
+   *
+   * @param size vector size.
+   * @param elements vector elements in (index, value) pairs.
+   */
+  def sparse(size: Int, elements: Seq[(Int, Double)]): Vector = {
+    require(size > 0, "The size of the requested sparse vector must be greater than 0.")
+
+    val (indices, values) = elements.sortBy(_._1).unzip
+    var prev = -1
+    indices.foreach { i =>
+      require(prev < i, s"Found duplicate indices: $i.")
+      prev = i
+    }
+    require(prev < size, s"You may not write an element to index $prev because the declared " +
+      s"size of your vector is $size")
+
+    new SparseVector(size, indices.toArray, values.toArray)
+  }
+
+  /**
+   * Creates a sparse vector using unordered (index, value) pairs in a Java friendly way.
+   *
+   * @param size vector size.
+   * @param elements vector elements in (index, value) pairs.
+   */
+  def sparse(size: Int, elements: JavaIterable[(JavaInteger, JavaDouble)]): Vector = {
+    sparse(size, elements.asScala.map { case (i, x) =>
+      (i.intValue(), x.doubleValue())
+    }.toSeq)
+  }
+
+  /**
+   * Creates a vector of all zeros.
+   *
+   * @param size vector size
+   * @return a zero vector
+   */
+  def zeros(size: Int): Vector = {
+    new DenseVector(new Array[Double](size))
+  }
+
+  /**
+   * Parses the JSON representation of a vector into a [[Vector]].
+   */
+  def fromJson(json: String): Vector = {
+    implicit val formats = DefaultFormats
+    val jValue = parseJson(json)
+    (jValue \ "type").extract[Int] match {
+      case 0 => // sparse
+        val size = (jValue \ "size").extract[Int]
+        val indices = (jValue \ "indices").extract[Seq[Int]].toArray
+        val values = (jValue \ "values").extract[Seq[Double]].toArray
+        sparse(size, indices, values)
+      case 1 => // dense
+        val values = (jValue \ "values").extract[Seq[Double]].toArray
+        dense(values)
+      case _ =>
+        throw new IllegalArgumentException(s"Cannot parse $json into a vector.")
+    }
+  }
+
+  /**
+   * Creates a vector instance from a breeze vector.
+   */
+  private[spark] def fromBreeze(breezeVector: BV[Double]): Vector = {
+    breezeVector match {
+      case v: BDV[Double] =>
+        if (v.offset == 0 && v.stride == 1 && v.length == v.data.length) {
+          new DenseVector(v.data)
+        } else {
+          new DenseVector(v.toArray)  // Can't use underlying array directly, so make a new one
+        }
+      case v: BSV[Double] =>
+        if (v.index.length == v.used) {
+          new SparseVector(v.length, v.index, v.data)
+        } else {
+          new SparseVector(v.length, v.index.slice(0, v.used), v.data.slice(0, v.used))
+        }
+      case v: BV[_] =>
+        sys.error("Unsupported Breeze vector type: " + v.getClass.getName)
+    }
+  }
+
+  /**
+   * Returns the p-norm of this vector.
+   * @param vector input vector.
+   * @param p norm.
+   * @return norm in L^p^ space.
+   */
+  def norm(vector: Vector, p: Double): Double = {
+    require(p >= 1.0, "To compute the p-norm of the vector, we require that you specify a p>=1. " +
+      s"You specified p=$p.")
+    val values = vector match {
+      case DenseVector(vs) => vs
+      case SparseVector(n, ids, vs) => vs
+      case v => throw new IllegalArgumentException("Do not support vector type " + v.getClass)
+    }
+    val size = values.length
+
+    if (p == 1) {
+      var sum = 0.0
+      var i = 0
+      while (i < size) {
+        sum += math.abs(values(i))
+        i += 1
+      }
+      sum
+    } else if (p == 2) {
+      var sum = 0.0
+      var i = 0
+      while (i < size) {
+        sum += values(i) * values(i)
+        i += 1
+      }
+      math.sqrt(sum)
+    } else if (p == Double.PositiveInfinity) {
+      var max = 0.0
+      var i = 0
+      while (i < size) {
+        val value = math.abs(values(i))
+        if (value > max) max = value
+        i += 1
+      }
+      max
+    } else {
+      var sum = 0.0
+      var i = 0
+      while (i < size) {
+        sum += math.pow(math.abs(values(i)), p)
+        i += 1
+      }
+      math.pow(sum, 1.0 / p)
+    }
+  }
+
+  /**
+   * Returns the squared distance between two Vectors.
+   * @param v1 first Vector.
+   * @param v2 second Vector.
+   * @return squared distance between two Vectors.
+   */
+  def sqdist(v1: Vector, v2: Vector): Double = {
+    require(v1.size == v2.size, s"Vector dimensions do not match: Dim(v1)=${v1.size} and Dim(v2)" +
+      s"=${v2.size}.")
+    var squaredDistance = 0.0
+    (v1, v2) match {
+      case (v1: SparseVector, v2: SparseVector) =>
+        val v1Values = v1.values
+        val v1Indices = v1.indices
+        val v2Values = v2.values
+        val v2Indices = v2.indices
+        val nnzv1 = v1Indices.length
+        val nnzv2 = v2Indices.length
+
+        var kv1 = 0
+        var kv2 = 0
+        while (kv1 < nnzv1 || kv2 < nnzv2) {
+          var score = 0.0
+
+          if (kv2 >= nnzv2 || (kv1 < nnzv1 && v1Indices(kv1) < v2Indices(kv2))) {
+            score = v1Values(kv1)
+            kv1 += 1
+          } else if (kv1 >= nnzv1 || (kv2 < nnzv2 && v2Indices(kv2) < v1Indices(kv1))) {
+            score = v2Values(kv2)
+            kv2 += 1
+          } else {
+            score = v1Values(kv1) - v2Values(kv2)
+            kv1 += 1
+            kv2 += 1
+          }
+          squaredDistance += score * score
+        }
+
+      case (v1: SparseVector, v2: DenseVector) =>
+        squaredDistance = sqdist(v1, v2)
+
+      case (v1: DenseVector, v2: SparseVector) =>
+        squaredDistance = sqdist(v2, v1)
+
+      case (DenseVector(vv1), DenseVector(vv2)) =>
+        var kv = 0
+        val sz = vv1.length
+        while (kv < sz) {
+          val score = vv1(kv) - vv2(kv)
+          squaredDistance += score * score
+          kv += 1
+        }
+      case _ =>
+        throw new IllegalArgumentException("Do not support vector type " + v1.getClass +
+          " and " + v2.getClass)
+    }
+    squaredDistance
+  }
+
+  /**
+   * Returns the squared distance between DenseVector and SparseVector.
+   */
+  private[ml] def sqdist(v1: SparseVector, v2: DenseVector): Double = {
+    var kv1 = 0
+    var kv2 = 0
+    val indices = v1.indices
+    var squaredDistance = 0.0
+    val nnzv1 = indices.length
+    val nnzv2 = v2.size
+    var iv1 = if (nnzv1 > 0) indices(kv1) else -1
+
+    while (kv2 < nnzv2) {
+      var score = 0.0
+      if (kv2 != iv1) {
+        score = v2(kv2)
+      } else {
+        score = v1.values(kv1) - v2(kv2)
+        if (kv1 < nnzv1 - 1) {
+          kv1 += 1
+          iv1 = indices(kv1)
+        }
+      }
+      squaredDistance += score * score
+      kv2 += 1
+    }
+    squaredDistance
+  }
+
+  /**
+   * Check equality between sparse/dense vectors
+   */
+  private[ml] def equals(
+      v1Indices: IndexedSeq[Int],
+      v1Values: Array[Double],
+      v2Indices: IndexedSeq[Int],
+      v2Values: Array[Double]): Boolean = {
+    val v1Size = v1Values.length
+    val v2Size = v2Values.length
+    var k1 = 0
+    var k2 = 0
+    var allEqual = true
+    while (allEqual) {
+      while (k1 < v1Size && v1Values(k1) == 0) k1 += 1
+      while (k2 < v2Size && v2Values(k2) == 0) k2 += 1
+
+      if (k1 >= v1Size || k2 >= v2Size) {
+        return k1 >= v1Size && k2 >= v2Size // check end alignment
+      }
+      allEqual = v1Indices(k1) == v2Indices(k2) && v1Values(k1) == v2Values(k2)
+      k1 += 1
+      k2 += 1
+    }
+    allEqual
+  }
+
+  /** Max number of nonzero entries used in computing hash code. */
+  private[linalg] val MAX_HASH_NNZ = 128
+}
+
+/**
+ * A dense vector represented by a value array.
+ */
+class DenseVector (val values: Array[Double]) extends Vector {
+
+  override def size: Int = values.length
+
+  override def toString: String = values.mkString("[", ",", "]")
+
+  override def toArray: Array[Double] = values
+
+  private[spark] override def toBreeze: BV[Double] = new BDV[Double](values)
+
+  override def apply(i: Int): Double = values(i)
+
+  override def copy: DenseVector = {
+    new DenseVector(values.clone())
+  }
+
+  override def foreachActive(f: (Int, Double) => Unit): Unit = {
+    var i = 0
+    val localValuesSize = values.length
+    val localValues = values
+
+    while (i < localValuesSize) {
+      f(i, localValues(i))
+      i += 1
+    }
+  }
+
+  override def hashCode(): Int = {
+    var result: Int = 31 + size
+    var i = 0
+    val end = values.length
+    var nnz = 0
+    while (i < end && nnz < Vectors.MAX_HASH_NNZ) {
+      val v = values(i)
+      if (v != 0.0) {
+        result = 31 * result + i
+        val bits = java.lang.Double.doubleToLongBits(values(i))
+        result = 31 * result + (bits ^ (bits >>> 32)).toInt
+        nnz += 1
+      }
+      i += 1
+    }
+    result
+  }
+
+  override def numActives: Int = size
+
+  override def numNonzeros: Int = {
+    // same as values.count(_ != 0.0) but faster
+    var nnz = 0
+    values.foreach { v =>
+      if (v != 0.0) {
+        nnz += 1
+      }
+    }
+    nnz
+  }
+
+  override def toSparse: SparseVector = {
+    val nnz = numNonzeros
+    val ii = new Array[Int](nnz)
+    val vv = new Array[Double](nnz)
+    var k = 0
+    foreachActive { (i, v) =>
+      if (v != 0) {
+        ii(k) = i
+        vv(k) = v
+        k += 1
+      }
+    }
+    new SparseVector(size, ii, vv)
+  }
+
+  override def argmax: Int = {
+    if (size == 0) {
+      -1
+    } else {
+      var maxIdx = 0
+      var maxValue = values(0)
+      var i = 1
+      while (i < size) {
+        if (values(i) > maxValue) {
+          maxIdx = i
+          maxValue = values(i)
+        }
+        i += 1
+      }
+      maxIdx
+    }
+  }
+
+  override def toJson: String = {
+    val jValue = ("type" -> 1) ~ ("values" -> values.toSeq)
+    compact(render(jValue))
+  }
+}
+
+object DenseVector {
+
+  /** Extracts the value array from a dense vector. */
+  def unapply(dv: DenseVector): Option[Array[Double]] = Some(dv.values)
+}
+
+/**
+ * A sparse vector represented by an index array and an value array.
+ *
+ * @param size size of the vector.
+ * @param indices index array, assume to be strictly increasing.
+ * @param values value array, must have the same length as the index array.
+ */
+class SparseVector (
+    override val size: Int,
+    val indices: Array[Int],
+    val values: Array[Double]) extends Vector {
+
+  require(indices.length == values.length, "Sparse vectors require that the dimension of the" +
+    s" indices match the dimension of the values. You provided ${indices.length} indices and " +
+    s" ${values.length} values.")
+  require(indices.length <= size, s"You provided ${indices.length} indices and values, " +
+    s"which exceeds the specified vector size ${size}.")
+
+  override def toString: String =
+    s"($size,${indices.mkString("[", ",", "]")},${values.mkString("[", ",", "]")})"
+
+  override def toArray: Array[Double] = {
+    val data = new Array[Double](size)
+    var i = 0
+    val nnz = indices.length
+    while (i < nnz) {
+      data(indices(i)) = values(i)
+      i += 1
+    }
+    data
+  }
+
+  override def copy: SparseVector = {
+    new SparseVector(size, indices.clone(), values.clone())
+  }
+
+  private[spark] override def toBreeze: BV[Double] = new BSV[Double](indices, values, size)
+
+  override def foreachActive(f: (Int, Double) => Unit): Unit = {
+    var i = 0
+    val localValuesSize = values.length
+    val localIndices = indices
+    val localValues = values
+
+    while (i < localValuesSize) {
+      f(localIndices(i), localValues(i))
+      i += 1
+    }
+  }
+
+  override def hashCode(): Int = {
+    var result: Int = 31 + size
+    val end = values.length
+    var k = 0
+    var nnz = 0
+    while (k < end && nnz < Vectors.MAX_HASH_NNZ) {
+      val v = values(k)
+      if (v != 0.0) {
+        val i = indices(k)
+        result = 31 * result + i
+        val bits = java.lang.Double.doubleToLongBits(v)
+        result = 31 * result + (bits ^ (bits >>> 32)).toInt
+        nnz += 1
+      }
+      k += 1
+    }
+    result
+  }
+
+  override def numActives: Int = values.length
+
+  override def numNonzeros: Int = {
+    var nnz = 0
+    values.foreach { v =>
+      if (v != 0.0) {
+        nnz += 1
+      }
+    }
+    nnz
+  }
+
+  override def toSparse: SparseVector = {
+    val nnz = numNonzeros
+    if (nnz == numActives) {
+      this
+    } else {
+      val ii = new Array[Int](nnz)
+      val vv = new Array[Double](nnz)
+      var k = 0
+      foreachActive { (i, v) =>
+        if (v != 0.0) {
+          ii(k) = i
+          vv(k) = v
+          k += 1
+        }
+      }
+      new SparseVector(size, ii, vv)
+    }
+  }
+
+  override def argmax: Int = {
+    if (size == 0) {
+      -1
+    } else {
+      // Find the max active entry.
+      var maxIdx = indices(0)
+      var maxValue = values(0)
+      var maxJ = 0
+      var j = 1
+      val na = numActives
+      while (j < na) {
+        val v = values(j)
+        if (v > maxValue) {
+          maxValue = v
+          maxIdx = indices(j)
+          maxJ = j
+        }
+        j += 1
+      }
+
+      // If the max active entry is nonpositive and there exists inactive ones, find the first zero.
+      if (maxValue <= 0.0 && na < size) {
+        if (maxValue == 0.0) {
+          // If there exists an inactive entry before maxIdx, find it and return its index.
+          if (maxJ < maxIdx) {
+            var k = 0
+            while (k < maxJ && indices(k) == k) {
+              k += 1
+            }
+            maxIdx = k
+          }
+        } else {
+          // If the max active value is negative, find and return the first inactive index.
+          var k = 0
+          while (k < na && indices(k) == k) {
+            k += 1
+          }
+          maxIdx = k
+        }
+      }
+
+      maxIdx
+    }
+  }
+
+  /**
+   * Create a slice of this vector based on the given indices.
+   * @param selectedIndices Unsorted list of indices into the vector.
+   *                        This does NOT do bound checking.
+   * @return  New SparseVector with values in the order specified by the given indices.
+   *
+   * NOTE: The API needs to be discussed before making this public.
+   *       Also, if we have a version assuming indices are sorted, we should optimize it.
+   */
+  private[spark] def slice(selectedIndices: Array[Int]): SparseVector = {
+    var currentIdx = 0
+    val (sliceInds, sliceVals) = selectedIndices.flatMap { origIdx =>
+      val iIdx = java.util.Arrays.binarySearch(this.indices, origIdx)
+      val i_v = if (iIdx >= 0) {
+        Iterator((currentIdx, this.values(iIdx)))
+      } else {
+        Iterator()
+      }
+      currentIdx += 1
+      i_v
+    }.unzip
+    new SparseVector(selectedIndices.length, sliceInds.toArray, sliceVals.toArray)
+  }
+
+  override def toJson: String = {
+    val jValue = ("type" -> 0) ~
+      ("size" -> size) ~
+      ("indices" -> indices.toSeq) ~
+      ("values" -> values.toSeq)
+    compact(render(jValue))
+  }
+}
+
+object SparseVector {
+  def unapply(sv: SparseVector): Option[(Int, Array[Int], Array[Double])] =
+    Some((sv.size, sv.indices, sv.values))
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/96534aa4/mllib-local/src/test/scala/org/apache/spark/ml/DummyTestingSuite.scala
----------------------------------------------------------------------
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/DummyTestingSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/DummyTestingSuite.scala
deleted file mode 100644
index 51b7c24..0000000
--- a/mllib-local/src/test/scala/org/apache/spark/ml/DummyTestingSuite.scala
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.ml
-
-import org.scalatest.FunSuite // scalastyle:ignore funsuite
-
-// This is testing if the new build works. To be removed soon.
-class DummyTestingSuite extends FunSuite { // scalastyle:ignore funsuite
-
-  test("This is testing if the new build works.") {
-    assert(DummyTesting.add10(15) === 25)
-  }
-}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org