You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@flink.apache.org by tr...@apache.org on 2015/06/02 18:53:35 UTC

flink git commit: [ml] Fixes implicit issue with BreezeVectorConverter

Repository: flink
Updated Branches:
  refs/heads/master 950b79c59 -> 90c0142ef


[ml] Fixes implicit issue with BreezeVectorConverter


Project: http://git-wip-us.apache.org/repos/asf/flink/repo
Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/90c0142e
Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/90c0142e
Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/90c0142e

Branch: refs/heads/master
Commit: 90c0142ef21ae329acc843558e7ba6bc160df8d8
Parents: 950b79c
Author: Till Rohrmann <tr...@apache.org>
Authored: Tue Jun 2 13:22:47 2015 +0200
Committer: Till Rohrmann <tr...@apache.org>
Committed: Tue Jun 2 17:02:26 2015 +0200

----------------------------------------------------------------------
 .../flink/ml/math/BreezeVectorConverter.scala   | 47 --------------------
 .../org/apache/flink/ml/math/DenseVector.scala  | 15 +++++++
 .../org/apache/flink/ml/math/SparseVector.scala | 22 +++++++++
 .../scala/org/apache/flink/ml/math/Vector.scala | 23 ++++++++++
 .../flink/ml/preprocessing/StandardScaler.scala |  2 +-
 .../apache/flink/ml/math/BreezeMathSuite.scala  | 30 +++++++++++++
 6 files changed, 91 insertions(+), 48 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/flink/blob/90c0142e/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/BreezeVectorConverter.scala
----------------------------------------------------------------------
diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/BreezeVectorConverter.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/BreezeVectorConverter.scala
index f5f7469..0bb24f3 100644
--- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/BreezeVectorConverter.scala
+++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/BreezeVectorConverter.scala
@@ -18,8 +18,6 @@
 
 package org.apache.flink.ml.math
 
-import breeze.linalg.{SparseVector => BreezeSparseVector}
-import breeze.linalg.{DenseVector => BreezeDenseVector}
 import breeze.linalg.{Vector => BreezeVector}
 
 /** Type class which allows the conversion from Breeze vectors to Flink vectors
@@ -34,48 +32,3 @@ trait BreezeVectorConverter[T <: Vector] extends Serializable {
     */
   def convert(vector: BreezeVector[Double]): T
 }
-
-object BreezeVectorConverter{
-
-  /** Type class implementation for [[org.apache.flink.ml.math.DenseVector]] */
-  implicit val denseVectorConverter = new BreezeVectorConverter[DenseVector] {
-    override def convert(vector: BreezeVector[Double]): DenseVector = {
-      vector match {
-        case dense: BreezeDenseVector[Double] => new DenseVector(dense.data)
-        case sparse: BreezeSparseVector[Double] => new DenseVector(sparse.toDenseVector.data)
-      }
-    }
-  }
-
-  /** Type class implementation for [[org.apache.flink.ml.math.SparseVector]] */
-  implicit val sparseVectorConverter = new BreezeVectorConverter[SparseVector] {
-    override def convert(vector: BreezeVector[Double]): SparseVector = {
-      vector match {
-        case dense: BreezeDenseVector[Double] =>
-          SparseVector.fromCOO(
-            dense.length,
-            dense.iterator.toIterable)
-        case sparse: BreezeSparseVector[Double] =>
-          new SparseVector(
-            sparse.used,
-            sparse.index.take(sparse.used),
-            sparse.data.take(sparse.used))
-      }
-    }
-  }
-
-  /** Type class implementation for [[Vector]] */
-  implicit val vectorConverter = new BreezeVectorConverter[Vector] {
-    override def convert(vector: BreezeVector[Double]): Vector = {
-      vector match {
-        case dense: BreezeDenseVector[Double] => new DenseVector(dense.data)
-
-        case sparse: BreezeSparseVector[Double] =>
-          new SparseVector(
-            sparse.used,
-            sparse.index.take(sparse.used),
-            sparse.data.take(sparse.used))
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/flink/blob/90c0142e/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala
----------------------------------------------------------------------
diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala
index 079e4bc..f242496 100644
--- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala
+++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala
@@ -18,6 +18,8 @@
 
 package org.apache.flink.ml.math
 
+import breeze.linalg.{SparseVector => BreezeSparseVector, DenseVector => BreezeDenseVector, Vector => BreezeVector}
+
 /**
  * Dense vector implementation of [[Vector]]. The data is represented in a continuous array of
  * doubles.
@@ -134,4 +136,17 @@ object DenseVector {
   def init(size: Int, value: Double): DenseVector = {
     new DenseVector(Array.fill(size)(value))
   }
+
+  /** BreezeVectorConverter implementation for [[org.apache.flink.ml.math.DenseVector]]
+    *
+    * This allows to convert Breeze vectors into [[DenseVector]].
+    */
+  implicit val denseVectorConverter = new BreezeVectorConverter[DenseVector] {
+    override def convert(vector: BreezeVector[Double]): DenseVector = {
+      vector match {
+        case dense: BreezeDenseVector[Double] => new DenseVector(dense.data)
+        case sparse: BreezeSparseVector[Double] => new DenseVector(sparse.toDenseVector.data)
+      }
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/90c0142e/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala
----------------------------------------------------------------------
diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala
index ddfa084..cc2a227 100644
--- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala
+++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala
@@ -18,6 +18,8 @@
 
 package org.apache.flink.ml.math
 
+import breeze.linalg.{SparseVector => BreezeSparseVector, DenseVector => BreezeDenseVector, Vector => BreezeVector}
+
 import scala.util.Sorting
 
 /** Sparse vector implementation storing the data in two arrays. One index contains the sorted
@@ -226,4 +228,24 @@ object SparseVector {
   def fromCOO(size: Int, entry: (Int, Int)): SparseVector = {
     fromCOO(size, (entry._1, entry._2.toDouble))
   }
+
+  /** BreezeVectorConverter implementation for [[org.apache.flink.ml.math.SparseVector]]
+    *
+    * This allows to convert Breeze vectors into [[SparseVector]]
+    */
+  implicit val sparseVectorConverter = new BreezeVectorConverter[SparseVector] {
+    override def convert(vector: BreezeVector[Double]): SparseVector = {
+      vector match {
+        case dense: BreezeDenseVector[Double] =>
+          SparseVector.fromCOO(
+            dense.length,
+            dense.iterator.toIterable)
+        case sparse: BreezeSparseVector[Double] =>
+          new SparseVector(
+            sparse.used,
+            sparse.index.take(sparse.used),
+            sparse.data.take(sparse.used))
+      }
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/flink/blob/90c0142e/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/Vector.scala
----------------------------------------------------------------------
diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/Vector.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/Vector.scala
index 0b1f0cd..ca87e5b 100644
--- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/Vector.scala
+++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/Vector.scala
@@ -18,6 +18,9 @@
 
 package org.apache.flink.ml.math
 
+import breeze.linalg.{SparseVector => BreezeSparseVector, DenseVector => BreezeDenseVector, Vector => BreezeVector}
+import org.apache.flink.ml.math.Vector
+
 /** Base trait for Vectors
   *
   */
@@ -72,3 +75,23 @@ trait Vector extends Serializable {
     }
   }
 }
+
+object Vector{
+  /** BreezeVectorConverter implementation for [[Vector]]
+    *
+    * This allows to convert Breeze vectors into [[Vector]].
+    */
+  implicit val vectorConverter = new BreezeVectorConverter[Vector] {
+    override def convert(vector: BreezeVector[Double]): Vector = {
+      vector match {
+        case dense: BreezeDenseVector[Double] => new DenseVector(dense.data)
+
+        case sparse: BreezeSparseVector[Double] =>
+          new SparseVector(
+            sparse.used,
+            sparse.index.take(sparse.used),
+            sparse.data.take(sparse.used))
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/flink/blob/90c0142e/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/preprocessing/StandardScaler.scala
----------------------------------------------------------------------
diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/preprocessing/StandardScaler.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/preprocessing/StandardScaler.scala
index 7992b02..3b9c8d2 100644
--- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/preprocessing/StandardScaler.scala
+++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/preprocessing/StandardScaler.scala
@@ -250,7 +250,7 @@ object StandardScaler {
                 breezeVector -= broadcastMean
                 breezeVector :/= broadcastStd
                 breezeVector = (breezeVector :* std) + mean
-                LabeledVector(label, breezeVector.fromBreeze[Vector])
+                LabeledVector(label, breezeVector.fromBreeze)
               }
             }
           }

http://git-wip-us.apache.org/repos/asf/flink/blob/90c0142e/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/BreezeMathSuite.scala
----------------------------------------------------------------------
diff --git a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/BreezeMathSuite.scala b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/BreezeMathSuite.scala
index b03f08f..0d230c5 100644
--- a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/BreezeMathSuite.scala
+++ b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/BreezeMathSuite.scala
@@ -19,6 +19,7 @@
 package org.apache.flink.ml.math
 
 import Breeze._
+import breeze.linalg
 
 import org.scalatest.{Matchers, FlatSpec}
 
@@ -65,4 +66,33 @@ class BreezeMathSuite extends FlatSpec with Matchers {
 
     result should equal(expectedMatrix)
   }
+
+  it should "convert a dense Flink vector into a dense Breeze vector and vice versa" in {
+    val vector = DenseVector(1, 2, 3)
+
+    val breezeVector = vector.asBreeze
+
+    val flinkVector = breezeVector.fromBreeze
+
+    breezeVector.getClass should be(new linalg.DenseVector[Double](0).getClass())
+    flinkVector.getClass should be (classOf[DenseVector])
+
+    flinkVector should equal(vector)
+  }
+
+  it should "convert a sparse Flink vector into a sparse Breeze vector and given the right " +
+    "converter back into a dense Flink vector" in {
+    implicit val converter = implicitly[BreezeVectorConverter[DenseVector]]
+
+    val vector = SparseVector.fromCOO(3, (1, 1.0), (2, 2.0))
+
+    val breezeVector = vector.asBreeze
+
+    val flinkVector = breezeVector.fromBreeze
+
+    breezeVector.getClass should be(new linalg.SparseVector[Double](null).getClass())
+    flinkVector.getClass should be (classOf[DenseVector])
+
+    flinkVector.equalsVector(vector) should be(true)
+  }
 }