You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2016/02/23 19:30:03 UTC

spark git commit: [SPARK-13338][ML] Allow setting 'degree' parameter to 1 for PolynomialExpansion

Repository: spark
Updated Branches:
  refs/heads/master 4d1e5f92e -> 5d69eaf09


[SPARK-13338][ML] Allow setting 'degree' parameter to 1 for PolynomialExpansion

Author: Grzegorz Chilkiewicz <gr...@codilime.com>

Closes #11216 from grzegorz-chilkiewicz/master.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5d69eaf0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5d69eaf0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5d69eaf0

Branch: refs/heads/master
Commit: 5d69eaf097bfb9fad9f6e4433c6cd40ba0552a56
Parents: 4d1e5f9
Author: Grzegorz Chilkiewicz <gr...@codilime.com>
Authored: Tue Feb 23 10:30:02 2016 -0800
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Tue Feb 23 10:30:02 2016 -0800

----------------------------------------------------------------------
 .../spark/ml/feature/PolynomialExpansion.scala  |  2 +-
 .../ml/feature/PolynomialExpansionSuite.scala   | 72 +++++++++++---------
 2 files changed, 41 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/5d69eaf0/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
index 42b26c8..0a9b971 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
@@ -46,7 +46,7 @@ class PolynomialExpansion(override val uid: String)
    * @group param
    */
   val degree = new IntParam(this, "degree", "the polynomial degree to expand (>= 1)",
-    ParamValidators.gt(1))
+    ParamValidators.gtEq(1))
 
   setDefault(degree -> 2)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/5d69eaf0/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala
index dfdc579..86dbee1 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala
@@ -34,22 +34,31 @@ class PolynomialExpansionSuite
     ParamsSuite.checkParams(new PolynomialExpansion)
   }
 
-  test("Polynomial expansion with default parameter") {
-    val data = Array(
-      Vectors.sparse(3, Seq((0, -2.0), (1, 2.3))),
-      Vectors.dense(-2.0, 2.3),
-      Vectors.dense(0.0, 0.0, 0.0),
-      Vectors.dense(0.6, -1.1, -3.0),
-      Vectors.sparse(3, Seq())
-    )
-
-    val twoDegreeExpansion: Array[Vector] = Array(
-      Vectors.sparse(9, Array(0, 1, 2, 3, 4), Array(-2.0, 4.0, 2.3, -4.6, 5.29)),
-      Vectors.dense(-2.0, 4.0, 2.3, -4.6, 5.29),
-      Vectors.dense(new Array[Double](9)),
-      Vectors.dense(0.6, 0.36, -1.1, -0.66, 1.21, -3.0, -1.8, 3.3, 9.0),
-      Vectors.sparse(9, Array.empty, Array.empty))
+  private val data = Array(
+    Vectors.sparse(3, Seq((0, -2.0), (1, 2.3))),
+    Vectors.dense(-2.0, 2.3),
+    Vectors.dense(0.0, 0.0, 0.0),
+    Vectors.dense(0.6, -1.1, -3.0),
+    Vectors.sparse(3, Seq())
+  )
+
+  private val twoDegreeExpansion: Array[Vector] = Array(
+    Vectors.sparse(9, Array(0, 1, 2, 3, 4), Array(-2.0, 4.0, 2.3, -4.6, 5.29)),
+    Vectors.dense(-2.0, 4.0, 2.3, -4.6, 5.29),
+    Vectors.dense(new Array[Double](9)),
+    Vectors.dense(0.6, 0.36, -1.1, -0.66, 1.21, -3.0, -1.8, 3.3, 9.0),
+    Vectors.sparse(9, Array.empty, Array.empty))
+
+  private val threeDegreeExpansion: Array[Vector] = Array(
+    Vectors.sparse(19, Array(0, 1, 2, 3, 4, 5, 6, 7, 8),
+      Array(-2.0, 4.0, -8.0, 2.3, -4.6, 9.2, 5.29, -10.58, 12.17)),
+    Vectors.dense(-2.0, 4.0, -8.0, 2.3, -4.6, 9.2, 5.29, -10.58, 12.17),
+    Vectors.dense(new Array[Double](19)),
+    Vectors.dense(0.6, 0.36, 0.216, -1.1, -0.66, -0.396, 1.21, 0.726, -1.331, -3.0, -1.8,
+      -1.08, 3.3, 1.98, -3.63, 9.0, 5.4, -9.9, -27.0),
+    Vectors.sparse(19, Array.empty, Array.empty))
 
+  test("Polynomial expansion with default parameter") {
     val df = sqlContext.createDataFrame(data.zip(twoDegreeExpansion)).toDF("features", "expected")
 
     val polynomialExpansion = new PolynomialExpansion()
@@ -67,23 +76,6 @@ class PolynomialExpansionSuite
   }
 
   test("Polynomial expansion with setter") {
-    val data = Array(
-      Vectors.sparse(3, Seq((0, -2.0), (1, 2.3))),
-      Vectors.dense(-2.0, 2.3),
-      Vectors.dense(0.0, 0.0, 0.0),
-      Vectors.dense(0.6, -1.1, -3.0),
-      Vectors.sparse(3, Seq())
-    )
-
-    val threeDegreeExpansion: Array[Vector] = Array(
-      Vectors.sparse(19, Array(0, 1, 2, 3, 4, 5, 6, 7, 8),
-        Array(-2.0, 4.0, -8.0, 2.3, -4.6, 9.2, 5.29, -10.58, 12.17)),
-      Vectors.dense(-2.0, 4.0, -8.0, 2.3, -4.6, 9.2, 5.29, -10.58, 12.17),
-      Vectors.dense(new Array[Double](19)),
-      Vectors.dense(0.6, 0.36, 0.216, -1.1, -0.66, -0.396, 1.21, 0.726, -1.331, -3.0, -1.8,
-        -1.08, 3.3, 1.98, -3.63, 9.0, 5.4, -9.9, -27.0),
-      Vectors.sparse(19, Array.empty, Array.empty))
-
     val df = sqlContext.createDataFrame(data.zip(threeDegreeExpansion)).toDF("features", "expected")
 
     val polynomialExpansion = new PolynomialExpansion()
@@ -101,6 +93,22 @@ class PolynomialExpansionSuite
     }
   }
 
+  test("Polynomial expansion with degree 1 is identity on vectors") {
+    val df = sqlContext.createDataFrame(data.zip(data)).toDF("features", "expected")
+
+    val polynomialExpansion = new PolynomialExpansion()
+      .setInputCol("features")
+      .setOutputCol("polyFeatures")
+      .setDegree(1)
+
+    polynomialExpansion.transform(df).select("polyFeatures", "expected").collect().foreach {
+      case Row(expanded: Vector, expected: Vector) =>
+        assert(expanded ~== expected absTol 1e-1)
+      case _ =>
+        throw new TestFailedException("Unmatched data types after polynomial expansion", 0)
+    }
+  }
+
   test("read/write") {
     val t = new PolynomialExpansion()
       .setInputCol("myInputCol")


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org