You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by db...@apache.org on 2016/09/21 18:56:26 UTC

spark git commit: [SPARK-11918][ML] Better error from WLS for cases like singular input

Repository: spark
Updated Branches:
  refs/heads/master d7ee12211 -> b4a4421b6


[SPARK-11918][ML] Better error from WLS for cases like singular input

## What changes were proposed in this pull request?

Update error handling for Cholesky decomposition to provide a little more info when input is singular.

## How was this patch tested?

New test case; jenkins tests.

Author: Sean Owen <so...@cloudera.com>

Closes #15177 from srowen/SPARK-11918.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b4a4421b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b4a4421b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b4a4421b

Branch: refs/heads/master
Commit: b4a4421b610e776e5280fd5e7453f937f806cbd1
Parents: d7ee122
Author: Sean Owen <so...@cloudera.com>
Authored: Wed Sep 21 18:56:16 2016 +0000
Committer: DB Tsai <db...@netflix.com>
Committed: Wed Sep 21 18:56:16 2016 +0000

----------------------------------------------------------------------
 .../mllib/linalg/CholeskyDecomposition.scala    | 19 +++++++++++++++----
 .../ml/optim/WeightedLeastSquaresSuite.scala    | 20 ++++++++++++++++++++
 2 files changed, 35 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/b4a4421b/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala
index e449479..08f8f19 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala
@@ -36,8 +36,7 @@ private[spark] object CholeskyDecomposition {
     val k = bx.length
     val info = new intW(0)
     lapack.dppsv("U", k, 1, A, bx, k, info)
-    val code = info.`val`
-    assert(code == 0, s"lapack.dppsv returned $code.")
+    checkReturnValue(info, "dppsv")
     bx
   }
 
@@ -52,8 +51,20 @@ private[spark] object CholeskyDecomposition {
   def inverse(UAi: Array[Double], k: Int): Array[Double] = {
     val info = new intW(0)
     lapack.dpptri("U", k, UAi, info)
-    val code = info.`val`
-    assert(code == 0, s"lapack.dpptri returned $code.")
+    checkReturnValue(info, "dpptri")
     UAi
   }
+
+  private def checkReturnValue(info: intW, method: String): Unit = {
+    info.`val` match {
+      case code if code < 0 =>
+        throw new IllegalStateException(s"LAPACK.$method returned $code; arg ${-code} is illegal")
+      case code if code > 0 =>
+        throw new IllegalArgumentException(
+          s"LAPACK.$method returned $code because A is not positive definite. Is A derived from " +
+          "a singular matrix (e.g. collinear column values)?")
+      case _ => // do nothing
+    }
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/b4a4421b/mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala
index c8de796..2cb1af0 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala
@@ -60,6 +60,26 @@ class WeightedLeastSquaresSuite extends SparkFunSuite with MLlibTestSparkContext
     ), 2)
   }
 
+  test("two collinear features result in error with no regularization") {
+    val singularInstances = sc.parallelize(Seq(
+      Instance(1.0, 1.0, Vectors.dense(1.0, 2.0)),
+      Instance(2.0, 1.0, Vectors.dense(2.0, 4.0)),
+      Instance(3.0, 1.0, Vectors.dense(3.0, 6.0)),
+      Instance(4.0, 1.0, Vectors.dense(4.0, 8.0))
+    ), 2)
+
+    intercept[IllegalArgumentException] {
+      new WeightedLeastSquares(
+        false, regParam = 0.0, standardizeFeatures = false,
+        standardizeLabel = false).fit(singularInstances)
+    }
+
+    // Should not throw an exception
+    new WeightedLeastSquares(
+      false, regParam = 1.0, standardizeFeatures = false,
+      standardizeLabel = false).fit(singularInstances)
+  }
+
   test("WLS against lm") {
     /*
        R code:


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org