You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by sr...@apache.org on 2018/01/24 18:13:50 UTC
spark git commit: [SPARK-23152][ML] - Correctly guard against empty
datasets
Repository: spark
Updated Branches:
refs/heads/master bbb87b350 -> 840dea64a
[SPARK-23152][ML] - Correctly guard against empty datasets
## What changes were proposed in this pull request?
Correctly guard against empty datasets in `org.apache.spark.ml.classification.Classifier`
## How was this patch tested?
existing tests
Author: Matthew Tovbin <mt...@salesforce.com>
Closes #20321 from tovbinm/SPARK-23152.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/840dea64
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/840dea64
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/840dea64
Branch: refs/heads/master
Commit: 840dea64abd8a3a5960de830f19a57f5f1aa3bf6
Parents: bbb87b3
Author: Matthew Tovbin <mt...@salesforce.com>
Authored: Wed Jan 24 13:13:44 2018 -0500
Committer: Sean Owen <so...@cloudera.com>
Committed: Wed Jan 24 13:13:44 2018 -0500
----------------------------------------------------------------------
.../scala/org/apache/spark/ml/classification/Classifier.scala | 2 +-
.../org/apache/spark/ml/classification/ClassifierSuite.scala | 7 +++++++
2 files changed, 8 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/840dea64/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
index bc0b49d..9d1d5aa 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
@@ -109,7 +109,7 @@ abstract class Classifier[
case None =>
// Get number of classes from dataset itself.
val maxLabelRow: Array[Row] = dataset.select(max($(labelCol))).take(1)
- if (maxLabelRow.isEmpty) {
+ if (maxLabelRow.isEmpty || maxLabelRow(0).get(0) == null) {
throw new SparkException("ML algorithm was given empty dataset.")
}
val maxDoubleLabel: Double = maxLabelRow.head.getDouble(0)
http://git-wip-us.apache.org/repos/asf/spark/blob/840dea64/mllib/src/test/scala/org/apache/spark/ml/classification/ClassifierSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/ClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/ClassifierSuite.scala
index de71207..87bf2be 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/ClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/ClassifierSuite.scala
@@ -90,6 +90,13 @@ class ClassifierSuite extends SparkFunSuite with MLlibTestSparkContext {
}
assert(e.getMessage.contains("requires integers in range"))
}
+ val df3 = getTestData(Seq.empty[Double])
+ withClue("getNumClasses should fail if dataset is empty") {
+ val e: SparkException = intercept[SparkException] {
+ c.getNumClasses(df3)
+ }
+ assert(e.getMessage == "ML algorithm was given empty dataset.")
+ }
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org