You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by yl...@apache.org on 2017/07/07 10:32:07 UTC
spark git commit: [SPARK-21285][ML] VectorAssembler reports the
column name of unsupported data type
Repository: spark
Updated Branches:
refs/heads/master 7fcbb9b57 -> 56536e999
[SPARK-21285][ML] VectorAssembler reports the column name of unsupported data type
## What changes were proposed in this pull request?
add the column name in the exception which is raised by unsupported data type.
## How was this patch tested?
+ [x] pass all tests.
Author: Yan Facai (颜发才) <fa...@gmail.com>
Closes #18523 from facaiy/ENH/vectorassembler_add_col.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/56536e99
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/56536e99
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/56536e99
Branch: refs/heads/master
Commit: 56536e9992ac4ea771758463962e49bba410e896
Parents: 7fcbb9b
Author: Yan Facai (颜发才) <fa...@gmail.com>
Authored: Fri Jul 7 18:32:01 2017 +0800
Committer: Yanbo Liang <yb...@gmail.com>
Committed: Fri Jul 7 18:32:01 2017 +0800
----------------------------------------------------------------------
.../apache/spark/ml/feature/VectorAssembler.scala | 15 +++++++++------
.../spark/ml/feature/VectorAssemblerSuite.scala | 5 ++++-
2 files changed, 13 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/56536e99/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
index ca90053..73f27d1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
@@ -113,12 +113,15 @@ class VectorAssembler @Since("1.4.0") (@Since("1.4.0") override val uid: String)
override def transformSchema(schema: StructType): StructType = {
val inputColNames = $(inputCols)
val outputColName = $(outputCol)
- val inputDataTypes = inputColNames.map(name => schema(name).dataType)
- inputDataTypes.foreach {
- case _: NumericType | BooleanType =>
- case t if t.isInstanceOf[VectorUDT] =>
- case other =>
- throw new IllegalArgumentException(s"Data type $other is not supported.")
+ val incorrectColumns = inputColNames.flatMap { name =>
+ schema(name).dataType match {
+ case _: NumericType | BooleanType => None
+ case t if t.isInstanceOf[VectorUDT] => None
+ case other => Some(s"Data type $other of column $name is not supported.")
+ }
+ }
+ if (incorrectColumns.nonEmpty) {
+ throw new IllegalArgumentException(incorrectColumns.mkString("\n"))
}
if (schema.fieldNames.contains(outputColName)) {
throw new IllegalArgumentException(s"Output column $outputColName already exists.")
http://git-wip-us.apache.org/repos/asf/spark/blob/56536e99/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala
index 46cced3..6aef1c6 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala
@@ -79,7 +79,10 @@ class VectorAssemblerSuite
val thrown = intercept[IllegalArgumentException] {
assembler.transform(df)
}
- assert(thrown.getMessage contains "Data type StringType is not supported")
+ assert(thrown.getMessage contains
+ "Data type StringType of column a is not supported.\n" +
+ "Data type StringType of column b is not supported.\n" +
+ "Data type StringType of column c is not supported.")
}
test("ML attributes") {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org