You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by me...@apache.org on 2016/02/12 02:28:39 UTC

spark git commit: [SPARK-12765][ML][COUNTVECTORIZER] fix CountVectorizer.transform's lost transformSchema

Repository: spark
Updated Branches:
  refs/heads/master b35467388 -> a5257048d


[SPARK-12765][ML][COUNTVECTORIZER] fix CountVectorizer.transform's lost transformSchema

https://issues.apache.org/jira/browse/SPARK-12765

Author: Liu Xiang <lx...@gmail.com>

Closes #10720 from sloth2012/sloth.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a5257048
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a5257048
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a5257048

Branch: refs/heads/master
Commit: a5257048d74359c3fa7810009be1d60d370e2896
Parents: b354673
Author: Liu Xiang <lx...@gmail.com>
Authored: Thu Feb 11 17:28:37 2016 -0800
Committer: Xiangrui Meng <me...@databricks.com>
Committed: Thu Feb 11 17:28:37 2016 -0800

----------------------------------------------------------------------
 .../main/scala/org/apache/spark/ml/feature/CountVectorizer.scala    | 1 +
 1 file changed, 1 insertion(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/a5257048/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
index 10dcda2..d5cb05f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
@@ -210,6 +210,7 @@ class CountVectorizerModel(override val uid: String, val vocabulary: Array[Strin
   private var broadcastDict: Option[Broadcast[Map[String, Int]]] = None
 
   override def transform(dataset: DataFrame): DataFrame = {
+    transformSchema(dataset.schema, logging = true)
     if (broadcastDict.isEmpty) {
       val dict = vocabulary.zipWithIndex.toMap
       broadcastDict = Some(dataset.sqlContext.sparkContext.broadcast(dict))


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org