You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Apache Spark (Jira)" <ji...@apache.org> on 2021/02/04 10:22:00 UTC
[jira] [Commented] (SPARK-34356) OVR transform fix potential column
conflict
[ https://issues.apache.org/jira/browse/SPARK-34356?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17278749#comment-17278749 ]
Apache Spark commented on SPARK-34356:
--------------------------------------
User 'zhengruifeng' has created a pull request for this issue:
https://github.com/apache/spark/pull/31472
> OVR transform fix potential column conflict
> -------------------------------------------
>
> Key: SPARK-34356
> URL: https://issues.apache.org/jira/browse/SPARK-34356
> Project: Spark
> Issue Type: Improvement
> Components: ML
> Affects Versions: 3.2.0
> Reporter: zhengruifeng
> Assignee: zhengruifeng
> Priority: Major
>
> {code:java}
> import org.apache.spark.ml.classification._val df = spark.read.format("libsvm").load("/d0/Dev/Opensource/spark/data/mllib/sample_multiclass_classification_data.txt").withColumn("probability", lit(0.0))val classifier = new LogisticRegression().setMaxIter(1).setTol(1E-6).setFitIntercept(true)
> val ovr = new OneVsRest().setClassifier(classifier)
> val ovrm = ovr.fit(df)
> ovrm.transform(df)
> java.lang.IllegalArgumentException: requirement failed: Column probability already exists.
> at scala.Predef$.require(Predef.scala:281)
> at org.apache.spark.ml.util.SchemaUtils$.appendColumn(SchemaUtils.scala:106)
> at org.apache.spark.ml.util.SchemaUtils$.appendColumn(SchemaUtils.scala:96)
> at org.apache.spark.ml.classification.ProbabilisticClassifierParams.validateAndTransformSchema(ProbabilisticClassifier.scala:38)
> at org.apache.spark.ml.classification.ProbabilisticClassifierParams.validateAndTransformSchema$(ProbabilisticClassifier.scala:33)
> at org.apache.spark.ml.classification.LogisticRegressionModel.org$apache$spark$ml$classification$LogisticRegressionParams$$super$validateAndTransformSchema(LogisticRegression.scala:917)
> at org.apache.spark.ml.classification.LogisticRegressionParams.validateAndTransformSchema(LogisticRegression.scala:268)
> at org.apache.spark.ml.classification.LogisticRegressionParams.validateAndTransformSchema$(LogisticRegression.scala:255)
> at org.apache.spark.ml.classification.LogisticRegressionModel.validateAndTransformSchema(LogisticRegression.scala:917)
> at org.apache.spark.ml.PredictionModel.transformSchema(Predictor.scala:222)
> at org.apache.spark.ml.classification.ClassificationModel.transformSchema(Classifier.scala:182)
> at org.apache.spark.ml.classification.ProbabilisticClassificationModel.transformSchema(ProbabilisticClassifier.scala:88)
> at org.apache.spark.ml.PipelineStage.transformSchema(Pipeline.scala:71)
> at org.apache.spark.ml.classification.ProbabilisticClassificationModel.transform(ProbabilisticClassifier.scala:107)
> at org.apache.spark.ml.classification.OneVsRestModel.$anonfun$transform$4(OneVsRest.scala:215)
> at scala.collection.IndexedSeqOptimized.foldLeft(IndexedSeqOptimized.scala:60)
> at scala.collection.IndexedSeqOptimized.foldLeft$(IndexedSeqOptimized.scala:68)
> at scala.collection.mutable.ArrayOps$ofRef.foldLeft(ArrayOps.scala:198)
> at org.apache.spark.ml.classification.OneVsRestModel.transform(OneVsRest.scala:203)
> ... 49 elided {code}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org