You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by sethah <gi...@git.apache.org> on 2017/10/31 18:58:58 UTC
[GitHub] spark pull request #18118: [SPARK-20199][ML] : Provided featureSubsetStrateg...
Github user sethah commented on a diff in the pull request:
https://github.com/apache/spark/pull/18118#discussion_r148095940
--- Diff: mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala ---
@@ -354,6 +356,41 @@ class GBTClassifierSuite extends SparkFunSuite with MLlibTestSparkContext
}
/////////////////////////////////////////////////////////////////////////////
+ // Tests of feature subset strategy
+ /////////////////////////////////////////////////////////////////////////////
+ test("Tests of feature subset strategy") {
+ val numClasses = 2
+ val gbt = new GBTClassifier()
+ .setImpurity("Gini")
+ .setMaxDepth(3)
+ .setMaxIter(5)
+ .setSubsamplingRate(1.0)
+ .setStepSize(0.5)
+ .setSeed(123)
+ .setFeatureSubsetStrategy("all")
+
+ // In this data, feature 1 is very important.
+ val data: RDD[LabeledPoint] = TreeTests.featureImportanceData(sc)
+ val categoricalFeatures = Map.empty[Int, Int]
+ val df: DataFrame = TreeTests.setMetadata(data, categoricalFeatures, numClasses)
+
+ val importances = gbt.fit(df).featureImportances
+ val mostImportantFeature = importances.argmax
+ assert(mostImportantFeature === 1)
+ assert(importances.toArray.sum === 1.0)
--- End diff --
the last two assertions here aren't necessary
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org