You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by WeichenXu123 <gi...@git.apache.org> on 2017/12/29 13:31:23 UTC
[GitHub] spark pull request #19979: [SPARK-22881][ML][TEST] ML regression package tes...
Github user WeichenXu123 commented on a diff in the pull request:
https://github.com/apache/spark/pull/19979#discussion_r159061148
--- Diff: mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala ---
@@ -89,33 +88,31 @@ class DecisionTreeRegressorSuite
val df = TreeTests.setMetadata(categoricalDataPointsRDD, categoricalFeatures, numClasses = 0)
val model = dt.fit(df)
- val predictions = model.transform(df)
- .select(model.getFeaturesCol, model.getVarianceCol)
- .collect()
-
- predictions.foreach { case Row(features: Vector, variance: Double) =>
- val expectedVariance = model.rootNode.predictImpl(features).impurityStats.calculate()
- assert(variance === expectedVariance,
- s"Expected variance $expectedVariance but got $variance.")
+ testTransformer[(Vector, Double)](df, model, "features", "variance") {
+ case Row(features: Vector, variance: Double) =>
+ val expectedVariance = model.rootNode.predictImpl(features).impurityStats.calculate()
+ assert(variance === expectedVariance,
+ s"Expected variance $expectedVariance but got $variance.")
}
val varianceData: RDD[LabeledPoint] = TreeTests.varianceData(sc)
val varianceDF = TreeTests.setMetadata(varianceData, Map.empty[Int, Int], 0)
dt.setMaxDepth(1)
.setMaxBins(6)
.setSeed(0)
- val transformVarDF = dt.fit(varianceDF).transform(varianceDF)
- val calculatedVariances = transformVarDF.select(dt.getVarianceCol).collect().map {
- case Row(variance: Double) => variance
- }
- // Since max depth is set to 1, the best split point is that which splits the data
- // into (0.0, 1.0, 2.0) and (10.0, 12.0, 14.0). The predicted variance for each
- // data point in the left node is 0.667 and for each data point in the right node
- // is 2.667
- val expectedVariances = Array(0.667, 0.667, 0.667, 2.667, 2.667, 2.667)
- calculatedVariances.zip(expectedVariances).foreach { case (actual, expected) =>
- assert(actual ~== expected absTol 1e-3)
+ testTransformerByGlobalCheckFunc[(Vector, Double)](varianceDF, dt.fit(varianceDF),
--- End diff --
The `varianceDF` generated by `TreeTests.setMetadata`, how to add "expected value" column into the DF ? It seems to need some flaky code. @jkbradley
---
---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org