You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by WeichenXu123 <gi...@git.apache.org> on 2017/12/29 13:31:23 UTC
[GitHub] spark pull request #19979: [SPARK-22881][ML][TEST] ML regression package tes...

Github user WeichenXu123 commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19979#discussion_r159061148
  
    --- Diff: mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala ---
    @@ -89,33 +88,31 @@ class DecisionTreeRegressorSuite
         val df = TreeTests.setMetadata(categoricalDataPointsRDD, categoricalFeatures, numClasses = 0)
         val model = dt.fit(df)
     
    -    val predictions = model.transform(df)
    -      .select(model.getFeaturesCol, model.getVarianceCol)
    -      .collect()
    -
    -    predictions.foreach { case Row(features: Vector, variance: Double) =>
    -      val expectedVariance = model.rootNode.predictImpl(features).impurityStats.calculate()
    -      assert(variance === expectedVariance,
    -        s"Expected variance $expectedVariance but got $variance.")
    +    testTransformer[(Vector, Double)](df, model, "features", "variance") {
    +      case Row(features: Vector, variance: Double) =>
    +        val expectedVariance = model.rootNode.predictImpl(features).impurityStats.calculate()
    +        assert(variance === expectedVariance,
    +          s"Expected variance $expectedVariance but got $variance.")
         }
     
         val varianceData: RDD[LabeledPoint] = TreeTests.varianceData(sc)
         val varianceDF = TreeTests.setMetadata(varianceData, Map.empty[Int, Int], 0)
         dt.setMaxDepth(1)
           .setMaxBins(6)
           .setSeed(0)
    -    val transformVarDF = dt.fit(varianceDF).transform(varianceDF)
    -    val calculatedVariances = transformVarDF.select(dt.getVarianceCol).collect().map {
    -      case Row(variance: Double) => variance
    -    }
     
    -    // Since max depth is set to 1, the best split point is that which splits the data
    -    // into (0.0, 1.0, 2.0) and (10.0, 12.0, 14.0). The predicted variance for each
    -    // data point in the left node is 0.667 and for each data point in the right node
    -    // is 2.667
    -    val expectedVariances = Array(0.667, 0.667, 0.667, 2.667, 2.667, 2.667)
    -    calculatedVariances.zip(expectedVariances).foreach { case (actual, expected) =>
    -      assert(actual ~== expected absTol 1e-3)
    +    testTransformerByGlobalCheckFunc[(Vector, Double)](varianceDF, dt.fit(varianceDF),
    --- End diff --
    
    The `varianceDF` generated by `TreeTests.setMetadata`, how to add "expected value" column into the DF ? It seems to need some flaky code.  @jkbradley 


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org