You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2018/05/12 04:15:45 UTC
spark git commit: [SPARK-23907] Removes regr_* functions in
functions.scala
Repository: spark
Updated Branches:
refs/heads/master f27a035da -> e3dabdf6e
[SPARK-23907] Removes regr_* functions in functions.scala
## What changes were proposed in this pull request?
This patch removes the various regr_* functions in functions.scala. They are so uncommon that I don't think they deserve real estate in functions.scala. We can consider adding them later if more users need them.
## How was this patch tested?
Removed the associated test case as well.
Author: Reynold Xin <rx...@databricks.com>
Closes #21309 from rxin/SPARK-23907.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e3dabdf6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e3dabdf6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e3dabdf6
Branch: refs/heads/master
Commit: e3dabdf6ef210fb9f4337e305feb9c4983a57350
Parents: f27a035
Author: Reynold Xin <rx...@databricks.com>
Authored: Sat May 12 12:15:36 2018 +0800
Committer: hyukjinkwon <gu...@apache.org>
Committed: Sat May 12 12:15:36 2018 +0800
----------------------------------------------------------------------
.../scala/org/apache/spark/sql/functions.scala | 171 -------------------
.../spark/sql/DataFrameAggregateSuite.scala | 68 --------
2 files changed, 239 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/e3dabdf6/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index e7f866d..3c9ace4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -811,177 +811,6 @@ object functions {
*/
def var_pop(columnName: String): Column = var_pop(Column(columnName))
- /**
- * Aggregate function: returns the number of non-null pairs.
- *
- * @group agg_funcs
- * @since 2.4.0
- */
- def regr_count(y: Column, x: Column): Column = withAggregateFunction {
- RegrCount(y.expr, x.expr)
- }
-
- /**
- * Aggregate function: returns the number of non-null pairs.
- *
- * @group agg_funcs
- * @since 2.4.0
- */
- def regr_count(y: String, x: String): Column = regr_count(Column(y), Column(x))
-
- /**
- * Aggregate function: returns SUM(x*x)-SUM(x)*SUM(x)/N. Any pair with a NULL is ignored.
- *
- * @group agg_funcs
- * @since 2.4.0
- */
- def regr_sxx(y: Column, x: Column): Column = withAggregateFunction {
- RegrSXX(y.expr, x.expr)
- }
-
- /**
- * Aggregate function: returns SUM(x*x)-SUM(x)*SUM(x)/N. Any pair with a NULL is ignored.
- *
- * @group agg_funcs
- * @since 2.4.0
- */
- def regr_sxx(y: String, x: String): Column = regr_sxx(Column(y), Column(x))
-
- /**
- * Aggregate function: returns SUM(y*y)-SUM(y)*SUM(y)/N. Any pair with a NULL is ignored.
- *
- * @group agg_funcs
- * @since 2.4.0
- */
- def regr_syy(y: Column, x: Column): Column = withAggregateFunction {
- RegrSYY(y.expr, x.expr)
- }
-
- /**
- * Aggregate function: returns SUM(y*y)-SUM(y)*SUM(y)/N. Any pair with a NULL is ignored.
- *
- * @group agg_funcs
- * @since 2.4.0
- */
- def regr_syy(y: String, x: String): Column = regr_syy(Column(y), Column(x))
-
- /**
- * Aggregate function: returns the average of y. Any pair with a NULL is ignored.
- *
- * @group agg_funcs
- * @since 2.4.0
- */
- def regr_avgy(y: Column, x: Column): Column = withAggregateFunction {
- RegrAvgY(y.expr, x.expr)
- }
-
- /**
- * Aggregate function: returns the average of y. Any pair with a NULL is ignored.
- *
- * @group agg_funcs
- * @since 2.4.0
- */
- def regr_avgy(y: String, x: String): Column = regr_avgy(Column(y), Column(x))
-
- /**
- * Aggregate function: returns the average of x. Any pair with a NULL is ignored.
- *
- * @group agg_funcs
- * @since 2.4.0
- */
- def regr_avgx(y: Column, x: Column): Column = withAggregateFunction {
- RegrAvgX(y.expr, x.expr)
- }
-
- /**
- * Aggregate function: returns the average of x. Any pair with a NULL is ignored.
- *
- * @group agg_funcs
- * @since 2.4.0
- */
- def regr_avgx(y: String, x: String): Column = regr_avgx(Column(y), Column(x))
-
- /**
- * Aggregate function: returns the covariance of y and x multiplied for the number of items in
- * the dataset. Any pair with a NULL is ignored.
- *
- * @group agg_funcs
- * @since 2.4.0
- */
- def regr_sxy(y: Column, x: Column): Column = withAggregateFunction {
- RegrSXY(y.expr, x.expr)
- }
-
- /**
- * Aggregate function: returns the covariance of y and x multiplied for the number of items in
- * the dataset. Any pair with a NULL is ignored.
- *
- * @group agg_funcs
- * @since 2.4.0
- */
- def regr_sxy(y: String, x: String): Column = regr_sxy(Column(y), Column(x))
-
- /**
- * Aggregate function: returns the slope of the linear regression line. Any pair with a NULL is
- * ignored.
- *
- * @group agg_funcs
- * @since 2.4.0
- */
- def regr_slope(y: Column, x: Column): Column = withAggregateFunction {
- RegrSlope(y.expr, x.expr)
- }
-
- /**
- * Aggregate function: returns the slope of the linear regression line. Any pair with a NULL is
- * ignored.
- *
- * @group agg_funcs
- * @since 2.4.0
- */
- def regr_slope(y: String, x: String): Column = regr_slope(Column(y), Column(x))
-
- /**
- * Aggregate function: returns the coefficient of determination (also called R-squared or
- * goodness of fit) for the regression line. Any pair with a NULL is ignored.
- *
- * @group agg_funcs
- * @since 2.4.0
- */
- def regr_r2(y: Column, x: Column): Column = withAggregateFunction {
- RegrR2(y.expr, x.expr)
- }
-
- /**
- * Aggregate function: returns the coefficient of determination (also called R-squared or
- * goodness of fit) for the regression line. Any pair with a NULL is ignored.
- *
- * @group agg_funcs
- * @since 2.4.0
- */
- def regr_r2(y: String, x: String): Column = regr_r2(Column(y), Column(x))
-
- /**
- * Aggregate function: returns the y-intercept of the linear regression line. Any pair with a
- * NULL is ignored.
- *
- * @group agg_funcs
- * @since 2.4.0
- */
- def regr_intercept(y: Column, x: Column): Column = withAggregateFunction {
- RegrIntercept(y.expr, x.expr)
- }
-
- /**
- * Aggregate function: returns the y-intercept of the linear regression line. Any pair with a
- * NULL is ignored.
- *
- * @group agg_funcs
- * @since 2.4.0
- */
- def regr_intercept(y: String, x: String): Column = regr_intercept(Column(y), Column(x))
-
-
//////////////////////////////////////////////////////////////////////////////////////////////
// Window functions
http://git-wip-us.apache.org/repos/asf/spark/blob/e3dabdf6/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 4337fb2..96c2896 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -687,72 +687,4 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
}
}
}
-
- test("SPARK-23907: regression functions") {
- val emptyTableData = Seq.empty[(Double, Double)].toDF("a", "b")
- val correlatedData = Seq[(Double, Double)]((2, 3), (3, 4), (7.5, 8.2), (10.3, 12))
- .toDF("a", "b")
- val correlatedDataWithNull = Seq[(java.lang.Double, java.lang.Double)](
- (2.0, 3.0), (3.0, null), (7.5, 8.2), (10.3, 12.0)).toDF("a", "b")
- checkAnswer(testData2.groupBy().agg(regr_count("a", "b")), Seq(Row(6)))
- checkAnswer(testData3.groupBy().agg(regr_count("a", "b")), Seq(Row(1)))
- checkAnswer(emptyTableData.groupBy().agg(regr_count("a", "b")), Seq(Row(0)))
-
- checkAggregatesWithTol(testData2.groupBy().agg(regr_sxx("a", "b")), Row(1.5), absTol)
- checkAggregatesWithTol(testData3.groupBy().agg(regr_sxx("a", "b")), Row(0.0), absTol)
- checkAggregatesWithTol(emptyTableData.groupBy().agg(regr_sxx("a", "b")), Row(null), absTol)
- checkAggregatesWithTol(testData2.groupBy().agg(regr_syy("b", "a")), Row(1.5), absTol)
- checkAggregatesWithTol(testData3.groupBy().agg(regr_syy("b", "a")), Row(0.0), absTol)
- checkAggregatesWithTol(emptyTableData.groupBy().agg(regr_syy("b", "a")), Row(null), absTol)
-
- checkAggregatesWithTol(testData2.groupBy().agg(regr_avgx("a", "b")), Row(1.5), absTol)
- checkAggregatesWithTol(testData3.groupBy().agg(regr_avgx("a", "b")), Row(2.0), absTol)
- checkAggregatesWithTol(emptyTableData.groupBy().agg(regr_avgx("a", "b")), Row(null), absTol)
- checkAggregatesWithTol(testData2.groupBy().agg(regr_avgy("b", "a")), Row(1.5), absTol)
- checkAggregatesWithTol(testData3.groupBy().agg(regr_avgy("b", "a")), Row(2.0), absTol)
- checkAggregatesWithTol(emptyTableData.groupBy().agg(regr_avgy("b", "a")), Row(null), absTol)
-
- checkAggregatesWithTol(testData2.groupBy().agg(regr_sxy("a", "b")), Row(0.0), absTol)
- checkAggregatesWithTol(testData3.groupBy().agg(regr_sxy("a", "b")), Row(0.0), absTol)
- checkAggregatesWithTol(emptyTableData.groupBy().agg(regr_sxy("a", "b")), Row(null), absTol)
-
- checkAggregatesWithTol(testData2.groupBy().agg(regr_slope("a", "b")), Row(0.0), absTol)
- checkAggregatesWithTol(testData3.groupBy().agg(regr_slope("a", "b")), Row(null), absTol)
- checkAggregatesWithTol(emptyTableData.groupBy().agg(regr_slope("a", "b")), Row(null), absTol)
-
- checkAggregatesWithTol(testData2.groupBy().agg(regr_r2("a", "b")), Row(0.0), absTol)
- checkAggregatesWithTol(testData3.groupBy().agg(regr_r2("a", "b")), Row(null), absTol)
- checkAggregatesWithTol(emptyTableData.groupBy().agg(regr_r2("a", "b")), Row(null), absTol)
-
- checkAggregatesWithTol(testData2.groupBy().agg(regr_intercept("a", "b")), Row(2.0), absTol)
- checkAggregatesWithTol(testData3.groupBy().agg(regr_intercept("a", "b")), Row(null), absTol)
- checkAggregatesWithTol(emptyTableData.groupBy().agg(regr_intercept("a", "b")),
- Row(null), absTol)
-
-
- checkAggregatesWithTol(correlatedData.groupBy().agg(
- regr_count("a", "b"),
- regr_avgx("a", "b"),
- regr_avgy("a", "b"),
- regr_sxx("a", "b"),
- regr_syy("a", "b"),
- regr_sxy("a", "b"),
- regr_slope("a", "b"),
- regr_r2("a", "b"),
- regr_intercept("a", "b")),
- Row(4, 6.8, 5.7, 51.28, 45.38, 48.06, 0.937207488, 0.992556013, -0.67301092),
- absTol)
- checkAggregatesWithTol(correlatedDataWithNull.groupBy().agg(
- regr_count("a", "b"),
- regr_avgx("a", "b"),
- regr_avgy("a", "b"),
- regr_sxx("a", "b"),
- regr_syy("a", "b"),
- regr_sxy("a", "b"),
- regr_slope("a", "b"),
- regr_r2("a", "b"),
- regr_intercept("a", "b")),
- Row(3, 7.73333333, 6.6, 40.82666666, 35.66, 37.98, 0.93027433, 0.99079694, -0.59412149),
- absTol)
- }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org