You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2021/02/02 00:30:04 UTC

[spark] branch master updated: [SPARK-34306][SQL][PYTHON][R] Use Snake naming rule across the function APIs

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 30468a9  [SPARK-34306][SQL][PYTHON][R] Use Snake naming rule across the function APIs
30468a9 is described below

commit 30468a901577e82c855fbc4cb78e1b869facb44c
Author: HyukjinKwon <gu...@apache.org>
AuthorDate: Tue Feb 2 09:29:40 2021 +0900

    [SPARK-34306][SQL][PYTHON][R] Use Snake naming rule across the function APIs
    
    ### What changes were proposed in this pull request?
    
    This PR completes snake_case rule at functions APIs across the languages, see also SPARK-10621.
    
    In more details, this PR:
    - Adds `count_distinct` in Scala Python, and R, and document that `count_distinct` is encouraged. This was not deprecated because `countDistinct` is pretty commonly used. We could deprecate in the future releases.
    - (Scala-specific) adds `typedlit` but doesn't deprecate `typedLit` which is arguably commonly used. Likewise, we could deprecate in the future releases.
    - Deprecates and renames:
      - `sumDistinct` -> `sum_distinct`
      - `bitwiseNOT` -> `bitwise_not`
      - `shiftLeft` -> `shiftleft` (matched with SQL name in `FunctionRegistry`)
      - `shiftRight` -> `shiftright` (matched with SQL name in `FunctionRegistry`)
      - `shiftRightUnsigned` -> `shiftrightunsigned` (matched with SQL name in `FunctionRegistry`)
      - (Scala-specific) `callUDF` -> `call_udf`
    
    ### Why are the changes needed?
    
    To keep the consistent naming in APIs.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, it deprecates some APIs and add new renamed APIs as described above.
    
    ### How was this patch tested?
    
    Unittests were added.
    
    Closes #31408 from HyukjinKwon/SPARK-34306.
    
    Authored-by: HyukjinKwon <gu...@apache.org>
    Signed-off-by: HyukjinKwon <gu...@apache.org>
---
 R/pkg/NAMESPACE                                    |   6 +
 R/pkg/R/functions.R                                | 125 +++++++++++++++++----
 R/pkg/R/generics.R                                 |  24 ++++
 R/pkg/tests/fulltests/test_sparkSQL.R              |  17 ++-
 R/pkg/vignettes/sparkr-vignettes.Rmd               |   2 +-
 docs/sql-getting-started.md                        |   2 +-
 .../spark/examples/ml/JavaTokenizerExample.java    |   6 +-
 python/docs/source/reference/pyspark.sql.rst       |   9 +-
 python/pyspark/sql/functions.py                    |  91 +++++++++++++--
 python/pyspark/sql/functions.pyi                   |   6 +
 python/pyspark/sql/tests/test_column.py            |   2 +
 python/pyspark/sql/tests/test_functions.py         |  21 +++-
 python/pyspark/sql/tests/test_group.py             |   1 +
 .../optimizer/RewriteDistinctAggregates.scala      |   4 +-
 .../main/scala/org/apache/spark/sql/Dataset.scala  |   2 +-
 .../scala/org/apache/spark/sql/functions.scala     | 113 ++++++++++++++++---
 .../org/apache/spark/sql/JavaDataFrameSuite.java   |   2 +-
 .../apache/spark/sql/DataFrameAggregateSuite.scala |  32 +++---
 .../apache/spark/sql/DataFrameFunctionsSuite.scala |   6 +-
 .../org/apache/spark/sql/DataFrameSuite.scala      |   6 +-
 .../org/apache/spark/sql/DateFunctionsSuite.scala  |   2 +-
 .../org/apache/spark/sql/MathFunctionsSuite.scala  |  24 ++--
 .../apache/spark/sql/execution/PlannerSuite.scala  |   4 +-
 .../spark/sql/execution/SameResultSuite.scala      |   4 +-
 .../apache/spark/sql/hive/JavaDataFrameSuite.java  |   2 +-
 .../spark/sql/hive/HiveSparkSubmitSuite.scala      |   2 +-
 .../hive/execution/ObjectHashAggregateSuite.scala  |   2 +-
 27 files changed, 409 insertions(+), 108 deletions(-)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 6ef2df5..4b0e868 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -243,6 +243,7 @@ exportMethods("%<=>%",
               "base64",
               "between",
               "bin",
+              "bitwise_not",
               "bitwiseNOT",
               "bround",
               "cast",
@@ -259,6 +260,7 @@ exportMethods("%<=>%",
               "cos",
               "cosh",
               "count",
+              "count_distinct",
               "countDistinct",
               "crc32",
               "create_array",
@@ -391,8 +393,11 @@ exportMethods("%<=>%",
               "sha1",
               "sha2",
               "shiftLeft",
+              "shiftleft",
               "shiftRight",
+              "shiftright",
               "shiftRightUnsigned",
+              "shiftrightunsigned",
               "shuffle",
               "sd",
               "sign",
@@ -415,6 +420,7 @@ exportMethods("%<=>%",
               "substr",
               "substring_index",
               "sum",
+              "sum_distinct",
               "sumDistinct",
               "tan",
               "tanh",
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 43b25a1..741878b 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -484,7 +484,7 @@ setMethod("acosh",
 #' \dontrun{
 #' head(select(df, approx_count_distinct(df$gear)))
 #' head(select(df, approx_count_distinct(df$gear, 0.02)))
-#' head(select(df, countDistinct(df$gear, df$cyl)))
+#' head(select(df, count_distinct(df$gear, df$cyl)))
 #' head(select(df, n_distinct(df$gear)))
 #' head(distinct(select(df, "gear")))}
 #' @note approx_count_distinct(Column) since 3.0.0
@@ -636,20 +636,33 @@ setMethod("bin",
           })
 
 #' @details
-#' \code{bitwiseNOT}: Computes bitwise NOT.
+#' \code{bitwise_not}: Computes bitwise NOT.
 #'
 #' @rdname column_nonaggregate_functions
-#' @aliases bitwiseNOT bitwiseNOT,Column-method
+#' @aliases bitwise_not bitwise_not,Column-method
 #' @examples
 #'
 #' \dontrun{
-#' head(select(df, bitwiseNOT(cast(df$vs, "int"))))}
+#' head(select(df, bitwise_not(cast(df$vs, "int"))))}
+#' @note bitwise_not since 3.2.0
+setMethod("bitwise_not",
+          signature(x = "Column"),
+          function(x) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "bitwise_not", x@jc)
+            column(jc)
+          })
+
+#' @details
+#' \code{bitwiseNOT}: Computes bitwise NOT.
+#'
+#' @rdname column_nonaggregate_functions
+#' @aliases bitwiseNOT bitwiseNOT,Column-method
 #' @note bitwiseNOT since 1.5.0
 setMethod("bitwiseNOT",
           signature(x = "Column"),
           function(x) {
-            jc <- callJStatic("org.apache.spark.sql.functions", "bitwiseNOT", x@jc)
-            column(jc)
+            .Deprecated("bitwise_not")
+            bitwise_not(x)
           })
 
 #' @details
@@ -1937,21 +1950,34 @@ setMethod("sum",
           })
 
 #' @details
-#' \code{sumDistinct}: Returns the sum of distinct values in the expression.
+#' \code{sum_distinct}: Returns the sum of distinct values in the expression.
 #'
 #' @rdname column_aggregate_functions
-#' @aliases sumDistinct sumDistinct,Column-method
+#' @aliases sum_distinct sum_distinct,Column-method
 #' @examples
 #'
 #' \dontrun{
-#' head(select(df, sumDistinct(df$gear)))
+#' head(select(df, sum_distinct(df$gear)))
 #' head(distinct(select(df, "gear")))}
+#' @note sum_distinct since 3.2.0
+setMethod("sum_distinct",
+          signature(x = "Column"),
+          function(x) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "sum_distinct", x@jc)
+            column(jc)
+          })
+
+#' @details
+#' \code{sumDistinct}: Returns the sum of distinct values in the expression.
+#'
+#' @rdname column_aggregate_functions
+#' @aliases sumDistinct sumDistinct,Column-method
 #' @note sumDistinct since 1.4.0
 setMethod("sumDistinct",
           signature(x = "Column"),
           function(x) {
-            jc <- callJStatic("org.apache.spark.sql.functions", "sumDistinct", x@jc)
-            column(jc)
+            .Deprecated("sum_distinct")
+            sum_distinct(x)
           })
 
 #' @details
@@ -2469,24 +2495,38 @@ setMethod("approxCountDistinct",
           })
 
 #' @details
-#' \code{countDistinct}: Returns the number of distinct items in a group.
+#' \code{count_distinct}: Returns the number of distinct items in a group.
 #'
 #' @rdname column_aggregate_functions
-#' @aliases countDistinct countDistinct,Column-method
-#' @note countDistinct since 1.4.0
-setMethod("countDistinct",
+#' @aliases count_distinct count_distinct,Column-method
+#' @note count_distinct since 3.2.0
+setMethod("count_distinct",
           signature(x = "Column"),
           function(x, ...) {
             jcols <- lapply(list(...), function(x) {
               stopifnot(class(x) == "Column")
               x@jc
             })
-            jc <- callJStatic("org.apache.spark.sql.functions", "countDistinct", x@jc,
+            jc <- callJStatic("org.apache.spark.sql.functions", "count_distinct", x@jc,
                               jcols)
             column(jc)
           })
 
 #' @details
+#' \code{countDistinct}: Returns the number of distinct items in a group.
+#'
+#' An alias of \code{count_distinct}, and it is encouraged to use \code{count_distinct} directly.
+#'
+#' @rdname column_aggregate_functions
+#' @aliases countDistinct countDistinct,Column-method
+#' @note countDistinct since 1.4.0
+setMethod("countDistinct",
+          signature(x = "Column"),
+          function(x, ...) {
+            count_distinct(x, ...)
+          })
+
+#' @details
 #' \code{concat}: Concatenates multiple input columns together into a single column.
 #' The function works with strings, binary and compatible array columns.
 #'
@@ -2550,7 +2590,7 @@ setMethod("least",
 #' @note n_distinct since 1.4.0
 setMethod("n_distinct", signature(x = "Column"),
           function(x, ...) {
-            countDistinct(x, ...)
+            count_distinct(x, ...)
           })
 
 #' @rdname count
@@ -2894,6 +2934,21 @@ setMethod("sha2", signature(y = "Column", x = "numeric"),
           })
 
 #' @details
+#' \code{shiftleft}: Shifts the given value numBits left. If the given value is a long value,
+#' this function will return a long value else it will return an integer value.
+#'
+#' @rdname column_math_functions
+#' @aliases shiftleft shiftleft,Column,numeric-method
+#' @note shiftleft since 3.2.0
+setMethod("shiftleft", signature(y = "Column", x = "numeric"),
+          function(y, x) {
+            jc <- callJStatic("org.apache.spark.sql.functions",
+                              "shiftleft",
+                              y@jc, as.integer(x))
+            column(jc)
+          })
+
+#' @details
 #' \code{shiftLeft}: Shifts the given value numBits left. If the given value is a long value,
 #' this function will return a long value else it will return an integer value.
 #'
@@ -2902,8 +2957,21 @@ setMethod("sha2", signature(y = "Column", x = "numeric"),
 #' @note shiftLeft since 1.5.0
 setMethod("shiftLeft", signature(y = "Column", x = "numeric"),
           function(y, x) {
+            .Deprecated("shiftleft")
+            shiftleft(y, x)
+          })
+
+#' @details
+#' \code{shiftright}: (Signed) shifts the given value numBits right. If the given value is a long
+#' value, it will return a long value else it will return an integer value.
+#'
+#' @rdname column_math_functions
+#' @aliases shiftright shiftright,Column,numeric-method
+#' @note shiftright since 3.2.0
+setMethod("shiftright", signature(y = "Column", x = "numeric"),
+          function(y, x) {
             jc <- callJStatic("org.apache.spark.sql.functions",
-                              "shiftLeft",
+                              "shiftright",
                               y@jc, as.integer(x))
             column(jc)
           })
@@ -2917,8 +2985,21 @@ setMethod("shiftLeft", signature(y = "Column", x = "numeric"),
 #' @note shiftRight since 1.5.0
 setMethod("shiftRight", signature(y = "Column", x = "numeric"),
           function(y, x) {
+            .Deprecated("shiftright")
+            shiftright(y, x)
+          })
+
+#' @details
+#' \code{shiftrightunsigned}: (Unsigned) shifts the given value numBits right. If the given value is
+#' a long value, it will return a long value else it will return an integer value.
+#'
+#' @rdname column_math_functions
+#' @aliases shiftrightunsigned shiftrightunsigned,Column,numeric-method
+#' @note shiftrightunsigned since 3.2.0
+setMethod("shiftrightunsigned", signature(y = "Column", x = "numeric"),
+          function(y, x) {
             jc <- callJStatic("org.apache.spark.sql.functions",
-                              "shiftRight",
+                              "shiftrightunsigned",
                               y@jc, as.integer(x))
             column(jc)
           })
@@ -2932,10 +3013,8 @@ setMethod("shiftRight", signature(y = "Column", x = "numeric"),
 #' @note shiftRightUnsigned since 1.5.0
 setMethod("shiftRightUnsigned", signature(y = "Column", x = "numeric"),
           function(y, x) {
-            jc <- callJStatic("org.apache.spark.sql.functions",
-                              "shiftRightUnsigned",
-                              y@jc, as.integer(x))
-            column(jc)
+            .Deprecated("shiftrightunsigned")
+            shiftrightunsigned(y, x)
           })
 
 #' @details
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 4c73cc2..a97fd4c 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -886,6 +886,10 @@ setGeneric("bin", function(x) { standardGeneric("bin") })
 
 #' @rdname column_nonaggregate_functions
 #' @name NULL
+setGeneric("bitwise_not", function(x) { standardGeneric("bitwise_not") })
+
+#' @rdname column_nonaggregate_functions
+#' @name NULL
 setGeneric("bitwiseNOT", function(x) { standardGeneric("bitwiseNOT") })
 
 #' @rdname column_math_functions
@@ -925,6 +929,10 @@ setGeneric("conv", function(x, fromBase, toBase) { standardGeneric("conv") })
 
 #' @rdname column_aggregate_functions
 #' @name NULL
+setGeneric("count_distinct", function(x, ...) { standardGeneric("count_distinct") })
+
+#' @rdname column_aggregate_functions
+#' @name NULL
 setGeneric("countDistinct", function(x, ...) { standardGeneric("countDistinct") })
 
 #' @rdname column_misc_functions
@@ -1326,12 +1334,24 @@ setGeneric("shiftLeft", function(y, x) { standardGeneric("shiftLeft") })
 
 #' @rdname column_math_functions
 #' @name NULL
+setGeneric("shiftleft", function(y, x) { standardGeneric("shiftleft") })
+
+#' @rdname column_math_functions
+#' @name NULL
 setGeneric("shiftRight", function(y, x) { standardGeneric("shiftRight") })
 
 #' @rdname column_math_functions
 #' @name NULL
+setGeneric("shiftright", function(y, x) { standardGeneric("shiftright") })
+
+#' @rdname column_math_functions
+#' @name NULL
 setGeneric("shiftRightUnsigned", function(y, x) { standardGeneric("shiftRightUnsigned") })
 
+#' @rdname column_math_functions
+#' @name NULL
+setGeneric("shiftrightunsigned", function(y, x) { standardGeneric("shiftrightunsigned") })
+
 #' @rdname column_collection_functions
 #' @name NULL
 setGeneric("shuffle", function(x) { standardGeneric("shuffle") })
@@ -1390,6 +1410,10 @@ setGeneric("substring_index", function(x, delim, count) { standardGeneric("subst
 
 #' @rdname column_aggregate_functions
 #' @name NULL
+setGeneric("sum_distinct", function(x) { standardGeneric("sum_distinct") })
+
+#' @rdname column_aggregate_functions
+#' @name NULL
 setGeneric("sumDistinct", function(x) { standardGeneric("sumDistinct") })
 
 #' @rdname column_datetime_functions
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 4eafe8f..0ee23b2 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1397,7 +1397,8 @@ test_that("column operators", {
 test_that("column functions", {
   c <- column("a")
   c1 <- abs(c) + acos(c) + approx_count_distinct(c) + ascii(c) + asin(c) + atan(c)
-  c2 <- avg(c) + base64(c) + bin(c) + bitwiseNOT(c) + cbrt(c) + ceil(c) + cos(c)
+  c2 <- avg(c) + base64(c) + bin(c) + suppressWarnings(bitwiseNOT(c)) +
+    bitwise_not(c) + cbrt(c) + ceil(c) + cos(c)
   c3 <- cosh(c) + count(c) + crc32(c) + hash(c) + exp(c)
   c4 <- explode(c) + expm1(c) + factorial(c) + first(c) + floor(c) + hex(c)
   c5 <- hour(c) + initcap(c) + last(c) + last_day(c) + length(c)
@@ -1405,7 +1406,8 @@ test_that("column functions", {
   c7 <- mean(c) + min(c) + month(c) + negate(c) + posexplode(c) + quarter(c)
   c8 <- reverse(c) + rint(c) + round(c) + rtrim(c) + sha1(c) + monotonically_increasing_id()
   c9 <- signum(c) + sin(c) + sinh(c) + size(c) + stddev(c) + soundex(c) + sqrt(c) + sum(c)
-  c10 <- sumDistinct(c) + tan(c) + tanh(c) + degrees(c) + radians(c)
+  c10 <- suppressWarnings(sumDistinct(c)) + sum_distinct(c) + tan(c) + tanh(c) +
+    degrees(c) + radians(c)
   c11 <- to_date(c) + trim(c) + unbase64(c) + unhex(c) + upper(c)
   c12 <- variance(c) + xxhash64(c) + ltrim(c, "a") + rtrim(c, "b") + trim(c, "c")
   c13 <- lead("col", 1) + lead(c, 1) + lag("col", 1) + lag(c, 1)
@@ -1457,6 +1459,8 @@ test_that("column functions", {
   expect_equal(collect(df3)[[2, 1]], FALSE)
   expect_equal(collect(df3)[[3, 1]], TRUE)
 
+  df4 <- select(df, count_distinct(df$age, df$name))
+  expect_equal(collect(df4)[[1, 1]], 2)
   df4 <- select(df, countDistinct(df$age, df$name))
   expect_equal(collect(df4)[[1, 1]], 2)
 
@@ -1887,9 +1891,12 @@ test_that("column binary mathfunctions", {
   expect_equal(collect(select(df, hypot(df$a, df$b)))[3, "HYPOT(a, b)"], sqrt(3^2 + 7^2))
   expect_equal(collect(select(df, hypot(df$a, df$b)))[4, "HYPOT(a, b)"], sqrt(4^2 + 8^2))
   ## nolint end
-  expect_equal(collect(select(df, shiftLeft(df$b, 1)))[4, 1], 16)
-  expect_equal(collect(select(df, shiftRight(df$b, 1)))[4, 1], 4)
-  expect_equal(collect(select(df, shiftRightUnsigned(df$b, 1)))[4, 1], 4)
+  expect_equal(collect(select(df, shiftleft(df$b, 1)))[4, 1], 16)
+  expect_equal(collect(select(df, shiftright(df$b, 1)))[4, 1], 4)
+  expect_equal(collect(select(df, shiftrightunsigned(df$b, 1)))[4, 1], 4)
+  expect_equal(collect(select(df, suppressWarnings(shiftLeft(df$b, 1))))[4, 1], 16)
+  expect_equal(collect(select(df, suppressWarnings(shiftRight(df$b, 1))))[4, 1], 4)
+  expect_equal(collect(select(df, suppressWarnings(shiftRightUnsigned(df$b, 1))))[4, 1], 4)
   expect_equal(class(collect(select(df, rand()))[2, 1]), "numeric")
   expect_equal(collect(select(df, rand(1)))[1, 1], 0.636, tolerance = 0.01)
   expect_equal(class(collect(select(df, randn()))[2, 1]), "numeric")
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index 3177b54..0ed0028 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -331,7 +331,7 @@ A common flow of grouping and aggregation is
 
 2. Feed the `GroupedData` object to `agg` or `summarize` functions, with some provided aggregation functions to compute a number within each group.
 
-A number of widely used functions are supported to aggregate data after grouping, including `avg`, `countDistinct`, `count`, `first`, `kurtosis`, `last`, `max`, `mean`, `min`, `sd`, `skewness`, `stddev_pop`, `stddev_samp`, `sumDistinct`, `sum`, `var_pop`, `var_samp`, `var`. See the [API doc for aggregate functions](https://spark.apache.org/docs/latest/api/R/column_aggregate_functions.html) linked there.
+A number of widely used functions are supported to aggregate data after grouping, including `avg`, `count_distinct`, `count`, `first`, `kurtosis`, `last`, `max`, `mean`, `min`, `sd`, `skewness`, `stddev_pop`, `stddev_samp`, `sum_distinct`, `sum`, `var_pop`, `var_samp`, `var`. See the [API doc for aggregate functions](https://spark.apache.org/docs/latest/api/R/column_aggregate_functions.html) linked there.
 
 For example we can compute a histogram of the number of cylinders in the `mtcars` dataset as shown below.
 
diff --git a/docs/sql-getting-started.md b/docs/sql-getting-started.md
index 5a6f182..5486e73 100644
--- a/docs/sql-getting-started.md
+++ b/docs/sql-getting-started.md
@@ -352,7 +352,7 @@ Scalar functions are functions that return a single value per row, as opposed to
 
 ## Aggregate Functions
 
-Aggregate functions are functions that return a single value on a group of rows. The [Built-in Aggregation Functions](sql-ref-functions-builtin.html#aggregate-functions) provide common aggregations such as `count()`, `countDistinct()`, `avg()`, `max()`, `min()`, etc.
+Aggregate functions are functions that return a single value on a group of rows. The [Built-in Aggregation Functions](sql-ref-functions-builtin.html#aggregate-functions) provide common aggregations such as `count()`, `count_distinct()`, `avg()`, `max()`, `min()`, etc.
 Users are not limited to the predefined aggregate functions and can create their own. For more details
 about user defined aggregate functions, please refer to the documentation of
 [User Defined Aggregate Functions](sql-ref-functions-udf-aggregate.html).
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java
index 3b5d8e6..091f7d1 100644
--- a/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java
@@ -36,7 +36,7 @@ import org.apache.spark.sql.types.StructField;
 import org.apache.spark.sql.types.StructType;
 
 // col("...") is preferable to df.col("...")
-import static org.apache.spark.sql.functions.callUDF;
+import static org.apache.spark.sql.functions.call_udf;
 import static org.apache.spark.sql.functions.col;
 // $example off$
 
@@ -73,12 +73,12 @@ public class JavaTokenizerExample {
 
     Dataset<Row> tokenized = tokenizer.transform(sentenceDataFrame);
     tokenized.select("sentence", "words")
-        .withColumn("tokens", callUDF("countTokens", col("words")))
+        .withColumn("tokens", call_udf("countTokens", col("words")))
         .show(false);
 
     Dataset<Row> regexTokenized = regexTokenizer.transform(sentenceDataFrame);
     regexTokenized.select("sentence", "words")
-        .withColumn("tokens", callUDF("countTokens", col("words")))
+        .withColumn("tokens", call_udf("countTokens", col("words")))
         .show(false);
     // $example off$
 
diff --git a/python/docs/source/reference/pyspark.sql.rst b/python/docs/source/reference/pyspark.sql.rst
index 0dc2f6e..78926c9 100644
--- a/python/docs/source/reference/pyspark.sql.rst
+++ b/python/docs/source/reference/pyspark.sql.rst
@@ -340,6 +340,7 @@ Functions
     avg
     base64
     bin
+    bitwise_not
     bitwiseNOT
     broadcast
     bround
@@ -358,6 +359,7 @@ Functions
     cos
     cosh
     count
+    count_distinct
     countDistinct
     covar_pop
     covar_samp
@@ -482,9 +484,9 @@ Functions
     sequence
     sha1
     sha2
-    shiftLeft
-    shiftRight
-    shiftRightUnsigned
+    shiftleft
+    shiftright
+    shiftrightunsigned
     shuffle
     signum
     sin
@@ -504,6 +506,7 @@ Functions
     substring
     substring_index
     sum
+    sum_distinct
     sumDistinct
     tan
     tanh
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 45dbedf..6b5f7ee 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -206,8 +206,20 @@ def mean(col):
 def sumDistinct(col):
     """
     Aggregate function: returns the sum of distinct values in the expression.
+
+    .. deprecated:: 3.2.0
+        Use :func:`sum_distinct` instead.
     """
-    return _invoke_function_over_column("sumDistinct", col)
+    warnings.warn("Deprecated in 3.2, use sum_distinct instead.", FutureWarning)
+    return sum_distinct(col)
+
+
+@since(3.2)
+def sum_distinct(col):
+    """
+    Aggregate function: returns the sum of distinct values in the expression.
+    """
+    return _invoke_function_over_column("sum_distinct", col)
 
 
 def acos(col):
@@ -494,8 +506,20 @@ def toRadians(col):
 def bitwiseNOT(col):
     """
     Computes bitwise not.
+
+    .. deprecated:: 3.2.0
+        Use :func:`bitwise_not` instead.
+    """
+    warnings.warn("Deprecated in 3.2, use bitwise_not instead.", FutureWarning)
+    return bitwise_not(col)
+
+
+@since(3.2)
+def bitwise_not(col):
+    """
+    Computes bitwise not.
     """
-    return _invoke_function_over_column("bitwiseNOT", col)
+    return _invoke_function_over_column("bitwise_not", col)
 
 
 @since(2.4)
@@ -810,7 +834,7 @@ def approx_count_distinct(col, rsd=None):
     col : :class:`Column` or str
     rsd : float, optional
         maximum relative standard deviation allowed (default = 0.05).
-        For rsd < 0.01, it is more efficient to use :func:`countDistinct`
+        For rsd < 0.01, it is more efficient to use :func:`count_distinct`
 
     Examples
     --------
@@ -928,18 +952,29 @@ def covar_samp(col1, col2):
 def countDistinct(col, *cols):
     """Returns a new :class:`Column` for distinct count of ``col`` or ``cols``.
 
+    An alias of :func:`count_distinct`, and it is encouraged to use :func:`count_distinct`
+    directly.
+
     .. versionadded:: 1.3.0
+    """
+    return count_distinct(col, *cols)
+
+
+def count_distinct(col, *cols):
+    """Returns a new :class:`Column` for distinct count of ``col`` or ``cols``.
+
+    .. versionadded:: 3.2.0
 
     Examples
     --------
-    >>> df.agg(countDistinct(df.age, df.name).alias('c')).collect()
+    >>> df.agg(count_distinct(df.age, df.name).alias('c')).collect()
     [Row(c=2)]
 
-    >>> df.agg(countDistinct("age", "name").alias('c')).collect()
+    >>> df.agg(count_distinct("age", "name").alias('c')).collect()
     [Row(c=2)]
     """
     sc = SparkContext._active_spark_context
-    jc = sc._jvm.functions.countDistinct(_to_java_column(col), _to_seq(sc, cols, _to_java_column))
+    jc = sc._jvm.functions.count_distinct(_to_java_column(col), _to_seq(sc, cols, _to_java_column))
     return Column(jc)
 
 
@@ -1255,13 +1290,25 @@ def shiftLeft(col, numBits):
 
     .. versionadded:: 1.5.0
 
+    .. deprecated:: 3.2.0
+        Use :func:`shiftleft` instead.
+    """
+    warnings.warn("Deprecated in 3.2, use shiftleft instead.", FutureWarning)
+    return shiftleft(col, numBits)
+
+
+def shiftleft(col, numBits):
+    """Shift the given value numBits left.
+
+    .. versionadded:: 3.2.0
+
     Examples
     --------
-    >>> spark.createDataFrame([(21,)], ['a']).select(shiftLeft('a', 1).alias('r')).collect()
+    >>> spark.createDataFrame([(21,)], ['a']).select(shiftleft('a', 1).alias('r')).collect()
     [Row(r=42)]
     """
     sc = SparkContext._active_spark_context
-    return Column(sc._jvm.functions.shiftLeft(_to_java_column(col), numBits))
+    return Column(sc._jvm.functions.shiftleft(_to_java_column(col), numBits))
 
 
 def shiftRight(col, numBits):
@@ -1269,9 +1316,21 @@ def shiftRight(col, numBits):
 
     .. versionadded:: 1.5.0
 
+    .. deprecated:: 3.2.0
+        Use :func:`shiftright` instead.
+    """
+    warnings.warn("Deprecated in 3.2, use shiftright instead.", FutureWarning)
+    return shiftright(col, numBits)
+
+
+def shiftright(col, numBits):
+    """(Signed) shift the given value numBits right.
+
+    .. versionadded:: 3.2.0
+
     Examples
     --------
-    >>> spark.createDataFrame([(42,)], ['a']).select(shiftRight('a', 1).alias('r')).collect()
+    >>> spark.createDataFrame([(42,)], ['a']).select(shiftright('a', 1).alias('r')).collect()
     [Row(r=21)]
     """
     sc = SparkContext._active_spark_context
@@ -1284,10 +1343,22 @@ def shiftRightUnsigned(col, numBits):
 
     .. versionadded:: 1.5.0
 
+    .. deprecated:: 3.2.0
+        Use :func:`shiftrightunsigned` instead.
+    """
+    warnings.warn("Deprecated in 3.2, use shiftrightunsigned instead.", FutureWarning)
+    return shiftrightunsigned(col, numBits)
+
+
+def shiftrightunsigned(col, numBits):
+    """Unsigned shift the given value numBits right.
+
+    .. versionadded:: 3.2.0
+
     Examples
     --------
     >>> df = spark.createDataFrame([(-42,)], ['a'])
-    >>> df.select(shiftRightUnsigned('a', 1).alias('r')).collect()
+    >>> df.select(shiftrightunsigned('a', 1).alias('r')).collect()
     [Row(r=9223372036854775787)]
     """
     sc = SparkContext._active_spark_context
diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi
index 273128b..e04edc7 100644
--- a/python/pyspark/sql/functions.pyi
+++ b/python/pyspark/sql/functions.pyi
@@ -45,6 +45,7 @@ def corr(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
 def covar_pop(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
 def covar_samp(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
 def countDistinct(col: ColumnOrName, *cols: ColumnOrName) -> Column: ...
+def count_distinct(col: ColumnOrName, *cols: ColumnOrName) -> Column: ...
 def first(col: ColumnOrName, ignorenulls: bool = ...) -> Column: ...
 def grouping(col: ColumnOrName) -> Column: ...
 def grouping_id(*cols: ColumnOrName) -> Column: ...
@@ -64,8 +65,11 @@ def randn(seed: Optional[int] = ...) -> Column: ...
 def round(col: ColumnOrName, scale: int = ...) -> Column: ...
 def bround(col: ColumnOrName, scale: int = ...) -> Column: ...
 def shiftLeft(col: ColumnOrName, numBits: int) -> Column: ...
+def shiftleft(col: ColumnOrName, numBits: int) -> Column: ...
 def shiftRight(col: ColumnOrName, numBits: int) -> Column: ...
+def shiftright(col: ColumnOrName, numBits: int) -> Column: ...
 def shiftRightUnsigned(col: ColumnOrName, numBits: int) -> Column: ...
+def shiftrightunsigned(col: ColumnOrName, numBits: int) -> Column: ...
 def spark_partition_id() -> Column: ...
 def expr(str: str) -> Column: ...
 def struct(*cols: ColumnOrName) -> Column: ...
@@ -278,6 +282,7 @@ def atan2(col1: ColumnOrName, col2: float) -> Column: ...
 def avg(col: ColumnOrName) -> Column: ...
 def base64(col: ColumnOrName) -> Column: ...
 def bitwiseNOT(col: ColumnOrName) -> Column: ...
+def bitwise_not(col: ColumnOrName) -> Column: ...
 def cbrt(col: ColumnOrName) -> Column: ...
 def ceil(col: ColumnOrName) -> Column: ...
 def col(col: str) -> Column: ...
@@ -333,6 +338,7 @@ def stddev_pop(col: ColumnOrName) -> Column: ...
 def stddev_samp(col: ColumnOrName) -> Column: ...
 def sum(col: ColumnOrName) -> Column: ...
 def sumDistinct(col: ColumnOrName) -> Column: ...
+def sum_distinct(col: ColumnOrName) -> Column: ...
 def tan(col: ColumnOrName) -> Column: ...
 def tanh(col: ColumnOrName) -> Column: ...
 def toDegrees(col: ColumnOrName) -> Column: ...
diff --git a/python/pyspark/sql/tests/test_column.py b/python/pyspark/sql/tests/test_column.py
index 2ae0a9b..c2530b2 100644
--- a/python/pyspark/sql/tests/test_column.py
+++ b/python/pyspark/sql/tests/test_column.py
@@ -139,6 +139,8 @@ class ColumnTests(ReusedSQLTestCase):
         self.assertEqual(170 ^ 75, result['(a ^ b)'])
         result = df.select(functions.bitwiseNOT(df.b)).collect()[0].asDict()
         self.assertEqual(~75, result['~b'])
+        result = df.select(functions.bitwise_not(df.b)).collect()[0].asDict()
+        self.assertEqual(~75, result['~b'])
 
     def test_with_field(self):
         from pyspark.sql.functions import lit, col
diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py
index 053164a..112043e 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -21,7 +21,9 @@ import re
 
 from py4j.protocol import Py4JJavaError
 from pyspark.sql import Row, Window
-from pyspark.sql.functions import udf, input_file_name, col, percentile_approx, lit
+from pyspark.sql.functions import udf, input_file_name, col, percentile_approx, \
+    lit, assert_true, sum_distinct, sumDistinct, shiftleft, shiftLeft, shiftRight, \
+    shiftright, shiftrightunsigned, shiftRightUnsigned
 from pyspark.testing.sqlutils import ReusedSQLTestCase
 
 
@@ -640,6 +642,23 @@ class FunctionsTests(ReusedSQLTestCase):
             str(cm.exception)
         )
 
+    def test_sum_distinct(self):
+        self.spark.range(10).select(
+            assert_true(sum_distinct(col("id")) == sumDistinct(col("id")))).collect()
+
+    def test_shiftleft(self):
+        self.spark.range(10).select(
+            assert_true(shiftLeft(col("id"), 2) == shiftleft(col("id"), 2))).collect()
+
+    def test_shiftright(self):
+        self.spark.range(10).select(
+            assert_true(shiftRight(col("id"), 2) == shiftright(col("id"), 2))).collect()
+
+    def test_shiftrightunsigned(self):
+        self.spark.range(10).select(
+            assert_true(
+                shiftRightUnsigned(col("id"), 2) == shiftrightunsigned(col("id"), 2))).collect()
+
 
 if __name__ == "__main__":
     import unittest
diff --git a/python/pyspark/sql/tests/test_group.py b/python/pyspark/sql/tests/test_group.py
index 324c964..e603334 100644
--- a/python/pyspark/sql/tests/test_group.py
+++ b/python/pyspark/sql/tests/test_group.py
@@ -31,6 +31,7 @@ class GroupTests(ReusedSQLTestCase):
         self.assertEqual((0, u'99'),
                          tuple(g.agg(functions.first(df.key), functions.last(df.value)).first()))
         self.assertTrue(95 < g.agg(functions.approx_count_distinct(df.key)).first()[0])
+        # test deprecated countDistinct
         self.assertEqual(100, g.agg(functions.countDistinct(df.value)).first()[0])
 
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
index 188435d..e65e869 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
@@ -39,8 +39,8 @@ import org.apache.spark.sql.types.IntegerType
  *
  *   val agg = data.groupBy($"key")
  *     .agg(
- *       countDistinct($"cat1").as("cat1_cnt"),
- *       countDistinct($"cat2").as("cat2_cnt"),
+ *       count_distinct($"cat1").as("cat1_cnt"),
+ *       count_distinct($"cat2").as("cat2_cnt"),
  *       sum($"value").as("total"))
  * }}}
  *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index d399197..e78b811 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -2319,7 +2319,7 @@ class Dataset[T] private[sql](
    *
    *   val allWords = ds.select('title, explode(split('words, " ")).as("word"))
    *
-   *   val bookCountPerWord = allWords.groupBy("word").agg(countDistinct("title"))
+   *   val bookCountPerWord = allWords.groupBy("word").agg(count_distinct("title"))
    * }}}
    *
    * Using `flatMap()` this can similarly be exploded as:
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 40d2a51..de95067 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -116,6 +116,16 @@ object functions {
   /**
    * Creates a [[Column]] of literal value.
    *
+   * An alias of `typedlit`, and it is encouraged to use `typedlit` directly.
+   *
+   * @group normal_funcs
+   * @since 2.2.0
+   */
+  def typedLit[T : TypeTag](literal: T): Column = typedlit(literal)
+
+  /**
+   * Creates a [[Column]] of literal value.
+   *
    * The passed in object is returned directly if it is already a [[Column]].
    * If the object is a Scala Symbol, it is converted into a [[Column]] also.
    * Otherwise, a new [[Column]] is created to represent the literal value.
@@ -123,9 +133,9 @@ object functions {
    * can handle parameterized scala types e.g.: List, Seq and Map.
    *
    * @group normal_funcs
-   * @since 2.2.0
+   * @since 3.2.0
    */
-  def typedLit[T : TypeTag](literal: T): Column = literal match {
+  def typedlit[T : TypeTag](literal: T): Column = literal match {
     case c: Column => c
     case s: Symbol => new ColumnName(s.name)
     case _ => Column(Literal.create(literal))
@@ -388,24 +398,37 @@ object functions {
   /**
    * Aggregate function: returns the number of distinct items in a group.
    *
+   * An alias of `count_distinct`, and it is encouraged to use `count_distinct` directly.
+   *
    * @group agg_funcs
    * @since 1.3.0
    */
   @scala.annotation.varargs
-  def countDistinct(expr: Column, exprs: Column*): Column =
-    // For usage like countDistinct("*"), we should let analyzer expand star and
-    // resolve function.
-    Column(UnresolvedFunction("count", (expr +: exprs).map(_.expr), isDistinct = true))
+  def countDistinct(expr: Column, exprs: Column*): Column = count_distinct(expr, exprs: _*)
 
   /**
    * Aggregate function: returns the number of distinct items in a group.
    *
+   * An alias of `count_distinct`, and it is encouraged to use `count_distinct` directly.
+   *
    * @group agg_funcs
    * @since 1.3.0
    */
   @scala.annotation.varargs
   def countDistinct(columnName: String, columnNames: String*): Column =
-    countDistinct(Column(columnName), columnNames.map(Column.apply) : _*)
+    count_distinct(Column(columnName), columnNames.map(Column.apply) : _*)
+
+  /**
+   * Aggregate function: returns the number of distinct items in a group.
+   *
+   * @group agg_funcs
+   * @since 3.2.0
+   */
+  @scala.annotation.varargs
+  def count_distinct(expr: Column, exprs: Column*): Column =
+    // For usage like countDistinct("*"), we should let analyzer expand star and
+    // resolve function.
+    Column(UnresolvedFunction("count", (expr +: exprs).map(_.expr), isDistinct = true))
 
   /**
    * Aggregate function: returns the population covariance for two columns.
@@ -796,6 +819,7 @@ object functions {
    * @group agg_funcs
    * @since 1.3.0
    */
+  @deprecated("Use sum_distinct", "3.2.0")
   def sumDistinct(e: Column): Column = withAggregateFunction(Sum(e.expr), isDistinct = true)
 
   /**
@@ -804,7 +828,16 @@ object functions {
    * @group agg_funcs
    * @since 1.3.0
    */
-  def sumDistinct(columnName: String): Column = sumDistinct(Column(columnName))
+  @deprecated("Use sum_distinct", "3.2.0")
+  def sumDistinct(columnName: String): Column = sum_distinct(Column(columnName))
+
+  /**
+   * Aggregate function: returns the sum of distinct values in the expression.
+   *
+   * @group agg_funcs
+   * @since 3.2.0
+   */
+  def sum_distinct(e: Column): Column = withAggregateFunction(Sum(e.expr), isDistinct = true)
 
   /**
    * Aggregate function: alias for `var_samp`.
@@ -1411,7 +1444,16 @@ object functions {
    * @group normal_funcs
    * @since 1.4.0
    */
-  def bitwiseNOT(e: Column): Column = withExpr { BitwiseNot(e.expr) }
+  @deprecated("Use bitwise_not", "3.2.0")
+  def bitwiseNOT(e: Column): Column = bitwise_not(e)
+
+  /**
+   * Computes bitwise NOT (~) of a number.
+   *
+   * @group normal_funcs
+   * @since 3.2.0
+   */
+  def bitwise_not(e: Column): Column = withExpr { BitwiseNot(e.expr) }
 
   /**
    * Parses the expression string into the column that it represents, similar to
@@ -2142,7 +2184,17 @@ object functions {
    * @group math_funcs
    * @since 1.5.0
    */
-  def shiftLeft(e: Column, numBits: Int): Column = withExpr { ShiftLeft(e.expr, lit(numBits).expr) }
+  @deprecated("Use shiftleft", "3.2.0")
+  def shiftLeft(e: Column, numBits: Int): Column = shiftleft(e, numBits)
+
+  /**
+   * Shift the given value numBits left. If the given value is a long value, this function
+   * will return a long value else it will return an integer value.
+   *
+   * @group math_funcs
+   * @since 3.2.0
+   */
+  def shiftleft(e: Column, numBits: Int): Column = withExpr { ShiftLeft(e.expr, lit(numBits).expr) }
 
   /**
    * (Signed) shift the given value numBits right. If the given value is a long value, it will
@@ -2151,7 +2203,17 @@ object functions {
    * @group math_funcs
    * @since 1.5.0
    */
-  def shiftRight(e: Column, numBits: Int): Column = withExpr {
+  @deprecated("Use shiftright", "3.2.0")
+  def shiftRight(e: Column, numBits: Int): Column = shiftright(e, numBits)
+
+  /**
+   * (Signed) shift the given value numBits right. If the given value is a long value, it will
+   * return a long value else it will return an integer value.
+   *
+   * @group math_funcs
+   * @since 3.2.0
+   */
+  def shiftright(e: Column, numBits: Int): Column = withExpr {
     ShiftRight(e.expr, lit(numBits).expr)
   }
 
@@ -2162,7 +2224,17 @@ object functions {
    * @group math_funcs
    * @since 1.5.0
    */
-  def shiftRightUnsigned(e: Column, numBits: Int): Column = withExpr {
+  @deprecated("Use shiftrightunsigned", "3.2.0")
+  def shiftRightUnsigned(e: Column, numBits: Int): Column = shiftrightunsigned(e, numBits)
+
+  /**
+   * Unsigned shift the given value numBits right. If the given value is a long value,
+   * it will return a long value else it will return an integer value.
+   *
+   * @group math_funcs
+   * @since 3.2.0
+   */
+  def shiftrightunsigned(e: Column, numBits: Int): Column = withExpr {
     ShiftRightUnsigned(e.expr, lit(numBits).expr)
   }
 
@@ -5075,6 +5147,17 @@ object functions {
 
   /**
    * Call an user-defined function.
+   *
+   * @group udf_funcs
+   * @since 1.5.0
+   */
+  @scala.annotation.varargs
+  @deprecated("Use call_udf")
+  def callUDF(udfName: String, cols: Column*): Column =
+    call_udf(udfName, cols: _*)
+
+  /**
+   * Call an user-defined function.
    * Example:
    * {{{
    *  import org.apache.spark.sql._
@@ -5082,14 +5165,14 @@ object functions {
    *  val df = Seq(("id1", 1), ("id2", 4), ("id3", 5)).toDF("id", "value")
    *  val spark = df.sparkSession
    *  spark.udf.register("simpleUDF", (v: Int) => v * v)
-   *  df.select($"id", callUDF("simpleUDF", $"value"))
+   *  df.select($"id", call_udf("simpleUDF", $"value"))
    * }}}
    *
    * @group udf_funcs
-   * @since 1.5.0
+   * @since 3.2.0
    */
   @scala.annotation.varargs
-  def callUDF(udfName: String, cols: Column*): Column = withExpr {
+  def call_udf(udfName: String, cols: Column*): Column = withExpr {
     UnresolvedFunction(udfName, cols.map(_.expr), isDistinct = false)
   }
 }
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
index f4bffd9..da7c622 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
@@ -105,7 +105,7 @@ public class JavaDataFrameSuite {
 
     // Varargs in column expressions
     df.groupBy().agg(countDistinct("key", "value"));
-    df.groupBy().agg(countDistinct(col("key"), col("value")));
+    df.groupBy().agg(count_distinct(col("key"), col("value")));
     df.select(coalesce(col("key")));
 
     // Varargs with mathfunctions
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index c6d134b..07e6a40 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -297,7 +297,7 @@ class DataFrameAggregateSuite extends QueryTest
       Row(2.0, 2.0))
 
     checkAnswer(
-      testData2.agg(avg($"a"), sumDistinct($"a")), // non-partial
+      testData2.agg(avg($"a"), sumDistinct($"a")), // non-partial and test deprecated version
       Row(2.0, 6.0) :: Nil)
 
     checkAnswer(
@@ -305,7 +305,7 @@ class DataFrameAggregateSuite extends QueryTest
       Row(new java.math.BigDecimal(2)))
 
     checkAnswer(
-      decimalData.agg(avg($"a"), sumDistinct($"a")), // non-partial
+      decimalData.agg(avg($"a"), sum_distinct($"a")), // non-partial
       Row(new java.math.BigDecimal(2), new java.math.BigDecimal(6)) :: Nil)
 
     checkAnswer(
@@ -314,7 +314,7 @@ class DataFrameAggregateSuite extends QueryTest
     // non-partial
     checkAnswer(
       decimalData.agg(
-        avg($"a" cast DecimalType(10, 2)), sumDistinct($"a" cast DecimalType(10, 2))),
+        avg($"a" cast DecimalType(10, 2)), sum_distinct($"a" cast DecimalType(10, 2))),
       Row(new java.math.BigDecimal(2), new java.math.BigDecimal(6)) :: Nil)
   }
 
@@ -324,11 +324,11 @@ class DataFrameAggregateSuite extends QueryTest
       Row(2.0))
 
     checkAnswer(
-      testData3.agg(avg($"b"), countDistinct($"b")),
+      testData3.agg(avg($"b"), count_distinct($"b")),
       Row(2.0, 1))
 
     checkAnswer(
-      testData3.agg(avg($"b"), sumDistinct($"b")), // non-partial
+      testData3.agg(avg($"b"), sum_distinct($"b")), // non-partial
       Row(2.0, 2.0))
   }
 
@@ -339,7 +339,7 @@ class DataFrameAggregateSuite extends QueryTest
       Row(null))
 
     checkAnswer(
-      emptyTableData.agg(avg($"a"), sumDistinct($"b")), // non-partial
+      emptyTableData.agg(avg($"a"), sum_distinct($"b")), // non-partial
       Row(null, null))
   }
 
@@ -347,7 +347,7 @@ class DataFrameAggregateSuite extends QueryTest
     assert(testData2.count() === testData2.rdd.map(_ => 1).count())
 
     checkAnswer(
-      testData2.agg(count($"a"), sumDistinct($"a")), // non-partial
+      testData2.agg(count($"a"), sum_distinct($"a")), // non-partial
       Row(6, 6.0))
   }
 
@@ -364,12 +364,12 @@ class DataFrameAggregateSuite extends QueryTest
 
     checkAnswer(
       testData3.agg(
-        count($"a"), count($"b"), count(lit(1)), countDistinct($"a"), countDistinct($"b")),
+        count($"a"), count($"b"), count(lit(1)), count_distinct($"a"), count_distinct($"b")),
       Row(2, 1, 2, 2, 1)
     )
 
     checkAnswer(
-      testData3.agg(count($"b"), countDistinct($"b"), sumDistinct($"b")), // non-partial
+      testData3.agg(count($"b"), count_distinct($"b"), sum_distinct($"b")), // non-partial
       Row(1, 1, 2)
     )
   }
@@ -384,17 +384,17 @@ class DataFrameAggregateSuite extends QueryTest
       .toDF("key1", "key2", "key3")
 
     checkAnswer(
-      df1.agg(countDistinct($"key1", $"key2")),
+      df1.agg(count_distinct($"key1", $"key2")),
       Row(3)
     )
 
     checkAnswer(
-      df1.agg(countDistinct($"key1", $"key2", $"key3")),
+      df1.agg(count_distinct($"key1", $"key2", $"key3")),
       Row(3)
     )
 
     checkAnswer(
-      df1.groupBy($"key1").agg(countDistinct($"key2", $"key3")),
+      df1.groupBy($"key1").agg(count_distinct($"key2", $"key3")),
       Seq(Row("a", 2), Row("x", 1))
     )
   }
@@ -402,7 +402,7 @@ class DataFrameAggregateSuite extends QueryTest
   test("zero count") {
     val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b")
     checkAnswer(
-      emptyTableData.agg(count($"a"), sumDistinct($"a")), // non-partial
+      emptyTableData.agg(count($"a"), sum_distinct($"a")), // non-partial
       Row(0, null))
   }
 
@@ -433,7 +433,7 @@ class DataFrameAggregateSuite extends QueryTest
   test("zero sum distinct") {
     val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b")
     checkAnswer(
-      emptyTableData.agg(sumDistinct($"a")),
+      emptyTableData.agg(sum_distinct($"a")),
       Row(null))
   }
 
@@ -622,7 +622,7 @@ class DataFrameAggregateSuite extends QueryTest
     val df = Seq((1, 3, "a"), (1, 2, "b"), (3, 4, "c"), (3, 4, "c"), (3, 5, "d"))
       .toDF("x", "y", "z")
     checkAnswer(
-      df.groupBy($"x").agg(countDistinct($"y"), sort_array(collect_list($"z"))),
+      df.groupBy($"x").agg(count_distinct($"y"), sort_array(collect_list($"z"))),
       Seq(Row(1, 2, Seq("a", "b")), Row(3, 2, Seq("c", "c", "d"))))
   }
 
@@ -837,7 +837,7 @@ class DataFrameAggregateSuite extends QueryTest
     )
   }
 
-  test("SPARK-27581: DataFrame countDistinct(\"*\") shouldn't fail with AnalysisException") {
+  test("SPARK-27581: DataFrame count_distinct(\"*\") shouldn't fail with AnalysisException") {
     val df = sql("select id % 100 from range(100000)")
     val distinctCount1 = df.select(expr("count(distinct(*))"))
     val distinctCount2 = df.select(countDistinct("*"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index aa1678e..99f17e7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -171,10 +171,10 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     )
   }
 
-  test("bitwiseNOT") {
+  test("bitwise_not") {
     checkAnswer(
-      testData2.select(bitwiseNOT($"a")),
-      testData2.collect().toSeq.map(r => Row(~r.getInt(0))))
+      testData2.select(bitwiseNOT($"a"), bitwise_not($"a")),
+      testData2.collect().toSeq.map(r => Row(~r.getInt(0), ~r.getInt(0))))
   }
 
   test("bin") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 3cafeed3..10b99b8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -133,7 +133,7 @@ class DataFrameSuite extends QueryTest
       df2
         .select('_1 as 'letter, 'number)
         .groupBy('letter)
-        .agg(countDistinct('number)),
+        .agg(count_distinct('number)),
       Row("a", 3) :: Row("b", 2) :: Row("c", 1) :: Nil
     )
   }
@@ -513,7 +513,7 @@ class DataFrameSuite extends QueryTest
         Row(5, false)))
 
     checkAnswer(
-      testData2.select(sumDistinct($"a")),
+      testData2.select(sum_distinct($"a")),
       Row(6))
   }
 
@@ -607,7 +607,7 @@ class DataFrameSuite extends QueryTest
     val df = Seq(("id1", 1), ("id2", 4), ("id3", 5)).toDF("id", "value")
     df.sparkSession.udf.register("simpleUDF", (v: Int) => v * v)
     checkAnswer(
-      df.select($"id", callUDF("simpleUDF", $"value")),
+      df.select($"id", callUDF("simpleUDF", $"value")), // test deprecated one
       Row("id1", 1) :: Row("id2", 16) :: Row("id3", 25) :: Nil)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
index b545d60..34b1654 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
@@ -47,7 +47,7 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
 
   test("function current_timestamp and now") {
     val df1 = Seq((1, 2), (3, 1)).toDF("a", "b")
-    checkAnswer(df1.select(countDistinct(current_timestamp())), Row(1))
+    checkAnswer(df1.select(count_distinct(current_timestamp())), Row(1))
 
     // Execution in one query should return the same value
     checkAnswer(sql("""SELECT CURRENT_TIMESTAMP() = CURRENT_TIMESTAMP()"""), Row(true))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
index 87526b1..3509804 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
@@ -366,14 +366,14 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
 
     checkAnswer(
       df.select(
-        shiftLeft('a, 1), shiftLeft('b, 1), shiftLeft('c, 1), shiftLeft('d, 1),
-        shiftLeft('f, 1)),
+        shiftleft('a, 1), shiftleft('b, 1), shiftleft('c, 1), shiftleft('d, 1),
+        shiftLeft('f, 1)), // test deprecated one.
         Row(42.toLong, 42, 42.toShort, 42.toByte, null))
 
     checkAnswer(
       df.selectExpr(
-        "shiftLeft(a, 1)", "shiftLeft(b, 1)", "shiftLeft(b, 1)", "shiftLeft(d, 1)",
-        "shiftLeft(f, 1)"),
+        "shiftleft(a, 1)", "shiftleft(b, 1)", "shiftleft(b, 1)", "shiftleft(d, 1)",
+        "shiftleft(f, 1)"),
       Row(42.toLong, 42, 42.toShort, 42.toByte, null))
   }
 
@@ -383,14 +383,14 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
 
     checkAnswer(
       df.select(
-        shiftRight('a, 1), shiftRight('b, 1), shiftRight('c, 1), shiftRight('d, 1),
-        shiftRight('f, 1)),
+        shiftright('a, 1), shiftright('b, 1), shiftright('c, 1), shiftright('d, 1),
+        shiftRight('f, 1)), // test deprecated one.
       Row(21.toLong, 21, 21.toShort, 21.toByte, null))
 
     checkAnswer(
       df.selectExpr(
-        "shiftRight(a, 1)", "shiftRight(b, 1)", "shiftRight(c, 1)", "shiftRight(d, 1)",
-        "shiftRight(f, 1)"),
+        "shiftright(a, 1)", "shiftright(b, 1)", "shiftright(c, 1)", "shiftright(d, 1)",
+        "shiftright(f, 1)"),
       Row(21.toLong, 21, 21.toShort, 21.toByte, null))
   }
 
@@ -400,14 +400,14 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
 
     checkAnswer(
       df.select(
-        shiftRightUnsigned('a, 1), shiftRightUnsigned('b, 1), shiftRightUnsigned('c, 1),
-        shiftRightUnsigned('d, 1), shiftRightUnsigned('f, 1)),
+        shiftrightunsigned('a, 1), shiftrightunsigned('b, 1), shiftrightunsigned('c, 1),
+        shiftrightunsigned('d, 1), shiftRightUnsigned('f, 1)), // test deprecated one.
       Row(9223372036854775787L, 21, 21.toShort, 21.toByte, null))
 
     checkAnswer(
       df.selectExpr(
-        "shiftRightUnsigned(a, 1)", "shiftRightUnsigned(b, 1)", "shiftRightUnsigned(c, 1)",
-        "shiftRightUnsigned(d, 1)", "shiftRightUnsigned(f, 1)"),
+        "shiftrightunsigned(a, 1)", "shiftrightunsigned(b, 1)", "shiftrightunsigned(c, 1)",
+        "shiftrightunsigned(d, 1)", "shiftrightunsigned(f, 1)"),
       Row(9223372036854775787L, 21, 21.toShort, 21.toByte, null))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index 8924d2e..e851722 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -61,13 +61,13 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
   }
 
   test("count distinct is partially aggregated") {
-    val query = testData.groupBy('value).agg(countDistinct('key)).queryExecution.analyzed
+    val query = testData.groupBy('value).agg(count_distinct('key)).queryExecution.analyzed
     testPartialAggregationPlan(query)
   }
 
   test("mixed aggregates are partially aggregated") {
     val query =
-      testData.groupBy('value).agg(count('value), countDistinct('key)).queryExecution.analyzed
+      testData.groupBy('value).agg(count('value), count_distinct('key)).queryExecution.analyzed
     testPartialAggregationPlan(query)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala
index 18d3667..d2406aa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala
@@ -120,8 +120,8 @@ class SameResultSuite extends QueryTest with SharedSparkSession {
     val df2 = spark.range(10).agg(sum($"id"))
     assert(df1.queryExecution.executedPlan.sameResult(df2.queryExecution.executedPlan))
 
-    val df3 = spark.range(10).agg(sumDistinct($"id"))
-    val df4 = spark.range(10).agg(sumDistinct($"id"))
+    val df3 = spark.range(10).agg(sum_distinct($"id"))
+    val df4 = spark.range(10).agg(sum_distinct($"id"))
     assert(df3.queryExecution.executedPlan.sameResult(df4.queryExecution.executedPlan))
   }
 
diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaDataFrameSuite.java b/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaDataFrameSuite.java
index 2b53238..268a31d 100644
--- a/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaDataFrameSuite.java
+++ b/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaDataFrameSuite.java
@@ -85,7 +85,7 @@ public class JavaDataFrameSuite {
           udaf.distinct(col("value")),
           udaf.apply(col("value")),
           registeredUDAF.apply(col("value")),
-          callUDF("mydoublesum", col("value")));
+          callUDF("mydoublesum", col("value")));  // test deprecated one
 
     List<Row> expectedResult = new ArrayList<>();
     expectedResult.add(RowFactory.create(4950.0, 9900.0, 9900.0, 9900.0));
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index 77d54ed..4e64b4d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -711,7 +711,7 @@ object SPARK_9757 extends QueryTest {
         val df =
           hiveContext
             .range(10)
-            .select(callUDF("struct", ($"id" + 0.2) cast DecimalType(10, 3)) as "dec_struct")
+            .select(call_udf("struct", ($"id" + 0.2) cast DecimalType(10, 3)) as "dec_struct")
         df.write.option("path", dir.getCanonicalPath).mode("overwrite").saveAsTable("t")
         checkAnswer(hiveContext.table("t"), df)
       }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ObjectHashAggregateSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ObjectHashAggregateSuite.scala
index 72aeb4f..2bca25e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ObjectHashAggregateSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ObjectHashAggregateSuite.scala
@@ -219,7 +219,7 @@ class ObjectHashAggregateSuite
     val withPartialSafe = max($"c2")
 
     // A Spark SQL native distinct aggregate function
-    val withDistinct = countDistinct($"c3")
+    val withDistinct = count_distinct($"c3")
 
     val allAggs = Seq(
       "typed" -> typed,


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org