You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2018/02/07 00:46:48 UTC
spark git commit: [SPARK-23327][SQL] Update the description and tests
of three external API or functions
Repository: spark
Updated Branches:
refs/heads/master b96a083b1 -> c36fecc3b
[SPARK-23327][SQL] Update the description and tests of three external API or functions
## What changes were proposed in this pull request?
Update the description and tests of three external API or functions `createFunction `, `length` and `repartitionByRange `
## How was this patch tested?
N/A
Author: gatorsmile <ga...@gmail.com>
Closes #20495 from gatorsmile/updateFunc.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c36fecc3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c36fecc3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c36fecc3
Branch: refs/heads/master
Commit: c36fecc3b416c38002779c3cf40b6a665ac4bf13
Parents: b96a083
Author: gatorsmile <ga...@gmail.com>
Authored: Tue Feb 6 16:46:43 2018 -0800
Committer: gatorsmile <ga...@gmail.com>
Committed: Tue Feb 6 16:46:43 2018 -0800
----------------------------------------------------------------------
R/pkg/R/functions.R | 4 +++-
python/pyspark/sql/functions.py | 8 ++++---
.../sql/catalyst/catalog/SessionCatalog.scala | 7 ++++--
.../expressions/stringExpressions.scala | 23 ++++++++++----------
.../scala/org/apache/spark/sql/Dataset.scala | 2 ++
.../spark/sql/execution/command/functions.scala | 14 +++++++-----
.../scala/org/apache/spark/sql/functions.scala | 4 +++-
.../sql/execution/command/DDLParserSuite.scala | 10 ++++-----
8 files changed, 44 insertions(+), 28 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/c36fecc3/R/pkg/R/functions.R
----------------------------------------------------------------------
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 55365a4..9f7c631 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -1026,7 +1026,9 @@ setMethod("last_day",
})
#' @details
-#' \code{length}: Computes the length of a given string or binary column.
+#' \code{length}: Computes the character length of a string data or number of bytes
+#' of a binary data. The length of string data includes the trailing spaces.
+#' The length of binary data includes binary zeros.
#'
#' @rdname column_string_functions
#' @aliases length length,Column-method
http://git-wip-us.apache.org/repos/asf/spark/blob/c36fecc3/python/pyspark/sql/functions.py
----------------------------------------------------------------------
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 3c8fb4c..05031f5 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1705,10 +1705,12 @@ def unhex(col):
@ignore_unicode_prefix
@since(1.5)
def length(col):
- """Calculates the length of a string or binary expression.
+ """Computes the character length of string data or number of bytes of binary data.
+ The length of character data includes the trailing spaces. The length of binary data
+ includes binary zeros.
- >>> spark.createDataFrame([('ABC',)], ['a']).select(length('a').alias('length')).collect()
- [Row(length=3)]
+ >>> spark.createDataFrame([('ABC ',)], ['a']).select(length('a').alias('length')).collect()
+ [Row(length=4)]
"""
sc = SparkContext._active_spark_context
return Column(sc._jvm.functions.length(_to_java_column(col)))
http://git-wip-us.apache.org/repos/asf/spark/blob/c36fecc3/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index a129896..4b119c7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -988,8 +988,11 @@ class SessionCatalog(
// -------------------------------------------------------
/**
- * Create a metastore function in the database specified in `funcDefinition`.
+ * Create a function in the database specified in `funcDefinition`.
* If no such database is specified, create it in the current database.
+ *
+ * @param ignoreIfExists: When true, ignore if the function with the specified name exists
+ * in the specified database.
*/
def createFunction(funcDefinition: CatalogFunction, ignoreIfExists: Boolean): Unit = {
val db = formatDatabaseName(funcDefinition.identifier.database.getOrElse(getCurrentDatabase))
@@ -1061,7 +1064,7 @@ class SessionCatalog(
}
/**
- * Check if the specified function exists.
+ * Check if the function with the specified name exists
*/
def functionExists(name: FunctionIdentifier): Boolean = {
val db = formatDatabaseName(name.database.getOrElse(getCurrentDatabase))
http://git-wip-us.apache.org/repos/asf/spark/blob/c36fecc3/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 5cf783f..d7612e3 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -1653,19 +1653,19 @@ case class Left(str: Expression, len: Expression, child: Expression) extends Run
* A function that returns the char length of the given string expression or
* number of bytes of the given binary expression.
*/
-// scalastyle:off line.size.limit
@ExpressionDescription(
- usage = "_FUNC_(expr) - Returns the character length of `expr` or number of bytes in binary data.",
+ usage = "_FUNC_(expr) - Returns the character length of string data or number of bytes of " +
+ "binary data. The length of string data includes the trailing spaces. The length of binary " +
+ "data includes binary zeros.",
examples = """
Examples:
- > SELECT _FUNC_('Spark SQL');
- 9
- > SELECT CHAR_LENGTH('Spark SQL');
- 9
- > SELECT CHARACTER_LENGTH('Spark SQL');
- 9
+ > SELECT _FUNC_('Spark SQL ');
+ 10
+ > SELECT CHAR_LENGTH('Spark SQL ');
+ 10
+ > SELECT CHARACTER_LENGTH('Spark SQL ');
+ 10
""")
-// scalastyle:on line.size.limit
case class Length(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def dataType: DataType = IntegerType
override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(StringType, BinaryType))
@@ -1687,7 +1687,7 @@ case class Length(child: Expression) extends UnaryExpression with ImplicitCastIn
* A function that returns the bit length of the given string or binary expression.
*/
@ExpressionDescription(
- usage = "_FUNC_(expr) - Returns the bit length of `expr` or number of bits in binary data.",
+ usage = "_FUNC_(expr) - Returns the bit length of string data or number of bits of binary data.",
examples = """
Examples:
> SELECT _FUNC_('Spark SQL');
@@ -1716,7 +1716,8 @@ case class BitLength(child: Expression) extends UnaryExpression with ImplicitCas
* A function that returns the byte length of the given string or binary expression.
*/
@ExpressionDescription(
- usage = "_FUNC_(expr) - Returns the byte length of `expr` or number of bytes in binary data.",
+ usage = "_FUNC_(expr) - Returns the byte length of string data or number of bytes of binary " +
+ "data.",
examples = """
Examples:
> SELECT _FUNC_('Spark SQL');
http://git-wip-us.apache.org/repos/asf/spark/blob/c36fecc3/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index d47cd0a..0aee1d7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -2825,6 +2825,7 @@ class Dataset[T] private[sql](
*
* At least one partition-by expression must be specified.
* When no explicit sort order is specified, "ascending nulls first" is assumed.
+ * Note, the rows are not sorted in each partition of the resulting Dataset.
*
* @group typedrel
* @since 2.3.0
@@ -2848,6 +2849,7 @@ class Dataset[T] private[sql](
*
* At least one partition-by expression must be specified.
* When no explicit sort order is specified, "ascending nulls first" is assumed.
+ * Note, the rows are not sorted in each partition of the resulting Dataset.
*
* @group typedrel
* @since 2.3.0
http://git-wip-us.apache.org/repos/asf/spark/blob/c36fecc3/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
index 4f92ffe..1f7808c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
@@ -40,6 +40,10 @@ import org.apache.spark.sql.types.{StringType, StructField, StructType}
* CREATE [OR REPLACE] FUNCTION [IF NOT EXISTS] [databaseName.]functionName
* AS className [USING JAR\FILE 'uri' [, JAR|FILE 'uri']]
* }}}
+ *
+ * @param ignoreIfExists: When true, ignore if the function with the specified name exists
+ * in the specified database.
+ * @param replace: When true, alter the function with the specified name
*/
case class CreateFunctionCommand(
databaseName: Option[String],
@@ -47,17 +51,17 @@ case class CreateFunctionCommand(
className: String,
resources: Seq[FunctionResource],
isTemp: Boolean,
- ifNotExists: Boolean,
+ ignoreIfExists: Boolean,
replace: Boolean)
extends RunnableCommand {
- if (ifNotExists && replace) {
+ if (ignoreIfExists && replace) {
throw new AnalysisException("CREATE FUNCTION with both IF NOT EXISTS and REPLACE" +
" is not allowed.")
}
// Disallow to define a temporary function with `IF NOT EXISTS`
- if (ifNotExists && isTemp) {
+ if (ignoreIfExists && isTemp) {
throw new AnalysisException(
"It is not allowed to define a TEMPORARY function with IF NOT EXISTS.")
}
@@ -79,12 +83,12 @@ case class CreateFunctionCommand(
// Handles `CREATE OR REPLACE FUNCTION AS ... USING ...`
if (replace && catalog.functionExists(func.identifier)) {
// alter the function in the metastore
- catalog.alterFunction(CatalogFunction(func.identifier, className, resources))
+ catalog.alterFunction(func)
} else {
// For a permanent, we will store the metadata into underlying external catalog.
// This function will be loaded into the FunctionRegistry when a query uses it.
// We do not load it into FunctionRegistry right now.
- catalog.createFunction(CatalogFunction(func.identifier, className, resources), ifNotExists)
+ catalog.createFunction(func, ignoreIfExists)
}
}
Seq.empty[Row]
http://git-wip-us.apache.org/repos/asf/spark/blob/c36fecc3/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 0d11682..0d54c02 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2267,7 +2267,9 @@ object functions {
}
/**
- * Computes the length of a given string or binary column.
+ * Computes the character length of a given string or number of bytes of a binary string.
+ * The length of character strings include the trailing spaces. The length of binary strings
+ * includes binary zeros.
*
* @group string_funcs
* @since 1.5.0
http://git-wip-us.apache.org/repos/asf/spark/blob/c36fecc3/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
index 2b1aea0..e0ccae1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
@@ -236,7 +236,7 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
Seq(
FunctionResource(FunctionResourceType.fromString("jar"), "/path/to/jar1"),
FunctionResource(FunctionResourceType.fromString("jar"), "/path/to/jar2")),
- isTemp = true, ifNotExists = false, replace = false)
+ isTemp = true, ignoreIfExists = false, replace = false)
val expected2 = CreateFunctionCommand(
Some("hello"),
"world",
@@ -244,7 +244,7 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
Seq(
FunctionResource(FunctionResourceType.fromString("archive"), "/path/to/archive"),
FunctionResource(FunctionResourceType.fromString("file"), "/path/to/file")),
- isTemp = false, ifNotExists = false, replace = false)
+ isTemp = false, ignoreIfExists = false, replace = false)
val expected3 = CreateFunctionCommand(
None,
"helloworld3",
@@ -252,7 +252,7 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
Seq(
FunctionResource(FunctionResourceType.fromString("jar"), "/path/to/jar1"),
FunctionResource(FunctionResourceType.fromString("jar"), "/path/to/jar2")),
- isTemp = true, ifNotExists = false, replace = true)
+ isTemp = true, ignoreIfExists = false, replace = true)
val expected4 = CreateFunctionCommand(
Some("hello"),
"world1",
@@ -260,7 +260,7 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
Seq(
FunctionResource(FunctionResourceType.fromString("archive"), "/path/to/archive"),
FunctionResource(FunctionResourceType.fromString("file"), "/path/to/file")),
- isTemp = false, ifNotExists = false, replace = true)
+ isTemp = false, ignoreIfExists = false, replace = true)
val expected5 = CreateFunctionCommand(
Some("hello"),
"world2",
@@ -268,7 +268,7 @@ class DDLParserSuite extends PlanTest with SharedSQLContext {
Seq(
FunctionResource(FunctionResourceType.fromString("archive"), "/path/to/archive"),
FunctionResource(FunctionResourceType.fromString("file"), "/path/to/file")),
- isTemp = false, ifNotExists = true, replace = false)
+ isTemp = false, ignoreIfExists = true, replace = false)
comparePlans(parsed1, expected1)
comparePlans(parsed2, expected2)
comparePlans(parsed3, expected3)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org