You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2018/09/11 06:17:34 UTC
spark git commit: [SPARK-25371][SQL] struct() should allow being
called with 0 args
Repository: spark
Updated Branches:
refs/heads/master da5685b5b -> 0736e72a6
[SPARK-25371][SQL] struct() should allow being called with 0 args
## What changes were proposed in this pull request?
SPARK-21281 introduced a check for the inputs of `CreateStructLike` to be non-empty. This means that `struct()`, which was previously considered valid, now throws an Exception. This behavior change was introduced in 2.3.0. The change may break users' application on upgrade and it causes `VectorAssembler` to fail when an empty `inputCols` is defined.
The PR removes the added check making `struct()` valid again.
## How was this patch tested?
added UT
Closes #22373 from mgaido91/SPARK-25371.
Authored-by: Marco Gaido <ma...@gmail.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0736e72a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0736e72a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0736e72a
Branch: refs/heads/master
Commit: 0736e72a66735664b191fc363f54e3c522697dba
Parents: da5685b
Author: Marco Gaido <ma...@gmail.com>
Authored: Tue Sep 11 14:16:56 2018 +0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Tue Sep 11 14:16:56 2018 +0800
----------------------------------------------------------------------
.../org/apache/spark/ml/feature/VectorAssemblerSuite.scala | 5 +++++
.../spark/sql/catalyst/expressions/complexTypeCreator.scala | 5 +----
.../scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala | 2 --
3 files changed, 6 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/0736e72a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala
----------------------------------------------------------------------
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala
index ed15a1d..a4d388f 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala
@@ -256,4 +256,9 @@ class VectorAssemblerSuite
assert(runWithMetadata("keep", additional_filter = "id1 > 2").count() == 4)
}
+ test("SPARK-25371: VectorAssembler with empty inputCols") {
+ val vectorAssembler = new VectorAssembler().setInputCols(Array()).setOutputCol("a")
+ val output = vectorAssembler.transform(dfWithNullsAndNaNs)
+ assert(output.select("a").limit(1).collect().head == Row(Vectors.sparse(0, Seq.empty)))
+ }
}
http://git-wip-us.apache.org/repos/asf/spark/blob/0736e72a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index 077a6dc..aba9c6c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -379,10 +379,7 @@ trait CreateNamedStructLike extends Expression {
}
override def checkInputDataTypes(): TypeCheckResult = {
- if (children.length < 1) {
- TypeCheckResult.TypeCheckFailure(
- s"input to function $prettyName requires at least one argument")
- } else if (children.size % 2 != 0) {
+ if (children.size % 2 != 0) {
TypeCheckResult.TypeCheckFailure(s"$prettyName expects an even number of arguments.")
} else {
val invalidNames = nameExprs.filterNot(e => e.foldable && e.dataType == StringType)
http://git-wip-us.apache.org/repos/asf/spark/blob/0736e72a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 4b83e51..121db442 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -2677,8 +2677,6 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext {
val funcsMustHaveAtLeastOneArg =
("coalesce", (df: DataFrame) => df.select(coalesce())) ::
("coalesce", (df: DataFrame) => df.selectExpr("coalesce()")) ::
- ("named_struct", (df: DataFrame) => df.select(struct())) ::
- ("named_struct", (df: DataFrame) => df.selectExpr("named_struct()")) ::
("hash", (df: DataFrame) => df.select(hash())) ::
("hash", (df: DataFrame) => df.selectExpr("hash()")) :: Nil
funcsMustHaveAtLeastOneArg.foreach { case (name, func) =>
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org