You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2017/07/27 23:49:46 UTC
spark git commit: [SPARK-21538][SQL] Attribute resolution
inconsistency in the Dataset API
Repository: spark
Updated Branches:
refs/heads/master 9f5647d62 -> f44ead89f
[SPARK-21538][SQL] Attribute resolution inconsistency in the Dataset API
## What changes were proposed in this pull request?
This PR contains a tiny update that removes an attribute resolution inconsistency in the Dataset API. The following example is taken from the ticket description:
```
spark.range(1).withColumnRenamed("id", "x").sort(col("id")) // works
spark.range(1).withColumnRenamed("id", "x").sort($"id") // works
spark.range(1).withColumnRenamed("id", "x").sort('id) // works
spark.range(1).withColumnRenamed("id", "x").sort("id") // fails with:
org.apache.spark.sql.AnalysisException: Cannot resolve column name "id" among (x);
```
The above `AnalysisException` happens because the last case calls `Dataset.apply()` to convert strings into columns, which triggers attribute resolution. To make the API consistent between overloaded methods, this PR defers the resolution and constructs columns directly.
Author: aokolnychyi <an...@sap.com>
Closes #18740 from aokolnychyi/spark-21538.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f44ead89
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f44ead89
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f44ead89
Branch: refs/heads/master
Commit: f44ead89f48f040b7eb9dfc88df0ec995b47bfe9
Parents: 9f5647d
Author: aokolnychyi <an...@sap.com>
Authored: Thu Jul 27 16:49:42 2017 -0700
Committer: gatorsmile <ga...@gmail.com>
Committed: Thu Jul 27 16:49:42 2017 -0700
----------------------------------------------------------------------
.../src/main/scala/org/apache/spark/sql/Dataset.scala | 2 +-
.../test/scala/org/apache/spark/sql/DatasetSuite.scala | 13 +++++++++++++
2 files changed, 14 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/f44ead89/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 9007367..aa968d8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -1108,7 +1108,7 @@ class Dataset[T] private[sql](
*/
@scala.annotation.varargs
def sort(sortCol: String, sortCols: String*): Dataset[T] = {
- sort((sortCol +: sortCols).map(apply) : _*)
+ sort((sortCol +: sortCols).map(Column(_)) : _*)
}
/**
http://git-wip-us.apache.org/repos/asf/spark/blob/f44ead89/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 73098cd..40235e3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -1304,6 +1304,19 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
assert(rlike3.count() == 0)
}
}
+
+ test("SPARK-21538: Attribute resolution inconsistency in Dataset API") {
+ val df = spark.range(3).withColumnRenamed("id", "x")
+ val expected = Row(0) :: Row(1) :: Row (2) :: Nil
+ checkAnswer(df.sort("id"), expected)
+ checkAnswer(df.sort(col("id")), expected)
+ checkAnswer(df.sort($"id"), expected)
+ checkAnswer(df.sort('id), expected)
+ checkAnswer(df.orderBy("id"), expected)
+ checkAnswer(df.orderBy(col("id")), expected)
+ checkAnswer(df.orderBy($"id"), expected)
+ checkAnswer(df.orderBy('id), expected)
+ }
}
case class WithImmutableMap(id: String, map_test: scala.collection.immutable.Map[Long, String])
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org