You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2017/07/27 23:49:46 UTC

spark git commit: [SPARK-21538][SQL] Attribute resolution inconsistency in the Dataset API

Repository: spark
Updated Branches:
  refs/heads/master 9f5647d62 -> f44ead89f


[SPARK-21538][SQL] Attribute resolution inconsistency in the Dataset API

## What changes were proposed in this pull request?

This PR contains a tiny update that removes an attribute resolution inconsistency in the Dataset API. The following example is taken from the ticket description:

```
spark.range(1).withColumnRenamed("id", "x").sort(col("id"))  // works
spark.range(1).withColumnRenamed("id", "x").sort($"id")  // works
spark.range(1).withColumnRenamed("id", "x").sort('id) // works
spark.range(1).withColumnRenamed("id", "x").sort("id") // fails with:
org.apache.spark.sql.AnalysisException: Cannot resolve column name "id" among (x);
```
The above `AnalysisException` happens because the last case calls `Dataset.apply()` to convert strings into columns, which triggers attribute resolution. To make the API consistent between overloaded methods, this PR defers the resolution and constructs columns directly.

Author: aokolnychyi <an...@sap.com>

Closes #18740 from aokolnychyi/spark-21538.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f44ead89
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f44ead89
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f44ead89

Branch: refs/heads/master
Commit: f44ead89f48f040b7eb9dfc88df0ec995b47bfe9
Parents: 9f5647d
Author: aokolnychyi <an...@sap.com>
Authored: Thu Jul 27 16:49:42 2017 -0700
Committer: gatorsmile <ga...@gmail.com>
Committed: Thu Jul 27 16:49:42 2017 -0700

----------------------------------------------------------------------
 .../src/main/scala/org/apache/spark/sql/Dataset.scala  |  2 +-
 .../test/scala/org/apache/spark/sql/DatasetSuite.scala | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/f44ead89/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 9007367..aa968d8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -1108,7 +1108,7 @@ class Dataset[T] private[sql](
    */
   @scala.annotation.varargs
   def sort(sortCol: String, sortCols: String*): Dataset[T] = {
-    sort((sortCol +: sortCols).map(apply) : _*)
+    sort((sortCol +: sortCols).map(Column(_)) : _*)
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/f44ead89/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 73098cd..40235e3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -1304,6 +1304,19 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
       assert(rlike3.count() == 0)
     }
   }
+
+  test("SPARK-21538: Attribute resolution inconsistency in Dataset API") {
+    val df = spark.range(3).withColumnRenamed("id", "x")
+    val expected = Row(0) :: Row(1) :: Row (2) :: Nil
+    checkAnswer(df.sort("id"), expected)
+    checkAnswer(df.sort(col("id")), expected)
+    checkAnswer(df.sort($"id"), expected)
+    checkAnswer(df.sort('id), expected)
+    checkAnswer(df.orderBy("id"), expected)
+    checkAnswer(df.orderBy(col("id")), expected)
+    checkAnswer(df.orderBy($"id"), expected)
+    checkAnswer(df.orderBy('id), expected)
+  }
 }
 
 case class WithImmutableMap(id: String, map_test: scala.collection.immutable.Map[Long, String])


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org