You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2016/08/20 15:30:05 UTC
spark git commit: [SPARK-17104][SQL] LogicalRelation.newInstance
should follow the semantics of MultiInstanceRelation
Repository: spark
Updated Branches:
refs/heads/master 3e5fdeb3f -> 31a015572
[SPARK-17104][SQL] LogicalRelation.newInstance should follow the semantics of MultiInstanceRelation
## What changes were proposed in this pull request?
Currently `LogicalRelation.newInstance()` simply creates another `LogicalRelation` object with the same parameters. However, the `newInstance()` method inherited from `MultiInstanceRelation` should return a copy of object with unique expression ids. Current `LogicalRelation.newInstance()` can cause failure when doing self-join.
## How was this patch tested?
Jenkins tests.
Author: Liang-Chi Hsieh <si...@tw.ibm.com>
Closes #14682 from viirya/fix-localrelation.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/31a01557
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/31a01557
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/31a01557
Branch: refs/heads/master
Commit: 31a015572024046f4deaa6cec66bb6fab110f31d
Parents: 3e5fdeb
Author: Liang-Chi Hsieh <si...@tw.ibm.com>
Authored: Sat Aug 20 23:29:48 2016 +0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Sat Aug 20 23:29:48 2016 +0800
----------------------------------------------------------------------
.../sql/execution/datasources/LogicalRelation.scala | 11 +++++++++--
.../scala/org/apache/spark/sql/hive/parquetSuites.scala | 7 +++++++
2 files changed, 16 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/31a01557/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
index 90711f2..2a8e147 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
@@ -79,11 +79,18 @@ case class LogicalRelation(
/** Used to lookup original attribute capitalization */
val attributeMap: AttributeMap[AttributeReference] = AttributeMap(output.map(o => (o, o)))
- def newInstance(): this.type =
+ /**
+ * Returns a new instance of this LogicalRelation. According to the semantics of
+ * MultiInstanceRelation, this method returns a copy of this object with
+ * unique expression ids. We respect the `expectedOutputAttributes` and create
+ * new instances of attributes in it.
+ */
+ override def newInstance(): this.type = {
LogicalRelation(
relation,
- expectedOutputAttributes,
+ expectedOutputAttributes.map(_.map(_.newInstance())),
metastoreTableIdentifier).asInstanceOf[this.type]
+ }
override def refresh(): Unit = relation match {
case fs: HadoopFsRelation => fs.refresh()
http://git-wip-us.apache.org/repos/asf/spark/blob/31a01557/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
index 31b6197..e92bbde 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
@@ -589,6 +589,13 @@ class ParquetMetastoreSuite extends ParquetPartitioningTest {
}
}
}
+
+ test("self-join") {
+ val table = spark.table("normal_parquet")
+ val selfJoin = table.as("t1").join(table.as("t2"))
+ checkAnswer(selfJoin,
+ sql("SELECT * FROM normal_parquet x JOIN normal_parquet y"))
+ }
}
/**
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org