You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2016/08/20 15:30:05 UTC
spark git commit: [SPARK-17104][SQL] LogicalRelation.newInstance should follow the semantics of MultiInstanceRelation

Repository: spark
Updated Branches:
  refs/heads/master 3e5fdeb3f -> 31a015572


[SPARK-17104][SQL] LogicalRelation.newInstance should follow the semantics of MultiInstanceRelation

## What changes were proposed in this pull request?

Currently `LogicalRelation.newInstance()` simply creates another `LogicalRelation` object with the same parameters. However, the `newInstance()` method inherited from `MultiInstanceRelation` should return a copy of object with unique expression ids. Current `LogicalRelation.newInstance()` can cause failure when doing self-join.

## How was this patch tested?

Jenkins tests.

Author: Liang-Chi Hsieh <si...@tw.ibm.com>

Closes #14682 from viirya/fix-localrelation.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/31a01557
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/31a01557
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/31a01557

Branch: refs/heads/master
Commit: 31a015572024046f4deaa6cec66bb6fab110f31d
Parents: 3e5fdeb
Author: Liang-Chi Hsieh <si...@tw.ibm.com>
Authored: Sat Aug 20 23:29:48 2016 +0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Sat Aug 20 23:29:48 2016 +0800

----------------------------------------------------------------------
 .../sql/execution/datasources/LogicalRelation.scala      | 11 +++++++++--
 .../scala/org/apache/spark/sql/hive/parquetSuites.scala  |  7 +++++++
 2 files changed, 16 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/31a01557/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
index 90711f2..2a8e147 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
@@ -79,11 +79,18 @@ case class LogicalRelation(
   /** Used to lookup original attribute capitalization */
   val attributeMap: AttributeMap[AttributeReference] = AttributeMap(output.map(o => (o, o)))
 
-  def newInstance(): this.type =
+  /**
+   * Returns a new instance of this LogicalRelation. According to the semantics of
+   * MultiInstanceRelation, this method returns a copy of this object with
+   * unique expression ids. We respect the `expectedOutputAttributes` and create
+   * new instances of attributes in it.
+   */
+  override def newInstance(): this.type = {
     LogicalRelation(
       relation,
-      expectedOutputAttributes,
+      expectedOutputAttributes.map(_.map(_.newInstance())),
       metastoreTableIdentifier).asInstanceOf[this.type]
+  }
 
   override def refresh(): Unit = relation match {
     case fs: HadoopFsRelation => fs.refresh()

http://git-wip-us.apache.org/repos/asf/spark/blob/31a01557/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
index 31b6197..e92bbde 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/parquetSuites.scala
@@ -589,6 +589,13 @@ class ParquetMetastoreSuite extends ParquetPartitioningTest {
       }
     }
   }
+
+  test("self-join") {
+    val table = spark.table("normal_parquet")
+    val selfJoin = table.as("t1").join(table.as("t2"))
+    checkAnswer(selfJoin,
+      sql("SELECT * FROM normal_parquet x JOIN normal_parquet y"))
+  }
 }
 
 /**


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org