You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2018/02/01 18:48:38 UTC

spark git commit: [SPARK-23293][SQL] fix data source v2 self join

Repository: spark
Updated Branches:
  refs/heads/master f051f8340 -> 73da3b696


[SPARK-23293][SQL] fix data source v2 self join

## What changes were proposed in this pull request?

`DataSourceV2Relation` should extend `MultiInstanceRelation`, to take care of self-join.

## How was this patch tested?

a new test

Author: Wenchen Fan <we...@databricks.com>

Closes #20466 from cloud-fan/dsv2-selfjoin.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/73da3b69
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/73da3b69
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/73da3b69

Branch: refs/heads/master
Commit: 73da3b6968630d9e2cafc742ccb6d4eb54957df4
Parents: f051f83
Author: Wenchen Fan <we...@databricks.com>
Authored: Thu Feb 1 10:48:34 2018 -0800
Committer: gatorsmile <ga...@gmail.com>
Committed: Thu Feb 1 10:48:34 2018 -0800

----------------------------------------------------------------------
 .../sql/execution/datasources/v2/DataSourceV2Relation.scala  | 8 +++++++-
 .../org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala  | 6 ++++++
 2 files changed, 13 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/73da3b69/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
index 3d4c649..eebfa29 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
@@ -17,13 +17,15 @@
 
 package org.apache.spark.sql.execution.datasources.v2
 
+import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, Statistics}
 import org.apache.spark.sql.sources.v2.reader._
 
 case class DataSourceV2Relation(
     fullOutput: Seq[AttributeReference],
-    reader: DataSourceReader) extends LeafNode with DataSourceReaderHolder {
+    reader: DataSourceReader)
+  extends LeafNode with MultiInstanceRelation with DataSourceReaderHolder {
 
   override def canEqual(other: Any): Boolean = other.isInstanceOf[DataSourceV2Relation]
 
@@ -33,6 +35,10 @@ case class DataSourceV2Relation(
     case _ =>
       Statistics(sizeInBytes = conf.defaultSizeInBytes)
   }
+
+  override def newInstance(): DataSourceV2Relation = {
+    copy(fullOutput = fullOutput.map(_.newInstance()))
+  }
 }
 
 /**

http://git-wip-us.apache.org/repos/asf/spark/blob/73da3b69/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
index 1c3ba78..23147ff 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
@@ -217,6 +217,12 @@ class DataSourceV2Suite extends QueryTest with SharedSQLContext {
       }
     }
   }
+
+  test("SPARK-23293: data source v2 self join") {
+    val df = spark.read.format(classOf[SimpleDataSourceV2].getName).load()
+    val df2 = df.select(($"i" + 1).as("k"), $"j")
+    checkAnswer(df.join(df2, "j"), (0 until 10).map(i => Row(-i, i, i + 1)))
+  }
 }
 
 class SimpleDataSourceV2 extends DataSourceV2 with ReadSupport {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org