You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2017/06/24 14:36:07 UTC
spark git commit: [SPARK-21203][SQL] Fix wrong results of insertion of Array of Struct

Repository: spark
Updated Branches:
  refs/heads/master 7c7bc8fc0 -> 2e1586f60


[SPARK-21203][SQL] Fix wrong results of insertion of Array of Struct

### What changes were proposed in this pull request?
```SQL
CREATE TABLE `tab1`
(`custom_fields` ARRAY<STRUCT<`id`: BIGINT, `value`: STRING>>)
USING parquet

INSERT INTO `tab1`
SELECT ARRAY(named_struct('id', 1, 'value', 'a'), named_struct('id', 2, 'value', 'b'))

SELECT custom_fields.id, custom_fields.value FROM tab1
```

The above query always return the last struct of the array, because the rule `SimplifyCasts` incorrectly rewrites the query. The underlying cause is we always use the same `GenericInternalRow` object when doing the cast.

### How was this patch tested?

Author: gatorsmile <ga...@gmail.com>

Closes #18412 from gatorsmile/castStruct.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2e1586f6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2e1586f6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2e1586f6

Branch: refs/heads/master
Commit: 2e1586f60a77ea0adb6f3f68ba74323f0c242199
Parents: 7c7bc8f
Author: gatorsmile <ga...@gmail.com>
Authored: Sat Jun 24 22:35:59 2017 +0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Sat Jun 24 22:35:59 2017 +0800

----------------------------------------------------------------------
 .../spark/sql/catalyst/expressions/Cast.scala   |  4 ++--
 .../apache/spark/sql/sources/InsertSuite.scala  | 21 ++++++++++++++++++++
 2 files changed, 23 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/2e1586f6/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index a53ef42..43df19b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -482,15 +482,15 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       case (fromField, toField) => cast(fromField.dataType, toField.dataType)
     }
     // TODO: Could be faster?
-    val newRow = new GenericInternalRow(from.fields.length)
     buildCast[InternalRow](_, row => {
+      val newRow = new GenericInternalRow(from.fields.length)
       var i = 0
       while (i < row.numFields) {
         newRow.update(i,
           if (row.isNullAt(i)) null else castFuncs(i)(row.get(i, from.apply(i).dataType)))
         i += 1
       }
-      newRow.copy()
+      newRow
     })
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/2e1586f6/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index 2eae66d..41abff2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -345,4 +345,25 @@ class InsertSuite extends DataSourceTest with SharedSQLContext {
       )
     }
   }
+
+  test("SPARK-21203 wrong results of insertion of Array of Struct") {
+    val tabName = "tab1"
+    withTable(tabName) {
+      spark.sql(
+        """
+          |CREATE TABLE `tab1`
+          |(`custom_fields` ARRAY<STRUCT<`id`: BIGINT, `value`: STRING>>)
+          |USING parquet
+        """.stripMargin)
+      spark.sql(
+        """
+          |INSERT INTO `tab1`
+          |SELECT ARRAY(named_struct('id', 1, 'value', 'a'), named_struct('id', 2, 'value', 'b'))
+        """.stripMargin)
+
+      checkAnswer(
+        spark.sql("SELECT custom_fields.id, custom_fields.value FROM tab1"),
+        Row(Array(1, 2), Array("a", "b")))
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org