You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2017/06/24 14:36:07 UTC
spark git commit: [SPARK-21203][SQL] Fix wrong results of insertion
of Array of Struct
Repository: spark
Updated Branches:
refs/heads/master 7c7bc8fc0 -> 2e1586f60
[SPARK-21203][SQL] Fix wrong results of insertion of Array of Struct
### What changes were proposed in this pull request?
```SQL
CREATE TABLE `tab1`
(`custom_fields` ARRAY<STRUCT<`id`: BIGINT, `value`: STRING>>)
USING parquet
INSERT INTO `tab1`
SELECT ARRAY(named_struct('id', 1, 'value', 'a'), named_struct('id', 2, 'value', 'b'))
SELECT custom_fields.id, custom_fields.value FROM tab1
```
The above query always return the last struct of the array, because the rule `SimplifyCasts` incorrectly rewrites the query. The underlying cause is we always use the same `GenericInternalRow` object when doing the cast.
### How was this patch tested?
Author: gatorsmile <ga...@gmail.com>
Closes #18412 from gatorsmile/castStruct.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2e1586f6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2e1586f6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2e1586f6
Branch: refs/heads/master
Commit: 2e1586f60a77ea0adb6f3f68ba74323f0c242199
Parents: 7c7bc8f
Author: gatorsmile <ga...@gmail.com>
Authored: Sat Jun 24 22:35:59 2017 +0800
Committer: Wenchen Fan <we...@databricks.com>
Committed: Sat Jun 24 22:35:59 2017 +0800
----------------------------------------------------------------------
.../spark/sql/catalyst/expressions/Cast.scala | 4 ++--
.../apache/spark/sql/sources/InsertSuite.scala | 21 ++++++++++++++++++++
2 files changed, 23 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/2e1586f6/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index a53ef42..43df19b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -482,15 +482,15 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
case (fromField, toField) => cast(fromField.dataType, toField.dataType)
}
// TODO: Could be faster?
- val newRow = new GenericInternalRow(from.fields.length)
buildCast[InternalRow](_, row => {
+ val newRow = new GenericInternalRow(from.fields.length)
var i = 0
while (i < row.numFields) {
newRow.update(i,
if (row.isNullAt(i)) null else castFuncs(i)(row.get(i, from.apply(i).dataType)))
i += 1
}
- newRow.copy()
+ newRow
})
}
http://git-wip-us.apache.org/repos/asf/spark/blob/2e1586f6/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index 2eae66d..41abff2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -345,4 +345,25 @@ class InsertSuite extends DataSourceTest with SharedSQLContext {
)
}
}
+
+ test("SPARK-21203 wrong results of insertion of Array of Struct") {
+ val tabName = "tab1"
+ withTable(tabName) {
+ spark.sql(
+ """
+ |CREATE TABLE `tab1`
+ |(`custom_fields` ARRAY<STRUCT<`id`: BIGINT, `value`: STRING>>)
+ |USING parquet
+ """.stripMargin)
+ spark.sql(
+ """
+ |INSERT INTO `tab1`
+ |SELECT ARRAY(named_struct('id', 1, 'value', 'a'), named_struct('id', 2, 'value', 'b'))
+ """.stripMargin)
+
+ checkAnswer(
+ spark.sql("SELECT custom_fields.id, custom_fields.value FROM tab1"),
+ Row(Array(1, 2), Array("a", "b")))
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org