You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by li...@apache.org on 2015/07/09 07:09:37 UTC
spark git commit: [SPARK-8928] [SQL] Makes CatalystSchemaConverter
sticking to 1.4.x- when handling Parquet LISTs in compatible mode
Repository: spark
Updated Branches:
refs/heads/master a240bf3b4 -> 3dab0da42
[SPARK-8928] [SQL] Makes CatalystSchemaConverter sticking to 1.4.x- when handling Parquet LISTs in compatible mode
This PR is based on #7209 authored by Sephiroth-Lin.
Author: Weizhong Lin <li...@huawei.com>
Closes #7304 from liancheng/spark-8928 and squashes the following commits:
75267fe [Cheng Lian] Makes CatalystSchemaConverter sticking to 1.4.x- when handling LISTs in compatible mode
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3dab0da4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3dab0da4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3dab0da4
Branch: refs/heads/master
Commit: 3dab0da42940a46f0c4aa4853bdb5c64c4cb2613
Parents: a240bf3
Author: Cheng Lian <li...@databricks.com>
Authored: Wed Jul 8 22:09:12 2015 -0700
Committer: Cheng Lian <li...@databricks.com>
Committed: Wed Jul 8 22:09:14 2015 -0700
----------------------------------------------------------------------
.../spark/sql/parquet/CatalystSchemaConverter.scala | 6 ++++--
.../org/apache/spark/sql/parquet/ParquetSchemaSuite.scala | 10 +++++-----
2 files changed, 9 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/3dab0da4/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala
index de3a72d..1ea6926 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/CatalystSchemaConverter.scala
@@ -461,7 +461,8 @@ private[parquet] class CatalystSchemaConverter(
field.name,
Types
.buildGroup(REPEATED)
- .addField(convertField(StructField("element", elementType, nullable)))
+ // "array_element" is the name chosen by parquet-hive (1.7.0 and prior version)
+ .addField(convertField(StructField("array_element", elementType, nullable)))
.named(CatalystConverter.ARRAY_CONTAINS_NULL_BAG_SCHEMA_NAME))
// Spark 1.4.x and prior versions convert ArrayType with non-nullable elements into a 2-level
@@ -474,7 +475,8 @@ private[parquet] class CatalystSchemaConverter(
ConversionPatterns.listType(
repetition,
field.name,
- convertField(StructField("element", elementType, nullable), REPEATED))
+ // "array" is the name chosen by parquet-avro (1.7.0 and prior version)
+ convertField(StructField("array", elementType, nullable), REPEATED))
// Spark 1.4.x and prior versions convert MapType into a 3-level group annotated by
// MAP_KEY_VALUE. This is covered by `convertGroupField(field: GroupType): DataType`.
http://git-wip-us.apache.org/repos/asf/spark/blob/3dab0da4/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala
index 35d3c33..fa62939 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetSchemaSuite.scala
@@ -174,7 +174,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest {
"""
|message root {
| optional group _1 (LIST) {
- | repeated int32 element;
+ | repeated int32 array;
| }
|}
""".stripMargin)
@@ -198,7 +198,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest {
|message root {
| optional group _1 (LIST) {
| repeated group bag {
- | optional int32 element;
+ | optional int32 array_element;
| }
| }
|}
@@ -267,7 +267,7 @@ class ParquetSchemaInferenceSuite extends ParquetSchemaTest {
| optional binary _1 (UTF8);
| optional group _2 (LIST) {
| repeated group bag {
- | optional group element {
+ | optional group array_element {
| required int32 _1;
| required double _2;
| }
@@ -616,7 +616,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
"""message root {
| optional group f1 (LIST) {
| repeated group bag {
- | optional int32 element;
+ | optional int32 array_element;
| }
| }
|}
@@ -648,7 +648,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
nullable = true))),
"""message root {
| optional group f1 (LIST) {
- | repeated int32 element;
+ | repeated int32 array;
| }
|}
""".stripMargin)
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org