You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Yuming Wang (Jira)" <ji...@apache.org> on 2021/02/23 23:42:00 UTC
[jira] [Updated] (SPARK-34512) Disable validate default values when
parsing Avro schemas
[ https://issues.apache.org/jira/browse/SPARK-34512?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Yuming Wang updated SPARK-34512:
--------------------------------
Description:
How to reproduce this issue:
{code:scala}
// Add this test to HiveSerDeReadWriteSuite
test("SPARK-34512") {
withTable("t1") {
hiveClient.runSqlHive(
"""
|CREATE TABLE t1
| ROW FORMAT SERDE
| 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
| STORED AS INPUTFORMAT
| 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
| OUTPUTFORMAT
| 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
| TBLPROPERTIES (
| 'avro.schema.literal'='{
| "namespace": "org.apache.spark.sql.hive.test",
| "name": "schema_with_default_value",
| "type": "record",
| "fields": [
| {
| "name": "ARRAY_WITH_DEFAULT",
| "type": {"type": "array", "items": "string"},
| "default": null
| }
| ]
| }')
|""".stripMargin)
spark.sql("select * from t1").show
}
}
{code}
{noformat}
org.apache.avro.AvroTypeException: Invalid default for field ARRAY_WITH_DEFAULT: null not a {"type":"array","items":"string"}
at org.apache.avro.Schema.validateDefault(Schema.java:1571)
at org.apache.avro.Schema.access$500(Schema.java:87)
at org.apache.avro.Schema$Field.<init>(Schema.java:544)
at org.apache.avro.Schema.parse(Schema.java:1678)
at org.apache.avro.Schema$Parser.parse(Schema.java:1425)
at org.apache.avro.Schema$Parser.parse(Schema.java:1413)
at org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.getSchemaFor(AvroSerdeUtils.java:268)
at org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.determineSchemaOrThrowException(AvroSerdeUtils.java:111)
at org.apache.hadoop.hive.serde2.avro.AvroSerDe.determineSchemaOrReturnErrorSchema(AvroSerDe.java:187)
at org.apache.hadoop.hive.serde2.avro.AvroSerDe.initialize(AvroSerDe.java:107)
at org.apache.hadoop.hive.serde2.avro.AvroSerDe.initialize(AvroSerDe.java:83)
at org.apache.hadoop.hive.serde2.SerDeUtils.initializeSerDe(SerDeUtils.java:533)
at org.apache.hadoop.hive.metastore.MetaStoreUtils.getDeserializer(MetaStoreUtils.java:450)
at org.apache.hadoop.hive.metastore.MetaStoreUtils.getDeserializer(MetaStoreUtils.java:437)
at org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:281)
at org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:263)
at org.apache.hadoop.hive.ql.metadata.Table.getColsInternal(Table.java:641)
at org.apache.hadoop.hive.ql.metadata.Table.getCols(Table.java:624)
at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:831)
at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:867)
at org.apache.hadoop.hive.ql.exec.DDLTask.createTable(DDLTask.java:4356)
at org.apache.hadoop.hive.ql.exec.DDLTask.execute(DDLTask.java:354)
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:199)
at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100)
at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:2183)
at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1839)
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1526)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1237)
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1227)
at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$runHive$1(HiveClientImpl.scala:820)
at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$withHiveState$1(HiveClientImpl.scala:291)
at org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:224)
at org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:223)
at org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:273)
at org.apache.spark.sql.hive.client.HiveClientImpl.runHive(HiveClientImpl.scala:800)
at org.apache.spark.sql.hive.client.HiveClientImpl.runSqlHive(HiveClientImpl.scala:787)
{noformat}
It works before.
was:
How to reproduce this issue:
{code:scala}
{code}
> Disable validate default values when parsing Avro schemas
> ---------------------------------------------------------
>
> Key: SPARK-34512
> URL: https://issues.apache.org/jira/browse/SPARK-34512
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 3.2.0
> Reporter: Yuming Wang
> Priority: Major
>
> How to reproduce this issue:
> {code:scala}
> // Add this test to HiveSerDeReadWriteSuite
> test("SPARK-34512") {
> withTable("t1") {
> hiveClient.runSqlHive(
> """
> |CREATE TABLE t1
> | ROW FORMAT SERDE
> | 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
> | STORED AS INPUTFORMAT
> | 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
> | OUTPUTFORMAT
> | 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
> | TBLPROPERTIES (
> | 'avro.schema.literal'='{
> | "namespace": "org.apache.spark.sql.hive.test",
> | "name": "schema_with_default_value",
> | "type": "record",
> | "fields": [
> | {
> | "name": "ARRAY_WITH_DEFAULT",
> | "type": {"type": "array", "items": "string"},
> | "default": null
> | }
> | ]
> | }')
> |""".stripMargin)
> spark.sql("select * from t1").show
> }
> }
> {code}
> {noformat}
> org.apache.avro.AvroTypeException: Invalid default for field ARRAY_WITH_DEFAULT: null not a {"type":"array","items":"string"}
> at org.apache.avro.Schema.validateDefault(Schema.java:1571)
> at org.apache.avro.Schema.access$500(Schema.java:87)
> at org.apache.avro.Schema$Field.<init>(Schema.java:544)
> at org.apache.avro.Schema.parse(Schema.java:1678)
> at org.apache.avro.Schema$Parser.parse(Schema.java:1425)
> at org.apache.avro.Schema$Parser.parse(Schema.java:1413)
> at org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.getSchemaFor(AvroSerdeUtils.java:268)
> at org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.determineSchemaOrThrowException(AvroSerdeUtils.java:111)
> at org.apache.hadoop.hive.serde2.avro.AvroSerDe.determineSchemaOrReturnErrorSchema(AvroSerDe.java:187)
> at org.apache.hadoop.hive.serde2.avro.AvroSerDe.initialize(AvroSerDe.java:107)
> at org.apache.hadoop.hive.serde2.avro.AvroSerDe.initialize(AvroSerDe.java:83)
> at org.apache.hadoop.hive.serde2.SerDeUtils.initializeSerDe(SerDeUtils.java:533)
> at org.apache.hadoop.hive.metastore.MetaStoreUtils.getDeserializer(MetaStoreUtils.java:450)
> at org.apache.hadoop.hive.metastore.MetaStoreUtils.getDeserializer(MetaStoreUtils.java:437)
> at org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:281)
> at org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:263)
> at org.apache.hadoop.hive.ql.metadata.Table.getColsInternal(Table.java:641)
> at org.apache.hadoop.hive.ql.metadata.Table.getCols(Table.java:624)
> at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:831)
> at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:867)
> at org.apache.hadoop.hive.ql.exec.DDLTask.createTable(DDLTask.java:4356)
> at org.apache.hadoop.hive.ql.exec.DDLTask.execute(DDLTask.java:354)
> at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:199)
> at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100)
> at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:2183)
> at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1839)
> at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1526)
> at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1237)
> at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1227)
> at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$runHive$1(HiveClientImpl.scala:820)
> at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$withHiveState$1(HiveClientImpl.scala:291)
> at org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:224)
> at org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:223)
> at org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:273)
> at org.apache.spark.sql.hive.client.HiveClientImpl.runHive(HiveClientImpl.scala:800)
> at org.apache.spark.sql.hive.client.HiveClientImpl.runSqlHive(HiveClientImpl.scala:787)
> {noformat}
> It works before.
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org