You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hive.apache.org by "Luis Gonzalez (JIRA)" <ji...@apache.org> on 2016/01/28 16:53:39 UTC
[jira] [Updated] (HIVE-12955) avro.schema.literal don't support
more than 50 fields.
[ https://issues.apache.org/jira/browse/HIVE-12955?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Luis Gonzalez updated HIVE-12955:
---------------------------------
Description:
Hi!
we think we have hitted a bug. We have tested this in many ways for a couple of hours now and there is apparently a problem with avro.schema.literal when you specify more than 50 fields. In our tests we have found that regarless the avro file (table) we want to load, it fails with 60 fields.. some tables we uses have more than 400 fields and have the same problem.
For instance if we launch the command
{code:shell}
hive> drop table tableName;
OK
Time taken: 0.162 seconds
hive>
> CREATE EXTERNAL TABLE tableName
> ROW FORMAT
> SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
> WITH SERDEPROPERTIES ('avro.schema.literal'='
> { "namespace": "cdr.avro", "type": "record", "name": "Account", "fields": [
> {"name": "Id", "type":["null", "string"],"default":null},
> {"name": "IsDeleted", "type":["null", "boolean"],"default":null},
> {"name": "MasterRecordId", "type":["null", "string"],"default":null},
> {"name": "Name", "type":["null", "string"],"default":null},
> {"name": "Type", "type":["null", "string"],"default":null},
> {"name": "RecordTypeId", "type":["null", "string"],"default":null},
> {"name": "ParentId", "type":["null", "string"],"default":null},
> {"name": "Phone", "type":["null", "string"],"default":null},
> {"name": "Fax", "type":["null", "string"],"default":null},
> {"name": "AccountNumber", "type":["null", "string"],"default":null},
> {"name": "Website", "type":["null", "string"],"default":null},
> {"name": "Industry", "type":["null", "string"],"default":null},
> {"name": "AnnualRevenue", "type":["null", "double"],"default":null},
> {"name": "NumberOfEmployees", "type":["null", "int"],"default":null},
> {"name": "Description", "type":["null", "string"],"default":null},
> {"name": "OwnerId", "type":["null", "string"],"default":null},
> {"name": "CreatedDate", "type":["null", "string"],"default":null},
> {"name": "CreatedById", "type":["null", "string"],"default":null},
> {"name": "LastModifiedDate", "type":["null", "string"],"default":null},
> {"name": "LastModifiedById", "type":["null", "string"],"default":null},
> {"name": "SystemModstamp", "type":["null", "string"],"default":null},
> {"name": "LastActivityDate", "type":["null", "string"],"default":null},
> {"name": "IsPartner", "type":["null", "boolean"],"default":null},
> {"name": "IsCustomerPortal", "type":["null", "boolean"],"default":null},
> {"name": "JigsawCompanyId", "type":["null", "string"],"default":null},
> {"name": "Invoice_Level__c", "type":["null", "string"],"default":null},
> {"name": "IT_Developer_Fee__c", "type":["null", "boolean"],"default":null},
> {"name": "Customer_Type__c", "type":["null", "string"],"default":null},
> {"name": "Shortname__c", "type":["null", "string"],"default":null},
> {"name": "Excluir_compensacion_por_desvio__c", "type":["null", "boolean"],"default":null},
> {"name": "Commercial_Area__c", "type":["null", "string"],"default":null},
> {"name": "Account_Status__c", "type":["null", "string"],"default":null},
> {"name": "Active_Fiscal_Details__c", "type":["null", "boolean"],"default":null},
> {"name": "Office_Code__c", "type":["null", "string"],"default":null},
> {"name": "Commercial_Brand__c", "type":["null", "string"],"default":null},
> {"name": "Agreed_payment_method__c", "type":["null", "string"],"default":null},
> {"name": "Division__c", "type":["null", "string"],"default":null},
> {"name": "Inactive_Date__c", "type":["null", "string"],"default":null},
> {"name": "SAP_Code__c", "type":["null", "string"],"default":null},
> {"name": "Country_fiscal__c", "type":["null", "string"],"default":null},
> {"name": "Fiscal_Number_1__c", "type":["null", "string"],"default":null},
> {"name": "Fiscal_Number_2__c", "type":["null", "string"],"default":null},
> {"name": "Street_Fiscal__c", "type":["null", "string"],"default":null},
> {"name": "City_fiscal__c", "type":["null", "string"],"default":null},
> {"name": "Post_Code_fiscal__c", "type":["null", "string"],"default":null},
> {"name": "Web_Prepayment__c", "type":["null", "boolean"],"default":null},
> {"name": "Customer_Subtype__c", "type":["null", "string"],"default":null},
> {"name": "Relationship__c", "type":["null", "string"],"default":null},
> {"name": "Market_Country__c", "type":["null", "string"],"default":null}
> ] }
> ')
> STORED AS
> INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
> OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
> LOCATION 's3://bucket.../path/to/avro';
OK
Time taken: 0.412 seconds
hive>
> select * from tableName limit 10;
OK
{code}
but when using the same AVRO file and more fields it fails
{code:shell}
hive> drop table tableName;
OK
Time taken: 0.146 seconds
hive>
> CREATE EXTERNAL TABLE tableName
> ROW FORMAT
> SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
> WITH SERDEPROPERTIES ('avro.schema.literal'='
> { "namespace": "cdr.avro", "type": "record", "name": "Account", "fields": [
> {"name": "Id", "type":["null", "string"],"default":null},
> {"name": "IsDeleted", "type":["null", "boolean"],"default":null},
> {"name": "MasterRecordId", "type":["null", "string"],"default":null},
> {"name": "Name", "type":["null", "string"],"default":null},
> {"name": "Type", "type":["null", "string"],"default":null},
> {"name": "RecordTypeId", "type":["null", "string"],"default":null},
> {"name": "ParentId", "type":["null", "string"],"default":null},
> {"name": "Phone", "type":["null", "string"],"default":null},
> {"name": "Fax", "type":["null", "string"],"default":null},
> {"name": "AccountNumber", "type":["null", "string"],"default":null},
> {"name": "Website", "type":["null", "string"],"default":null},
> {"name": "Industry", "type":["null", "string"],"default":null},
> {"name": "AnnualRevenue", "type":["null", "double"],"default":null},
> {"name": "NumberOfEmployees", "type":["null", "int"],"default":null},
> {"name": "Description", "type":["null", "string"],"default":null},
> {"name": "OwnerId", "type":["null", "string"],"default":null},
> {"name": "CreatedDate", "type":["null", "string"],"default":null},
> {"name": "CreatedById", "type":["null", "string"],"default":null},
> {"name": "LastModifiedDate", "type":["null", "string"],"default":null},
> {"name": "LastModifiedById", "type":["null", "string"],"default":null},
> {"name": "SystemModstamp", "type":["null", "string"],"default":null},
> {"name": "LastActivityDate", "type":["null", "string"],"default":null},
> {"name": "IsPartner", "type":["null", "boolean"],"default":null},
> {"name": "IsCustomerPortal", "type":["null", "boolean"],"default":null},
> {"name": "JigsawCompanyId", "type":["null", "string"],"default":null},
> {"name": "Invoice_Level__c", "type":["null", "string"],"default":null},
> {"name": "IT_Developer_Fee__c", "type":["null", "boolean"],"default":null},
> {"name": "Customer_Type__c", "type":["null", "string"],"default":null},
> {"name": "Shortname__c", "type":["null", "string"],"default":null},
> {"name": "Excluir_compensacion_por_desvio__c", "type":["null", "boolean"],"default":null},
> {"name": "Commercial_Area__c", "type":["null", "string"],"default":null},
> {"name": "Account_Status__c", "type":["null", "string"],"default":null},
> {"name": "Active_Fiscal_Details__c", "type":["null", "boolean"],"default":null},
> {"name": "Office_Code__c", "type":["null", "string"],"default":null},
> {"name": "Commercial_Brand__c", "type":["null", "string"],"default":null},
> {"name": "Agreed_payment_method__c", "type":["null", "string"],"default":null},
> {"name": "Division__c", "type":["null", "string"],"default":null},
> {"name": "Inactive_Date__c", "type":["null", "string"],"default":null},
> {"name": "SAP_Code__c", "type":["null", "string"],"default":null},
> {"name": "Country_fiscal__c", "type":["null", "string"],"default":null},
> {"name": "Fiscal_Number_1__c", "type":["null", "string"],"default":null},
> {"name": "Fiscal_Number_2__c", "type":["null", "string"],"default":null},
> {"name": "Street_Fiscal__c", "type":["null", "string"],"default":null},
> {"name": "City_fiscal__c", "type":["null", "string"],"default":null},
> {"name": "Post_Code_fiscal__c", "type":["null", "string"],"default":null},
> {"name": "Web_Prepayment__c", "type":["null", "boolean"],"default":null},
> {"name": "Customer_Subtype__c", "type":["null", "string"],"default":null},
> {"name": "Relationship__c", "type":["null", "string"],"default":null},
> {"name": "Market_Country__c", "type":["null", "string"],"default":null},
> {"name": "Customer_Service_Centre__c", "type":["null", "string"],"default":null},
> {"name": "Acquisition_Channel_Type__c", "type":["null", "string"],"default":null},
> {"name": "Acquisition_Channel_Description__c", "type":["null", "string"],"default":null},
> {"name": "Comments__c", "type":["null", "string"],"default":null},
> {"name": "Street_Commercial__c", "type":["null", "string"],"default":null},
> {"name": "Country_Commercial__c", "type":["null", "string"],"default":null},
> {"name": "City_Commercial__c", "type":["null", "string"],"default":null},
> {"name": "Post_Code_Commercial__c", "type":["null", "string"],"default":null},
> {"name": "Atlas_Branch_Number__c", "type":["null", "double"],"default":null},
> {"name": "Timezone__c", "type":["null", "string"],"default":null},
> {"name": "Billing_Language__c", "type":["null", "string"],"default":null},
> {"name": "Fiscal_Name__c", "type":["null", "string"],"default":null}
> ] }
> ')
> STORED AS
> INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
> OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
> LOCATION 's3://bucket/path/to/avro';
OK
Time taken: 0.48 seconds
hive>
> select * from tableName limit 10;
OK
Failed with exception java.io.IOException:org.apache.avro.AvroTypeException: Found cdr.avro.Account, expecting org.apache.hadoop.hive.CannotDetermineSchemaSentinel
Time taken: 0.028 seconds
{code}
This problem doesn't affect when we store the avro schema with 400 fields in a file in s3 and use the avro.schema.url field.
was:
Hi!
we think we have hitted a bug. We have tested this in many ways for a couple of hours now and there is apparently a problem with avro.schema.literal when you specify more than 50 fields. In our tests we have found that regarless the avro file (table) we want to load, it fails with 60 fields.. some tables we uses have more than 400 fields and have the same problem.
For instance if we launch the command
```
hive> drop table tableName;
OK
Time taken: 0.162 seconds
hive>
> CREATE EXTERNAL TABLE tableName
> ROW FORMAT
> SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
> WITH SERDEPROPERTIES ('avro.schema.literal'='
> { "namespace": "cdr.avro", "type": "record", "name": "Account", "fields": [
> {"name": "Id", "type":["null", "string"],"default":null},
> {"name": "IsDeleted", "type":["null", "boolean"],"default":null},
> {"name": "MasterRecordId", "type":["null", "string"],"default":null},
> {"name": "Name", "type":["null", "string"],"default":null},
> {"name": "Type", "type":["null", "string"],"default":null},
> {"name": "RecordTypeId", "type":["null", "string"],"default":null},
> {"name": "ParentId", "type":["null", "string"],"default":null},
> {"name": "Phone", "type":["null", "string"],"default":null},
> {"name": "Fax", "type":["null", "string"],"default":null},
> {"name": "AccountNumber", "type":["null", "string"],"default":null},
> {"name": "Website", "type":["null", "string"],"default":null},
> {"name": "Industry", "type":["null", "string"],"default":null},
> {"name": "AnnualRevenue", "type":["null", "double"],"default":null},
> {"name": "NumberOfEmployees", "type":["null", "int"],"default":null},
> {"name": "Description", "type":["null", "string"],"default":null},
> {"name": "OwnerId", "type":["null", "string"],"default":null},
> {"name": "CreatedDate", "type":["null", "string"],"default":null},
> {"name": "CreatedById", "type":["null", "string"],"default":null},
> {"name": "LastModifiedDate", "type":["null", "string"],"default":null},
> {"name": "LastModifiedById", "type":["null", "string"],"default":null},
> {"name": "SystemModstamp", "type":["null", "string"],"default":null},
> {"name": "LastActivityDate", "type":["null", "string"],"default":null},
> {"name": "IsPartner", "type":["null", "boolean"],"default":null},
> {"name": "IsCustomerPortal", "type":["null", "boolean"],"default":null},
> {"name": "JigsawCompanyId", "type":["null", "string"],"default":null},
> {"name": "Invoice_Level__c", "type":["null", "string"],"default":null},
> {"name": "IT_Developer_Fee__c", "type":["null", "boolean"],"default":null},
> {"name": "Customer_Type__c", "type":["null", "string"],"default":null},
> {"name": "Shortname__c", "type":["null", "string"],"default":null},
> {"name": "Excluir_compensacion_por_desvio__c", "type":["null", "boolean"],"default":null},
> {"name": "Commercial_Area__c", "type":["null", "string"],"default":null},
> {"name": "Account_Status__c", "type":["null", "string"],"default":null},
> {"name": "Active_Fiscal_Details__c", "type":["null", "boolean"],"default":null},
> {"name": "Office_Code__c", "type":["null", "string"],"default":null},
> {"name": "Commercial_Brand__c", "type":["null", "string"],"default":null},
> {"name": "Agreed_payment_method__c", "type":["null", "string"],"default":null},
> {"name": "Division__c", "type":["null", "string"],"default":null},
> {"name": "Inactive_Date__c", "type":["null", "string"],"default":null},
> {"name": "SAP_Code__c", "type":["null", "string"],"default":null},
> {"name": "Country_fiscal__c", "type":["null", "string"],"default":null},
> {"name": "Fiscal_Number_1__c", "type":["null", "string"],"default":null},
> {"name": "Fiscal_Number_2__c", "type":["null", "string"],"default":null},
> {"name": "Street_Fiscal__c", "type":["null", "string"],"default":null},
> {"name": "City_fiscal__c", "type":["null", "string"],"default":null},
> {"name": "Post_Code_fiscal__c", "type":["null", "string"],"default":null},
> {"name": "Web_Prepayment__c", "type":["null", "boolean"],"default":null},
> {"name": "Customer_Subtype__c", "type":["null", "string"],"default":null},
> {"name": "Relationship__c", "type":["null", "string"],"default":null},
> {"name": "Market_Country__c", "type":["null", "string"],"default":null}
> ] }
> ')
> STORED AS
> INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
> OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
> LOCATION 's3://bucket.../path/to/avro';
OK
Time taken: 0.412 seconds
hive>
> select * from tableName limit 10;
OK
```
but when using the same AVRO file and more fields it fails
```
hive> drop table tableName;
OK
Time taken: 0.146 seconds
hive>
> CREATE EXTERNAL TABLE tableName
> ROW FORMAT
> SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
> WITH SERDEPROPERTIES ('avro.schema.literal'='
> { "namespace": "cdr.avro", "type": "record", "name": "Account", "fields": [
> {"name": "Id", "type":["null", "string"],"default":null},
> {"name": "IsDeleted", "type":["null", "boolean"],"default":null},
> {"name": "MasterRecordId", "type":["null", "string"],"default":null},
> {"name": "Name", "type":["null", "string"],"default":null},
> {"name": "Type", "type":["null", "string"],"default":null},
> {"name": "RecordTypeId", "type":["null", "string"],"default":null},
> {"name": "ParentId", "type":["null", "string"],"default":null},
> {"name": "Phone", "type":["null", "string"],"default":null},
> {"name": "Fax", "type":["null", "string"],"default":null},
> {"name": "AccountNumber", "type":["null", "string"],"default":null},
> {"name": "Website", "type":["null", "string"],"default":null},
> {"name": "Industry", "type":["null", "string"],"default":null},
> {"name": "AnnualRevenue", "type":["null", "double"],"default":null},
> {"name": "NumberOfEmployees", "type":["null", "int"],"default":null},
> {"name": "Description", "type":["null", "string"],"default":null},
> {"name": "OwnerId", "type":["null", "string"],"default":null},
> {"name": "CreatedDate", "type":["null", "string"],"default":null},
> {"name": "CreatedById", "type":["null", "string"],"default":null},
> {"name": "LastModifiedDate", "type":["null", "string"],"default":null},
> {"name": "LastModifiedById", "type":["null", "string"],"default":null},
> {"name": "SystemModstamp", "type":["null", "string"],"default":null},
> {"name": "LastActivityDate", "type":["null", "string"],"default":null},
> {"name": "IsPartner", "type":["null", "boolean"],"default":null},
> {"name": "IsCustomerPortal", "type":["null", "boolean"],"default":null},
> {"name": "JigsawCompanyId", "type":["null", "string"],"default":null},
> {"name": "Invoice_Level__c", "type":["null", "string"],"default":null},
> {"name": "IT_Developer_Fee__c", "type":["null", "boolean"],"default":null},
> {"name": "Customer_Type__c", "type":["null", "string"],"default":null},
> {"name": "Shortname__c", "type":["null", "string"],"default":null},
> {"name": "Excluir_compensacion_por_desvio__c", "type":["null", "boolean"],"default":null},
> {"name": "Commercial_Area__c", "type":["null", "string"],"default":null},
> {"name": "Account_Status__c", "type":["null", "string"],"default":null},
> {"name": "Active_Fiscal_Details__c", "type":["null", "boolean"],"default":null},
> {"name": "Office_Code__c", "type":["null", "string"],"default":null},
> {"name": "Commercial_Brand__c", "type":["null", "string"],"default":null},
> {"name": "Agreed_payment_method__c", "type":["null", "string"],"default":null},
> {"name": "Division__c", "type":["null", "string"],"default":null},
> {"name": "Inactive_Date__c", "type":["null", "string"],"default":null},
> {"name": "SAP_Code__c", "type":["null", "string"],"default":null},
> {"name": "Country_fiscal__c", "type":["null", "string"],"default":null},
> {"name": "Fiscal_Number_1__c", "type":["null", "string"],"default":null},
> {"name": "Fiscal_Number_2__c", "type":["null", "string"],"default":null},
> {"name": "Street_Fiscal__c", "type":["null", "string"],"default":null},
> {"name": "City_fiscal__c", "type":["null", "string"],"default":null},
> {"name": "Post_Code_fiscal__c", "type":["null", "string"],"default":null},
> {"name": "Web_Prepayment__c", "type":["null", "boolean"],"default":null},
> {"name": "Customer_Subtype__c", "type":["null", "string"],"default":null},
> {"name": "Relationship__c", "type":["null", "string"],"default":null},
> {"name": "Market_Country__c", "type":["null", "string"],"default":null},
> {"name": "Customer_Service_Centre__c", "type":["null", "string"],"default":null},
> {"name": "Acquisition_Channel_Type__c", "type":["null", "string"],"default":null},
> {"name": "Acquisition_Channel_Description__c", "type":["null", "string"],"default":null},
> {"name": "Comments__c", "type":["null", "string"],"default":null},
> {"name": "Street_Commercial__c", "type":["null", "string"],"default":null},
> {"name": "Country_Commercial__c", "type":["null", "string"],"default":null},
> {"name": "City_Commercial__c", "type":["null", "string"],"default":null},
> {"name": "Post_Code_Commercial__c", "type":["null", "string"],"default":null},
> {"name": "Atlas_Branch_Number__c", "type":["null", "double"],"default":null},
> {"name": "Timezone__c", "type":["null", "string"],"default":null},
> {"name": "Billing_Language__c", "type":["null", "string"],"default":null},
> {"name": "Fiscal_Name__c", "type":["null", "string"],"default":null}
> ] }
> ')
> STORED AS
> INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
> OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
> LOCATION 's3://bucket/path/to/avro';
OK
Time taken: 0.48 seconds
hive>
> select * from tableName limit 10;
OK
Failed with exception java.io.IOException:org.apache.avro.AvroTypeException: Found cdr.avro.Account, expecting org.apache.hadoop.hive.CannotDetermineSchemaSentinel
Time taken: 0.028 seconds
```
This problem doesn't affect when we store the avro schema with 400 fields in a file in s3 and use the avro.schema.url field.
> avro.schema.literal don't support more than 50 fields.
> -------------------------------------------------------
>
> Key: HIVE-12955
> URL: https://issues.apache.org/jira/browse/HIVE-12955
> Project: Hive
> Issue Type: Bug
> Components: Hive
> Affects Versions: 1.0.0
> Reporter: Luis Gonzalez
> Priority: Minor
>
> Hi!
> we think we have hitted a bug. We have tested this in many ways for a couple of hours now and there is apparently a problem with avro.schema.literal when you specify more than 50 fields. In our tests we have found that regarless the avro file (table) we want to load, it fails with 60 fields.. some tables we uses have more than 400 fields and have the same problem.
> For instance if we launch the command
> {code:shell}
> hive> drop table tableName;
> OK
> Time taken: 0.162 seconds
> hive>
> > CREATE EXTERNAL TABLE tableName
> > ROW FORMAT
> > SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
> > WITH SERDEPROPERTIES ('avro.schema.literal'='
> > { "namespace": "cdr.avro", "type": "record", "name": "Account", "fields": [
> > {"name": "Id", "type":["null", "string"],"default":null},
> > {"name": "IsDeleted", "type":["null", "boolean"],"default":null},
> > {"name": "MasterRecordId", "type":["null", "string"],"default":null},
> > {"name": "Name", "type":["null", "string"],"default":null},
> > {"name": "Type", "type":["null", "string"],"default":null},
> > {"name": "RecordTypeId", "type":["null", "string"],"default":null},
> > {"name": "ParentId", "type":["null", "string"],"default":null},
> > {"name": "Phone", "type":["null", "string"],"default":null},
> > {"name": "Fax", "type":["null", "string"],"default":null},
> > {"name": "AccountNumber", "type":["null", "string"],"default":null},
> > {"name": "Website", "type":["null", "string"],"default":null},
> > {"name": "Industry", "type":["null", "string"],"default":null},
> > {"name": "AnnualRevenue", "type":["null", "double"],"default":null},
> > {"name": "NumberOfEmployees", "type":["null", "int"],"default":null},
> > {"name": "Description", "type":["null", "string"],"default":null},
> > {"name": "OwnerId", "type":["null", "string"],"default":null},
> > {"name": "CreatedDate", "type":["null", "string"],"default":null},
> > {"name": "CreatedById", "type":["null", "string"],"default":null},
> > {"name": "LastModifiedDate", "type":["null", "string"],"default":null},
> > {"name": "LastModifiedById", "type":["null", "string"],"default":null},
> > {"name": "SystemModstamp", "type":["null", "string"],"default":null},
> > {"name": "LastActivityDate", "type":["null", "string"],"default":null},
> > {"name": "IsPartner", "type":["null", "boolean"],"default":null},
> > {"name": "IsCustomerPortal", "type":["null", "boolean"],"default":null},
> > {"name": "JigsawCompanyId", "type":["null", "string"],"default":null},
> > {"name": "Invoice_Level__c", "type":["null", "string"],"default":null},
> > {"name": "IT_Developer_Fee__c", "type":["null", "boolean"],"default":null},
> > {"name": "Customer_Type__c", "type":["null", "string"],"default":null},
> > {"name": "Shortname__c", "type":["null", "string"],"default":null},
> > {"name": "Excluir_compensacion_por_desvio__c", "type":["null", "boolean"],"default":null},
> > {"name": "Commercial_Area__c", "type":["null", "string"],"default":null},
> > {"name": "Account_Status__c", "type":["null", "string"],"default":null},
> > {"name": "Active_Fiscal_Details__c", "type":["null", "boolean"],"default":null},
> > {"name": "Office_Code__c", "type":["null", "string"],"default":null},
> > {"name": "Commercial_Brand__c", "type":["null", "string"],"default":null},
> > {"name": "Agreed_payment_method__c", "type":["null", "string"],"default":null},
> > {"name": "Division__c", "type":["null", "string"],"default":null},
> > {"name": "Inactive_Date__c", "type":["null", "string"],"default":null},
> > {"name": "SAP_Code__c", "type":["null", "string"],"default":null},
> > {"name": "Country_fiscal__c", "type":["null", "string"],"default":null},
> > {"name": "Fiscal_Number_1__c", "type":["null", "string"],"default":null},
> > {"name": "Fiscal_Number_2__c", "type":["null", "string"],"default":null},
> > {"name": "Street_Fiscal__c", "type":["null", "string"],"default":null},
> > {"name": "City_fiscal__c", "type":["null", "string"],"default":null},
> > {"name": "Post_Code_fiscal__c", "type":["null", "string"],"default":null},
> > {"name": "Web_Prepayment__c", "type":["null", "boolean"],"default":null},
> > {"name": "Customer_Subtype__c", "type":["null", "string"],"default":null},
> > {"name": "Relationship__c", "type":["null", "string"],"default":null},
> > {"name": "Market_Country__c", "type":["null", "string"],"default":null}
> > ] }
> > ')
> > STORED AS
> > INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
> > OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
> > LOCATION 's3://bucket.../path/to/avro';
> OK
> Time taken: 0.412 seconds
> hive>
> > select * from tableName limit 10;
> OK
> {code}
> but when using the same AVRO file and more fields it fails
> {code:shell}
> hive> drop table tableName;
> OK
> Time taken: 0.146 seconds
> hive>
> > CREATE EXTERNAL TABLE tableName
> > ROW FORMAT
> > SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
> > WITH SERDEPROPERTIES ('avro.schema.literal'='
> > { "namespace": "cdr.avro", "type": "record", "name": "Account", "fields": [
> > {"name": "Id", "type":["null", "string"],"default":null},
> > {"name": "IsDeleted", "type":["null", "boolean"],"default":null},
> > {"name": "MasterRecordId", "type":["null", "string"],"default":null},
> > {"name": "Name", "type":["null", "string"],"default":null},
> > {"name": "Type", "type":["null", "string"],"default":null},
> > {"name": "RecordTypeId", "type":["null", "string"],"default":null},
> > {"name": "ParentId", "type":["null", "string"],"default":null},
> > {"name": "Phone", "type":["null", "string"],"default":null},
> > {"name": "Fax", "type":["null", "string"],"default":null},
> > {"name": "AccountNumber", "type":["null", "string"],"default":null},
> > {"name": "Website", "type":["null", "string"],"default":null},
> > {"name": "Industry", "type":["null", "string"],"default":null},
> > {"name": "AnnualRevenue", "type":["null", "double"],"default":null},
> > {"name": "NumberOfEmployees", "type":["null", "int"],"default":null},
> > {"name": "Description", "type":["null", "string"],"default":null},
> > {"name": "OwnerId", "type":["null", "string"],"default":null},
> > {"name": "CreatedDate", "type":["null", "string"],"default":null},
> > {"name": "CreatedById", "type":["null", "string"],"default":null},
> > {"name": "LastModifiedDate", "type":["null", "string"],"default":null},
> > {"name": "LastModifiedById", "type":["null", "string"],"default":null},
> > {"name": "SystemModstamp", "type":["null", "string"],"default":null},
> > {"name": "LastActivityDate", "type":["null", "string"],"default":null},
> > {"name": "IsPartner", "type":["null", "boolean"],"default":null},
> > {"name": "IsCustomerPortal", "type":["null", "boolean"],"default":null},
> > {"name": "JigsawCompanyId", "type":["null", "string"],"default":null},
> > {"name": "Invoice_Level__c", "type":["null", "string"],"default":null},
> > {"name": "IT_Developer_Fee__c", "type":["null", "boolean"],"default":null},
> > {"name": "Customer_Type__c", "type":["null", "string"],"default":null},
> > {"name": "Shortname__c", "type":["null", "string"],"default":null},
> > {"name": "Excluir_compensacion_por_desvio__c", "type":["null", "boolean"],"default":null},
> > {"name": "Commercial_Area__c", "type":["null", "string"],"default":null},
> > {"name": "Account_Status__c", "type":["null", "string"],"default":null},
> > {"name": "Active_Fiscal_Details__c", "type":["null", "boolean"],"default":null},
> > {"name": "Office_Code__c", "type":["null", "string"],"default":null},
> > {"name": "Commercial_Brand__c", "type":["null", "string"],"default":null},
> > {"name": "Agreed_payment_method__c", "type":["null", "string"],"default":null},
> > {"name": "Division__c", "type":["null", "string"],"default":null},
> > {"name": "Inactive_Date__c", "type":["null", "string"],"default":null},
> > {"name": "SAP_Code__c", "type":["null", "string"],"default":null},
> > {"name": "Country_fiscal__c", "type":["null", "string"],"default":null},
> > {"name": "Fiscal_Number_1__c", "type":["null", "string"],"default":null},
> > {"name": "Fiscal_Number_2__c", "type":["null", "string"],"default":null},
> > {"name": "Street_Fiscal__c", "type":["null", "string"],"default":null},
> > {"name": "City_fiscal__c", "type":["null", "string"],"default":null},
> > {"name": "Post_Code_fiscal__c", "type":["null", "string"],"default":null},
> > {"name": "Web_Prepayment__c", "type":["null", "boolean"],"default":null},
> > {"name": "Customer_Subtype__c", "type":["null", "string"],"default":null},
> > {"name": "Relationship__c", "type":["null", "string"],"default":null},
> > {"name": "Market_Country__c", "type":["null", "string"],"default":null},
> > {"name": "Customer_Service_Centre__c", "type":["null", "string"],"default":null},
> > {"name": "Acquisition_Channel_Type__c", "type":["null", "string"],"default":null},
> > {"name": "Acquisition_Channel_Description__c", "type":["null", "string"],"default":null},
> > {"name": "Comments__c", "type":["null", "string"],"default":null},
> > {"name": "Street_Commercial__c", "type":["null", "string"],"default":null},
> > {"name": "Country_Commercial__c", "type":["null", "string"],"default":null},
> > {"name": "City_Commercial__c", "type":["null", "string"],"default":null},
> > {"name": "Post_Code_Commercial__c", "type":["null", "string"],"default":null},
> > {"name": "Atlas_Branch_Number__c", "type":["null", "double"],"default":null},
> > {"name": "Timezone__c", "type":["null", "string"],"default":null},
> > {"name": "Billing_Language__c", "type":["null", "string"],"default":null},
> > {"name": "Fiscal_Name__c", "type":["null", "string"],"default":null}
> > ] }
> > ')
> > STORED AS
> > INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
> > OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
> > LOCATION 's3://bucket/path/to/avro';
> OK
> Time taken: 0.48 seconds
> hive>
> > select * from tableName limit 10;
> OK
> Failed with exception java.io.IOException:org.apache.avro.AvroTypeException: Found cdr.avro.Account, expecting org.apache.hadoop.hive.CannotDetermineSchemaSentinel
> Time taken: 0.028 seconds
> {code}
> This problem doesn't affect when we store the avro schema with 400 fields in a file in s3 and use the avro.schema.url field.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)