You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hive.apache.org by "László Bodor (Jira)" <ji...@apache.org> on 2021/08/26 11:11:00 UTC

[jira] [Updated] (HIVE-25484) NULL cannot be inserted as a Parquet map

     [ https://issues.apache.org/jira/browse/HIVE-25484?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

László Bodor updated HIVE-25484:
--------------------------------
    Description: 
not sure if this is a bug or a feature request, but I haven't been able to test NULL as map in the scope of HIVE-23688:
{code}
CREATE TABLE parquet_map_type_string (
id int,
stringMap map<string, string>
) stored as parquet;

insert into parquet_map_type_string SELECT 1, MAP('k1', null, 'k2', 'v2'); -- NULL as value
insert into parquet_map_type_string (id) VALUES (2) -- NULL as map;
{code}

leads to:
{code}
Caused by: java.lang.NullPointerException
	at org.apache.parquet.io.api.Binary$FromStringBinary.encodeUTF8(Binary.java:218)
	at org.apache.parquet.io.api.Binary$FromStringBinary.<init>(Binary.java:209)
	at org.apache.parquet.io.api.Binary.fromString(Binary.java:537)
	at org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriter$StringDataWriter.write(DataWritableWriter.java:474)
	at org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriter$MapDataWriter.write(DataWritableWriter.java:354)
	at org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriter$GroupDataWriter.write(DataWritableWriter.java:228)
	at org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriter$MessageDataWriter.write(DataWritableWriter.java:251)
	at org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriter.write(DataWritableWriter.java:115)
	at org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriteSupport.write(DataWritableWriteSupport.java:76)
	at org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriteSupport.write(DataWritableWriteSupport.java:35)
	at org.apache.parquet.hadoop.InternalParquetRecordWriter.write(InternalParquetRecordWriter.java:128)
	at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:182)
	at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:44)
	at org.apache.hadoop.hive.ql.io.parquet.write.ParquetRecordWriterWrapper.write(ParquetRecordWriterWrapper.java:161)
	at org.apache.hadoop.hive.ql.io.parquet.write.ParquetRecordWriterWrapper.write(ParquetRecordWriterWrapper.java:174)
	at org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:1160)
	at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:888)
	at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:94)
	at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:888)
	at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:173)
	at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:155)
	at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:550)
{code}

  was:
{code}
CREATE TABLE parquet_map_type_string (
id int,
stringMap map<string, string>
) stored as parquet;

insert into parquet_map_type_string SELECT 1, MAP('k1', null, 'k2', 'v2'); -- NULL as value
insert into parquet_map_type_string (id) VALUES (2) -- NULL as map;
{code}

leads to:
{code}
Caused by: java.lang.NullPointerException
	at org.apache.parquet.io.api.Binary$FromStringBinary.encodeUTF8(Binary.java:218)
	at org.apache.parquet.io.api.Binary$FromStringBinary.<init>(Binary.java:209)
	at org.apache.parquet.io.api.Binary.fromString(Binary.java:537)
	at org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriter$StringDataWriter.write(DataWritableWriter.java:474)
	at org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriter$MapDataWriter.write(DataWritableWriter.java:354)
	at org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriter$GroupDataWriter.write(DataWritableWriter.java:228)
	at org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriter$MessageDataWriter.write(DataWritableWriter.java:251)
	at org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriter.write(DataWritableWriter.java:115)
	at org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriteSupport.write(DataWritableWriteSupport.java:76)
	at org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriteSupport.write(DataWritableWriteSupport.java:35)
	at org.apache.parquet.hadoop.InternalParquetRecordWriter.write(InternalParquetRecordWriter.java:128)
	at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:182)
	at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:44)
	at org.apache.hadoop.hive.ql.io.parquet.write.ParquetRecordWriterWrapper.write(ParquetRecordWriterWrapper.java:161)
	at org.apache.hadoop.hive.ql.io.parquet.write.ParquetRecordWriterWrapper.write(ParquetRecordWriterWrapper.java:174)
	at org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:1160)
	at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:888)
	at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:94)
	at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:888)
	at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:173)
	at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:155)
	at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:550)
{code}


> NULL cannot be inserted as a Parquet map
> ----------------------------------------
>
>                 Key: HIVE-25484
>                 URL: https://issues.apache.org/jira/browse/HIVE-25484
>             Project: Hive
>          Issue Type: Bug
>            Reporter: László Bodor
>            Assignee: László Bodor
>            Priority: Major
>
> not sure if this is a bug or a feature request, but I haven't been able to test NULL as map in the scope of HIVE-23688:
> {code}
> CREATE TABLE parquet_map_type_string (
> id int,
> stringMap map<string, string>
> ) stored as parquet;
> insert into parquet_map_type_string SELECT 1, MAP('k1', null, 'k2', 'v2'); -- NULL as value
> insert into parquet_map_type_string (id) VALUES (2) -- NULL as map;
> {code}
> leads to:
> {code}
> Caused by: java.lang.NullPointerException
> 	at org.apache.parquet.io.api.Binary$FromStringBinary.encodeUTF8(Binary.java:218)
> 	at org.apache.parquet.io.api.Binary$FromStringBinary.<init>(Binary.java:209)
> 	at org.apache.parquet.io.api.Binary.fromString(Binary.java:537)
> 	at org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriter$StringDataWriter.write(DataWritableWriter.java:474)
> 	at org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriter$MapDataWriter.write(DataWritableWriter.java:354)
> 	at org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriter$GroupDataWriter.write(DataWritableWriter.java:228)
> 	at org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriter$MessageDataWriter.write(DataWritableWriter.java:251)
> 	at org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriter.write(DataWritableWriter.java:115)
> 	at org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriteSupport.write(DataWritableWriteSupport.java:76)
> 	at org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriteSupport.write(DataWritableWriteSupport.java:35)
> 	at org.apache.parquet.hadoop.InternalParquetRecordWriter.write(InternalParquetRecordWriter.java:128)
> 	at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:182)
> 	at org.apache.parquet.hadoop.ParquetRecordWriter.write(ParquetRecordWriter.java:44)
> 	at org.apache.hadoop.hive.ql.io.parquet.write.ParquetRecordWriterWrapper.write(ParquetRecordWriterWrapper.java:161)
> 	at org.apache.hadoop.hive.ql.io.parquet.write.ParquetRecordWriterWrapper.write(ParquetRecordWriterWrapper.java:174)
> 	at org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:1160)
> 	at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:888)
> 	at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:94)
> 	at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:888)
> 	at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:173)
> 	at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:155)
> 	at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:550)
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)