You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hive.apache.org by "László Pintér (Jira)" <ji...@apache.org> on 2022/06/13 09:14:00 UTC

[jira] [Assigned] (HIVE-26318) Select on migrated iceberg table fails with NPE

     [ https://issues.apache.org/jira/browse/HIVE-26318?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

László Pintér reassigned HIVE-26318:
------------------------------------


> Select on migrated iceberg table fails with NPE
> -----------------------------------------------
>
>                 Key: HIVE-26318
>                 URL: https://issues.apache.org/jira/browse/HIVE-26318
>             Project: Hive
>          Issue Type: Bug
>            Reporter: László Pintér
>            Assignee: László Pintér
>            Priority: Major
>
> Enable vectorization:
> {code:sql}
> set hive.vectorized.execution.enabled=true;
> {code}
> Create a hive table with the following schema:
> {code:sql}
> CREATE EXTERNAL TABLE tbl_complex (
> a int, 
> arrayofprimitives array<string>, 
> arrayofarrays array<array<string>>,
> arrayofmaps array<map<string, string>>,
> arrayofstructs array<struct<something:string, someone:string, somewhere:string>>,
> mapofprimitives map<string, string>,
> mapofarrays map<string, array<string>>,
> mapofmaps map<string, map<string, string>>,
> mapofstructs map<string, struct<something:string, someone:string, somewhere:string>>,
> structofprimitives struct<something:string, somewhere:string>, 
> structofarrays struct<names:array<string>, birthdays:array<string>>, 
> structofmaps struct<map1:map<string, string>, map2:map<string, string>>
> ) STORED AS PARQUET" {code}
> Insert some data:
> {code:sql}
> INSERT INTO tbl_complex VALUES (
>         1, 
>         array('a','b','c'), 
>         array(array('a'), array('b', 'c')), 
>         array(map('a','b'), map('e','f')), 
>         array(named_struct('something', 'a', 'someone', 'b', 'somewhere', 'c'), 
>         named_struct('something', 'e', 'someone', 'f', 'somewhere', 'g')), 
>         map('a', 'b'), 
>         map('a', array('b','c')), 
>         map('a', map('b','c')), 
>         map('a', named_struct('something', 'b', 'someone', 'c', 'somewhere', 'd')), 
>         named_struct('something', 'a', 'somewhere', 'b'), 
>         named_struct('names', array('a', 'b'), 'birthdays', array('c', 'd', 'e')), 
>         named_struct('map1', map('a', 'b'), 'map2', map('c', 'd')) 
>  )
> {code}
> Migrate the table to iceberg:
> {code:sql}
> ALTER TABLE tbl_complex SET TBLPROPERTIES ('storage_handler'='org.apache.iceberg.mr.hive.HiveIcebergStorageHandler');
> {code}
> Run a simple query: 
> {code:sql}
> SELECT * FROM tbl_complex ORDER BY a;
> {code}
> It will fail with:
> {code:txt}
> TaskAttempt 1 failed, info=[Error: Error while running task ( failure ) : attempt_1655110825475_0001_3_00_000000_1:java.lang.RuntimeException: java.lang.RuntimeException: java.io.IOException: java.lang.NullPointerException
> 	at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:348)
> 	at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:276)
> 	at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:381)
> 	at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:82)
> 	at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:69)
> 	at java.security.AccessController.doPrivileged(Native Method)
> 	at javax.security.auth.Subject.doAs(Subject.java:422)
> 	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1682)
> 	at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:69)
> 	at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:39)
> 	at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
> 	at com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108)
> 	at com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41)
> 	at com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77)
> 	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> 	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> 	at java.lang.Thread.run(Thread.java:748)
> Caused by: java.lang.RuntimeException: java.io.IOException: java.lang.NullPointerException
> 	at org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.initNextRecordReader(TezGroupedSplitsInputFormat.java:200)
> 	at org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.<init>(TezGroupedSplitsInputFormat.java:139)
> 	at org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat.getRecordReader(TezGroupedSplitsInputFormat.java:105)
> 	at org.apache.tez.mapreduce.lib.MRReaderMapred.setupOldRecordReader(MRReaderMapred.java:164)
> 	at org.apache.tez.mapreduce.lib.MRReaderMapred.setSplit(MRReaderMapred.java:83)
> 	at org.apache.tez.mapreduce.input.MRInput.initFromEventInternal(MRInput.java:706)
> 	at org.apache.tez.mapreduce.input.MRInput.initFromEvent(MRInput.java:665)
> 	at org.apache.tez.mapreduce.input.MRInputLegacy.checkAndAwaitRecordReaderInitialization(MRInputLegacy.java:150)
> 	at org.apache.tez.mapreduce.input.MRInputLegacy.init(MRInputLegacy.java:114)
> 	at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getMRInput(MapRecordProcessor.java:520)
> 	at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:173)
> 	at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:292)
> 	... 16 more
> Caused by: java.io.IOException: java.lang.NullPointerException
> 	at org.apache.hadoop.hive.io.HiveIOExceptionHandlerChain.handleRecordReaderCreationException(HiveIOExceptionHandlerChain.java:97)
> 	at org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil.handleRecordReaderCreationException(HiveIOExceptionHandlerUtil.java:57)
> 	at org.apache.hadoop.hive.ql.io.HiveInputFormat.getRecordReader(HiveInputFormat.java:458)
> 	at org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.initNextRecordReader(TezGroupedSplitsInputFormat.java:197)
> 	... 27 more
> Caused by: java.lang.NullPointerException
> 	at org.apache.iceberg.mr.hive.vector.ParquetSchemaFieldNameVisitor.primitive(ParquetSchemaFieldNameVisitor.java:101)
> 	at org.apache.iceberg.mr.hive.vector.ParquetSchemaFieldNameVisitor.primitive(ParquetSchemaFieldNameVisitor.java:38)
> 	at org.apache.iceberg.parquet.TypeWithSchemaVisitor.visit(TypeWithSchemaVisitor.java:52)
> 	at org.apache.iceberg.parquet.TypeWithSchemaVisitor.visitField(TypeWithSchemaVisitor.java:155)
> 	at org.apache.iceberg.parquet.TypeWithSchemaVisitor.visit(TypeWithSchemaVisitor.java:83)
> 	at org.apache.iceberg.parquet.TypeWithSchemaVisitor.visitField(TypeWithSchemaVisitor.java:155)
> 	at org.apache.iceberg.parquet.TypeWithSchemaVisitor.visitFields(TypeWithSchemaVisitor.java:169)
> 	at org.apache.iceberg.parquet.TypeWithSchemaVisitor.visit(TypeWithSchemaVisitor.java:47)
> 	at org.apache.iceberg.mr.hive.vector.HiveVectorizedReader.parquetRecordReader(HiveVectorizedReader.java:203)
> 	at org.apache.iceberg.mr.hive.vector.HiveVectorizedReader.reader(HiveVectorizedReader.java:138)
> 	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> 	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> 	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 	at java.lang.reflect.Method.invoke(Method.java:498)
> 	at org.apache.iceberg.common.DynMethods$UnboundMethod.invokeChecked(DynMethods.java:65)
> 	at org.apache.iceberg.common.DynMethods$UnboundMethod.invoke(DynMethods.java:77)
> 	at org.apache.iceberg.common.DynMethods$StaticMethod.invoke(DynMethods.java:196)
> 	at org.apache.iceberg.mr.mapreduce.IcebergInputFormat$IcebergRecordReader.newParquetIterable(IcebergInputFormat.java:417)
> 	at org.apache.iceberg.mr.mapreduce.IcebergInputFormat$IcebergRecordReader.openTask(IcebergInputFormat.java:336)
> 	at org.apache.iceberg.mr.mapreduce.IcebergInputFormat$IcebergRecordReader.open(IcebergInputFormat.java:353)
> 	at org.apache.iceberg.mr.mapreduce.IcebergInputFormat$IcebergRecordReader.nextTask(IcebergInputFormat.java:263)
> 	at org.apache.iceberg.mr.mapreduce.IcebergInputFormat$IcebergRecordReader.initialize(IcebergInputFormat.java:259)
> 	at org.apache.iceberg.mr.mapred.AbstractMapredIcebergRecordReader.<init>(AbstractMapredIcebergRecordReader.java:40)
> 	at org.apache.iceberg.mr.hive.vector.HiveIcebergVectorizedRecordReader.<init>(HiveIcebergVectorizedRecordReader.java:41)
> 	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
> 	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
> 	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
> 	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
> 	at org.apache.iceberg.common.DynConstructors$Ctor.newInstanceChecked(DynConstructors.java:60)
> 	at org.apache.iceberg.common.DynConstructors$Ctor.newInstance(DynConstructors.java:73)
> 	at org.apache.iceberg.mr.hive.HiveIcebergInputFormat.getRecordReader(HiveIcebergInputFormat.java:163)
> 	at org.apache.hadoop.hive.ql.io.RecordReaderWrapper.create(RecordReaderWrapper.java:72)
> 	at org.apache.hadoop.hive.ql.io.HiveInputFormat.getRecordReader(HiveInputFormat.java:449)
> 	... 28 more
> {code}



--
This message was sent by Atlassian Jira
(v8.20.7#820007)