You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@hive.apache.org by "László Pintér (Jira)" <ji...@apache.org> on 2022/06/13 09:14:00 UTC

[jira] [Created] (HIVE-26318) Select on migrated iceberg table fails with NPE

László Pintér created HIVE-26318:
------------------------------------

             Summary: Select on migrated iceberg table fails with NPE
                 Key: HIVE-26318
                 URL: https://issues.apache.org/jira/browse/HIVE-26318
             Project: Hive
          Issue Type: Bug
            Reporter: László Pintér
            Assignee: László Pintér


Enable vectorization:

{code:sql}
set hive.vectorized.execution.enabled=true;
{code}

Create a hive table with the following schema:
{code:sql}
CREATE EXTERNAL TABLE tbl_complex (
a int, 
arrayofprimitives array<string>, 
arrayofarrays array<array<string>>,
arrayofmaps array<map<string, string>>,
arrayofstructs array<struct<something:string, someone:string, somewhere:string>>,
mapofprimitives map<string, string>,
mapofarrays map<string, array<string>>,
mapofmaps map<string, map<string, string>>,
mapofstructs map<string, struct<something:string, someone:string, somewhere:string>>,
structofprimitives struct<something:string, somewhere:string>, 
structofarrays struct<names:array<string>, birthdays:array<string>>, 
structofmaps struct<map1:map<string, string>, map2:map<string, string>>
) STORED AS PARQUET" {code}

Insert some data:
{code:sql}
INSERT INTO tbl_complex VALUES (
        1, 
        array('a','b','c'), 
        array(array('a'), array('b', 'c')), 
        array(map('a','b'), map('e','f')), 
        array(named_struct('something', 'a', 'someone', 'b', 'somewhere', 'c'), 
        named_struct('something', 'e', 'someone', 'f', 'somewhere', 'g')), 
        map('a', 'b'), 
        map('a', array('b','c')), 
        map('a', map('b','c')), 
        map('a', named_struct('something', 'b', 'someone', 'c', 'somewhere', 'd')), 
        named_struct('something', 'a', 'somewhere', 'b'), 
        named_struct('names', array('a', 'b'), 'birthdays', array('c', 'd', 'e')), 
        named_struct('map1', map('a', 'b'), 'map2', map('c', 'd')) 
 )
{code}

Migrate the table to iceberg:

{code:sql}
ALTER TABLE tbl_complex SET TBLPROPERTIES ('storage_handler'='org.apache.iceberg.mr.hive.HiveIcebergStorageHandler');
{code}

Run a simple query: 

{code:sql}
SELECT * FROM tbl_complex ORDER BY a;
{code}

It will fail with:

{code:txt}
TaskAttempt 1 failed, info=[Error: Error while running task ( failure ) : attempt_1655110825475_0001_3_00_000000_1:java.lang.RuntimeException: java.lang.RuntimeException: java.io.IOException: java.lang.NullPointerException
	at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:348)
	at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:276)
	at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:381)
	at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:82)
	at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:69)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.security.auth.Subject.doAs(Subject.java:422)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1682)
	at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:69)
	at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:39)
	at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
	at com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108)
	at com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41)
	at com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.RuntimeException: java.io.IOException: java.lang.NullPointerException
	at org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.initNextRecordReader(TezGroupedSplitsInputFormat.java:200)
	at org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.<init>(TezGroupedSplitsInputFormat.java:139)
	at org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat.getRecordReader(TezGroupedSplitsInputFormat.java:105)
	at org.apache.tez.mapreduce.lib.MRReaderMapred.setupOldRecordReader(MRReaderMapred.java:164)
	at org.apache.tez.mapreduce.lib.MRReaderMapred.setSplit(MRReaderMapred.java:83)
	at org.apache.tez.mapreduce.input.MRInput.initFromEventInternal(MRInput.java:706)
	at org.apache.tez.mapreduce.input.MRInput.initFromEvent(MRInput.java:665)
	at org.apache.tez.mapreduce.input.MRInputLegacy.checkAndAwaitRecordReaderInitialization(MRInputLegacy.java:150)
	at org.apache.tez.mapreduce.input.MRInputLegacy.init(MRInputLegacy.java:114)
	at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.getMRInput(MapRecordProcessor.java:520)
	at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.init(MapRecordProcessor.java:173)
	at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:292)
	... 16 more
Caused by: java.io.IOException: java.lang.NullPointerException
	at org.apache.hadoop.hive.io.HiveIOExceptionHandlerChain.handleRecordReaderCreationException(HiveIOExceptionHandlerChain.java:97)
	at org.apache.hadoop.hive.io.HiveIOExceptionHandlerUtil.handleRecordReaderCreationException(HiveIOExceptionHandlerUtil.java:57)
	at org.apache.hadoop.hive.ql.io.HiveInputFormat.getRecordReader(HiveInputFormat.java:458)
	at org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat$TezGroupedSplitsRecordReader.initNextRecordReader(TezGroupedSplitsInputFormat.java:197)
	... 27 more
Caused by: java.lang.NullPointerException
	at org.apache.iceberg.mr.hive.vector.ParquetSchemaFieldNameVisitor.primitive(ParquetSchemaFieldNameVisitor.java:101)
	at org.apache.iceberg.mr.hive.vector.ParquetSchemaFieldNameVisitor.primitive(ParquetSchemaFieldNameVisitor.java:38)
	at org.apache.iceberg.parquet.TypeWithSchemaVisitor.visit(TypeWithSchemaVisitor.java:52)
	at org.apache.iceberg.parquet.TypeWithSchemaVisitor.visitField(TypeWithSchemaVisitor.java:155)
	at org.apache.iceberg.parquet.TypeWithSchemaVisitor.visit(TypeWithSchemaVisitor.java:83)
	at org.apache.iceberg.parquet.TypeWithSchemaVisitor.visitField(TypeWithSchemaVisitor.java:155)
	at org.apache.iceberg.parquet.TypeWithSchemaVisitor.visitFields(TypeWithSchemaVisitor.java:169)
	at org.apache.iceberg.parquet.TypeWithSchemaVisitor.visit(TypeWithSchemaVisitor.java:47)
	at org.apache.iceberg.mr.hive.vector.HiveVectorizedReader.parquetRecordReader(HiveVectorizedReader.java:203)
	at org.apache.iceberg.mr.hive.vector.HiveVectorizedReader.reader(HiveVectorizedReader.java:138)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at org.apache.iceberg.common.DynMethods$UnboundMethod.invokeChecked(DynMethods.java:65)
	at org.apache.iceberg.common.DynMethods$UnboundMethod.invoke(DynMethods.java:77)
	at org.apache.iceberg.common.DynMethods$StaticMethod.invoke(DynMethods.java:196)
	at org.apache.iceberg.mr.mapreduce.IcebergInputFormat$IcebergRecordReader.newParquetIterable(IcebergInputFormat.java:417)
	at org.apache.iceberg.mr.mapreduce.IcebergInputFormat$IcebergRecordReader.openTask(IcebergInputFormat.java:336)
	at org.apache.iceberg.mr.mapreduce.IcebergInputFormat$IcebergRecordReader.open(IcebergInputFormat.java:353)
	at org.apache.iceberg.mr.mapreduce.IcebergInputFormat$IcebergRecordReader.nextTask(IcebergInputFormat.java:263)
	at org.apache.iceberg.mr.mapreduce.IcebergInputFormat$IcebergRecordReader.initialize(IcebergInputFormat.java:259)
	at org.apache.iceberg.mr.mapred.AbstractMapredIcebergRecordReader.<init>(AbstractMapredIcebergRecordReader.java:40)
	at org.apache.iceberg.mr.hive.vector.HiveIcebergVectorizedRecordReader.<init>(HiveIcebergVectorizedRecordReader.java:41)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.apache.iceberg.common.DynConstructors$Ctor.newInstanceChecked(DynConstructors.java:60)
	at org.apache.iceberg.common.DynConstructors$Ctor.newInstance(DynConstructors.java:73)
	at org.apache.iceberg.mr.hive.HiveIcebergInputFormat.getRecordReader(HiveIcebergInputFormat.java:163)
	at org.apache.hadoop.hive.ql.io.RecordReaderWrapper.create(RecordReaderWrapper.java:72)
	at org.apache.hadoop.hive.ql.io.HiveInputFormat.getRecordReader(HiveInputFormat.java:449)
	... 28 more
{code}







--
This message was sent by Atlassian Jira
(v8.20.7#820007)