You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hive.apache.org by "范宜臻 (Jira)" <ji...@apache.org> on 2020/06/14 12:43:00 UTC

[jira] [Updated] (HIVE-23688) Vectorization: IndexArrayOutOfBoundsException For map type column which includes null value

     [ https://issues.apache.org/jira/browse/HIVE-23688?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

范宜臻 updated HIVE-23688:
-----------------------
    Description: 
how to reproduce:
{code:java}
set hive.vectorized.execution.enabled=true; 

CREATE TABLE parquet_map_type (id int,stringMap map<string, string>) 
stored as parquet; 

insert overwrite table parquet_map_typeSELECT 1, MAP('k1', null, 'k2', 'bar'); 

select id, stringMap['k1'] from parquet_map_type group by 1,2;
{code}
query explain:
{code:java}
Stage-0
  Fetch Operator
    limit:-1
    Stage-1
      Reducer 2 vectorized
      File Output Operator [FS_12]
        Group By Operator [GBY_11] (rows=5 width=2)
          Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
        <-Map 1 [SIMPLE_EDGE] vectorized
          SHUFFLE [RS_10]
            PartitionCols:_col0, _col1
            Group By Operator [GBY_9] (rows=10 width=2)
              Output:["_col0","_col1"],keys:_col0, _col1
              Select Operator [SEL_8] (rows=10 width=2)
                Output:["_col0","_col1"]
                TableScan [TS_0] (rows=10 width=2)
                  temp@parquet_map_type_fyz,parquet_map_type_fyz,Tbl:COMPLETE,Col:NONE,Output:["id","stringmap"]
{code}
runtime error:
{code:java}
Vertex failed, vertexName=Map 1, vertexId=vertex_1592040015150_0001_3_00, diagnostics=[Task failed, taskId=task_1592040015150_0001_3_00_000000, diagnostics=[TaskAttempt 0 failed, info=[Error: Error while running task ( failure ) : attempt_1592040015150_0001_3_00_000000_0:java.lang.RuntimeException: java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row 
	at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:296)
	at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:250)
	at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374)
	at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73)
	at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.security.auth.Subject.doAs(Subject.java:422)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1682)
	at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61)
	at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37)
	at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
	at com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108)
	at com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41)
	at com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
	at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row 
	at org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.processRow(MapRecordSource.java:101)
	at org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.pushRecord(MapRecordSource.java:76)
	at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.run(MapRecordProcessor.java:403)
	at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:267)
	... 16 more
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row 
	at org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator.process(VectorMapOperator.java:970)
	at org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.processRow(MapRecordSource.java:92)
	... 19 more
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Error evaluating id
	at org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator.process(VectorSelectOperator.java:149)
	at org.apache.hadoop.hive.ql.exec.Operator.vectorForward(Operator.java:918)
	at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:172)
	at org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator.deliverVectorizedRowBatch(VectorMapOperator.java:809)
	at org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator.process(VectorMapOperator.java:842)
	... 20 more
Caused by: java.lang.ArrayIndexOutOfBoundsException: 0
	at org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector.setElement(BytesColumnVector.java:506)
	at org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMapIndexBaseScalar.evaluate(VectorUDFMapIndexBaseScalar.java:83)
	at org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator.process(VectorSelectOperator.java:146)
	... 24 more
{code}

> Vectorization: IndexArrayOutOfBoundsException For map type column which includes null value
> -------------------------------------------------------------------------------------------
>
>                 Key: HIVE-23688
>                 URL: https://issues.apache.org/jira/browse/HIVE-23688
>             Project: Hive
>          Issue Type: Bug
>          Components: Parquet, Vectorization
>    Affects Versions: All Versions
>            Reporter: 范宜臻
>            Assignee: 范宜臻
>            Priority: Critical
>
> how to reproduce:
> {code:java}
> set hive.vectorized.execution.enabled=true; 
> CREATE TABLE parquet_map_type (id int,stringMap map<string, string>) 
> stored as parquet; 
> insert overwrite table parquet_map_typeSELECT 1, MAP('k1', null, 'k2', 'bar'); 
> select id, stringMap['k1'] from parquet_map_type group by 1,2;
> {code}
> query explain:
> {code:java}
> Stage-0
>   Fetch Operator
>     limit:-1
>     Stage-1
>       Reducer 2 vectorized
>       File Output Operator [FS_12]
>         Group By Operator [GBY_11] (rows=5 width=2)
>           Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
>         <-Map 1 [SIMPLE_EDGE] vectorized
>           SHUFFLE [RS_10]
>             PartitionCols:_col0, _col1
>             Group By Operator [GBY_9] (rows=10 width=2)
>               Output:["_col0","_col1"],keys:_col0, _col1
>               Select Operator [SEL_8] (rows=10 width=2)
>                 Output:["_col0","_col1"]
>                 TableScan [TS_0] (rows=10 width=2)
>                   temp@parquet_map_type_fyz,parquet_map_type_fyz,Tbl:COMPLETE,Col:NONE,Output:["id","stringmap"]
> {code}
> runtime error:
> {code:java}
> Vertex failed, vertexName=Map 1, vertexId=vertex_1592040015150_0001_3_00, diagnostics=[Task failed, taskId=task_1592040015150_0001_3_00_000000, diagnostics=[TaskAttempt 0 failed, info=[Error: Error while running task ( failure ) : attempt_1592040015150_0001_3_00_000000_0:java.lang.RuntimeException: java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row 
> 	at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:296)
> 	at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:250)
> 	at org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:374)
> 	at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:73)
> 	at org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:61)
> 	at java.security.AccessController.doPrivileged(Native Method)
> 	at javax.security.auth.Subject.doAs(Subject.java:422)
> 	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1682)
> 	at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:61)
> 	at org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:37)
> 	at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
> 	at com.google.common.util.concurrent.TrustedListenableFutureTask$TrustedFutureInterruptibleTask.runInterruptibly(TrustedListenableFutureTask.java:108)
> 	at com.google.common.util.concurrent.InterruptibleTask.run(InterruptibleTask.java:41)
> 	at com.google.common.util.concurrent.TrustedListenableFutureTask.run(TrustedListenableFutureTask.java:77)
> 	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> 	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> 	at java.lang.Thread.run(Thread.java:748)
> Caused by: java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row 
> 	at org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.processRow(MapRecordSource.java:101)
> 	at org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.pushRecord(MapRecordSource.java:76)
> 	at org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.run(MapRecordProcessor.java:403)
> 	at org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:267)
> 	... 16 more
> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row 
> 	at org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator.process(VectorMapOperator.java:970)
> 	at org.apache.hadoop.hive.ql.exec.tez.MapRecordSource.processRow(MapRecordSource.java:92)
> 	... 19 more
> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Error evaluating id
> 	at org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator.process(VectorSelectOperator.java:149)
> 	at org.apache.hadoop.hive.ql.exec.Operator.vectorForward(Operator.java:918)
> 	at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:172)
> 	at org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator.deliverVectorizedRowBatch(VectorMapOperator.java:809)
> 	at org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator.process(VectorMapOperator.java:842)
> 	... 20 more
> Caused by: java.lang.ArrayIndexOutOfBoundsException: 0
> 	at org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector.setElement(BytesColumnVector.java:506)
> 	at org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFMapIndexBaseScalar.evaluate(VectorUDFMapIndexBaseScalar.java:83)
> 	at org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator.process(VectorSelectOperator.java:146)
> 	... 24 more
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)