You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@hive.apache.org by "Nemon Lou (JIRA)" <ji...@apache.org> on 2017/01/16 12:19:26 UTC
[jira] [Created] (HIVE-15638) ArrayIndexOutOfBoundsException when
output Columns for UDTF are pruned
Nemon Lou created HIVE-15638:
--------------------------------
Summary: ArrayIndexOutOfBoundsException when output Columns for UDTF are pruned
Key: HIVE-15638
URL: https://issues.apache.org/jira/browse/HIVE-15638
Project: Hive
Issue Type: Bug
Components: Query Planning
Affects Versions: 2.1.0, 1.3.0
Reporter: Nemon Lou
{noformat}
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row [Error getting row data with exception java.lang.ArrayIndexOutOfBoundsException: 151
at org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.readVInt(LazyBinaryUtils.java:314)
at org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.checkObjectByteInfo(LazyBinaryUtils.java:183)
at org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct.parse(LazyBinaryStruct.java:142)
at org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct.getField(LazyBinaryStruct.java:202)
at org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector.getStructFieldData(LazyBinaryStructObjectInspector.java:64)
at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:364)
at org.apache.hadoop.hive.serde2.SerDeUtils.getJSONString(SerDeUtils.java:200)
at org.apache.hadoop.hive.serde2.SerDeUtils.getJSONString(SerDeUtils.java:186)
at org.apache.hadoop.hive.ql.exec.MapOperator.toErrorMessage(MapOperator.java:525)
at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:494)
at org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:160)
at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:180)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1710)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:174)
]
at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:499)
at org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:160)
... 8 more
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.ArrayIndexOutOfBoundsException: 151
at org.apache.hadoop.hive.ql.exec.ReduceSinkOperator.process(ReduceSinkOperator.java:416)
at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:878)
at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130)
at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:149)
at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:489)
... 9 more
Caused by: java.lang.ArrayIndexOutOfBoundsException: 151
at org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.readVInt(LazyBinaryUtils.java:314)
at org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.checkObjectByteInfo(LazyBinaryUtils.java:183)
at org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct.parse(LazyBinaryStruct.java:142)
at org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct.getField(LazyBinaryStruct.java:202)
at org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector.getStructFieldData(LazyBinaryStructObjectInspector.java:64)
at org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator._evaluate(ExprNodeColumnEvaluator.java:94)
at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:77)
at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:65)
at org.apache.hadoop.hive.ql.exec.ReduceSinkOperator.populateCachedDistributionKeys(ReduceSinkOperator.java:443)
at org.apache.hadoop.hive.ql.exec.ReduceSinkOperator.process(ReduceSinkOperator.java:350)
... 13 more
{noformat}
The way to reproduce :
DDL:
{noformat}
create table tb_a(data_dt string,key string,src string,data_id string,tag_id string, entity_src string);
create table tb_b(pos_tagging string,src string,data_id string);
create table tb_c(key string,start_time string,data_dt string);
insert into tb_a values('20160901','CPI','04','data_id','tag_id','entity_src');
insert into tb_b values('pos_tagging','04','data_id');
insert into tb_c values('data_id','start_time_0000','20160901');
create function hwrl as 'HotwordRelationUDTF' using jar 'hdfs:///tmp/nemon/udf/hotword.jar';
{noformat}
UDF File :
{code}
import java.util.ArrayList;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
/**
*/
public class HotwordRelationUDTF extends GenericUDTF {
private int argsNumber = 6;
@Override
public StructObjectInspector initialize(ObjectInspector[] args)
throws UDFArgumentException{
if (args.length != argsNumber) {
String log = "";
{
for (int i = 0; i < args.length; i++)
log += args[i].toString() + ",";
}
throw new UDFArgumentLengthException(
" OrgIdentifyUDTF ("
+ log
+ ") has wrong arguments. "
+ "The function ProductHotWordUDTF(data_dt,data_src,data_id,word_type,primary_word,txt_For_Handle)"
+ " have and only have " + argsNumber
+ " arguments.");
}
ArrayList<String> fieldNames = new ArrayList<String>();
ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
for (int i = 0; i < argsNumber; i++){
if (args[i].getCategory() != ObjectInspector.Category.PRIMITIVE) {
throw new UDFArgumentTypeException(1,
"Only primitive type arguments are accepted but "
+ args[i].getTypeName() + " is passed");
}
}
fieldNames.add("data_dt");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("data_src");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("data_id");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("word_type");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("primary_word");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("primary_nature");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("primary_offset");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);
fieldNames.add("related_word");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("related_nature");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("related_offset");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);
return ObjectInspectorFactory.getStandardStructObjectInspector(
fieldNames, fieldOIs);
}
@Override
public void process(Object[] args) throws HiveException {
ArrayList<Object> result = new ArrayList<Object>();
result.add("20160901");
result.add("data_src");
result.add("data_id");
result.add("word_type");
result.add("primary_word");
result.add("primary_nature");
result.add(6);
result.add("related_word");
result.add("related_nature");
result.add(0);
Object[] ret = result.toArray(new Object[] {});
forward(ret);
}
@Override
public void close() throws HiveException {
}
}
{code}
query:
{noformat}
set hive.auto.convert.join=false;
select substring(c.start_time,1,10) create_date, tt.data_id,tt.word_type,tt.primary_word,tt.primary_nature,tt.primary_offset,tt.related_word,tt.related_nature,tt.related_offset
from (
select hwrl(data_dt,src,data_id,tag_id,entity_src,pos_tagging)
as (data_dt,data_src,data_id,word_type,primary_word,primary_nature,primary_offset,related_word,related_nature,related_offset)
from (
select a.data_dt,a.src,a.data_id,a.tag_id,a.entity_src,b.pos_tagging
from tb_a a, tb_b b
where a.key like 'CP%'
and a.data_dt='20160901'
and a.data_id=b.data_id
and b.src='04'
) t
) tt, (select key,start_time from tb_c where data_dt='20160901') c
where tt.data_id=c.key
;
{noformat}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)