You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@hive.apache.org by "Nemon Lou (JIRA)" <ji...@apache.org> on 2017/01/16 12:19:26 UTC

[jira] [Created] (HIVE-15638) ArrayIndexOutOfBoundsException when output Columns for UDTF are pruned

Nemon Lou created HIVE-15638:
--------------------------------

             Summary: ArrayIndexOutOfBoundsException when output Columns for UDTF are pruned 
                 Key: HIVE-15638
                 URL: https://issues.apache.org/jira/browse/HIVE-15638
             Project: Hive
          Issue Type: Bug
          Components: Query Planning
    Affects Versions: 2.1.0, 1.3.0
            Reporter: Nemon Lou


{noformat}
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while processing row [Error getting row data with exception java.lang.ArrayIndexOutOfBoundsException: 151
	at org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.readVInt(LazyBinaryUtils.java:314)
	at org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.checkObjectByteInfo(LazyBinaryUtils.java:183)
	at org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct.parse(LazyBinaryStruct.java:142)
	at org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct.getField(LazyBinaryStruct.java:202)
	at org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector.getStructFieldData(LazyBinaryStructObjectInspector.java:64)
	at org.apache.hadoop.hive.serde2.SerDeUtils.buildJSONString(SerDeUtils.java:364)
	at org.apache.hadoop.hive.serde2.SerDeUtils.getJSONString(SerDeUtils.java:200)
	at org.apache.hadoop.hive.serde2.SerDeUtils.getJSONString(SerDeUtils.java:186)
	at org.apache.hadoop.hive.ql.exec.MapOperator.toErrorMessage(MapOperator.java:525)
	at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:494)
	at org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:160)
	at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
	at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:453)
	at org.apache.hadoop.mapred.MapTask.run(MapTask.java:343)
	at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:180)
	at java.security.AccessController.doPrivileged(Native Method)
	at javax.security.auth.Subject.doAs(Subject.java:422)
	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1710)
	at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:174)
 ]
	at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:499)
	at org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:160)
	... 8 more
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: java.lang.ArrayIndexOutOfBoundsException: 151
	at org.apache.hadoop.hive.ql.exec.ReduceSinkOperator.process(ReduceSinkOperator.java:416)
	at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:878)
	at org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:130)
	at org.apache.hadoop.hive.ql.exec.MapOperator$MapOpCtx.forward(MapOperator.java:149)
	at org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:489)
	... 9 more
Caused by: java.lang.ArrayIndexOutOfBoundsException: 151
	at org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.readVInt(LazyBinaryUtils.java:314)
	at org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.checkObjectByteInfo(LazyBinaryUtils.java:183)
	at org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct.parse(LazyBinaryStruct.java:142)
	at org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct.getField(LazyBinaryStruct.java:202)
	at org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector.getStructFieldData(LazyBinaryStructObjectInspector.java:64)
	at org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator._evaluate(ExprNodeColumnEvaluator.java:94)
	at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:77)
	at org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator.evaluate(ExprNodeEvaluator.java:65)
	at org.apache.hadoop.hive.ql.exec.ReduceSinkOperator.populateCachedDistributionKeys(ReduceSinkOperator.java:443)
	at org.apache.hadoop.hive.ql.exec.ReduceSinkOperator.process(ReduceSinkOperator.java:350)
	... 13 more
{noformat}

The way to reproduce :
DDL:
{noformat}
create table tb_a(data_dt string,key string,src string,data_id string,tag_id string, entity_src string);
create table tb_b(pos_tagging string,src string,data_id string);
create table tb_c(key string,start_time string,data_dt string);
insert into tb_a values('20160901','CPI','04','data_id','tag_id','entity_src');
insert into tb_b values('pos_tagging','04','data_id');
insert into tb_c values('data_id','start_time_0000','20160901');
create function hwrl as 'HotwordRelationUDTF' using jar 'hdfs:///tmp/nemon/udf/hotword.jar';

{noformat}

UDF File :
{code}
import java.util.ArrayList;

import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;

/**
 */
public class HotwordRelationUDTF extends GenericUDTF {
  private int argsNumber = 6;

  @Override
  public StructObjectInspector initialize(ObjectInspector[] args)
          throws UDFArgumentException{
    if (args.length != argsNumber) {
      String log = "";
      {
        for (int i = 0; i < args.length; i++)
          log += args[i].toString() + ",";
      }
      throw new UDFArgumentLengthException(
              " OrgIdentifyUDTF ("
                      + log
                      + ") has wrong arguments. "
                      + "The function ProductHotWordUDTF(data_dt,data_src,data_id,word_type,primary_word,txt_For_Handle)"
                      + " have and only have " + argsNumber
                      + " arguments.");
    }

    ArrayList<String> fieldNames = new ArrayList<String>();
    ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();

    for (int i = 0; i < argsNumber; i++){
      if (args[i].getCategory() != ObjectInspector.Category.PRIMITIVE) {
        throw new UDFArgumentTypeException(1,
                "Only primitive type arguments are accepted but "
                        + args[i].getTypeName() + " is passed");
      }
    }

    fieldNames.add("data_dt");
    fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    fieldNames.add("data_src");
    fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    fieldNames.add("data_id");
    fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    fieldNames.add("word_type");
    fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);

    fieldNames.add("primary_word");
    fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    fieldNames.add("primary_nature");
    fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    fieldNames.add("primary_offset");
    fieldOIs.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);
    fieldNames.add("related_word");
    fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    fieldNames.add("related_nature");
    fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    fieldNames.add("related_offset");
    fieldOIs.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);

    return ObjectInspectorFactory.getStandardStructObjectInspector(
            fieldNames, fieldOIs);
  }


  @Override
  public void process(Object[] args) throws HiveException {
    ArrayList<Object> result = new ArrayList<Object>();

    result.add("20160901");
    result.add("data_src");
    result.add("data_id");
    result.add("word_type");
    result.add("primary_word");
    result.add("primary_nature");
    result.add(6);
    result.add("related_word");
    result.add("related_nature");
    result.add(0);
    Object[] ret = result.toArray(new Object[] {});
    forward(ret);

  }


  @Override
  public void close() throws HiveException {

  }

}
{code}

query:
{noformat}

set hive.auto.convert.join=false;
select substring(c.start_time,1,10) create_date, tt.data_id,tt.word_type,tt.primary_word,tt.primary_nature,tt.primary_offset,tt.related_word,tt.related_nature,tt.related_offset 
from (
select hwrl(data_dt,src,data_id,tag_id,entity_src,pos_tagging)
as (data_dt,data_src,data_id,word_type,primary_word,primary_nature,primary_offset,related_word,related_nature,related_offset)
from (
select a.data_dt,a.src,a.data_id,a.tag_id,a.entity_src,b.pos_tagging
from tb_a a, tb_b b
where a.key like 'CP%' 
and a.data_dt='20160901'
and a.data_id=b.data_id
and b.src='04'
) t

) tt, (select key,start_time from tb_c where data_dt='20160901') c 
where tt.data_id=c.key 
;
{noformat}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)