You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@hive.apache.org by Dileep Kumar <di...@gmail.com> on 2013/03/05 02:37:19 UTC

Hive insert into RCFILE issue with timestamp columns

Hi All,

I am using the schema in the Impala VM and trying to create a dynamic
partitioned table on date_dim.
New table is called date_dim_i and schema for that is defined as:
create table date_dim_i
(
    d_date_sk                 int,
    d_date_id                 string,
    d_date                    timestamp,
    d_month_seq               int,
    d_week_seq                int,
    d_quarter_seq             int,
    d_dow                     int,
    d_moy                     int,
    d_dom                     int,
    d_qoy                     int,
    d_fy_year                 int,
    d_fy_quarter_seq          int,
    d_fy_week_seq             int,
    d_day_name                string,
    d_quarter_name            string,
    d_holiday                 string,
    d_weekend                 string,
    d_following_holiday       string,
    d_first_dom               int,
    d_last_dom                int,
    d_same_day_ly             int,
    d_same_day_lq             int,
    d_current_day             string,
    d_current_week            string,
    d_current_month           string,
    d_current_quarter         string,
    d_current_year            string
)
PARTITIONED BY (d_year int)
stored as RCFILE;

Then I do insert overwrite as:
insert overwrite table date_dim_i
PARTITION (d_year)
select d_date_sk, d_date_id, d_date, d_month_seq, d_week_seq,
d_quarter_seq, d_dow, d_moy, d_dom, d_qoy, d_fy_year, d_fy_quarter_seq,
d_fy_week_seq, d_day_name, d_quarter_name, d_holiday, d_weekend,
d_following_holiday, d_first_dom, d_last_dom, d_same_day_ly, d_same_day_lq,
d_current_day, d_current_week, d_current_month, d_current_quarter,
d_current_year, d_year
from date_dim;

The date_dim table schema is as :
create external table date_dim
(
    d_date_sk                 int,
    d_date_id                 string,
    d_date                    timestamp,
    d_month_seq               int,
    d_week_seq                int,
    d_quarter_seq             int,
    d_year                    int,
    d_dow                     int,
    d_moy                     int,
    d_dom                     int,
    d_qoy                     int,
    d_fy_year                 int,
    d_fy_quarter_seq          int,
    d_fy_week_seq             int,
    d_day_name                string,
    d_quarter_name            string,
    d_holiday                 string,
    d_weekend                 string,
    d_following_holiday       string,
    d_first_dom               int,
    d_last_dom                int,
    d_same_day_ly             int,
    d_same_day_lq             int,
    d_current_day             string,
    d_current_week            string,
    d_current_month           string,
    d_current_quarter         string,
    d_current_year            string
)
row format delimited fields terminated by '|'
location '/hive/tpcds/date_dim';





It fails with following exception:

Error: java.lang.RuntimeException:
org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while
processing row
{"d_date_sk":2415022,"d_date_id":"AAAAAAAAOKJNECAA","d_date":"1969-12-31
19:00:00","d_month_seq":0,"d_week_seq":1,"d_quarter_seq":1,"d_year":1900,"d_dow":1,"d_moy":1,"d_dom":2,"d_qoy":1,"d_fy_year":1900,"d_fy_quarter_seq":1,"d_fy_week_seq":1,"d_day_name":"Monday","d_quarter_name":"1900Q1","d_holiday":"N","d_weekend":"N","d_following_holiday":"Y","d_first_dom":2415021,"d_last_dom":2415020,"d_same_day_ly":2414657,"d_same_day_lq":2414930,"d_current_day":"N","d_current_week":"N","d_current_month":"N","d_current_quarter":"N","d_current_year":"N"}
****

        at
org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:161)****

        at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)****

        at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:399)**
**

        at org.apache.hadoop.mapred.MapTask.run(MapTask.java:334)****

        at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:152)****

        at java.security.AccessController.doPrivileged(Native Method)****

        at javax.security.auth.Subject.doAs(Subject.java:396)****

        at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1332)
****

        at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:147)****

Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime
Error while processing row
{"d_date_sk":2415022,"d_date_id":"AAAAAAAAOKJNECAA","d_date":"1969-12-31
19:00:00","d_month_seq":0,"d_week_seq":1,"d_quarter_seq":1,"d_year":1900,"d_dow":1,"d_moy":1,"d_dom":2,"d_qoy":1,"d_fy_year":1900,"d_fy_quarter_seq":1,"d_fy_week_seq":1,"d_day_name":"Monday","d_quarter_name":"1900Q1","d_holiday":"N","d_weekend":"N","d_following_holiday":"Y","d_first_dom":2415021,"d_last_dom":2415020,"d_same_day_ly":2414657,"d_same_day_lq":2414930,"d_current_day":"N","d_current_week":"N","d_current_month":"N","d_current_quarter":"N","d_current_year":"N"}
****

        at
org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:548)****

        at
org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:143)****

        ... 8 more****

Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Error
evaluating d_date****

        at
org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:80)
****

        at
org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)****

        at
org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:762)****

        at
org.apache.hadoop.hive.ql.exec.TableScanOperator.processOp(TableScanOperator.java:83)
****

        at
org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)****

        at
org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:762)****

        at
org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:529)****

        ... 9 more****

Caused by: java.lang.IllegalArgumentException: Timestamp format must be
yyyy-mm-dd hh:mm:ss[.fffffffff]****

        at java.sql.Timestamp.valueOf(Timestamp.java:185)****


Please suggest what could be wrong here as datatypes are exact same in both
cases.


Thanks !

Re: Hive insert into RCFILE issue with timestamp columns

Posted by Mark Grover <gr...@gmail.com>.
Dileep,
Can you use a more contemporary timestamp? Something after Jan 1, 1970
GMT, say Jan 1st, 2013?
Let us know what you see.

On Tue, Mar 5, 2013 at 2:56 PM, Dileep Kumar <di...@gmail.com> wrote:
> --hdfs dfs -mkdir /hive/tpcds/date_ts
>
> create external table date_ts
> (
>     d_date                    timestamp
> )
> row format delimited fields terminated by '|'
> location '/hive/tpcds/date_ts';
>
> [cloudera@localhost tmp-work]$ hive -e "select * from date_ts"
> Logging initialized using configuration in
> file:/etc/hive/conf.dist/hive-log4j.properties
> Hive history
> file=/tmp/cloudera/hive_job_log_cloudera_201303052251_950655265.txt
> OK
> Failed with exception
> java.io.IOException:java.lang.IllegalArgumentException: Timestamp format
> must be yyyy-mm-dd hh:mm:ss[.fffffffff]
> Time taken: 3.556 seconds
> [cloudera@localhost tmp-work]$ hdfs dfs -cat /hive/tpcds/date_ts/*
> 2415022|AAAAAAAAOKJNECAA|1900-01-02
> 02:00:21.000000000|0|1|1|1900|1|1|2|1|1900|1|1|Monday|1900Q1|N|N|Y|2415021|2415020|2414657|2414930|N|N|N|N|N|
>
>
>
>
>
> On Mon, Mar 4, 2013 at 6:00 PM, Dileep Kumar <di...@gmail.com>
> wrote:
>>
>> No.
>> Here are the errors:
>> Task with the most failures(4):
>> -----
>> Task ID:
>>   task_1361599885844_0013_m_000000
>>
>> URL:
>>
>> http://localhost.localdomain:50030/taskdetails.jsp?jobid=job_1361599885844_0013&tipid=task_1361599885844_0013_m_000000
>> -----
>> Diagnostic Messages for this Task:
>> Error: java.lang.RuntimeException:
>> org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while
>> processing row
>> {"d_date_sk":2415022,"d_date_id":"AAAAAAAAOKJNECAA","d_date":"1969-12-31
>> 19:00:00","d_month_seq":0,"d_week_seq":1,"d_quarter_seq":1,"d_year":1900,"d_dow":1,"d_moy":1,"d_dom":2,"d_qoy":1,"d_fy_year":1900,"d_fy_quarter_seq":1,"d_fy_week_seq":1,"d_day_name":"Monday","d_quarter_name":"1900Q1","d_holiday":"N","d_weekend":"N","d_following_holiday":"Y","d_first_dom":2415021,"d_last_dom":2415020,"d_same_day_ly":2414657,"d_same_day_lq":2414930,"d_current_day":"N","d_current_week":"N","d_current_month":"N","d_current_quarter":"N","d_current_year":"N"}
>>         at
>> org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:161)
>>         at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
>>         at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:399)
>>         at org.apache.hadoop.mapred.MapTask.run(MapTask.java:334)
>>         at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:152)
>>         at java.security.AccessController.doPrivileged(Native Method)
>>         at javax.security.auth.Subject.doAs(Subject.java:396)
>>         at
>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1332)
>>         at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:147)
>> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime
>> Error while processing row
>> {"d_date_sk":2415022,"d_date_id":"AAAAAAAAOKJNECAA","d_date":"1969-12-31
>> 19:00:00","d_month_seq":0,"d_week_seq":1,"d_quarter_seq":1,"d_year":1900,"d_dow":1,"d_moy":1,"d_dom":2,"d_qoy":1,"d_fy_year":1900,"d_fy_quarter_seq":1,"d_fy_week_seq":1,"d_day_name":"Monday","d_quarter_name":"1900Q1","d_holiday":"N","d_weekend":"N","d_following_holiday":"Y","d_first_dom":2415021,"d_last_dom":2415020,"d_same_day_ly":2414657,"d_same_day_lq":2414930,"d_current_day":"N","d_current_week":"N","d_current_month":"N","d_current_quarter":"N","d_current_year":"N"}
>>         at
>> org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:548)
>>         at
>> org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:143)
>>         ... 8 more
>> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Error
>> evaluating d_date
>>         at
>> org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:80)
>>         at
>> org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
>>         at
>> org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:762)
>>         at
>> org.apache.hadoop.hive.ql.exec.TableScanOperator.processOp(TableScanOperator.java:83)
>>         at
>> org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
>>         at
>> org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:762)
>>         at
>> org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:529)
>>         ... 9 more
>> Caused by: java.lang.IllegalArgumentException: Timestamp format must be
>> yyyy-mm-dd hh:mm:ss[.fffffffff]
>>         at java.sql.Timestamp.valueOf(Timestamp.java:185)
>>         at
>> org.apache.hadoop.hive.serde2.lazy.LazyTimestamp.init(LazyTimestamp.java:74)
>>         at
>> org.apache.hadoop.hive.serde2.lazy.LazyStruct.uncheckedGetField(LazyStruct.java:219)
>>         at
>> org.apache.hadoop.hive.serde2.lazy.LazyStruct.getField(LazyStruct.java:192)
>>         at
>> org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector.getStructFieldData(LazySimpleStructObjectInspector.java:188)
>>         at
>> org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator.evaluate(ExprNodeColumnEvaluator.java:98)
>>         at
>> org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:76)
>>         ... 15 more
>>
>>
>> FAILED: Execution Error, return code 2 from
>> org.apache.hadoop.hive.ql.exec.MapRedTask
>> MapReduce Jobs Launched:
>> Job 0: Map: 1   HDFS Read: 0 HDFS Write: 0 FAIL
>> Total MapReduce CPU Time Spent: 0 msec
>>
>>
>>
>> On Mon, Mar 4, 2013 at 5:51 PM, Mark Grover <gr...@gmail.com>
>> wrote:
>>>
>>> Hi Dilip,
>>> Are you able to run this query successfully?
>>>
>>> select d_date_sk, d_date_id, d_date, d_month_seq, d_week_seq,
>>> d_quarter_seq, d_dow, d_moy, d_dom, d_qoy, d_fy_year,
>>> d_fy_quarter_seq, d_fy_week_seq, d_day_name, d_quarter_name,
>>> d_holiday, d_weekend, d_following_holiday, d_first_dom, d_last_dom,
>>> d_same_day_ly, d_same_day_lq, d_current_day, d_current_week,
>>> d_current_month, d_current_quarter, d_current_year, d_year
>>> from date_dim
>>>
>>> On Mon, Mar 4, 2013 at 5:37 PM, Dileep Kumar <di...@gmail.com>
>>> wrote:
>>> > Hi All,
>>> >
>>> > I am using the schema in the Impala VM and trying to create a dynamic
>>> > partitioned table on date_dim.
>>> > New table is called date_dim_i and schema for that is defined as:
>>> > create table date_dim_i
>>> > (
>>> >     d_date_sk                 int,
>>> >     d_date_id                 string,
>>> >     d_date                    timestamp,
>>> >     d_month_seq               int,
>>> >     d_week_seq                int,
>>> >     d_quarter_seq             int,
>>> >     d_dow                     int,
>>> >     d_moy                     int,
>>> >     d_dom                     int,
>>> >     d_qoy                     int,
>>> >     d_fy_year                 int,
>>> >     d_fy_quarter_seq          int,
>>> >     d_fy_week_seq             int,
>>> >     d_day_name                string,
>>> >     d_quarter_name            string,
>>> >     d_holiday                 string,
>>> >     d_weekend                 string,
>>> >     d_following_holiday       string,
>>> >     d_first_dom               int,
>>> >     d_last_dom                int,
>>> >     d_same_day_ly             int,
>>> >     d_same_day_lq             int,
>>> >     d_current_day             string,
>>> >     d_current_week            string,
>>> >     d_current_month           string,
>>> >     d_current_quarter         string,
>>> >     d_current_year            string
>>> > )
>>> > PARTITIONED BY (d_year int)
>>> > stored as RCFILE;
>>> >
>>> > Then I do insert overwrite as:
>>> > insert overwrite table date_dim_i
>>> > PARTITION (d_year)
>>> > select d_date_sk, d_date_id, d_date, d_month_seq, d_week_seq,
>>> > d_quarter_seq,
>>> > d_dow, d_moy, d_dom, d_qoy, d_fy_year, d_fy_quarter_seq, d_fy_week_seq,
>>> > d_day_name, d_quarter_name, d_holiday, d_weekend, d_following_holiday,
>>> > d_first_dom, d_last_dom, d_same_day_ly, d_same_day_lq, d_current_day,
>>> > d_current_week, d_current_month, d_current_quarter, d_current_year,
>>> > d_year
>>> > from date_dim;
>>> >
>>> > The date_dim table schema is as :
>>> > create external table date_dim
>>> > (
>>> >     d_date_sk                 int,
>>> >     d_date_id                 string,
>>> >     d_date                    timestamp,
>>> >     d_month_seq               int,
>>> >     d_week_seq                int,
>>> >     d_quarter_seq             int,
>>> >     d_year                    int,
>>> >     d_dow                     int,
>>> >     d_moy                     int,
>>> >     d_dom                     int,
>>> >     d_qoy                     int,
>>> >     d_fy_year                 int,
>>> >     d_fy_quarter_seq          int,
>>> >     d_fy_week_seq             int,
>>> >     d_day_name                string,
>>> >     d_quarter_name            string,
>>> >     d_holiday                 string,
>>> >     d_weekend                 string,
>>> >     d_following_holiday       string,
>>> >     d_first_dom               int,
>>> >     d_last_dom                int,
>>> >     d_same_day_ly             int,
>>> >     d_same_day_lq             int,
>>> >     d_current_day             string,
>>> >     d_current_week            string,
>>> >     d_current_month           string,
>>> >     d_current_quarter         string,
>>> >     d_current_year            string
>>> > )
>>> > row format delimited fields terminated by '|'
>>> > location '/hive/tpcds/date_dim';
>>> >
>>> >
>>> >
>>> >
>>> >
>>> > It fails with following exception:
>>> >
>>> > Error: java.lang.RuntimeException:
>>> > org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error
>>> > while
>>> > processing row
>>> >
>>> > {"d_date_sk":2415022,"d_date_id":"AAAAAAAAOKJNECAA","d_date":"1969-12-31
>>> >
>>> > 19:00:00","d_month_seq":0,"d_week_seq":1,"d_quarter_seq":1,"d_year":1900,"d_dow":1,"d_moy":1,"d_dom":2,"d_qoy":1,"d_fy_year":1900,"d_fy_quarter_seq":1,"d_fy_week_seq":1,"d_day_name":"Monday","d_quarter_name":"1900Q1","d_holiday":"N","d_weekend":"N","d_following_holiday":"Y","d_first_dom":2415021,"d_last_dom":2415020,"d_same_day_ly":2414657,"d_same_day_lq":2414930,"d_current_day":"N","d_current_week":"N","d_current_month":"N","d_current_quarter":"N","d_current_year":"N"}
>>> >
>>> >         at
>>> > org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:161)
>>> >
>>> >         at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
>>> >
>>> >         at
>>> > org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:399)
>>> >
>>> >         at org.apache.hadoop.mapred.MapTask.run(MapTask.java:334)
>>> >
>>> >         at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:152)
>>> >
>>> >         at java.security.AccessController.doPrivileged(Native Method)
>>> >
>>> >         at javax.security.auth.Subject.doAs(Subject.java:396)
>>> >
>>> >         at
>>> >
>>> > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1332)
>>> >
>>> >         at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:147)
>>> >
>>> > Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive
>>> > Runtime
>>> > Error while processing row
>>> >
>>> > {"d_date_sk":2415022,"d_date_id":"AAAAAAAAOKJNECAA","d_date":"1969-12-31
>>> >
>>> > 19:00:00","d_month_seq":0,"d_week_seq":1,"d_quarter_seq":1,"d_year":1900,"d_dow":1,"d_moy":1,"d_dom":2,"d_qoy":1,"d_fy_year":1900,"d_fy_quarter_seq":1,"d_fy_week_seq":1,"d_day_name":"Monday","d_quarter_name":"1900Q1","d_holiday":"N","d_weekend":"N","d_following_holiday":"Y","d_first_dom":2415021,"d_last_dom":2415020,"d_same_day_ly":2414657,"d_same_day_lq":2414930,"d_current_day":"N","d_current_week":"N","d_current_month":"N","d_current_quarter":"N","d_current_year":"N"}
>>> >
>>> >         at
>>> >
>>> > org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:548)
>>> >
>>> >         at
>>> > org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:143)
>>> >
>>> >         ... 8 more
>>> >
>>> > Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Error
>>> > evaluating d_date
>>> >
>>> >         at
>>> >
>>> > org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:80)
>>> >
>>> >         at
>>> > org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
>>> >
>>> >         at
>>> > org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:762)
>>> >
>>> >         at
>>> >
>>> > org.apache.hadoop.hive.ql.exec.TableScanOperator.processOp(TableScanOperator.java:83)
>>> >
>>> >         at
>>> > org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
>>> >
>>> >         at
>>> > org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:762)
>>> >
>>> >         at
>>> >
>>> > org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:529)
>>> >
>>> >         ... 9 more
>>> >
>>> > Caused by: java.lang.IllegalArgumentException: Timestamp format must be
>>> > yyyy-mm-dd hh:mm:ss[.fffffffff]
>>> >
>>> >         at java.sql.Timestamp.valueOf(Timestamp.java:185)
>>> >
>>> >
>>> > Please suggest what could be wrong here as datatypes are exact same in
>>> > both
>>> > cases.
>>> >
>>> >
>>> > Thanks !
>>
>>
>

Re: Hive insert into RCFILE issue with timestamp columns

Posted by Sékine Coulibaly <sc...@gmail.com>.
Prasad,

Isn't the fractional part of the TIMESTAMP type supposed to be optional, as
per the error message :

Failed with exception
java.io.IOException:java.lang.IllegalArgumentException: Timestamp format
must be yyyy-mm-dd hh:mm:ss[.fffffffff]

Shall we understand 9 digits for fractional part are mandatory ?

Thanks




2013/3/6 Prasad Mujumdar <pr...@cloudera.com>

> Dilip,
>
>    Looks like you are using the data from the original schema for this new
> table that has single timestamp column. When I tried with just the
> timestamp from your data, the query runs fine. I guess the original issue
> you hit on the data that didn't have fraction part (1969-12-31 19:00:00, no
> .fffff).
>
> thanks
> Prasad
>
>
> On Tue, Mar 5, 2013 at 2:56 PM, Dileep Kumar <di...@gmail.com>wrote:
>
>> --hdfs dfs -mkdir /hive/tpcds/date_ts
>>
>> create external table date_ts
>> (
>>     d_date                    timestamp
>> )
>> row format delimited fields terminated by '|'
>> location '/hive/tpcds/date_ts';
>>
>> [cloudera@localhost tmp-work]$ hive -e "select * from date_ts"
>> Logging initialized using configuration in
>> file:/etc/hive/conf.dist/hive-log4j.properties
>> Hive history
>> file=/tmp/cloudera/hive_job_log_cloudera_201303052251_950655265.txt
>> OK
>> Failed with exception
>> java.io.IOException:java.lang.IllegalArgumentException: Timestamp format
>> must be yyyy-mm-dd hh:mm:ss[.fffffffff]
>> Time taken: 3.556 seconds
>> [cloudera@localhost tmp-work]$ hdfs dfs -cat /hive/tpcds/date_ts/*
>> 2415022|AAAAAAAAOKJNECAA|1900-01-02
>> 02:00:21.000000000|0|1|1|1900|1|1|2|1|1900|1|1|Monday|1900Q1|N|N|Y|2415021|2415020|2414657|2414930|N|N|N|N|N|
>>
>>
>>
>>
>>
>> On Mon, Mar 4, 2013 at 6:00 PM, Dileep Kumar <di...@gmail.com>wrote:
>>
>>> No.
>>> Here are the errors:
>>> Task with the most failures(4):
>>> -----
>>> Task ID:
>>>   task_1361599885844_0013_m_000000
>>>
>>> URL:
>>>
>>> http://localhost.localdomain:50030/taskdetails.jsp?jobid=job_1361599885844_0013&tipid=task_1361599885844_0013_m_000000
>>> -----
>>> Diagnostic Messages for this Task:
>>> Error: java.lang.RuntimeException:
>>> org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while
>>> processing row
>>> {"d_date_sk":2415022,"d_date_id":"AAAAAAAAOKJNECAA","d_date":"1969-12-31
>>> 19:00:00","d_month_seq":0,"d_week_seq":1,"d_quarter_seq":1,"d_year":1900,"d_dow":1,"d_moy":1,"d_dom":2,"d_qoy":1,"d_fy_year":1900,"d_fy_quarter_seq":1,"d_fy_week_seq":1,"d_day_name":"Monday","d_quarter_name":"1900Q1","d_holiday":"N","d_weekend":"N","d_following_holiday":"Y","d_first_dom":2415021,"d_last_dom":2415020,"d_same_day_ly":2414657,"d_same_day_lq":2414930,"d_current_day":"N","d_current_week":"N","d_current_month":"N","d_current_quarter":"N","d_current_year":"N"}
>>>         at
>>> org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:161)
>>>         at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
>>>         at
>>> org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:399)
>>>         at org.apache.hadoop.mapred.MapTask.run(MapTask.java:334)
>>>         at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:152)
>>>         at java.security.AccessController.doPrivileged(Native Method)
>>>         at javax.security.auth.Subject.doAs(Subject.java:396)
>>>         at
>>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1332)
>>>         at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:147)
>>> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive
>>> Runtime Error while processing row
>>> {"d_date_sk":2415022,"d_date_id":"AAAAAAAAOKJNECAA","d_date":"1969-12-31
>>> 19:00:00","d_month_seq":0,"d_week_seq":1,"d_quarter_seq":1,"d_year":1900,"d_dow":1,"d_moy":1,"d_dom":2,"d_qoy":1,"d_fy_year":1900,"d_fy_quarter_seq":1,"d_fy_week_seq":1,"d_day_name":"Monday","d_quarter_name":"1900Q1","d_holiday":"N","d_weekend":"N","d_following_holiday":"Y","d_first_dom":2415021,"d_last_dom":2415020,"d_same_day_ly":2414657,"d_same_day_lq":2414930,"d_current_day":"N","d_current_week":"N","d_current_month":"N","d_current_quarter":"N","d_current_year":"N"}
>>>         at
>>> org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:548)
>>>         at
>>> org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:143)
>>>         ... 8 more
>>> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Error
>>> evaluating d_date
>>>         at
>>> org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:80)
>>>         at
>>> org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
>>>         at
>>> org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:762)
>>>         at
>>> org.apache.hadoop.hive.ql.exec.TableScanOperator.processOp(TableScanOperator.java:83)
>>>         at
>>> org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
>>>         at
>>> org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:762)
>>>         at
>>> org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:529)
>>>         ... 9 more
>>> Caused by: java.lang.IllegalArgumentException: Timestamp format must be
>>> yyyy-mm-dd hh:mm:ss[.fffffffff]
>>>         at java.sql.Timestamp.valueOf(Timestamp.java:185)
>>>         at
>>> org.apache.hadoop.hive.serde2.lazy.LazyTimestamp.init(LazyTimestamp.java:74)
>>>         at
>>> org.apache.hadoop.hive.serde2.lazy.LazyStruct.uncheckedGetField(LazyStruct.java:219)
>>>         at
>>> org.apache.hadoop.hive.serde2.lazy.LazyStruct.getField(LazyStruct.java:192)
>>>         at
>>> org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector.getStructFieldData(LazySimpleStructObjectInspector.java:188)
>>>         at
>>> org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator.evaluate(ExprNodeColumnEvaluator.java:98)
>>>         at
>>> org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:76)
>>>         ... 15 more
>>>
>>>
>>> FAILED: Execution Error, return code 2 from
>>> org.apache.hadoop.hive.ql.exec.MapRedTask
>>> MapReduce Jobs Launched:
>>> Job 0: Map: 1   HDFS Read: 0 HDFS Write: 0 FAIL
>>> Total MapReduce CPU Time Spent: 0 msec
>>>
>>>
>>>
>>> On Mon, Mar 4, 2013 at 5:51 PM, Mark Grover <grover.markgrover@gmail.com
>>> > wrote:
>>>
>>>> Hi Dilip,
>>>> Are you able to run this query successfully?
>>>>
>>>> select d_date_sk, d_date_id, d_date, d_month_seq, d_week_seq,
>>>> d_quarter_seq, d_dow, d_moy, d_dom, d_qoy, d_fy_year,
>>>> d_fy_quarter_seq, d_fy_week_seq, d_day_name, d_quarter_name,
>>>> d_holiday, d_weekend, d_following_holiday, d_first_dom, d_last_dom,
>>>> d_same_day_ly, d_same_day_lq, d_current_day, d_current_week,
>>>> d_current_month, d_current_quarter, d_current_year, d_year
>>>> from date_dim
>>>>
>>>> On Mon, Mar 4, 2013 at 5:37 PM, Dileep Kumar <di...@gmail.com>
>>>> wrote:
>>>> > Hi All,
>>>> >
>>>> > I am using the schema in the Impala VM and trying to create a dynamic
>>>> > partitioned table on date_dim.
>>>> > New table is called date_dim_i and schema for that is defined as:
>>>> > create table date_dim_i
>>>> > (
>>>> >     d_date_sk                 int,
>>>> >     d_date_id                 string,
>>>> >     d_date                    timestamp,
>>>> >     d_month_seq               int,
>>>> >     d_week_seq                int,
>>>> >     d_quarter_seq             int,
>>>> >     d_dow                     int,
>>>> >     d_moy                     int,
>>>> >     d_dom                     int,
>>>> >     d_qoy                     int,
>>>> >     d_fy_year                 int,
>>>> >     d_fy_quarter_seq          int,
>>>> >     d_fy_week_seq             int,
>>>> >     d_day_name                string,
>>>> >     d_quarter_name            string,
>>>> >     d_holiday                 string,
>>>> >     d_weekend                 string,
>>>> >     d_following_holiday       string,
>>>> >     d_first_dom               int,
>>>> >     d_last_dom                int,
>>>> >     d_same_day_ly             int,
>>>> >     d_same_day_lq             int,
>>>> >     d_current_day             string,
>>>> >     d_current_week            string,
>>>> >     d_current_month           string,
>>>> >     d_current_quarter         string,
>>>> >     d_current_year            string
>>>> > )
>>>> > PARTITIONED BY (d_year int)
>>>> > stored as RCFILE;
>>>> >
>>>> > Then I do insert overwrite as:
>>>> > insert overwrite table date_dim_i
>>>> > PARTITION (d_year)
>>>> > select d_date_sk, d_date_id, d_date, d_month_seq, d_week_seq,
>>>> d_quarter_seq,
>>>> > d_dow, d_moy, d_dom, d_qoy, d_fy_year, d_fy_quarter_seq,
>>>> d_fy_week_seq,
>>>> > d_day_name, d_quarter_name, d_holiday, d_weekend, d_following_holiday,
>>>> > d_first_dom, d_last_dom, d_same_day_ly, d_same_day_lq, d_current_day,
>>>> > d_current_week, d_current_month, d_current_quarter, d_current_year,
>>>> d_year
>>>> > from date_dim;
>>>> >
>>>> > The date_dim table schema is as :
>>>> > create external table date_dim
>>>> > (
>>>> >     d_date_sk                 int,
>>>> >     d_date_id                 string,
>>>> >     d_date                    timestamp,
>>>> >     d_month_seq               int,
>>>> >     d_week_seq                int,
>>>> >     d_quarter_seq             int,
>>>> >     d_year                    int,
>>>> >     d_dow                     int,
>>>> >     d_moy                     int,
>>>> >     d_dom                     int,
>>>> >     d_qoy                     int,
>>>> >     d_fy_year                 int,
>>>> >     d_fy_quarter_seq          int,
>>>> >     d_fy_week_seq             int,
>>>> >     d_day_name                string,
>>>> >     d_quarter_name            string,
>>>> >     d_holiday                 string,
>>>> >     d_weekend                 string,
>>>> >     d_following_holiday       string,
>>>> >     d_first_dom               int,
>>>> >     d_last_dom                int,
>>>> >     d_same_day_ly             int,
>>>> >     d_same_day_lq             int,
>>>> >     d_current_day             string,
>>>> >     d_current_week            string,
>>>> >     d_current_month           string,
>>>> >     d_current_quarter         string,
>>>> >     d_current_year            string
>>>> > )
>>>> > row format delimited fields terminated by '|'
>>>> > location '/hive/tpcds/date_dim';
>>>> >
>>>> >
>>>> >
>>>> >
>>>> >
>>>> > It fails with following exception:
>>>> >
>>>> > Error: java.lang.RuntimeException:
>>>> > org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error
>>>> while
>>>> > processing row
>>>> >
>>>> {"d_date_sk":2415022,"d_date_id":"AAAAAAAAOKJNECAA","d_date":"1969-12-31
>>>> >
>>>> 19:00:00","d_month_seq":0,"d_week_seq":1,"d_quarter_seq":1,"d_year":1900,"d_dow":1,"d_moy":1,"d_dom":2,"d_qoy":1,"d_fy_year":1900,"d_fy_quarter_seq":1,"d_fy_week_seq":1,"d_day_name":"Monday","d_quarter_name":"1900Q1","d_holiday":"N","d_weekend":"N","d_following_holiday":"Y","d_first_dom":2415021,"d_last_dom":2415020,"d_same_day_ly":2414657,"d_same_day_lq":2414930,"d_current_day":"N","d_current_week":"N","d_current_month":"N","d_current_quarter":"N","d_current_year":"N"}
>>>> >
>>>> >         at
>>>> > org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:161)
>>>> >
>>>> >         at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
>>>> >
>>>> >         at
>>>> org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:399)
>>>> >
>>>> >         at org.apache.hadoop.mapred.MapTask.run(MapTask.java:334)
>>>> >
>>>> >         at
>>>> org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:152)
>>>> >
>>>> >         at java.security.AccessController.doPrivileged(Native Method)
>>>> >
>>>> >         at javax.security.auth.Subject.doAs(Subject.java:396)
>>>> >
>>>> >         at
>>>> >
>>>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1332)
>>>> >
>>>> >         at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:147)
>>>> >
>>>> > Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive
>>>> Runtime
>>>> > Error while processing row
>>>> >
>>>> {"d_date_sk":2415022,"d_date_id":"AAAAAAAAOKJNECAA","d_date":"1969-12-31
>>>> >
>>>> 19:00:00","d_month_seq":0,"d_week_seq":1,"d_quarter_seq":1,"d_year":1900,"d_dow":1,"d_moy":1,"d_dom":2,"d_qoy":1,"d_fy_year":1900,"d_fy_quarter_seq":1,"d_fy_week_seq":1,"d_day_name":"Monday","d_quarter_name":"1900Q1","d_holiday":"N","d_weekend":"N","d_following_holiday":"Y","d_first_dom":2415021,"d_last_dom":2415020,"d_same_day_ly":2414657,"d_same_day_lq":2414930,"d_current_day":"N","d_current_week":"N","d_current_month":"N","d_current_quarter":"N","d_current_year":"N"}
>>>> >
>>>> >         at
>>>> >
>>>> org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:548)
>>>> >
>>>> >         at
>>>> > org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:143)
>>>> >
>>>> >         ... 8 more
>>>> >
>>>> > Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Error
>>>> > evaluating d_date
>>>> >
>>>> >         at
>>>> >
>>>> org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:80)
>>>> >
>>>> >         at
>>>> > org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
>>>> >
>>>> >         at
>>>> > org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:762)
>>>> >
>>>> >         at
>>>> >
>>>> org.apache.hadoop.hive.ql.exec.TableScanOperator.processOp(TableScanOperator.java:83)
>>>> >
>>>> >         at
>>>> > org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
>>>> >
>>>> >         at
>>>> > org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:762)
>>>> >
>>>> >         at
>>>> >
>>>> org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:529)
>>>> >
>>>> >         ... 9 more
>>>> >
>>>> > Caused by: java.lang.IllegalArgumentException: Timestamp format must
>>>> be
>>>> > yyyy-mm-dd hh:mm:ss[.fffffffff]
>>>> >
>>>> >         at java.sql.Timestamp.valueOf(Timestamp.java:185)
>>>> >
>>>> >
>>>> > Please suggest what could be wrong here as datatypes are exact same
>>>> in both
>>>> > cases.
>>>> >
>>>> >
>>>> > Thanks !
>>>>
>>>
>>>
>>
>

Re: Hive insert into RCFILE issue with timestamp columns

Posted by Prasad Mujumdar <pr...@cloudera.com>.
Dilip,

   Looks like you are using the data from the original schema for this new
table that has single timestamp column. When I tried with just the
timestamp from your data, the query runs fine. I guess the original issue
you hit on the data that didn't have fraction part (1969-12-31 19:00:00, no
.fffff).

thanks
Prasad

On Tue, Mar 5, 2013 at 2:56 PM, Dileep Kumar <di...@gmail.com>wrote:

> --hdfs dfs -mkdir /hive/tpcds/date_ts
>
> create external table date_ts
> (
>     d_date                    timestamp
> )
> row format delimited fields terminated by '|'
> location '/hive/tpcds/date_ts';
>
> [cloudera@localhost tmp-work]$ hive -e "select * from date_ts"
> Logging initialized using configuration in
> file:/etc/hive/conf.dist/hive-log4j.properties
> Hive history
> file=/tmp/cloudera/hive_job_log_cloudera_201303052251_950655265.txt
> OK
> Failed with exception
> java.io.IOException:java.lang.IllegalArgumentException: Timestamp format
> must be yyyy-mm-dd hh:mm:ss[.fffffffff]
> Time taken: 3.556 seconds
> [cloudera@localhost tmp-work]$ hdfs dfs -cat /hive/tpcds/date_ts/*
> 2415022|AAAAAAAAOKJNECAA|1900-01-02
> 02:00:21.000000000|0|1|1|1900|1|1|2|1|1900|1|1|Monday|1900Q1|N|N|Y|2415021|2415020|2414657|2414930|N|N|N|N|N|
>
>
>
>
>
> On Mon, Mar 4, 2013 at 6:00 PM, Dileep Kumar <di...@gmail.com>wrote:
>
>> No.
>> Here are the errors:
>> Task with the most failures(4):
>> -----
>> Task ID:
>>   task_1361599885844_0013_m_000000
>>
>> URL:
>>
>> http://localhost.localdomain:50030/taskdetails.jsp?jobid=job_1361599885844_0013&tipid=task_1361599885844_0013_m_000000
>> -----
>> Diagnostic Messages for this Task:
>> Error: java.lang.RuntimeException:
>> org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while
>> processing row
>> {"d_date_sk":2415022,"d_date_id":"AAAAAAAAOKJNECAA","d_date":"1969-12-31
>> 19:00:00","d_month_seq":0,"d_week_seq":1,"d_quarter_seq":1,"d_year":1900,"d_dow":1,"d_moy":1,"d_dom":2,"d_qoy":1,"d_fy_year":1900,"d_fy_quarter_seq":1,"d_fy_week_seq":1,"d_day_name":"Monday","d_quarter_name":"1900Q1","d_holiday":"N","d_weekend":"N","d_following_holiday":"Y","d_first_dom":2415021,"d_last_dom":2415020,"d_same_day_ly":2414657,"d_same_day_lq":2414930,"d_current_day":"N","d_current_week":"N","d_current_month":"N","d_current_quarter":"N","d_current_year":"N"}
>>         at
>> org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:161)
>>         at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
>>         at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:399)
>>         at org.apache.hadoop.mapred.MapTask.run(MapTask.java:334)
>>         at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:152)
>>         at java.security.AccessController.doPrivileged(Native Method)
>>         at javax.security.auth.Subject.doAs(Subject.java:396)
>>         at
>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1332)
>>         at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:147)
>> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime
>> Error while processing row
>> {"d_date_sk":2415022,"d_date_id":"AAAAAAAAOKJNECAA","d_date":"1969-12-31
>> 19:00:00","d_month_seq":0,"d_week_seq":1,"d_quarter_seq":1,"d_year":1900,"d_dow":1,"d_moy":1,"d_dom":2,"d_qoy":1,"d_fy_year":1900,"d_fy_quarter_seq":1,"d_fy_week_seq":1,"d_day_name":"Monday","d_quarter_name":"1900Q1","d_holiday":"N","d_weekend":"N","d_following_holiday":"Y","d_first_dom":2415021,"d_last_dom":2415020,"d_same_day_ly":2414657,"d_same_day_lq":2414930,"d_current_day":"N","d_current_week":"N","d_current_month":"N","d_current_quarter":"N","d_current_year":"N"}
>>         at
>> org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:548)
>>         at
>> org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:143)
>>         ... 8 more
>> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Error
>> evaluating d_date
>>         at
>> org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:80)
>>         at
>> org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
>>         at
>> org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:762)
>>         at
>> org.apache.hadoop.hive.ql.exec.TableScanOperator.processOp(TableScanOperator.java:83)
>>         at
>> org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
>>         at
>> org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:762)
>>         at
>> org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:529)
>>         ... 9 more
>> Caused by: java.lang.IllegalArgumentException: Timestamp format must be
>> yyyy-mm-dd hh:mm:ss[.fffffffff]
>>         at java.sql.Timestamp.valueOf(Timestamp.java:185)
>>         at
>> org.apache.hadoop.hive.serde2.lazy.LazyTimestamp.init(LazyTimestamp.java:74)
>>         at
>> org.apache.hadoop.hive.serde2.lazy.LazyStruct.uncheckedGetField(LazyStruct.java:219)
>>         at
>> org.apache.hadoop.hive.serde2.lazy.LazyStruct.getField(LazyStruct.java:192)
>>         at
>> org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector.getStructFieldData(LazySimpleStructObjectInspector.java:188)
>>         at
>> org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator.evaluate(ExprNodeColumnEvaluator.java:98)
>>         at
>> org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:76)
>>         ... 15 more
>>
>>
>> FAILED: Execution Error, return code 2 from
>> org.apache.hadoop.hive.ql.exec.MapRedTask
>> MapReduce Jobs Launched:
>> Job 0: Map: 1   HDFS Read: 0 HDFS Write: 0 FAIL
>> Total MapReduce CPU Time Spent: 0 msec
>>
>>
>>
>> On Mon, Mar 4, 2013 at 5:51 PM, Mark Grover <gr...@gmail.com>wrote:
>>
>>> Hi Dilip,
>>> Are you able to run this query successfully?
>>>
>>> select d_date_sk, d_date_id, d_date, d_month_seq, d_week_seq,
>>> d_quarter_seq, d_dow, d_moy, d_dom, d_qoy, d_fy_year,
>>> d_fy_quarter_seq, d_fy_week_seq, d_day_name, d_quarter_name,
>>> d_holiday, d_weekend, d_following_holiday, d_first_dom, d_last_dom,
>>> d_same_day_ly, d_same_day_lq, d_current_day, d_current_week,
>>> d_current_month, d_current_quarter, d_current_year, d_year
>>> from date_dim
>>>
>>> On Mon, Mar 4, 2013 at 5:37 PM, Dileep Kumar <di...@gmail.com>
>>> wrote:
>>> > Hi All,
>>> >
>>> > I am using the schema in the Impala VM and trying to create a dynamic
>>> > partitioned table on date_dim.
>>> > New table is called date_dim_i and schema for that is defined as:
>>> > create table date_dim_i
>>> > (
>>> >     d_date_sk                 int,
>>> >     d_date_id                 string,
>>> >     d_date                    timestamp,
>>> >     d_month_seq               int,
>>> >     d_week_seq                int,
>>> >     d_quarter_seq             int,
>>> >     d_dow                     int,
>>> >     d_moy                     int,
>>> >     d_dom                     int,
>>> >     d_qoy                     int,
>>> >     d_fy_year                 int,
>>> >     d_fy_quarter_seq          int,
>>> >     d_fy_week_seq             int,
>>> >     d_day_name                string,
>>> >     d_quarter_name            string,
>>> >     d_holiday                 string,
>>> >     d_weekend                 string,
>>> >     d_following_holiday       string,
>>> >     d_first_dom               int,
>>> >     d_last_dom                int,
>>> >     d_same_day_ly             int,
>>> >     d_same_day_lq             int,
>>> >     d_current_day             string,
>>> >     d_current_week            string,
>>> >     d_current_month           string,
>>> >     d_current_quarter         string,
>>> >     d_current_year            string
>>> > )
>>> > PARTITIONED BY (d_year int)
>>> > stored as RCFILE;
>>> >
>>> > Then I do insert overwrite as:
>>> > insert overwrite table date_dim_i
>>> > PARTITION (d_year)
>>> > select d_date_sk, d_date_id, d_date, d_month_seq, d_week_seq,
>>> d_quarter_seq,
>>> > d_dow, d_moy, d_dom, d_qoy, d_fy_year, d_fy_quarter_seq, d_fy_week_seq,
>>> > d_day_name, d_quarter_name, d_holiday, d_weekend, d_following_holiday,
>>> > d_first_dom, d_last_dom, d_same_day_ly, d_same_day_lq, d_current_day,
>>> > d_current_week, d_current_month, d_current_quarter, d_current_year,
>>> d_year
>>> > from date_dim;
>>> >
>>> > The date_dim table schema is as :
>>> > create external table date_dim
>>> > (
>>> >     d_date_sk                 int,
>>> >     d_date_id                 string,
>>> >     d_date                    timestamp,
>>> >     d_month_seq               int,
>>> >     d_week_seq                int,
>>> >     d_quarter_seq             int,
>>> >     d_year                    int,
>>> >     d_dow                     int,
>>> >     d_moy                     int,
>>> >     d_dom                     int,
>>> >     d_qoy                     int,
>>> >     d_fy_year                 int,
>>> >     d_fy_quarter_seq          int,
>>> >     d_fy_week_seq             int,
>>> >     d_day_name                string,
>>> >     d_quarter_name            string,
>>> >     d_holiday                 string,
>>> >     d_weekend                 string,
>>> >     d_following_holiday       string,
>>> >     d_first_dom               int,
>>> >     d_last_dom                int,
>>> >     d_same_day_ly             int,
>>> >     d_same_day_lq             int,
>>> >     d_current_day             string,
>>> >     d_current_week            string,
>>> >     d_current_month           string,
>>> >     d_current_quarter         string,
>>> >     d_current_year            string
>>> > )
>>> > row format delimited fields terminated by '|'
>>> > location '/hive/tpcds/date_dim';
>>> >
>>> >
>>> >
>>> >
>>> >
>>> > It fails with following exception:
>>> >
>>> > Error: java.lang.RuntimeException:
>>> > org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error
>>> while
>>> > processing row
>>> >
>>> {"d_date_sk":2415022,"d_date_id":"AAAAAAAAOKJNECAA","d_date":"1969-12-31
>>> >
>>> 19:00:00","d_month_seq":0,"d_week_seq":1,"d_quarter_seq":1,"d_year":1900,"d_dow":1,"d_moy":1,"d_dom":2,"d_qoy":1,"d_fy_year":1900,"d_fy_quarter_seq":1,"d_fy_week_seq":1,"d_day_name":"Monday","d_quarter_name":"1900Q1","d_holiday":"N","d_weekend":"N","d_following_holiday":"Y","d_first_dom":2415021,"d_last_dom":2415020,"d_same_day_ly":2414657,"d_same_day_lq":2414930,"d_current_day":"N","d_current_week":"N","d_current_month":"N","d_current_quarter":"N","d_current_year":"N"}
>>> >
>>> >         at
>>> > org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:161)
>>> >
>>> >         at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
>>> >
>>> >         at
>>> org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:399)
>>> >
>>> >         at org.apache.hadoop.mapred.MapTask.run(MapTask.java:334)
>>> >
>>> >         at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:152)
>>> >
>>> >         at java.security.AccessController.doPrivileged(Native Method)
>>> >
>>> >         at javax.security.auth.Subject.doAs(Subject.java:396)
>>> >
>>> >         at
>>> >
>>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1332)
>>> >
>>> >         at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:147)
>>> >
>>> > Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive
>>> Runtime
>>> > Error while processing row
>>> >
>>> {"d_date_sk":2415022,"d_date_id":"AAAAAAAAOKJNECAA","d_date":"1969-12-31
>>> >
>>> 19:00:00","d_month_seq":0,"d_week_seq":1,"d_quarter_seq":1,"d_year":1900,"d_dow":1,"d_moy":1,"d_dom":2,"d_qoy":1,"d_fy_year":1900,"d_fy_quarter_seq":1,"d_fy_week_seq":1,"d_day_name":"Monday","d_quarter_name":"1900Q1","d_holiday":"N","d_weekend":"N","d_following_holiday":"Y","d_first_dom":2415021,"d_last_dom":2415020,"d_same_day_ly":2414657,"d_same_day_lq":2414930,"d_current_day":"N","d_current_week":"N","d_current_month":"N","d_current_quarter":"N","d_current_year":"N"}
>>> >
>>> >         at
>>> >
>>> org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:548)
>>> >
>>> >         at
>>> > org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:143)
>>> >
>>> >         ... 8 more
>>> >
>>> > Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Error
>>> > evaluating d_date
>>> >
>>> >         at
>>> >
>>> org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:80)
>>> >
>>> >         at
>>> > org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
>>> >
>>> >         at
>>> > org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:762)
>>> >
>>> >         at
>>> >
>>> org.apache.hadoop.hive.ql.exec.TableScanOperator.processOp(TableScanOperator.java:83)
>>> >
>>> >         at
>>> > org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
>>> >
>>> >         at
>>> > org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:762)
>>> >
>>> >         at
>>> >
>>> org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:529)
>>> >
>>> >         ... 9 more
>>> >
>>> > Caused by: java.lang.IllegalArgumentException: Timestamp format must be
>>> > yyyy-mm-dd hh:mm:ss[.fffffffff]
>>> >
>>> >         at java.sql.Timestamp.valueOf(Timestamp.java:185)
>>> >
>>> >
>>> > Please suggest what could be wrong here as datatypes are exact same in
>>> both
>>> > cases.
>>> >
>>> >
>>> > Thanks !
>>>
>>
>>
>

Re: Hive insert into RCFILE issue with timestamp columns

Posted by Dileep Kumar <di...@gmail.com>.
--hdfs dfs -mkdir /hive/tpcds/date_ts

create external table date_ts
(
    d_date                    timestamp
)
row format delimited fields terminated by '|'
location '/hive/tpcds/date_ts';

[cloudera@localhost tmp-work]$ hive -e "select * from date_ts"
Logging initialized using configuration in
file:/etc/hive/conf.dist/hive-log4j.properties
Hive history
file=/tmp/cloudera/hive_job_log_cloudera_201303052251_950655265.txt
OK
Failed with exception
java.io.IOException:java.lang.IllegalArgumentException: Timestamp format
must be yyyy-mm-dd hh:mm:ss[.fffffffff]
Time taken: 3.556 seconds
[cloudera@localhost tmp-work]$ hdfs dfs -cat /hive/tpcds/date_ts/*
2415022|AAAAAAAAOKJNECAA|1900-01-02
02:00:21.000000000|0|1|1|1900|1|1|2|1|1900|1|1|Monday|1900Q1|N|N|Y|2415021|2415020|2414657|2414930|N|N|N|N|N|





On Mon, Mar 4, 2013 at 6:00 PM, Dileep Kumar <di...@gmail.com>wrote:

> No.
> Here are the errors:
> Task with the most failures(4):
> -----
> Task ID:
>   task_1361599885844_0013_m_000000
>
> URL:
>
> http://localhost.localdomain:50030/taskdetails.jsp?jobid=job_1361599885844_0013&tipid=task_1361599885844_0013_m_000000
> -----
> Diagnostic Messages for this Task:
> Error: java.lang.RuntimeException:
> org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while
> processing row
> {"d_date_sk":2415022,"d_date_id":"AAAAAAAAOKJNECAA","d_date":"1969-12-31
> 19:00:00","d_month_seq":0,"d_week_seq":1,"d_quarter_seq":1,"d_year":1900,"d_dow":1,"d_moy":1,"d_dom":2,"d_qoy":1,"d_fy_year":1900,"d_fy_quarter_seq":1,"d_fy_week_seq":1,"d_day_name":"Monday","d_quarter_name":"1900Q1","d_holiday":"N","d_weekend":"N","d_following_holiday":"Y","d_first_dom":2415021,"d_last_dom":2415020,"d_same_day_ly":2414657,"d_same_day_lq":2414930,"d_current_day":"N","d_current_week":"N","d_current_month":"N","d_current_quarter":"N","d_current_year":"N"}
>         at
> org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:161)
>         at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
>         at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:399)
>         at org.apache.hadoop.mapred.MapTask.run(MapTask.java:334)
>         at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:152)
>         at java.security.AccessController.doPrivileged(Native Method)
>         at javax.security.auth.Subject.doAs(Subject.java:396)
>         at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1332)
>         at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:147)
> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime
> Error while processing row
> {"d_date_sk":2415022,"d_date_id":"AAAAAAAAOKJNECAA","d_date":"1969-12-31
> 19:00:00","d_month_seq":0,"d_week_seq":1,"d_quarter_seq":1,"d_year":1900,"d_dow":1,"d_moy":1,"d_dom":2,"d_qoy":1,"d_fy_year":1900,"d_fy_quarter_seq":1,"d_fy_week_seq":1,"d_day_name":"Monday","d_quarter_name":"1900Q1","d_holiday":"N","d_weekend":"N","d_following_holiday":"Y","d_first_dom":2415021,"d_last_dom":2415020,"d_same_day_ly":2414657,"d_same_day_lq":2414930,"d_current_day":"N","d_current_week":"N","d_current_month":"N","d_current_quarter":"N","d_current_year":"N"}
>         at
> org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:548)
>         at
> org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:143)
>         ... 8 more
> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Error
> evaluating d_date
>         at
> org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:80)
>         at
> org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
>         at
> org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:762)
>         at
> org.apache.hadoop.hive.ql.exec.TableScanOperator.processOp(TableScanOperator.java:83)
>         at
> org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
>         at
> org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:762)
>         at
> org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:529)
>         ... 9 more
> Caused by: java.lang.IllegalArgumentException: Timestamp format must be
> yyyy-mm-dd hh:mm:ss[.fffffffff]
>         at java.sql.Timestamp.valueOf(Timestamp.java:185)
>         at
> org.apache.hadoop.hive.serde2.lazy.LazyTimestamp.init(LazyTimestamp.java:74)
>         at
> org.apache.hadoop.hive.serde2.lazy.LazyStruct.uncheckedGetField(LazyStruct.java:219)
>         at
> org.apache.hadoop.hive.serde2.lazy.LazyStruct.getField(LazyStruct.java:192)
>         at
> org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector.getStructFieldData(LazySimpleStructObjectInspector.java:188)
>         at
> org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator.evaluate(ExprNodeColumnEvaluator.java:98)
>         at
> org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:76)
>         ... 15 more
>
>
> FAILED: Execution Error, return code 2 from
> org.apache.hadoop.hive.ql.exec.MapRedTask
> MapReduce Jobs Launched:
> Job 0: Map: 1   HDFS Read: 0 HDFS Write: 0 FAIL
> Total MapReduce CPU Time Spent: 0 msec
>
>
>
> On Mon, Mar 4, 2013 at 5:51 PM, Mark Grover <gr...@gmail.com>wrote:
>
>> Hi Dilip,
>> Are you able to run this query successfully?
>>
>> select d_date_sk, d_date_id, d_date, d_month_seq, d_week_seq,
>> d_quarter_seq, d_dow, d_moy, d_dom, d_qoy, d_fy_year,
>> d_fy_quarter_seq, d_fy_week_seq, d_day_name, d_quarter_name,
>> d_holiday, d_weekend, d_following_holiday, d_first_dom, d_last_dom,
>> d_same_day_ly, d_same_day_lq, d_current_day, d_current_week,
>> d_current_month, d_current_quarter, d_current_year, d_year
>> from date_dim
>>
>> On Mon, Mar 4, 2013 at 5:37 PM, Dileep Kumar <di...@gmail.com>
>> wrote:
>> > Hi All,
>> >
>> > I am using the schema in the Impala VM and trying to create a dynamic
>> > partitioned table on date_dim.
>> > New table is called date_dim_i and schema for that is defined as:
>> > create table date_dim_i
>> > (
>> >     d_date_sk                 int,
>> >     d_date_id                 string,
>> >     d_date                    timestamp,
>> >     d_month_seq               int,
>> >     d_week_seq                int,
>> >     d_quarter_seq             int,
>> >     d_dow                     int,
>> >     d_moy                     int,
>> >     d_dom                     int,
>> >     d_qoy                     int,
>> >     d_fy_year                 int,
>> >     d_fy_quarter_seq          int,
>> >     d_fy_week_seq             int,
>> >     d_day_name                string,
>> >     d_quarter_name            string,
>> >     d_holiday                 string,
>> >     d_weekend                 string,
>> >     d_following_holiday       string,
>> >     d_first_dom               int,
>> >     d_last_dom                int,
>> >     d_same_day_ly             int,
>> >     d_same_day_lq             int,
>> >     d_current_day             string,
>> >     d_current_week            string,
>> >     d_current_month           string,
>> >     d_current_quarter         string,
>> >     d_current_year            string
>> > )
>> > PARTITIONED BY (d_year int)
>> > stored as RCFILE;
>> >
>> > Then I do insert overwrite as:
>> > insert overwrite table date_dim_i
>> > PARTITION (d_year)
>> > select d_date_sk, d_date_id, d_date, d_month_seq, d_week_seq,
>> d_quarter_seq,
>> > d_dow, d_moy, d_dom, d_qoy, d_fy_year, d_fy_quarter_seq, d_fy_week_seq,
>> > d_day_name, d_quarter_name, d_holiday, d_weekend, d_following_holiday,
>> > d_first_dom, d_last_dom, d_same_day_ly, d_same_day_lq, d_current_day,
>> > d_current_week, d_current_month, d_current_quarter, d_current_year,
>> d_year
>> > from date_dim;
>> >
>> > The date_dim table schema is as :
>> > create external table date_dim
>> > (
>> >     d_date_sk                 int,
>> >     d_date_id                 string,
>> >     d_date                    timestamp,
>> >     d_month_seq               int,
>> >     d_week_seq                int,
>> >     d_quarter_seq             int,
>> >     d_year                    int,
>> >     d_dow                     int,
>> >     d_moy                     int,
>> >     d_dom                     int,
>> >     d_qoy                     int,
>> >     d_fy_year                 int,
>> >     d_fy_quarter_seq          int,
>> >     d_fy_week_seq             int,
>> >     d_day_name                string,
>> >     d_quarter_name            string,
>> >     d_holiday                 string,
>> >     d_weekend                 string,
>> >     d_following_holiday       string,
>> >     d_first_dom               int,
>> >     d_last_dom                int,
>> >     d_same_day_ly             int,
>> >     d_same_day_lq             int,
>> >     d_current_day             string,
>> >     d_current_week            string,
>> >     d_current_month           string,
>> >     d_current_quarter         string,
>> >     d_current_year            string
>> > )
>> > row format delimited fields terminated by '|'
>> > location '/hive/tpcds/date_dim';
>> >
>> >
>> >
>> >
>> >
>> > It fails with following exception:
>> >
>> > Error: java.lang.RuntimeException:
>> > org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error
>> while
>> > processing row
>> > {"d_date_sk":2415022,"d_date_id":"AAAAAAAAOKJNECAA","d_date":"1969-12-31
>> >
>> 19:00:00","d_month_seq":0,"d_week_seq":1,"d_quarter_seq":1,"d_year":1900,"d_dow":1,"d_moy":1,"d_dom":2,"d_qoy":1,"d_fy_year":1900,"d_fy_quarter_seq":1,"d_fy_week_seq":1,"d_day_name":"Monday","d_quarter_name":"1900Q1","d_holiday":"N","d_weekend":"N","d_following_holiday":"Y","d_first_dom":2415021,"d_last_dom":2415020,"d_same_day_ly":2414657,"d_same_day_lq":2414930,"d_current_day":"N","d_current_week":"N","d_current_month":"N","d_current_quarter":"N","d_current_year":"N"}
>> >
>> >         at
>> > org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:161)
>> >
>> >         at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
>> >
>> >         at
>> org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:399)
>> >
>> >         at org.apache.hadoop.mapred.MapTask.run(MapTask.java:334)
>> >
>> >         at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:152)
>> >
>> >         at java.security.AccessController.doPrivileged(Native Method)
>> >
>> >         at javax.security.auth.Subject.doAs(Subject.java:396)
>> >
>> >         at
>> >
>> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1332)
>> >
>> >         at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:147)
>> >
>> > Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive
>> Runtime
>> > Error while processing row
>> > {"d_date_sk":2415022,"d_date_id":"AAAAAAAAOKJNECAA","d_date":"1969-12-31
>> >
>> 19:00:00","d_month_seq":0,"d_week_seq":1,"d_quarter_seq":1,"d_year":1900,"d_dow":1,"d_moy":1,"d_dom":2,"d_qoy":1,"d_fy_year":1900,"d_fy_quarter_seq":1,"d_fy_week_seq":1,"d_day_name":"Monday","d_quarter_name":"1900Q1","d_holiday":"N","d_weekend":"N","d_following_holiday":"Y","d_first_dom":2415021,"d_last_dom":2415020,"d_same_day_ly":2414657,"d_same_day_lq":2414930,"d_current_day":"N","d_current_week":"N","d_current_month":"N","d_current_quarter":"N","d_current_year":"N"}
>> >
>> >         at
>> > org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:548)
>> >
>> >         at
>> > org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:143)
>> >
>> >         ... 8 more
>> >
>> > Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Error
>> > evaluating d_date
>> >
>> >         at
>> >
>> org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:80)
>> >
>> >         at
>> > org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
>> >
>> >         at
>> > org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:762)
>> >
>> >         at
>> >
>> org.apache.hadoop.hive.ql.exec.TableScanOperator.processOp(TableScanOperator.java:83)
>> >
>> >         at
>> > org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
>> >
>> >         at
>> > org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:762)
>> >
>> >         at
>> > org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:529)
>> >
>> >         ... 9 more
>> >
>> > Caused by: java.lang.IllegalArgumentException: Timestamp format must be
>> > yyyy-mm-dd hh:mm:ss[.fffffffff]
>> >
>> >         at java.sql.Timestamp.valueOf(Timestamp.java:185)
>> >
>> >
>> > Please suggest what could be wrong here as datatypes are exact same in
>> both
>> > cases.
>> >
>> >
>> > Thanks !
>>
>
>

Re: Hive insert into RCFILE issue with timestamp columns

Posted by Dileep Kumar <di...@gmail.com>.
No.
Here are the errors:
Task with the most failures(4):
-----
Task ID:
  task_1361599885844_0013_m_000000

URL:

http://localhost.localdomain:50030/taskdetails.jsp?jobid=job_1361599885844_0013&tipid=task_1361599885844_0013_m_000000
-----
Diagnostic Messages for this Task:
Error: java.lang.RuntimeException:
org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while
processing row
{"d_date_sk":2415022,"d_date_id":"AAAAAAAAOKJNECAA","d_date":"1969-12-31
19:00:00","d_month_seq":0,"d_week_seq":1,"d_quarter_seq":1,"d_year":1900,"d_dow":1,"d_moy":1,"d_dom":2,"d_qoy":1,"d_fy_year":1900,"d_fy_quarter_seq":1,"d_fy_week_seq":1,"d_day_name":"Monday","d_quarter_name":"1900Q1","d_holiday":"N","d_weekend":"N","d_following_holiday":"Y","d_first_dom":2415021,"d_last_dom":2415020,"d_same_day_ly":2414657,"d_same_day_lq":2414930,"d_current_day":"N","d_current_week":"N","d_current_month":"N","d_current_quarter":"N","d_current_year":"N"}
        at
org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:161)
        at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
        at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:399)
        at org.apache.hadoop.mapred.MapTask.run(MapTask.java:334)
        at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:152)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:396)
        at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1332)
        at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:147)
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime
Error while processing row
{"d_date_sk":2415022,"d_date_id":"AAAAAAAAOKJNECAA","d_date":"1969-12-31
19:00:00","d_month_seq":0,"d_week_seq":1,"d_quarter_seq":1,"d_year":1900,"d_dow":1,"d_moy":1,"d_dom":2,"d_qoy":1,"d_fy_year":1900,"d_fy_quarter_seq":1,"d_fy_week_seq":1,"d_day_name":"Monday","d_quarter_name":"1900Q1","d_holiday":"N","d_weekend":"N","d_following_holiday":"Y","d_first_dom":2415021,"d_last_dom":2415020,"d_same_day_ly":2414657,"d_same_day_lq":2414930,"d_current_day":"N","d_current_week":"N","d_current_month":"N","d_current_quarter":"N","d_current_year":"N"}
        at
org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:548)
        at
org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:143)
        ... 8 more
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Error
evaluating d_date
        at
org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:80)
        at
org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
        at
org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:762)
        at
org.apache.hadoop.hive.ql.exec.TableScanOperator.processOp(TableScanOperator.java:83)
        at
org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
        at
org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:762)
        at
org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:529)
        ... 9 more
Caused by: java.lang.IllegalArgumentException: Timestamp format must be
yyyy-mm-dd hh:mm:ss[.fffffffff]
        at java.sql.Timestamp.valueOf(Timestamp.java:185)
        at
org.apache.hadoop.hive.serde2.lazy.LazyTimestamp.init(LazyTimestamp.java:74)
        at
org.apache.hadoop.hive.serde2.lazy.LazyStruct.uncheckedGetField(LazyStruct.java:219)
        at
org.apache.hadoop.hive.serde2.lazy.LazyStruct.getField(LazyStruct.java:192)
        at
org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector.getStructFieldData(LazySimpleStructObjectInspector.java:188)
        at
org.apache.hadoop.hive.ql.exec.ExprNodeColumnEvaluator.evaluate(ExprNodeColumnEvaluator.java:98)
        at
org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:76)
        ... 15 more


FAILED: Execution Error, return code 2 from
org.apache.hadoop.hive.ql.exec.MapRedTask
MapReduce Jobs Launched:
Job 0: Map: 1   HDFS Read: 0 HDFS Write: 0 FAIL
Total MapReduce CPU Time Spent: 0 msec



On Mon, Mar 4, 2013 at 5:51 PM, Mark Grover <gr...@gmail.com>wrote:

> Hi Dilip,
> Are you able to run this query successfully?
>
> select d_date_sk, d_date_id, d_date, d_month_seq, d_week_seq,
> d_quarter_seq, d_dow, d_moy, d_dom, d_qoy, d_fy_year,
> d_fy_quarter_seq, d_fy_week_seq, d_day_name, d_quarter_name,
> d_holiday, d_weekend, d_following_holiday, d_first_dom, d_last_dom,
> d_same_day_ly, d_same_day_lq, d_current_day, d_current_week,
> d_current_month, d_current_quarter, d_current_year, d_year
> from date_dim
>
> On Mon, Mar 4, 2013 at 5:37 PM, Dileep Kumar <di...@gmail.com>
> wrote:
> > Hi All,
> >
> > I am using the schema in the Impala VM and trying to create a dynamic
> > partitioned table on date_dim.
> > New table is called date_dim_i and schema for that is defined as:
> > create table date_dim_i
> > (
> >     d_date_sk                 int,
> >     d_date_id                 string,
> >     d_date                    timestamp,
> >     d_month_seq               int,
> >     d_week_seq                int,
> >     d_quarter_seq             int,
> >     d_dow                     int,
> >     d_moy                     int,
> >     d_dom                     int,
> >     d_qoy                     int,
> >     d_fy_year                 int,
> >     d_fy_quarter_seq          int,
> >     d_fy_week_seq             int,
> >     d_day_name                string,
> >     d_quarter_name            string,
> >     d_holiday                 string,
> >     d_weekend                 string,
> >     d_following_holiday       string,
> >     d_first_dom               int,
> >     d_last_dom                int,
> >     d_same_day_ly             int,
> >     d_same_day_lq             int,
> >     d_current_day             string,
> >     d_current_week            string,
> >     d_current_month           string,
> >     d_current_quarter         string,
> >     d_current_year            string
> > )
> > PARTITIONED BY (d_year int)
> > stored as RCFILE;
> >
> > Then I do insert overwrite as:
> > insert overwrite table date_dim_i
> > PARTITION (d_year)
> > select d_date_sk, d_date_id, d_date, d_month_seq, d_week_seq,
> d_quarter_seq,
> > d_dow, d_moy, d_dom, d_qoy, d_fy_year, d_fy_quarter_seq, d_fy_week_seq,
> > d_day_name, d_quarter_name, d_holiday, d_weekend, d_following_holiday,
> > d_first_dom, d_last_dom, d_same_day_ly, d_same_day_lq, d_current_day,
> > d_current_week, d_current_month, d_current_quarter, d_current_year,
> d_year
> > from date_dim;
> >
> > The date_dim table schema is as :
> > create external table date_dim
> > (
> >     d_date_sk                 int,
> >     d_date_id                 string,
> >     d_date                    timestamp,
> >     d_month_seq               int,
> >     d_week_seq                int,
> >     d_quarter_seq             int,
> >     d_year                    int,
> >     d_dow                     int,
> >     d_moy                     int,
> >     d_dom                     int,
> >     d_qoy                     int,
> >     d_fy_year                 int,
> >     d_fy_quarter_seq          int,
> >     d_fy_week_seq             int,
> >     d_day_name                string,
> >     d_quarter_name            string,
> >     d_holiday                 string,
> >     d_weekend                 string,
> >     d_following_holiday       string,
> >     d_first_dom               int,
> >     d_last_dom                int,
> >     d_same_day_ly             int,
> >     d_same_day_lq             int,
> >     d_current_day             string,
> >     d_current_week            string,
> >     d_current_month           string,
> >     d_current_quarter         string,
> >     d_current_year            string
> > )
> > row format delimited fields terminated by '|'
> > location '/hive/tpcds/date_dim';
> >
> >
> >
> >
> >
> > It fails with following exception:
> >
> > Error: java.lang.RuntimeException:
> > org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error
> while
> > processing row
> > {"d_date_sk":2415022,"d_date_id":"AAAAAAAAOKJNECAA","d_date":"1969-12-31
> >
> 19:00:00","d_month_seq":0,"d_week_seq":1,"d_quarter_seq":1,"d_year":1900,"d_dow":1,"d_moy":1,"d_dom":2,"d_qoy":1,"d_fy_year":1900,"d_fy_quarter_seq":1,"d_fy_week_seq":1,"d_day_name":"Monday","d_quarter_name":"1900Q1","d_holiday":"N","d_weekend":"N","d_following_holiday":"Y","d_first_dom":2415021,"d_last_dom":2415020,"d_same_day_ly":2414657,"d_same_day_lq":2414930,"d_current_day":"N","d_current_week":"N","d_current_month":"N","d_current_quarter":"N","d_current_year":"N"}
> >
> >         at
> > org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:161)
> >
> >         at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
> >
> >         at
> org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:399)
> >
> >         at org.apache.hadoop.mapred.MapTask.run(MapTask.java:334)
> >
> >         at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:152)
> >
> >         at java.security.AccessController.doPrivileged(Native Method)
> >
> >         at javax.security.auth.Subject.doAs(Subject.java:396)
> >
> >         at
> >
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1332)
> >
> >         at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:147)
> >
> > Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime
> > Error while processing row
> > {"d_date_sk":2415022,"d_date_id":"AAAAAAAAOKJNECAA","d_date":"1969-12-31
> >
> 19:00:00","d_month_seq":0,"d_week_seq":1,"d_quarter_seq":1,"d_year":1900,"d_dow":1,"d_moy":1,"d_dom":2,"d_qoy":1,"d_fy_year":1900,"d_fy_quarter_seq":1,"d_fy_week_seq":1,"d_day_name":"Monday","d_quarter_name":"1900Q1","d_holiday":"N","d_weekend":"N","d_following_holiday":"Y","d_first_dom":2415021,"d_last_dom":2415020,"d_same_day_ly":2414657,"d_same_day_lq":2414930,"d_current_day":"N","d_current_week":"N","d_current_month":"N","d_current_quarter":"N","d_current_year":"N"}
> >
> >         at
> > org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:548)
> >
> >         at
> > org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:143)
> >
> >         ... 8 more
> >
> > Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Error
> > evaluating d_date
> >
> >         at
> >
> org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:80)
> >
> >         at
> > org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
> >
> >         at
> > org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:762)
> >
> >         at
> >
> org.apache.hadoop.hive.ql.exec.TableScanOperator.processOp(TableScanOperator.java:83)
> >
> >         at
> > org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
> >
> >         at
> > org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:762)
> >
> >         at
> > org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:529)
> >
> >         ... 9 more
> >
> > Caused by: java.lang.IllegalArgumentException: Timestamp format must be
> > yyyy-mm-dd hh:mm:ss[.fffffffff]
> >
> >         at java.sql.Timestamp.valueOf(Timestamp.java:185)
> >
> >
> > Please suggest what could be wrong here as datatypes are exact same in
> both
> > cases.
> >
> >
> > Thanks !
>

Re: Hive insert into RCFILE issue with timestamp columns

Posted by Mark Grover <gr...@gmail.com>.
Hi Dilip,
Are you able to run this query successfully?

select d_date_sk, d_date_id, d_date, d_month_seq, d_week_seq,
d_quarter_seq, d_dow, d_moy, d_dom, d_qoy, d_fy_year,
d_fy_quarter_seq, d_fy_week_seq, d_day_name, d_quarter_name,
d_holiday, d_weekend, d_following_holiday, d_first_dom, d_last_dom,
d_same_day_ly, d_same_day_lq, d_current_day, d_current_week,
d_current_month, d_current_quarter, d_current_year, d_year
from date_dim

On Mon, Mar 4, 2013 at 5:37 PM, Dileep Kumar <di...@gmail.com> wrote:
> Hi All,
>
> I am using the schema in the Impala VM and trying to create a dynamic
> partitioned table on date_dim.
> New table is called date_dim_i and schema for that is defined as:
> create table date_dim_i
> (
>     d_date_sk                 int,
>     d_date_id                 string,
>     d_date                    timestamp,
>     d_month_seq               int,
>     d_week_seq                int,
>     d_quarter_seq             int,
>     d_dow                     int,
>     d_moy                     int,
>     d_dom                     int,
>     d_qoy                     int,
>     d_fy_year                 int,
>     d_fy_quarter_seq          int,
>     d_fy_week_seq             int,
>     d_day_name                string,
>     d_quarter_name            string,
>     d_holiday                 string,
>     d_weekend                 string,
>     d_following_holiday       string,
>     d_first_dom               int,
>     d_last_dom                int,
>     d_same_day_ly             int,
>     d_same_day_lq             int,
>     d_current_day             string,
>     d_current_week            string,
>     d_current_month           string,
>     d_current_quarter         string,
>     d_current_year            string
> )
> PARTITIONED BY (d_year int)
> stored as RCFILE;
>
> Then I do insert overwrite as:
> insert overwrite table date_dim_i
> PARTITION (d_year)
> select d_date_sk, d_date_id, d_date, d_month_seq, d_week_seq, d_quarter_seq,
> d_dow, d_moy, d_dom, d_qoy, d_fy_year, d_fy_quarter_seq, d_fy_week_seq,
> d_day_name, d_quarter_name, d_holiday, d_weekend, d_following_holiday,
> d_first_dom, d_last_dom, d_same_day_ly, d_same_day_lq, d_current_day,
> d_current_week, d_current_month, d_current_quarter, d_current_year, d_year
> from date_dim;
>
> The date_dim table schema is as :
> create external table date_dim
> (
>     d_date_sk                 int,
>     d_date_id                 string,
>     d_date                    timestamp,
>     d_month_seq               int,
>     d_week_seq                int,
>     d_quarter_seq             int,
>     d_year                    int,
>     d_dow                     int,
>     d_moy                     int,
>     d_dom                     int,
>     d_qoy                     int,
>     d_fy_year                 int,
>     d_fy_quarter_seq          int,
>     d_fy_week_seq             int,
>     d_day_name                string,
>     d_quarter_name            string,
>     d_holiday                 string,
>     d_weekend                 string,
>     d_following_holiday       string,
>     d_first_dom               int,
>     d_last_dom                int,
>     d_same_day_ly             int,
>     d_same_day_lq             int,
>     d_current_day             string,
>     d_current_week            string,
>     d_current_month           string,
>     d_current_quarter         string,
>     d_current_year            string
> )
> row format delimited fields terminated by '|'
> location '/hive/tpcds/date_dim';
>
>
>
>
>
> It fails with following exception:
>
> Error: java.lang.RuntimeException:
> org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while
> processing row
> {"d_date_sk":2415022,"d_date_id":"AAAAAAAAOKJNECAA","d_date":"1969-12-31
> 19:00:00","d_month_seq":0,"d_week_seq":1,"d_quarter_seq":1,"d_year":1900,"d_dow":1,"d_moy":1,"d_dom":2,"d_qoy":1,"d_fy_year":1900,"d_fy_quarter_seq":1,"d_fy_week_seq":1,"d_day_name":"Monday","d_quarter_name":"1900Q1","d_holiday":"N","d_weekend":"N","d_following_holiday":"Y","d_first_dom":2415021,"d_last_dom":2415020,"d_same_day_ly":2414657,"d_same_day_lq":2414930,"d_current_day":"N","d_current_week":"N","d_current_month":"N","d_current_quarter":"N","d_current_year":"N"}
>
>         at
> org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:161)
>
>         at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
>
>         at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:399)
>
>         at org.apache.hadoop.mapred.MapTask.run(MapTask.java:334)
>
>         at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:152)
>
>         at java.security.AccessController.doPrivileged(Native Method)
>
>         at javax.security.auth.Subject.doAs(Subject.java:396)
>
>         at
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1332)
>
>         at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:147)
>
> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime
> Error while processing row
> {"d_date_sk":2415022,"d_date_id":"AAAAAAAAOKJNECAA","d_date":"1969-12-31
> 19:00:00","d_month_seq":0,"d_week_seq":1,"d_quarter_seq":1,"d_year":1900,"d_dow":1,"d_moy":1,"d_dom":2,"d_qoy":1,"d_fy_year":1900,"d_fy_quarter_seq":1,"d_fy_week_seq":1,"d_day_name":"Monday","d_quarter_name":"1900Q1","d_holiday":"N","d_weekend":"N","d_following_holiday":"Y","d_first_dom":2415021,"d_last_dom":2415020,"d_same_day_ly":2414657,"d_same_day_lq":2414930,"d_current_day":"N","d_current_week":"N","d_current_month":"N","d_current_quarter":"N","d_current_year":"N"}
>
>         at
> org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:548)
>
>         at
> org.apache.hadoop.hive.ql.exec.ExecMapper.map(ExecMapper.java:143)
>
>         ... 8 more
>
> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Error
> evaluating d_date
>
>         at
> org.apache.hadoop.hive.ql.exec.SelectOperator.processOp(SelectOperator.java:80)
>
>         at
> org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
>
>         at
> org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:762)
>
>         at
> org.apache.hadoop.hive.ql.exec.TableScanOperator.processOp(TableScanOperator.java:83)
>
>         at
> org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
>
>         at
> org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:762)
>
>         at
> org.apache.hadoop.hive.ql.exec.MapOperator.process(MapOperator.java:529)
>
>         ... 9 more
>
> Caused by: java.lang.IllegalArgumentException: Timestamp format must be
> yyyy-mm-dd hh:mm:ss[.fffffffff]
>
>         at java.sql.Timestamp.valueOf(Timestamp.java:185)
>
>
> Please suggest what could be wrong here as datatypes are exact same in both
> cases.
>
>
> Thanks !