You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@drill.apache.org by "Rahul Challapalli (JIRA)" <ji...@apache.org> on 2015/11/28 08:19:11 UTC

[jira] [Updated] (DRILL-4141) Hive Plugin : Timestamp value in an RCfile (Snappy compression) is wrongly interpreted

     [ https://issues.apache.org/jira/browse/DRILL-4141?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Rahul Challapalli updated DRILL-4141:
-------------------------------------
    Attachment: fewtypes_null.tbl.gz

drop table if exists fewtypes_null_compressed_gz;
create external table fewtypes_null_compressed_gz (
  int_col int,
  bigint_col bigint,
  date_col date,
  time_col string,
  timestamp_col timestamp,
  interval_col string,
  varchar_col string,
  float_col float,
  double_col double,
  bool_col boolean
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY "|"
LOCATION '/drill/testdata/hive_storage/fewtypes_null.tbl.gz'
TBLPROPERTIES ("serialization.null.format"="null");

drop table if exists fewtypes_null_compressed_rc_snappy;
create external table fewtypes_null_compressed_rc_snappy (
  int_col int,
  bigint_col bigint,
  date_col date,
  time_col string,
  timestamp_col timestamp,
  interval_col string,
  varchar_col string,
  float_col float,
  double_col double,
  bool_col boolean
)
STORED AS rcfile
LOCATION '/drill/testdata/hive_storage/fewtypes_null_rc_snappy';

SET hive.exec.compress.output=true;
SET mapred.output.compression.type=BLOCK;
SET SET mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;

INSERT OVERWRITE TABLE fewtypes_null_compressed_rc_snappy select * from fewtypes_null_compressed_gz;
{code}

> Hive Plugin :  Timestamp  value in an RCfile (Snappy compression) is wrongly interpreted
> ----------------------------------------------------------------------------------------
>
>                 Key: DRILL-4141
>                 URL: https://issues.apache.org/jira/browse/DRILL-4141
>             Project: Apache Drill
>          Issue Type: Bug
>          Components: Storage - Hive
>            Reporter: Rahul Challapalli
>            Priority: Critical
>         Attachments: fewtypes_null.tbl.gz
>
>
> git.commit.id.abbrev=f7a0d38
> The below should have returned "1996-02-28 17:32:01.0". However it returns an incorrect value
> {code}
> select timestamp_col from hive.fewtypes_null_compressed_rc_snappy where int_col=20;
> +------------------------+
> |     timestamp_col      |
> +------------------------+
> | 1996-02-29 01:32:01.0  |
> +------------------------+
> 1 row selected (0.715 seconds)
> {code}
> This is not related to the timezone of the sqlline client as the below query from the same client returns the proper result. However this time the data is  stored in a sequence file with snappy compression
> {code}
> select timestamp_col from hive.fewtypes_null_compressed_seq_snappy where int_col=20;
> +------------------------+
> |     timestamp_col      |
> +------------------------+
> | 1996-02-28 17:32:01.0  |
> +------------------------+
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)