You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@trafodion.apache.org by "Suresh Subbiah (JIRA)" <ji...@apache.org> on 2015/09/02 21:40:46 UTC
[jira] [Closed] (TRAFODION-1255) LP Bug: 1460923 - Following load with truncate, secondary indexes has incorrect number of rows.

     [ https://issues.apache.org/jira/browse/TRAFODION-1255?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Suresh Subbiah closed TRAFODION-1255.
-------------------------------------

> LP Bug: 1460923 - Following load with truncate, secondary indexes has incorrect number of rows.
> -----------------------------------------------------------------------------------------------
>
>                 Key: TRAFODION-1255
>                 URL: https://issues.apache.org/jira/browse/TRAFODION-1255
>             Project: Apache Trafodion
>          Issue Type: Bug
>          Components: sql-exe
>            Reporter: Julie Thai
>            Assignee: Suresh Subbiah
>            Priority: Critical
>             Fix For: 2.0-incubating
>
>
> Target table with 2 secondary indexes. On cluster with 20150528_0830 build, following a load-with-truncate-table, secondary indexes has incorrect row count.
> SQL>load into cdr
> select * from hive.hive.ext_cdr where col7 <= 65535;
> UTIL_OUTPUT
> --------------------------------------------------------------------------------------------------------------------------------
> Task: LOAD             Status: Started    Object: TRAFODION.BULKLOAD_LOAD.CDR
> Task:  CLEANUP         Status: Started    Object: TRAFODION.BULKLOAD_LOAD.CDR
> Task:  CLEANUP         Status: Ended      Object: TRAFODION.BULKLOAD_LOAD.CDR
> Task:  PREPARATION     Status: Started    Object: TRAFODION.BULKLOAD_LOAD.CDR
>        Rows Processed: 499439
> Task:  PREPARATION     Status: Ended      ET: 00:01:46.041                      
> Task:  COMPLETION      Status: Started    Object: TRAFODION.BULKLOAD_LOAD.CDR
> Task:  COMPLETION      Status: Ended      ET: 00:00:07.676                      
> --- SQL operation complete.
> SQL>select count(*) from cdr;
> (EXPR)
> --------------------
>               499439
> --- 1 row(s) selected.
> DBROOT>select count(*) from table(index_table cdr_col4_idx);
> (EXPR)
> --------------------
>               499439
> --- 1 row(s) selected.
> DBROOT>select count(*) from table(index_table cdr_col7_idx);
> (EXPR)
> --------------------
>               499439
> --- 1 row(s) selected.
> SQL>load with truncate table into cdr
> select * from hive.hive.ext_cdr where col7 > 65535;
> UTIL_OUTPUT
> --------------------------------------------------------------------------------------------------------------------------------
> Task: LOAD             Status: Started    Object: TRAFODION.BULKLOAD_LOAD.CDR
> Task:  PURGE DATA      Status: Started    Object: TRAFODION.BULKLOAD_LOAD.CDR
> Task:  PURGE DATA      Status: Ended      Object: TRAFODION.BULKLOAD_LOAD.CDR
> Task:  CLEANUP         Status: Started    Object: TRAFODION.BULKLOAD_LOAD.CDR
> Task:  CLEANUP         Status: Ended      Object: TRAFODION.BULKLOAD_LOAD.CDR
> Task:  PREPARATION     Status: Started    Object: TRAFODION.BULKLOAD_LOAD.CDR
>        Rows Processed: 9500561
> Task:  PREPARATION     Status: Ended      ET: 00:27:44.180                      
> Task:  COMPLETION      Status: Started    Object: TRAFODION.BULKLOAD_LOAD.CDR
> Task:  COMPLETION      Status: Ended      ET: 00:00:05.800                      
> --- SQL operation complete.
> SQL>select count(*) from cdr;
> (EXPR)
> --------------------
>              9500561
> --- 1 row(s) selected.
> DBROOT>select count(*) from table(index_table cdr_col4_idx);
> (EXPR)
> --------------------
>             47502805
> --- 1 row(s) selected.
> DBROOT>select count(*) from table(index_table cdr_col7_idx);
> (EXPR)
> --------------------
>             47502805
> --- 1 row(s) selected.
> To reproduce:
> 1. hdfs dfs -mkdir /bulkload/cdr
> 2. hdfs dfs -put cdr.tbl.000[1-4] /bulkload/cdr/.
> data files are large, contact julie.y.thai@hp.com for data files or subset.
> 3. In hive,
> drop table ext_cdr;
> create table ext_cdr
> (
> col1 string,
> col2 string,
> col3 bigint,
> col4 bigint,
> col5 string,
> col6 string,
> col7 int,
> col8 int,
> col9 string,
> col10 string
> )
> row format delimited fields terminated by '|'
> location '/bulkload/cdr'
> ;
> 4. in trafci, 
> CREATE TABLE CDR
> (
> COl1           CHAR(6) CHARACTER SET ISO88591 COLLATE DEFAULT DEFAULT NULL
> , COL2        CHAR(2) CHARACTER SET ISO88591 COLLATE DEFAULT DEFAULT NULL
> , COL3         LARGEINT NO DEFAULT NOT NULL NOT DROPPABLE
> , COL4         LARGEINT DEFAULT NULL
> , COL5        CHAR(14) CHARACTER SET ISO88591 COLLATE DEFAULT NOT NULL
> , COL6        CHAR(14) CHARACTER SET ISO88591 COLLATE DEFAULT DEFAULT NULL
> , COL7         INT DEFAULT NULL
> , COL8         INT DEFAULT NULL
> , COL9        CHAR(15) CHARACTER SET ISO88591 COLLATE DEFAULT DEFAULT NULL
> , COL10      CHAR(16) CHARACTER SET ISO88591 COLLATE DEFAULT DEFAULT NULL
> )
> STORE BY (COL3, COL5)
> DIVISION BY (SUBSTRING(COL5, 1, 6))
> SALT USING 5 PARTITIONS ON (COL3)
> HBASE_OPTIONS
> (
> DATA_BLOCK_ENCODING = 'FAST_DIFF',
> COMPRESSION = 'SNAPPY');
> create index cdr_col4_idx on cdr(col4)
> salt like table
> HBASE_OPTIONS
> (
> DATA_BLOCK_ENCODING = 'FAST_DIFF',
> COMPRESSION = 'SNAPPY');
> create index cdr_col7_idx on cdr(col7)
> salt like table
> HBASE_OPTIONS
> (
> DATA_BLOCK_ENCODING = 'FAST_DIFF',
> COMPRESSION = 'SNAPPY');
> load into cdr select * from hive.hive.ext_cdr where col7 <= 65535;      --expect 499439 rows processed
> select count(*) from cdr;   
> select count(*) from table(index_table cdr_col4_idx);
> select count(*) from table(index_table cdr_col7_idx);
> load with truncate table into cdr select * from hive.hive.ext_cdr where col7 > 65535;    --expect 9500561 rows processed
> select count(*) from cdr;  
> select count(*) from table(index_table cdr_col4_idx);
> select count(*) from table(index_table cdr_col7_idx);



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)