You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@trafodion.apache.org by "Alice Chen (JIRA)" <ji...@apache.org> on 2015/07/22 20:20:35 UTC
[jira] [Created] (TRAFODION-1255) LP Bug: 1460923 - Following load with truncate, secondary indexes has incorrect number of rows.

Alice Chen created TRAFODION-1255:
-------------------------------------

             Summary: LP Bug: 1460923 - Following load with truncate, secondary indexes has incorrect number of rows.
                 Key: TRAFODION-1255
                 URL: https://issues.apache.org/jira/browse/TRAFODION-1255
             Project: Apache Trafodion
          Issue Type: Bug
          Components: sql-exe
            Reporter: Julie Thai
            Assignee: Suresh Subbiah
            Priority: Critical
             Fix For: 2.0-incubating


Target table with 2 secondary indexes. On cluster with 20150528_0830 build, following a load-with-truncate-table, secondary indexes has incorrect row count.


SQL>load into cdr
select * from hive.hive.ext_cdr where col7 <= 65535;
UTIL_OUTPUT
--------------------------------------------------------------------------------------------------------------------------------
Task: LOAD             Status: Started    Object: TRAFODION.BULKLOAD_LOAD.CDR
Task:  CLEANUP         Status: Started    Object: TRAFODION.BULKLOAD_LOAD.CDR
Task:  CLEANUP         Status: Ended      Object: TRAFODION.BULKLOAD_LOAD.CDR
Task:  PREPARATION     Status: Started    Object: TRAFODION.BULKLOAD_LOAD.CDR
       Rows Processed: 499439
Task:  PREPARATION     Status: Ended      ET: 00:01:46.041                      
Task:  COMPLETION      Status: Started    Object: TRAFODION.BULKLOAD_LOAD.CDR
Task:  COMPLETION      Status: Ended      ET: 00:00:07.676                      

--- SQL operation complete.
SQL>select count(*) from cdr;
(EXPR)
--------------------
              499439

--- 1 row(s) selected.

DBROOT>select count(*) from table(index_table cdr_col4_idx);
(EXPR)
--------------------
              499439

--- 1 row(s) selected.

DBROOT>select count(*) from table(index_table cdr_col7_idx);
(EXPR)
--------------------
              499439

--- 1 row(s) selected.

SQL>load with truncate table into cdr
select * from hive.hive.ext_cdr where col7 > 65535;
UTIL_OUTPUT
--------------------------------------------------------------------------------------------------------------------------------
Task: LOAD             Status: Started    Object: TRAFODION.BULKLOAD_LOAD.CDR
Task:  PURGE DATA      Status: Started    Object: TRAFODION.BULKLOAD_LOAD.CDR
Task:  PURGE DATA      Status: Ended      Object: TRAFODION.BULKLOAD_LOAD.CDR
Task:  CLEANUP         Status: Started    Object: TRAFODION.BULKLOAD_LOAD.CDR
Task:  CLEANUP         Status: Ended      Object: TRAFODION.BULKLOAD_LOAD.CDR
Task:  PREPARATION     Status: Started    Object: TRAFODION.BULKLOAD_LOAD.CDR
       Rows Processed: 9500561
Task:  PREPARATION     Status: Ended      ET: 00:27:44.180                      
Task:  COMPLETION      Status: Started    Object: TRAFODION.BULKLOAD_LOAD.CDR
Task:  COMPLETION      Status: Ended      ET: 00:00:05.800                      

--- SQL operation complete.

SQL>select count(*) from cdr;
(EXPR)
--------------------
             9500561

--- 1 row(s) selected.

DBROOT>select count(*) from table(index_table cdr_col4_idx);
(EXPR)
--------------------
            47502805

--- 1 row(s) selected.

DBROOT>select count(*) from table(index_table cdr_col7_idx);
(EXPR)
--------------------
            47502805

--- 1 row(s) selected.


To reproduce:
1. hdfs dfs -mkdir /bulkload/cdr
2. hdfs dfs -put cdr.tbl.000[1-4] /bulkload/cdr/.
data files are large, contact julie.y.thai@hp.com for data files or subset.
3. In hive,
drop table ext_cdr;
create table ext_cdr
(
col1 string,
col2 string,
col3 bigint,
col4 bigint,
col5 string,
col6 string,
col7 int,
col8 int,
col9 string,
col10 string
)
row format delimited fields terminated by '|'
location '/bulkload/cdr'
;
4. in trafci, 
CREATE TABLE CDR
(
COl1           CHAR(6) CHARACTER SET ISO88591 COLLATE DEFAULT DEFAULT NULL
, COL2        CHAR(2) CHARACTER SET ISO88591 COLLATE DEFAULT DEFAULT NULL
, COL3         LARGEINT NO DEFAULT NOT NULL NOT DROPPABLE
, COL4         LARGEINT DEFAULT NULL
, COL5        CHAR(14) CHARACTER SET ISO88591 COLLATE DEFAULT NOT NULL
, COL6        CHAR(14) CHARACTER SET ISO88591 COLLATE DEFAULT DEFAULT NULL
, COL7         INT DEFAULT NULL
, COL8         INT DEFAULT NULL
, COL9        CHAR(15) CHARACTER SET ISO88591 COLLATE DEFAULT DEFAULT NULL
, COL10      CHAR(16) CHARACTER SET ISO88591 COLLATE DEFAULT DEFAULT NULL
)
STORE BY (COL3, COL5)
DIVISION BY (SUBSTRING(COL5, 1, 6))
SALT USING 5 PARTITIONS ON (COL3)
HBASE_OPTIONS
(
DATA_BLOCK_ENCODING = 'FAST_DIFF',
COMPRESSION = 'SNAPPY');

create index cdr_col4_idx on cdr(col4)
salt like table
HBASE_OPTIONS
(
DATA_BLOCK_ENCODING = 'FAST_DIFF',
COMPRESSION = 'SNAPPY');

create index cdr_col7_idx on cdr(col7)
salt like table
HBASE_OPTIONS
(
DATA_BLOCK_ENCODING = 'FAST_DIFF',
COMPRESSION = 'SNAPPY');

load into cdr select * from hive.hive.ext_cdr where col7 <= 65535;      --expect 499439 rows processed
select count(*) from cdr;   
select count(*) from table(index_table cdr_col4_idx);
select count(*) from table(index_table cdr_col7_idx);
load with truncate table into cdr select * from hive.hive.ext_cdr where col7 > 65535;    --expect 9500561 rows processed
select count(*) from cdr;  
select count(*) from table(index_table cdr_col4_idx);
select count(*) from table(index_table cdr_col7_idx);



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)