You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@trafodion.apache.org by "David Wayne Birdsall (JIRA)" <ji...@apache.org> on 2017/07/05 17:22:00 UTC
[jira] [Resolved] (TRAFODION-2662) Incremental UPDATE STATS fails
on very large sample tables
[ https://issues.apache.org/jira/browse/TRAFODION-2662?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
David Wayne Birdsall resolved TRAFODION-2662.
---------------------------------------------
Resolution: Fixed
> Incremental UPDATE STATS fails on very large sample tables
> ----------------------------------------------------------
>
> Key: TRAFODION-2662
> URL: https://issues.apache.org/jira/browse/TRAFODION-2662
> Project: Apache Trafodion
> Issue Type: Bug
> Components: sql-cmp
> Affects Versions: 2.2-incubating
> Environment: Large clusters
> Reporter: David Wayne Birdsall
> Assignee: David Wayne Birdsall
> Fix For: 2.2-incubating
>
>
> A user attempted to use the incremental UPDATE STATISTICS feature on a 90 billion row table. The persistent sample table was created using a sampling rate of 5 per cent, and so it had about 4.5 billion rows. The incremental UPDATE STATISTICS command failed with an error like the following:
> >>Update statistics for table traf1 on existing columns incremental where C2 >= '2017-06-10 00:00:00';
> *** ERROR[9219] Incremental UPDATE STATISTICS: An operation failed, possibly due to an invalid WHERE clause.
> *** ERROR[15001] A syntax error occurred at or before:
> SELECT FROM TRAFODION.SEABASE.TRAF_SAMPLE_32_1498066565_999632 WHERE C2 >= '2
> ^ (12 characters from start of SQL statement)
> *** ERROR[8822] The statement was not prepared.
> --- SQL operation failed with errors.
> The script below reproduces the problem on a workstation. The script creates a table, puts a few rows in it, creates a sample table, then doctors the sample table metadata to make UPDATE STATISTICS think it has 4.5 billion rows. The failure then occurs.
> ?section setup
> drop table if exists traf1;
> CREATE TABLE traf1
> (
> C1 CHAR(32) CHARACTER SET ISO88591 COLLATE
> DEFAULT NO DEFAULT NOT NULL NOT DROPPABLE NOT SERIALIZED
> , C2 TIMESTAMP(6) NO DEFAULT NOT NULL NOT
> DROPPABLE NOT SERIALIZED
> , C3 CHAR(32) CHARACTER SET ISO88591 COLLATE
> DEFAULT NO DEFAULT NOT NULL NOT DROPPABLE NOT SERIALIZED
> , C4 VARCHAR(4096 BYTES) CHARACTER SET UTF8
> COLLATE DEFAULT NO DEFAULT NOT NULL NOT DROPPABLE NOT SERIALIZED
> , C5 VARCHAR(1024 BYTES) CHARACTER SET UTF8
> COLLATE DEFAULT DEFAULT NULL NOT SERIALIZED
> , C6 SMALLINT DEFAULT 1 NOT SERIALIZED
> , PRIMARY KEY (C1 ASC, C2 DESC, C3 ASC)
> )
> SALT USING 4 PARTITIONS
> ON (C1)
> ATTRIBUTES ALIGNED FORMAT
> HBASE_OPTIONS
> (
> DATA_BLOCK_ENCODING = 'FAST_DIFF',
> BLOCKSIZE = '262144',
> MEMSTORE_FLUSH_SIZE = '1073741824'
> )
> ;
> insert into traf1 values ('abcdef',current_timestamp - interval '20' day,
> '0123456789abcdef','www.xyz.stuff.com','a bit of stuff',14);
> insert into traf1 values ('abcdef01',current_timestamp - interval '20' day,
> '0123456789abcdef01','www.xyz.morestuff.com','a bit of more stuff',12);
> insert into traf1 values ('012345',current_timestamp - interval '20' day,
> 'abc012','www.blahblah.com','a bit of it is fun',2);
> insert into traf1 values ('01234567',current_timestamp - interval '20' day,
> 'abc01234','www.blahblahblah.com','a bit of it is fun',31);
> insert into traf1 values ('def01234',current_timestamp - interval '20' day,
> '0123456789abcdef','www.xyz.stuff.com','a bit of stuff',14);
> insert into traf1 values ('adbf01',current_timestamp - interval '20' day,
> '0123456789abcdef01','www.xyz.morestuff.com','a bit of more stuff',12);
> insert into traf1 values ('a0b0c0',current_timestamp - interval '20' day,
> 'abc012','www.blahblah.com','a bit of it is fun',2);
> insert into traf1 values ('021357',current_timestamp - interval '20' day,
> 'abc01234','www.blahblahblah.com','a bit of it is fun',30);
> insert into traf1 values ('abcdef',current_timestamp - interval '10' day,
> '0123456789abcdef','www.xyz.stuff.com','a bit of stuff',14);
> insert into traf1 values ('abcddf01',current_timestamp - interval '10' day,
> '0123456789abcdef01','www.xyz.morestuff.com','a bit of more stuff',12);
> insert into traf1 values ('01234345',current_timestamp - interval '10' day,
> 'abc012','www.blahblah.com','a bit of it is fun',2);
> insert into traf1 values ('034567',current_timestamp - interval '10' day,
> 'abc01234','www.blahblahblah.com','a bit of it is fun',31);
> insert into traf1 values ('def8201234',current_timestamp - interval '10' day,
> '0123456789abcdef','www.xyz.stuff.com','a bit of stuff',18);
> insert into traf1 values ('adbf9a01',current_timestamp - interval '10' day,
> '0123456789abcdef01','www.C4edin.com','a bit of try twice',2);
> insert into traf1 values ('a0bbc0c0',current_timestamp - interval '10' day,
> 'abc012','www.blahblah.com','a bit of it is fun',2);
> insert into traf1 values ('02111357',current_timestamp - interval '10' day,
> 'abc01234','www.blahblahblah.com','a bit of it is fun',30);
> insert into traf1 values ('def01234',current_timestamp,'01234789abcdef','www.xyz.stuff.com','a bit of stuff',14);
> insert into traf1 values ('adbf01',current_timestamp,'0123456789abef01','www.xyz.morestuff.com','a bit of more stuff',12);
> insert into traf1 values ('a0b0c0',current_timestamp,'abc03452','www.blahblah.com','a bit of it is fun',21);
> insert into traf1 values ('02431357',current_timestamp,'abcd001234','www.blahblahblah.com','a bit of it is fun',39);
> insert into traf1 values ('defab01234',current_timestamp,'01a0234789abcdef','www.cnn.com','a bit of stuff',14);
> insert into traf1 values ('adb9ef01',current_timestamp,'012333456789abef01','www.youtube.com','a bit of more stuff',8);
> insert into traf1 values ('a2b0b0c0',current_timestamp,'ab3452','www.blahblah.com','a bit of it is fun',21);
> insert into traf1 values ('0243731357',current_timestamp,'abcd00122a34','www.blahblahblah.com','a bit of it is fun',39);
> ?section repro
> UPDATE STATISTICS FOR TABLE traf1 ON EVERY COLUMN SAMPLE RANDOM 50 PERCENT PERSISTENT;
> -- doctor REQUESTED_SAMPLE_ROWS, ACTUAL_SAMPLE_ROWS, SAMPLING_RATIO
> -- from sb_persistent_samples to make UPDATE STATS think the sample table
> -- has 4.5 billion rows
> update sb_persistent_samples set REQUESTED_SAMPLE_ROWS = 4500000000, ACTUAL_SAMPLE_ROWS = 4500000000
> where table_uid = (select object_uid from "_MD_".objects where object_name = 'TRAF1');
> cqd ustat_log 'reproULOG.txt';
> update statistics log on;
> Update statistics for table traf1 on existing columns incremental where C2 >= '2017-06-10 00:00:00';
> exit;
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)