You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@flink.apache.org by "Jane Chan (Jira)" <ji...@apache.org> on 2022/04/20 07:45:00 UTC

[jira] [Updated] (FLINK-27316) Prevent users from changing bucket number

     [ https://issues.apache.org/jira/browse/FLINK-27316?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Jane Chan updated FLINK-27316:
------------------------------
    Description: 
Before we support this feature, we should throw a meaningful exception to prevent data corruption which is caused by
{code:sql}
 ALTER TABLE ... SET ('bucket' = '...');
 ALTER TABLE ... RESET ('bucket');{code}
 
h3. How to reproduce
{code:sql}
-- Suppose we defined a managed table like
CREATE TABLE IF NOT EXISTS managed_table (
  f0 INT,
  f1 STRING) WITH (
    'path' = '...'
    'bucket' = '3');

-- then write some data
INSERT INTO managed_table
VALUES (1, 'Sense and Sensibility),
(2, 'Pride and Prejudice), 
(3, 'Emma'), 
(4, 'Mansfield Park'), 
(5, 'Northanger Abbey'),
(6, 'The Mad Woman in the Attic'),
(7, 'Little Woman');

-- change bucket number
ALTER TABLE managed_table SET ('bucket' = '5');

-- write some data again
INSERT INTO managed_table 
VALUES (1, 'Sense and Sensibility'), 
(2, 'Pride and Prejudice'), 
(3, 'Emma'), 
(4, 'Mansfield Park'), 
(5, 'Northanger Abbey'), 
(6, 'The Mad Woman in the Attic'), 
(7, 'Little Woman'), 
(8, 'Jane Eyre');

-- change bucket number again
ALTER TABLE managed_table SET ('bucket' = '1')

-- then write some record with '-D' as rowkind
-- E.g. changelogRow("-D", 7, "Little Woman"),
-- changelogRow("-D", 2, "Pride and Prejudice"), 
-- changelogRow("-D", 3, "Emma"), 
-- changelogRow("-D", 4, "Mansfield Park"), 
-- changelogRow("-D", 5, "Northanger Abbey"), 
-- changelogRow("-D", 6, "The Mad Woman in the Attic"), 
-- changelogRow("-D", 8, "Jane Eyre"), 
-- changelogRow("-D", 1, "Sense and Sensibility"), 
-- changelogRow("-D", 1, "Sense and Sensibility") 

CREATE TABLE helper_source (
  f0 INT, 
  f1 STRING) WITH (
  'connector' = 'values', 
  'data-id' = '${register-id}', 
  'bounded' = 'false', 
  'changelog-mode' = 'I,UA,UB,D'
); 

INSERT INTO managed_table SELECT * FROM helper_source;

-- then read the snapshot

SELECT * FROM managed_table
{code}

  was:
Before we support this feature, we should throw a meaningful exception to prevent data corruption which is caused by
{code:sql}
 ALTER TABLE ... SET ('bucket' = '...');
 ALTER TABLE ... RESET ('bucket');{code}
 
h3. How to reproduce
{code:sql}

-- Suppose we defined a managed table like
CREATE TABLE IF NOT EXISTS managed_table (
  f0 INT,
  f1 STRING) WITH (
    'path' = '...'
    'bucket' = '3');

-- then write some data
INSERT INTO managed_table
VALUES (1, 'Sense and Sensibility),
(2, 'Pride and Prejudice), 
(3, 'Emma'), 
(4, 'Mansfield Park'), 
(5, 'Northanger Abbey'),
(6, 'The Mad Woman in the Attic'),
(7, 'Little Woman');

-- change bucket number
ALTER TABLE managed_table SET ('bucket' = '5');

-- write some data again
INSERT INTO managed_table 
VALUES (1, 'Sense and Sensibility'), 
(2, 'Pride and Prejudice'), 
(3, 'Emma'), 
(4, 'Mansfield Park'), 
(5, 'Northanger Abbey'), 
(6, 'The Mad Woman in the Attic'), 
(7, 'Little Woman'), 
(8, 'Jane Eyre');

-- change bucket number again
ALTER TABLE managed_table SET ('bucket' = '1')

-- then write some record with '-D' as changelog mode
-- E.g. changelogRow("-D", 7, "Little Woman"), changelogRow("-D", 2, "Pride and Prejudice"), changelogRow("-D", 3, "Emma"), changelogRow("-D", 4, "Mansfield Park"), changelogRow("-D", 5, "Northanger Abbey"), changelogRow("-D", 6, "The Mad Woman in the Attic"), changelogRow("-D", 8, "Jane Eyre"), changelogRow("-D", 1, "Sense and Sensibility"), changelogRow("-D", 1, "Sense and Sensibility") CREATE TABLE helper_source (
  f0 INT, 
  f1 STRING) WITH (
  'connector' = 'values', 
  'data-id' = '${register-id}', 
  'bounded' = 'false', 
  'changelog-mode' = 'I,UA,UB,D'
); 

INSERT INTO managed_table SELECT * FROM helper_source;

-- then read the snapshot

SELECT * FROM managed_table
{code}


> Prevent users from changing bucket number
> -----------------------------------------
>
>                 Key: FLINK-27316
>                 URL: https://issues.apache.org/jira/browse/FLINK-27316
>             Project: Flink
>          Issue Type: Sub-task
>          Components: Table Store
>    Affects Versions: table-store-0.1.0
>            Reporter: Jane Chan
>            Priority: Major
>             Fix For: table-store-0.1.0
>
>
> Before we support this feature, we should throw a meaningful exception to prevent data corruption which is caused by
> {code:sql}
>  ALTER TABLE ... SET ('bucket' = '...');
>  ALTER TABLE ... RESET ('bucket');{code}
>  
> h3. How to reproduce
> {code:sql}
> -- Suppose we defined a managed table like
> CREATE TABLE IF NOT EXISTS managed_table (
>   f0 INT,
>   f1 STRING) WITH (
>     'path' = '...'
>     'bucket' = '3');
> -- then write some data
> INSERT INTO managed_table
> VALUES (1, 'Sense and Sensibility),
> (2, 'Pride and Prejudice), 
> (3, 'Emma'), 
> (4, 'Mansfield Park'), 
> (5, 'Northanger Abbey'),
> (6, 'The Mad Woman in the Attic'),
> (7, 'Little Woman');
> -- change bucket number
> ALTER TABLE managed_table SET ('bucket' = '5');
> -- write some data again
> INSERT INTO managed_table 
> VALUES (1, 'Sense and Sensibility'), 
> (2, 'Pride and Prejudice'), 
> (3, 'Emma'), 
> (4, 'Mansfield Park'), 
> (5, 'Northanger Abbey'), 
> (6, 'The Mad Woman in the Attic'), 
> (7, 'Little Woman'), 
> (8, 'Jane Eyre');
> -- change bucket number again
> ALTER TABLE managed_table SET ('bucket' = '1')
> -- then write some record with '-D' as rowkind
> -- E.g. changelogRow("-D", 7, "Little Woman"),
> -- changelogRow("-D", 2, "Pride and Prejudice"), 
> -- changelogRow("-D", 3, "Emma"), 
> -- changelogRow("-D", 4, "Mansfield Park"), 
> -- changelogRow("-D", 5, "Northanger Abbey"), 
> -- changelogRow("-D", 6, "The Mad Woman in the Attic"), 
> -- changelogRow("-D", 8, "Jane Eyre"), 
> -- changelogRow("-D", 1, "Sense and Sensibility"), 
> -- changelogRow("-D", 1, "Sense and Sensibility") 
> CREATE TABLE helper_source (
>   f0 INT, 
>   f1 STRING) WITH (
>   'connector' = 'values', 
>   'data-id' = '${register-id}', 
>   'bounded' = 'false', 
>   'changelog-mode' = 'I,UA,UB,D'
> ); 
> INSERT INTO managed_table SELECT * FROM helper_source;
> -- then read the snapshot
> SELECT * FROM managed_table
> {code}



--
This message was sent by Atlassian Jira
(v8.20.7#820007)