You are viewing a plain text version of this content. The canonical link for it is here.
Posted to gitbox@hive.apache.org by "kasakrisz (via GitHub)" <gi...@apache.org> on 2023/05/18 13:37:04 UTC

[GitHub] [hive] kasakrisz commented on a diff in pull request #4296: HIVE-27267: choose correct partition columns from bigTableRS

kasakrisz commented on code in PR #4296:
URL: https://github.com/apache/hive/pull/4296#discussion_r1197826881


##########
ql/src/test/queries/clientpositive/bucket_map_join_tez3.q:
##########
@@ -0,0 +1,139 @@
+-- Test for HIVE-27267
+
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+drop table if exists test_external_source;
+create external table test_external_source (date_col date, string_col string, decimal_col decimal(38,0)) stored as orc tblproperties ('external.table.purge'='true');
+insert into table test_external_source values
+('2022-08-30', 'pipeline', '50000000000000000005905545593'),
+('2022-08-16', 'pipeline', '50000000000000000005905545593'),
+('2022-09-01', 'pipeline', '50000000000000000006008686831'),
+('2022-08-30', 'pipeline', '50000000000000000005992620837'),
+('2022-09-01', 'pipeline', '50000000000000000005992620837'),
+('2022-09-01', 'pipeline', '50000000000000000005992621067'),
+('2022-08-30', 'pipeline', '50000000000000000005992621067');
+
+drop table if exists test_external_target;
+create external table test_external_target (date_col date, string_col string, decimal_col decimal(38,0)) stored as orc tblproperties ('external.table.purge'='true');
+insert into table test_external_target values
+('2017-05-17', 'pipeline', '50000000000000000000441610525'),
+('2018-12-20', 'pipeline', '50000000000000000001048981030'),
+('2020-06-30', 'pipeline', '50000000000000000002332575516'),
+('2021-08-16', 'pipeline', '50000000000000000003897973989'),
+('2017-06-06', 'pipeline', '50000000000000000000449148729'),
+('2017-09-08', 'pipeline', '50000000000000000000525378314'),
+('2022-08-30', 'pipeline', '50000000000000000005905545593'),
+('2022-08-16', 'pipeline', '50000000000000000005905545593'),
+('2018-05-03', 'pipeline', '50000000000000000000750826355'),
+('2020-01-10', 'pipeline', '50000000000000000001816579677'),
+('2021-11-01', 'pipeline', '50000000000000000004269423714'),
+('2017-11-07', 'pipeline', '50000000000000000000585901787'),
+('2019-10-15', 'pipeline', '50000000000000000001598843430'),
+('2020-04-01', 'pipeline', '50000000000000000002035795461'),
+('2020-02-24', 'pipeline', '50000000000000000001932600185'),
+('2020-04-27', 'pipeline', '50000000000000000002108160849'),
+('2016-07-05', 'pipeline', '50000000000000000000054405114'),
+('2020-06-02', 'pipeline', '50000000000000000002234387967'),
+('2020-08-21', 'pipeline', '50000000000000000002529168758'),
+('2021-02-17', 'pipeline', '50000000000000000003158511687');
+
+drop table if exists target_table;
+drop table if exists source_table;
+create table target_table(date_col date, string_col string, decimal_col decimal(38,0)) clustered by (decimal_col) into 7 buckets stored as orc tblproperties ('bucketing_version'='2', 'transactional'='true', 'transactional_properties'='default');
+create table source_table(date_col date, string_col string, decimal_col decimal(38,0)) clustered by (decimal_col) into 7 buckets stored as orc tblproperties ('bucketing_version'='2', 'transactional'='true', 'transactional_properties'='default');
+
+insert into table target_table select * from test_external_target;
+insert into table source_table select * from test_external_source;

Review Comment:
   Does creating external tables first then copy the data into acid tables has an impact on the repro?
   If not please remove the external tables from this q file and go directly with acid tables to simplify the test case.



##########
ql/src/test/queries/clientpositive/bucket_map_join_tez3.q:
##########
@@ -0,0 +1,139 @@
+-- Test for HIVE-27267
+
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+drop table if exists test_external_source;
+create external table test_external_source (date_col date, string_col string, decimal_col decimal(38,0)) stored as orc tblproperties ('external.table.purge'='true');
+insert into table test_external_source values
+('2022-08-30', 'pipeline', '50000000000000000005905545593'),
+('2022-08-16', 'pipeline', '50000000000000000005905545593'),
+('2022-09-01', 'pipeline', '50000000000000000006008686831'),
+('2022-08-30', 'pipeline', '50000000000000000005992620837'),
+('2022-09-01', 'pipeline', '50000000000000000005992620837'),
+('2022-09-01', 'pipeline', '50000000000000000005992621067'),
+('2022-08-30', 'pipeline', '50000000000000000005992621067');
+
+drop table if exists test_external_target;
+create external table test_external_target (date_col date, string_col string, decimal_col decimal(38,0)) stored as orc tblproperties ('external.table.purge'='true');
+insert into table test_external_target values
+('2017-05-17', 'pipeline', '50000000000000000000441610525'),
+('2018-12-20', 'pipeline', '50000000000000000001048981030'),
+('2020-06-30', 'pipeline', '50000000000000000002332575516'),
+('2021-08-16', 'pipeline', '50000000000000000003897973989'),
+('2017-06-06', 'pipeline', '50000000000000000000449148729'),
+('2017-09-08', 'pipeline', '50000000000000000000525378314'),
+('2022-08-30', 'pipeline', '50000000000000000005905545593'),
+('2022-08-16', 'pipeline', '50000000000000000005905545593'),
+('2018-05-03', 'pipeline', '50000000000000000000750826355'),
+('2020-01-10', 'pipeline', '50000000000000000001816579677'),
+('2021-11-01', 'pipeline', '50000000000000000004269423714'),
+('2017-11-07', 'pipeline', '50000000000000000000585901787'),
+('2019-10-15', 'pipeline', '50000000000000000001598843430'),
+('2020-04-01', 'pipeline', '50000000000000000002035795461'),
+('2020-02-24', 'pipeline', '50000000000000000001932600185'),
+('2020-04-27', 'pipeline', '50000000000000000002108160849'),
+('2016-07-05', 'pipeline', '50000000000000000000054405114'),
+('2020-06-02', 'pipeline', '50000000000000000002234387967'),
+('2020-08-21', 'pipeline', '50000000000000000002529168758'),
+('2021-02-17', 'pipeline', '50000000000000000003158511687');
+
+drop table if exists target_table;
+drop table if exists source_table;

Review Comment:
   nit.: normally Qtest framework should drop all database object between each q file run so these drops are not necessary.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscribe@hive.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: gitbox-unsubscribe@hive.apache.org
For additional commands, e-mail: gitbox-help@hive.apache.org