You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by yj...@apache.org on 2016/07/27 02:23:36 UTC
incubator-hawq git commit: HAWQ-898. Revert copy checkinstall-good
case as cases transactions and row_types need this to prepare data
Repository: incubator-hawq
Updated Branches:
refs/heads/master b59603125 -> 0bb0a2074
HAWQ-898. Revert copy checkinstall-good case as cases transactions and row_types need this to prepare data
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/0bb0a207
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/0bb0a207
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/0bb0a207
Branch: refs/heads/master
Commit: 0bb0a207467ade1eb5d8b52a2465d23c1e803fda
Parents: b596031
Author: YI JIN <yj...@pivotal.io>
Authored: Wed Jul 27 12:23:23 2016 +1000
Committer: YI JIN <yj...@pivotal.io>
Committed: Wed Jul 27 12:23:23 2016 +1000
----------------------------------------------------------------------
src/test/regress/input/copy.source | 224 ++++++++++++++++++++++++++++++
src/test/regress/known_good_schedule | 1 +
src/test/regress/output/copy.source | 214 ++++++++++++++++++++++++++++
3 files changed, 439 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0bb0a207/src/test/regress/input/copy.source
----------------------------------------------------------------------
diff --git a/src/test/regress/input/copy.source b/src/test/regress/input/copy.source
new file mode 100755
index 0000000..ccab99b
--- /dev/null
+++ b/src/test/regress/input/copy.source
@@ -0,0 +1,224 @@
+--
+-- COPY
+--
+-- CLASS POPULATION
+-- (any resemblance to real life is purely coincidental)
+--
+COPY aggtest FROM '@abs_srcdir@/data/agg.data';
+
+COPY onek FROM '@abs_srcdir@/data/onek.data';
+
+COPY onek TO '@abs_builddir@/results/onek.data';
+
+TRUNCATE onek;
+
+COPY onek FROM '@abs_builddir@/results/onek.data';
+
+COPY tenk1 FROM '@abs_srcdir@/data/tenk.data';
+
+COPY slow_emp4000 FROM '@abs_srcdir@/data/rect.data';
+
+COPY person FROM '@abs_srcdir@/data/person.data';
+
+COPY emp FROM '@abs_srcdir@/data/emp.data';
+
+COPY student FROM '@abs_srcdir@/data/student.data';
+
+COPY stud_emp FROM '@abs_srcdir@/data/stud_emp.data';
+
+COPY road FROM '@abs_srcdir@/data/streets.data';
+
+COPY real_city FROM '@abs_srcdir@/data/real_city.data';
+
+COPY hash_i4_heap FROM '@abs_srcdir@/data/hash.data';
+
+COPY hash_name_heap FROM '@abs_srcdir@/data/hash.data';
+
+COPY hash_txt_heap FROM '@abs_srcdir@/data/hash.data';
+
+COPY hash_f8_heap FROM '@abs_srcdir@/data/hash.data';
+
+-- the data in this file has a lot of duplicates in the index key
+-- fields, leading to long bucket chains and lots of table expansion.
+-- this is therefore a stress test of the bucket overflow code (unlike
+-- the data in hash.data, which has unique index keys).
+--
+-- COPY hash_ovfl_heap FROM '@abs_srcdir@/data/hashovfl.data';
+
+COPY bt_i4_heap FROM '@abs_srcdir@/data/desc.data';
+
+COPY bt_name_heap FROM '@abs_srcdir@/data/hash.data';
+
+COPY bt_txt_heap FROM '@abs_srcdir@/data/desc.data';
+
+COPY bt_f8_heap FROM '@abs_srcdir@/data/hash.data';
+
+COPY array_op_test FROM '@abs_srcdir@/data/array.data';
+
+COPY array_index_op_test FROM '@abs_srcdir@/data/array.data';
+
+--- test copying in CSV mode with various styles
+--- of embedded line ending characters
+
+create temp table copytest (
+ style text,
+ test text,
+ filler int);
+
+insert into copytest values('DOS',E'abc\r\ndef',1);
+insert into copytest values('Unix',E'abc\ndef',2);
+insert into copytest values('Mac',E'abc\rdef',3);
+insert into copytest values(E'esc\\ape',E'a\\r\\\r\\\n\\nb',4);
+
+copy copytest to '@abs_builddir@/results/copytest.csv' csv;
+
+create temp table copytest2 (like copytest);
+
+copy copytest2 from '@abs_builddir@/results/copytest.csv' csv;
+
+select * from copytest except select * from copytest2 order by 1,2,3;
+
+truncate copytest2;
+
+--- same test but with an escape char different from quote char
+
+copy copytest to '@abs_builddir@/results/copytest.csv' csv quote '''' escape E'\\';
+
+copy copytest2 from '@abs_builddir@/results/copytest.csv' csv quote '''' escape E'\\';
+
+select * from copytest except select * from copytest2 order by 1,2,3;
+
+
+-- test header line feature
+
+create temp table copytest3 (
+ c1 int,
+ "col with , comma" text,
+ "col with "" quote" int) distributed by (c1);
+
+copy copytest3 from stdin csv header;
+this is just a line full of junk that would error out if parsed
+1,a,1
+2,b,2
+\.
+
+copy copytest3 to stdout csv header;
+-- copy with error table
+CREATE TABLE number (a INT) DISTRIBUTED BY (a);
+
+COPY number FROM STDIN LOG ERRORS INTO err_copy SEGMENT REJECT LIMIT 10 ROWS;
+these are invalid line should be insert into error table.
+a
+b
+c
+d
+e
+f
+g
+h
+\.
+
+select relname,filename,linenum,bytenum,errmsg,rawdata,rawbytes from err_copy order by linenum;
+select * from number; --should be empty
+\d err_copy
+
+DROP TABLE err_copy;
+
+COPY number FROM STDIN LOG ERRORS INTO err_copy SEGMENT REJECT LIMIT 10 ROWS;
+these are invalid line should be insert into error table.
+a
+1
+b
+2
+c
+3
+d
+4
+e
+5
+f
+6
+g
+7
+h
+\.
+
+select relname,filename,linenum,bytenum,errmsg,rawdata,rawbytes from err_copy order by linenum;
+select count(*) from number; --should be 7
+DROP TABLE err_copy;
+
+TRUNCATE number;
+
+COPY number FROM STDIN LOG ERRORS INTO err_copy SEGMENT REJECT LIMIT 10 ROWS;
+these are invalid line should be insert into error table.
+a
+1
+b
+2
+c
+3
+d
+4
+e
+5
+f
+6
+g
+7
+h
+i
+\.
+
+select relname,filename,linenum,bytenum,errmsg,rawdata,rawbytes from err_copy order by linenum; -- should not exist
+select count(*) from number; --should be empty
+
+TRUNCATE number;
+CREATE TABLE err_copy (cmdtime timestamp with time zone, relname text, filename text, linenum integer, bytenum integer, errmsg text, rawdata text, rawbytes bytea) distributed randomly;
+
+COPY number FROM STDIN LOG ERRORS INTO err_copy SEGMENT REJECT LIMIT 10 ROWS;
+these are invalid line should be insert into error table.
+a
+1
+b
+2
+c
+3
+d
+4
+e
+5
+f
+6
+g
+7
+h
+\.
+
+select relname,filename,linenum,bytenum,errmsg,rawdata,rawbytes from err_copy order by linenum;
+select count(*) from number; --should be 7
+DROP TABLE err_copy;
+
+-- invalid error table schema
+TRUNCATE number;
+create table invalid_error_table1 (a int) distributed randomly;
+create table invalid_error_table3 (cmdtime timestamp with time zone, relname text, filename text, linenum integer, bytenum integer, errmsg text, rawdata text, rawbytes bytea)
+ distributed by (cmdtime);
+
+COPY number FROM STDIN LOG ERRORS INTO invalid_error_table1 SEGMENT REJECT LIMIT 10 ROWS; -- should fail
+these are invalid line should be insert into error table.
+1
+\.
+
+;
+
+COPY number FROM STDIN LOG ERRORS INTO invalid_error_table3 SEGMENT REJECT LIMIT 10 ROWS; -- should fail
+these are invalid line should be insert into error table.
+1
+\.
+
+;
+
+DROP TABLE invalid_error_table1;
+DROP TABLE invalid_error_table3;
+
+DROP TABLE number;
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0bb0a207/src/test/regress/known_good_schedule
----------------------------------------------------------------------
diff --git a/src/test/regress/known_good_schedule b/src/test/regress/known_good_schedule
index dcb6a7a..8c9091f 100755
--- a/src/test/regress/known_good_schedule
+++ b/src/test/regress/known_good_schedule
@@ -44,6 +44,7 @@ ignore: horology
ignore: create_type
test: create_table_test
ignore: create_function_2
+test: copy
ignore: copyselect
ignore: constraints
ignore: triggers
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/0bb0a207/src/test/regress/output/copy.source
----------------------------------------------------------------------
diff --git a/src/test/regress/output/copy.source b/src/test/regress/output/copy.source
new file mode 100755
index 0000000..d250166
--- /dev/null
+++ b/src/test/regress/output/copy.source
@@ -0,0 +1,214 @@
+--
+-- COPY
+--
+-- CLASS POPULATION
+-- (any resemblance to real life is purely coincidental)
+--
+COPY aggtest FROM '@abs_srcdir@/data/agg.data';
+COPY onek FROM '@abs_srcdir@/data/onek.data';
+COPY onek TO '@abs_builddir@/results/onek.data';
+TRUNCATE onek;
+COPY onek FROM '@abs_builddir@/results/onek.data';
+COPY tenk1 FROM '@abs_srcdir@/data/tenk.data';
+COPY slow_emp4000 FROM '@abs_srcdir@/data/rect.data';
+COPY person FROM '@abs_srcdir@/data/person.data';
+COPY emp FROM '@abs_srcdir@/data/emp.data';
+COPY student FROM '@abs_srcdir@/data/student.data';
+COPY stud_emp FROM '@abs_srcdir@/data/stud_emp.data';
+COPY road FROM '@abs_srcdir@/data/streets.data';
+COPY real_city FROM '@abs_srcdir@/data/real_city.data';
+COPY hash_i4_heap FROM '@abs_srcdir@/data/hash.data';
+COPY hash_name_heap FROM '@abs_srcdir@/data/hash.data';
+COPY hash_txt_heap FROM '@abs_srcdir@/data/hash.data';
+COPY hash_f8_heap FROM '@abs_srcdir@/data/hash.data';
+-- the data in this file has a lot of duplicates in the index key
+-- fields, leading to long bucket chains and lots of table expansion.
+-- this is therefore a stress test of the bucket overflow code (unlike
+-- the data in hash.data, which has unique index keys).
+--
+-- COPY hash_ovfl_heap FROM '@abs_srcdir@/data/hashovfl.data';
+COPY bt_i4_heap FROM '@abs_srcdir@/data/desc.data';
+COPY bt_name_heap FROM '@abs_srcdir@/data/hash.data';
+COPY bt_txt_heap FROM '@abs_srcdir@/data/desc.data';
+COPY bt_f8_heap FROM '@abs_srcdir@/data/hash.data';
+COPY array_op_test FROM '@abs_srcdir@/data/array.data';
+COPY array_index_op_test FROM '@abs_srcdir@/data/array.data';
+--- test copying in CSV mode with various styles
+--- of embedded line ending characters
+create temp table copytest (
+ style text,
+ test text,
+ filler int);
+insert into copytest values('DOS',E'abc\r\ndef',1);
+insert into copytest values('Unix',E'abc\ndef',2);
+insert into copytest values('Mac',E'abc\rdef',3);
+insert into copytest values(E'esc\\ape',E'a\\r\\\r\\\n\\nb',4);
+copy copytest to '@abs_builddir@/results/copytest.csv' csv;
+create temp table copytest2 (like copytest);
+copy copytest2 from '@abs_builddir@/results/copytest.csv' csv;
+select * from copytest except select * from copytest2 order by 1,2,3;
+ style | test | filler
+-------+------+--------
+(0 rows)
+
+truncate copytest2;
+--- same test but with an escape char different from quote char
+copy copytest to '@abs_builddir@/results/copytest.csv' csv quote '''' escape E'\\';
+copy copytest2 from '@abs_builddir@/results/copytest.csv' csv quote '''' escape E'\\';
+select * from copytest except select * from copytest2 order by 1,2,3;
+ style | test | filler
+-------+------+--------
+(0 rows)
+
+-- test header line feature
+create temp table copytest3 (
+ c1 int,
+ "col with , comma" text,
+ "col with "" quote" int) distributed by (c1);
+copy copytest3 from stdin csv header;
+copy copytest3 to stdout csv header;
+c1,"col with , comma","col with "" quote"
+1,a,1
+2,b,2
+-- copy with error table
+CREATE TABLE number (a INT) DISTRIBUTED BY (a);
+COPY number FROM STDIN LOG ERRORS INTO err_copy SEGMENT REJECT LIMIT 10 ROWS;
+NOTICE: Error table "err_copy" does not exist. Auto generating an error table with the same name
+WARNING: The error table was created in the same transaction as this operation. It will get dropped if transaction rolls back even if bad rows are present
+HINT: To avoid this create the error table ahead of time using: CREATE TABLE <name> (cmdtime timestamp with time zone, relname text, filename text, linenum integer, bytenum integer, errmsg text, rawdata text, rawbytes bytea)
+NOTICE: Found 9 data formatting errors (9 or more input rows). Errors logged into error table "err_copy"
+select relname,filename,linenum,bytenum,errmsg,rawdata,rawbytes from err_copy order by linenum;
+ relname | filename | linenum | bytenum | errmsg | rawdata | rawbytes
+---------+----------+---------+---------+---------------------------------------------------------------------------------------------------------+-----------------------------------------------------------+----------
+ number | <stdin> | 1 | | invalid input syntax for integer: "these are invalid line should be insert into error table.", column a | these are invalid line should be insert into error table. |
+ number | <stdin> | 2 | | invalid input syntax for integer: "a", column a | a |
+ number | <stdin> | 3 | | invalid input syntax for integer: "b", column a | b |
+ number | <stdin> | 4 | | invalid input syntax for integer: "c", column a | c |
+ number | <stdin> | 5 | | invalid input syntax for integer: "d", column a | d |
+ number | <stdin> | 6 | | invalid input syntax for integer: "e", column a | e |
+ number | <stdin> | 7 | | invalid input syntax for integer: "f", column a | f |
+ number | <stdin> | 8 | | invalid input syntax for integer: "g", column a | g |
+ number | <stdin> | 9 | | invalid input syntax for integer: "h", column a | h |
+(9 rows)
+
+select * from number; --should be empty
+ a
+---
+(0 rows)
+
+\d err_copy
+ Append-Only Table "public.err_copy"
+ Column | Type | Modifiers
+----------+--------------------------+-----------
+ cmdtime | timestamp with time zone |
+ relname | text |
+ filename | text |
+ linenum | integer |
+ bytenum | integer |
+ errmsg | text |
+ rawdata | text |
+ rawbytes | bytea |
+Compression Type: None
+Compression Level: 0
+Block Size: 32768
+Checksum: f
+Distributed randomly
+
+DROP TABLE err_copy;
+COPY number FROM STDIN LOG ERRORS INTO err_copy SEGMENT REJECT LIMIT 10 ROWS;
+NOTICE: Error table "err_copy" does not exist. Auto generating an error table with the same name
+WARNING: The error table was created in the same transaction as this operation. It will get dropped if transaction rolls back even if bad rows are present
+HINT: To avoid this create the error table ahead of time using: CREATE TABLE <name> (cmdtime timestamp with time zone, relname text, filename text, linenum integer, bytenum integer, errmsg text, rawdata text, rawbytes bytea)
+NOTICE: Found 9 data formatting errors (9 or more input rows). Errors logged into error table "err_copy"
+select relname,filename,linenum,bytenum,errmsg,rawdata,rawbytes from err_copy order by linenum;
+ relname | filename | linenum | bytenum | errmsg | rawdata | rawbytes
+---------+----------+---------+---------+---------------------------------------------------------------------------------------------------------+-----------------------------------------------------------+----------
+ number | <stdin> | 1 | | invalid input syntax for integer: "these are invalid line should be insert into error table.", column a | these are invalid line should be insert into error table. |
+ number | <stdin> | 2 | | invalid input syntax for integer: "a", column a | a |
+ number | <stdin> | 4 | | invalid input syntax for integer: "b", column a | b |
+ number | <stdin> | 6 | | invalid input syntax for integer: "c", column a | c |
+ number | <stdin> | 8 | | invalid input syntax for integer: "d", column a | d |
+ number | <stdin> | 10 | | invalid input syntax for integer: "e", column a | e |
+ number | <stdin> | 12 | | invalid input syntax for integer: "f", column a | f |
+ number | <stdin> | 14 | | invalid input syntax for integer: "g", column a | g |
+ number | <stdin> | 16 | | invalid input syntax for integer: "h", column a | h |
+(9 rows)
+
+select count(*) from number; --should be 7
+ count
+-------
+ 7
+(1 row)
+
+DROP TABLE err_copy;
+TRUNCATE number;
+COPY number FROM STDIN LOG ERRORS INTO err_copy SEGMENT REJECT LIMIT 10 ROWS;
+NOTICE: Error table "err_copy" does not exist. Auto generating an error table with the same name
+WARNING: The error table was created in the same transaction as this operation. It will get dropped if transaction rolls back even if bad rows are present
+HINT: To avoid this create the error table ahead of time using: CREATE TABLE <name> (cmdtime timestamp with time zone, relname text, filename text, linenum integer, bytenum integer, errmsg text, rawdata text, rawbytes bytea)
+ERROR: Segment reject limit reached. Aborting operation. Last error was: invalid input syntax for integer: "i", column a
+CONTEXT: COPY number, line 17, column a
+select relname,filename,linenum,bytenum,errmsg,rawdata,rawbytes from err_copy order by linenum; -- should not exist
+ERROR: relation "err_copy" does not exist
+LINE 1: ...name,linenum,bytenum,errmsg,rawdata,rawbytes from err_copy o...
+ ^
+select count(*) from number; --should be empty
+ count
+-------
+ 0
+(1 row)
+
+TRUNCATE number;
+CREATE TABLE err_copy (cmdtime timestamp with time zone, relname text, filename text, linenum integer, bytenum integer, errmsg text, rawdata text, rawbytes bytea) distributed randomly;
+COPY number FROM STDIN LOG ERRORS INTO err_copy SEGMENT REJECT LIMIT 10 ROWS;
+NOTICE: Found 9 data formatting errors (9 or more input rows). Errors logged into error table "err_copy"
+select relname,filename,linenum,bytenum,errmsg,rawdata,rawbytes from err_copy order by linenum;
+ relname | filename | linenum | bytenum | errmsg | rawdata | rawbytes
+---------+----------+---------+---------+---------------------------------------------------------------------------------------------------------+-----------------------------------------------------------+----------
+ number | <stdin> | 1 | | invalid input syntax for integer: "these are invalid line should be insert into error table.", column a | these are invalid line should be insert into error table. |
+ number | <stdin> | 2 | | invalid input syntax for integer: "a", column a | a |
+ number | <stdin> | 4 | | invalid input syntax for integer: "b", column a | b |
+ number | <stdin> | 6 | | invalid input syntax for integer: "c", column a | c |
+ number | <stdin> | 8 | | invalid input syntax for integer: "d", column a | d |
+ number | <stdin> | 10 | | invalid input syntax for integer: "e", column a | e |
+ number | <stdin> | 12 | | invalid input syntax for integer: "f", column a | f |
+ number | <stdin> | 14 | | invalid input syntax for integer: "g", column a | g |
+ number | <stdin> | 16 | | invalid input syntax for integer: "h", column a | h |
+(9 rows)
+
+select count(*) from number; --should be 7
+ count
+-------
+ 7
+(1 row)
+
+DROP TABLE err_copy;
+-- invalid error table schema
+TRUNCATE number;
+create table invalid_error_table1 (a int) distributed randomly;
+create table invalid_error_table3 (cmdtime timestamp with time zone, relname text, filename text, linenum integer, bytenum integer, errmsg text, rawdata text, rawbytes bytea)
+ distributed by (cmdtime);
+
+COPY number FROM STDIN LOG ERRORS INTO invalid_error_table1 SEGMENT REJECT LIMIT 10 ROWS; -- should fail
+ERROR: Relation "invalid_error_table1" already exists and is not of a valid error table format (expected 8 attributes, found 1)
+these are invalid line should be insert into error table.
+1
+\.
+invalid command \.
+;
+ERROR: syntax error at or near "these"
+LINE 1: these are invalid line should be insert into error table.
+ ^
+COPY number FROM STDIN LOG ERRORS INTO invalid_error_table3 SEGMENT REJECT LIMIT 10 ROWS; -- should fail
+ERROR: Relation "invalid_error_table3" already exists and is not of a valid error table format. It appears to not distributed randomly
+these are invalid line should be insert into error table.
+1
+\.
+invalid command \.
+;
+ERROR: syntax error at or near "these"
+LINE 1: these are invalid line should be insert into error table.
+ ^
+DROP TABLE invalid_error_table1;
+DROP TABLE invalid_error_table3;
+DROP TABLE number;