You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by rl...@apache.org on 2016/09/02 02:54:07 UTC
[1/3] incubator-hawq git commit: HAWQ-991. Implement behavior2 in
usage2 of hawq register.
Repository: incubator-hawq
Updated Branches:
refs/heads/master 25a4ab540 -> 1fce139d5
HAWQ-991. Implement behavior2 in usage2 of hawq register.
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/1fce139d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/1fce139d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/1fce139d
Branch: refs/heads/master
Commit: 1fce139d5d64986e4226ede903570b67b9d8c94c
Parents: af12c6b
Author: xunzhang <xu...@gmail.com>
Authored: Fri Sep 2 02:03:35 2016 +0800
Committer: rlei <rl...@pivotal.io>
Committed: Fri Sep 2 10:53:27 2016 +0800
----------------------------------------------------------------------
.../feature/ManagementTool/files_incomplete.yml | 29 ++++++++++++++++
src/test/feature/ManagementTool/incorrect1.yml | 22 ------------
src/test/feature/ManagementTool/incorrect2.yml | 22 ------------
src/test/feature/ManagementTool/incorrect3.yml | 22 ------------
src/test/feature/ManagementTool/incorrect4.yml | 22 ------------
src/test/feature/ManagementTool/incorrect5.yml | 22 ------------
src/test/feature/ManagementTool/incorrect6.yml | 23 -------------
src/test/feature/ManagementTool/incorrect7.yml | 29 ----------------
src/test/feature/ManagementTool/incorrect8.yml | 28 ---------------
.../ManagementTool/missing_bucketnum.yml | 28 +++++++++++++++
.../feature/ManagementTool/missing_checksum.yml | 22 ++++++++++++
.../feature/ManagementTool/missing_filesize.yml | 22 ++++++++++++
.../feature/ManagementTool/missing_pagesize.yml | 22 ++++++++++++
.../ManagementTool/missing_rowgroupsize.yml | 22 ++++++++++++
.../ManagementTool/test_hawq_register.cpp | 36 +++++++++++++++-----
.../feature/ManagementTool/wrong_dfs_url.yml | 23 +++++++++++++
.../feature/ManagementTool/wrong_schema.yml | 22 ++++++++++++
tools/bin/hawqregister | 21 +++++++++---
18 files changed, 235 insertions(+), 202 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/1fce139d/src/test/feature/ManagementTool/files_incomplete.yml
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/files_incomplete.yml b/src/test/feature/ManagementTool/files_incomplete.yml
new file mode 100755
index 0000000..d9a9185
--- /dev/null
+++ b/src/test/feature/ManagementTool/files_incomplete.yml
@@ -0,0 +1,29 @@
+AO_FileLocations:
+ Blocksize: 32768
+ Checksum: false
+ CompressionLevel: 0
+ CompressionType: null
+ Files:
+ - path: /hawq_default/16385/16387/17015/1
+ size: 16
+ - path: /hawq_default/16385/16387/17015/2
+ size: 0
+ - path: /hawq_default/16385/16387/17015/3
+ size: 0
+ - path: /hawq_default/16385/16387/17015/4
+ size: 16
+ - path: /hawq_default/16385/16387/17015/5
+ size: 16
+AO_Schema:
+- name: i
+ type: int4
+DBVersion: PostgreSQL 8.2.15 (Greenplum Database 4.2.0 build 1) (HAWQ 2.0.1.0 build
+ dev) on x86_64-apple-darwin15.5.0, compiled by GCC Apple LLVM version 7.3.0 (clang-703.0.31)
+ compiled on Aug 31 2016 11:10:07
+DFS_URL: hdfs://localhost:8020
+Distribution_Policy: DISTRIBUTED BY (i)
+Encoding: UTF8
+FileFormat: AO
+TableName: public.t10
+Version: 1.0.0
+Bucketnum: 6
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/1fce139d/src/test/feature/ManagementTool/incorrect1.yml
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/incorrect1.yml b/src/test/feature/ManagementTool/incorrect1.yml
deleted file mode 100755
index cfb2983..0000000
--- a/src/test/feature/ManagementTool/incorrect1.yml
+++ /dev/null
@@ -1,22 +0,0 @@
-DBVersion: PostgreSQL 8.2.15 (Greenplum Database 4.2.0 build 1) (HAWQ 2.0.1.0 build
- dev) on x86_64-apple-darwin15.5.0, compiled by GCC Apple LLVM version 7.3.0 (clang-703.0.31)
- compiled on Jul 25 2016 13:00:28
-DFS_URL: hdfs://localhost:8020
-Distribution_Policy: DISTRIBUTED RANDOMLY
-Encoding: UTF8
-FileFormat: Parquet
-Parquet_FileLocations:
- Checksum: false
- CompressionLevel: 0
- CompressionType: null
- EnableDictionary: false
- Files:
- - path: /hawq_default/16385/16387/35983/1
- size: 945
- RowGroupSize: 8388608
-Parquet_Schema:
-- name: i
- type: int4
-TableName: public.hawqregister
-Version: 1.0.0
-Bucketnum: 6
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/1fce139d/src/test/feature/ManagementTool/incorrect2.yml
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/incorrect2.yml b/src/test/feature/ManagementTool/incorrect2.yml
deleted file mode 100755
index cfb2983..0000000
--- a/src/test/feature/ManagementTool/incorrect2.yml
+++ /dev/null
@@ -1,22 +0,0 @@
-DBVersion: PostgreSQL 8.2.15 (Greenplum Database 4.2.0 build 1) (HAWQ 2.0.1.0 build
- dev) on x86_64-apple-darwin15.5.0, compiled by GCC Apple LLVM version 7.3.0 (clang-703.0.31)
- compiled on Jul 25 2016 13:00:28
-DFS_URL: hdfs://localhost:8020
-Distribution_Policy: DISTRIBUTED RANDOMLY
-Encoding: UTF8
-FileFormat: Parquet
-Parquet_FileLocations:
- Checksum: false
- CompressionLevel: 0
- CompressionType: null
- EnableDictionary: false
- Files:
- - path: /hawq_default/16385/16387/35983/1
- size: 945
- RowGroupSize: 8388608
-Parquet_Schema:
-- name: i
- type: int4
-TableName: public.hawqregister
-Version: 1.0.0
-Bucketnum: 6
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/1fce139d/src/test/feature/ManagementTool/incorrect3.yml
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/incorrect3.yml b/src/test/feature/ManagementTool/incorrect3.yml
deleted file mode 100755
index ff56f4e..0000000
--- a/src/test/feature/ManagementTool/incorrect3.yml
+++ /dev/null
@@ -1,22 +0,0 @@
-DBVersion: PostgreSQL 8.2.15 (Greenplum Database 4.2.0 build 1) (HAWQ 2.0.1.0 build
- dev) on x86_64-apple-darwin15.5.0, compiled by GCC Apple LLVM version 7.3.0 (clang-703.0.31)
- compiled on Jul 25 2016 13:00:28
-DFS_URL: hdfs://localhost:8020
-Distribution_Policy: DISTRIBUTED RANDOMLY
-Encoding: UTF8
-FileFormat: Parquet
-Parquet_FileLocations:
- Checksum: false
- CompressionLevel: 0
- CompressionType: null
- EnableDictionary: false
- Files:
- - path: /hawq_default/16385/16387/35983/1
- PageSize: 1048576
- RowGroupSize: 8388608
-Parquet_Schema:
-- name: i
- type: int4
-TableName: public.hawqregister
-Version: 1.0.0
-Bucketnum: 6
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/1fce139d/src/test/feature/ManagementTool/incorrect4.yml
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/incorrect4.yml b/src/test/feature/ManagementTool/incorrect4.yml
deleted file mode 100755
index aec3a7f..0000000
--- a/src/test/feature/ManagementTool/incorrect4.yml
+++ /dev/null
@@ -1,22 +0,0 @@
-DBVersion: PostgreSQL 8.2.15 (Greenplum Database 4.2.0 build 1) (HAWQ 2.0.1.0 build
- dev) on x86_64-apple-darwin15.5.0, compiled by GCC Apple LLVM version 7.3.0 (clang-703.0.31)
- compiled on Jul 25 2016 13:00:28
-DFS_URL: hdfs://localhost:8020
-Distribution_Policy: DISTRIBUTED RANDOMLY
-Encoding: UTF8
-FileFormat: Parquet
-Parquet_FileLocations:
- Checksum: false
- CompressionLevel: 0
- CompressionType: null
- EnableDictionary: false
- Files:
- - path: /hawq_default/16385/16387/35983/1
- size: 945
- PageSize: 1048576
- RowGroupSize: 8388608
-Parquet_Schema:
-- name: i
-TableName: public.hawqregister
-Version: 1.0.0
-Bucketnum: 6
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/1fce139d/src/test/feature/ManagementTool/incorrect5.yml
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/incorrect5.yml b/src/test/feature/ManagementTool/incorrect5.yml
deleted file mode 100755
index 540744b..0000000
--- a/src/test/feature/ManagementTool/incorrect5.yml
+++ /dev/null
@@ -1,22 +0,0 @@
-AO_FileLocations:
- Blocksize: 32768
- CompressionLevel: 0
- CompressionType: null
- Files:
- - path: /hawq_default/16385/16387/16518/1
- size: 32
-AO_Schema:
-- name: x
- type: float8
-- name: y
- type: float8
-DBVersion: PostgreSQL 8.2.15 (Greenplum Database 4.2.0 build 1) (HAWQ 2.0.1.0 build
- dev) on x86_64-apple-darwin15.5.0, compiled by GCC Apple LLVM version 7.3.0 (clang-703.0.31)
- compiled on Aug 12 2016 10:49:35
-DFS_URL: hdfs://localhost:8020
-Distribution_Policy: DISTRIBUTED RANDOMLY
-Encoding: UTF8
-FileFormat: AO
-TableName: public.t1
-Version: 1.0.0
-Bucketnum: 6
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/1fce139d/src/test/feature/ManagementTool/incorrect6.yml
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/incorrect6.yml b/src/test/feature/ManagementTool/incorrect6.yml
deleted file mode 100755
index f42d0f7..0000000
--- a/src/test/feature/ManagementTool/incorrect6.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-AO_FileLocations:
- Blocksize: 32768
- Checksum: false
- CompressionLevel: 0
- CompressionType: null
- Files:
- - path: /hawq_default/16385/16387/16518/1
- size: 32
-AO_Schema:
-- name: x
- type: float8
-- name: y
- type: float8
-DBVersion: PostgreSQL 8.2.15 (Greenplum Database 4.2.0 build 1) (HAWQ 2.0.1.0 build
- dev) on x86_64-apple-darwin15.5.0, compiled by GCC Apple LLVM version 7.3.0 (clang-703.0.31)
- compiled on Aug 12 2016 10:49:35
-DFS_URL: hdfs://localhost:80
-Distribution_Policy: DISTRIBUTED RANDOMLY
-Encoding: UTF8
-FileFormat: AO
-TableName: public.t1
-Version: 1.0.0
-Bucketnum: 6
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/1fce139d/src/test/feature/ManagementTool/incorrect7.yml
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/incorrect7.yml b/src/test/feature/ManagementTool/incorrect7.yml
deleted file mode 100644
index d9a9185..0000000
--- a/src/test/feature/ManagementTool/incorrect7.yml
+++ /dev/null
@@ -1,29 +0,0 @@
-AO_FileLocations:
- Blocksize: 32768
- Checksum: false
- CompressionLevel: 0
- CompressionType: null
- Files:
- - path: /hawq_default/16385/16387/17015/1
- size: 16
- - path: /hawq_default/16385/16387/17015/2
- size: 0
- - path: /hawq_default/16385/16387/17015/3
- size: 0
- - path: /hawq_default/16385/16387/17015/4
- size: 16
- - path: /hawq_default/16385/16387/17015/5
- size: 16
-AO_Schema:
-- name: i
- type: int4
-DBVersion: PostgreSQL 8.2.15 (Greenplum Database 4.2.0 build 1) (HAWQ 2.0.1.0 build
- dev) on x86_64-apple-darwin15.5.0, compiled by GCC Apple LLVM version 7.3.0 (clang-703.0.31)
- compiled on Aug 31 2016 11:10:07
-DFS_URL: hdfs://localhost:8020
-Distribution_Policy: DISTRIBUTED BY (i)
-Encoding: UTF8
-FileFormat: AO
-TableName: public.t10
-Version: 1.0.0
-Bucketnum: 6
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/1fce139d/src/test/feature/ManagementTool/incorrect8.yml
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/incorrect8.yml b/src/test/feature/ManagementTool/incorrect8.yml
deleted file mode 100644
index 32d3b0d..0000000
--- a/src/test/feature/ManagementTool/incorrect8.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-AO_FileLocations:
- Blocksize: 32768
- Checksum: false
- CompressionLevel: 0
- CompressionType: null
- Files:
- - path: /hawq_default/16385/16387/17015/1
- size: 16
- - path: /hawq_default/16385/16387/17015/2
- size: 0
- - path: /hawq_default/16385/16387/17015/3
- size: 0
- - path: /hawq_default/16385/16387/17015/4
- size: 16
- - path: /hawq_default/16385/16387/17015/5
- size: 16
-AO_Schema:
-- name: i
- type: int4
-DBVersion: PostgreSQL 8.2.15 (Greenplum Database 4.2.0 build 1) (HAWQ 2.0.1.0 build
- dev) on x86_64-apple-darwin15.5.0, compiled by GCC Apple LLVM version 7.3.0 (clang-703.0.31)
- compiled on Aug 31 2016 11:10:07
-DFS_URL: hdfs://localhost:8020
-Distribution_Policy: DISTRIBUTED BY (i)
-Encoding: UTF8
-FileFormat: AO
-TableName: public.t10
-Version: 1.0.0
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/1fce139d/src/test/feature/ManagementTool/missing_bucketnum.yml
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/missing_bucketnum.yml b/src/test/feature/ManagementTool/missing_bucketnum.yml
new file mode 100755
index 0000000..32d3b0d
--- /dev/null
+++ b/src/test/feature/ManagementTool/missing_bucketnum.yml
@@ -0,0 +1,28 @@
+AO_FileLocations:
+ Blocksize: 32768
+ Checksum: false
+ CompressionLevel: 0
+ CompressionType: null
+ Files:
+ - path: /hawq_default/16385/16387/17015/1
+ size: 16
+ - path: /hawq_default/16385/16387/17015/2
+ size: 0
+ - path: /hawq_default/16385/16387/17015/3
+ size: 0
+ - path: /hawq_default/16385/16387/17015/4
+ size: 16
+ - path: /hawq_default/16385/16387/17015/5
+ size: 16
+AO_Schema:
+- name: i
+ type: int4
+DBVersion: PostgreSQL 8.2.15 (Greenplum Database 4.2.0 build 1) (HAWQ 2.0.1.0 build
+ dev) on x86_64-apple-darwin15.5.0, compiled by GCC Apple LLVM version 7.3.0 (clang-703.0.31)
+ compiled on Aug 31 2016 11:10:07
+DFS_URL: hdfs://localhost:8020
+Distribution_Policy: DISTRIBUTED BY (i)
+Encoding: UTF8
+FileFormat: AO
+TableName: public.t10
+Version: 1.0.0
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/1fce139d/src/test/feature/ManagementTool/missing_checksum.yml
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/missing_checksum.yml b/src/test/feature/ManagementTool/missing_checksum.yml
new file mode 100755
index 0000000..540744b
--- /dev/null
+++ b/src/test/feature/ManagementTool/missing_checksum.yml
@@ -0,0 +1,22 @@
+AO_FileLocations:
+ Blocksize: 32768
+ CompressionLevel: 0
+ CompressionType: null
+ Files:
+ - path: /hawq_default/16385/16387/16518/1
+ size: 32
+AO_Schema:
+- name: x
+ type: float8
+- name: y
+ type: float8
+DBVersion: PostgreSQL 8.2.15 (Greenplum Database 4.2.0 build 1) (HAWQ 2.0.1.0 build
+ dev) on x86_64-apple-darwin15.5.0, compiled by GCC Apple LLVM version 7.3.0 (clang-703.0.31)
+ compiled on Aug 12 2016 10:49:35
+DFS_URL: hdfs://localhost:8020
+Distribution_Policy: DISTRIBUTED RANDOMLY
+Encoding: UTF8
+FileFormat: AO
+TableName: public.t1
+Version: 1.0.0
+Bucketnum: 6
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/1fce139d/src/test/feature/ManagementTool/missing_filesize.yml
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/missing_filesize.yml b/src/test/feature/ManagementTool/missing_filesize.yml
new file mode 100755
index 0000000..ff56f4e
--- /dev/null
+++ b/src/test/feature/ManagementTool/missing_filesize.yml
@@ -0,0 +1,22 @@
+DBVersion: PostgreSQL 8.2.15 (Greenplum Database 4.2.0 build 1) (HAWQ 2.0.1.0 build
+ dev) on x86_64-apple-darwin15.5.0, compiled by GCC Apple LLVM version 7.3.0 (clang-703.0.31)
+ compiled on Jul 25 2016 13:00:28
+DFS_URL: hdfs://localhost:8020
+Distribution_Policy: DISTRIBUTED RANDOMLY
+Encoding: UTF8
+FileFormat: Parquet
+Parquet_FileLocations:
+ Checksum: false
+ CompressionLevel: 0
+ CompressionType: null
+ EnableDictionary: false
+ Files:
+ - path: /hawq_default/16385/16387/35983/1
+ PageSize: 1048576
+ RowGroupSize: 8388608
+Parquet_Schema:
+- name: i
+ type: int4
+TableName: public.hawqregister
+Version: 1.0.0
+Bucketnum: 6
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/1fce139d/src/test/feature/ManagementTool/missing_pagesize.yml
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/missing_pagesize.yml b/src/test/feature/ManagementTool/missing_pagesize.yml
new file mode 100755
index 0000000..cfb2983
--- /dev/null
+++ b/src/test/feature/ManagementTool/missing_pagesize.yml
@@ -0,0 +1,22 @@
+DBVersion: PostgreSQL 8.2.15 (Greenplum Database 4.2.0 build 1) (HAWQ 2.0.1.0 build
+ dev) on x86_64-apple-darwin15.5.0, compiled by GCC Apple LLVM version 7.3.0 (clang-703.0.31)
+ compiled on Jul 25 2016 13:00:28
+DFS_URL: hdfs://localhost:8020
+Distribution_Policy: DISTRIBUTED RANDOMLY
+Encoding: UTF8
+FileFormat: Parquet
+Parquet_FileLocations:
+ Checksum: false
+ CompressionLevel: 0
+ CompressionType: null
+ EnableDictionary: false
+ Files:
+ - path: /hawq_default/16385/16387/35983/1
+ size: 945
+ RowGroupSize: 8388608
+Parquet_Schema:
+- name: i
+ type: int4
+TableName: public.hawqregister
+Version: 1.0.0
+Bucketnum: 6
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/1fce139d/src/test/feature/ManagementTool/missing_rowgroupsize.yml
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/missing_rowgroupsize.yml b/src/test/feature/ManagementTool/missing_rowgroupsize.yml
new file mode 100755
index 0000000..0b65b31
--- /dev/null
+++ b/src/test/feature/ManagementTool/missing_rowgroupsize.yml
@@ -0,0 +1,22 @@
+DBVersion: PostgreSQL 8.2.15 (Greenplum Database 4.2.0 build 1) (HAWQ 2.0.1.0 build
+ dev) on x86_64-apple-darwin15.5.0, compiled by GCC Apple LLVM version 7.3.0 (clang-703.0.31)
+ compiled on Jul 25 2016 13:00:28
+DFS_URL: hdfs://localhost:8020
+Distribution_Policy: DISTRIBUTED RANDOMLY
+Encoding: UTF8
+FileFormat: Parquet
+Parquet_FileLocations:
+ Checksum: false
+ CompressionLevel: 0
+ CompressionType: null
+ EnableDictionary: false
+ Files:
+ - path: /hawq_default/16385/16387/35983/1
+ size: 945
+ PageSize: 1048576
+Parquet_Schema:
+- name: i
+ type: int4
+TableName: public.hawqregister
+Version: 1.0.0
+Bucketnum: 6
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/1fce139d/src/test/feature/ManagementTool/test_hawq_register.cpp
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/test_hawq_register.cpp b/src/test/feature/ManagementTool/test_hawq_register.cpp
index 97489b8..0952bd5 100644
--- a/src/test/feature/ManagementTool/test_hawq_register.cpp
+++ b/src/test/feature/ManagementTool/test_hawq_register.cpp
@@ -335,17 +335,37 @@ TEST_F(TestHawqRegister, TestEmptyTable) {
TEST_F(TestHawqRegister, TestIncorrectYaml) {
SQLUtility util;
string filePath = util.getTestRootPath() + "/ManagementTool/";
- EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c " + filePath + "incorrect1.yml xx"));
- EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c " + filePath + "incorrect2.yml xx"));
- EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c " + filePath + "incorrect3.yml xx"));
- EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c " + filePath + "incorrect4.yml xx"));
- EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c " + filePath + "incorrect5.yml xx"));
- EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c " + filePath + "incorrect6.yml xx"));
- EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c " + filePath + "incorrect8.yml xx"));
+
+ EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c " + filePath + "missing_pagesize.yml xx"));
+ EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c " + filePath + "missing_rowgroupsize.yml xx"));
+ EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c " + filePath + "missing_filesize.yml xx"));
+ EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c " + filePath + "wrong_schema.yml xx"));
+ EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c " + filePath + "missing_checksum.yml xx"));
+ EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c " + filePath + "wrong_dfs_url.yml xx"));
+ EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c " + filePath + "missing_bucketnum.yml xx"));
}
TEST_F(TestHawqRegister, TestDismatchFileNumber) {
SQLUtility util;
string filePath = util.getTestRootPath() + "/ManagementTool/";
- EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c " + filePath + "incorrect7.yml xx"));
+ EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c " + filePath + "files_incomplete.yml xx"));
+}
+
+TEST_F(TestHawqRegister, TestUsage2Behavior2) {
+ SQLUtility util;
+ util.execute("drop table if exists simple_register_table;");
+ util.execute("create table simple_register_table(i int) with (appendonly=true, orientation=row) distributed randomly;");
+ util.execute("insert into simple_register_table values(1), (2), (3);");
+
+ EXPECT_EQ(0, Command::getCommandStatus("hawq extract -d " + (string) HAWQ_DB + " -o tmp.yml testhawqregister_testusage2behavior2.simple_register_table"));
+ EXPECT_EQ(0, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c tmp.yml testhawqregister_testusage2behavior2.new_simple_register_table"));
+ util.query("select * from new_simple_register_table;", 3);
+
+ EXPECT_EQ(0, Command::getCommandStatus("hawq extract -d " + (string) HAWQ_DB + " -o new_tmp.yml testhawqregister_testusage2behavior2.new_simple_register_table"));
+ EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c new_tmp.yml testhawqregister_testusage2behavior2.new_simple_register_table"));
+
+ EXPECT_EQ(0, Command::getCommandStatus("rm -rf tmp.yml"));
+ EXPECT_EQ(0, Command::getCommandStatus("rm -rf new_tmp.yml"));
+ util.execute("drop table simple_register_table;");
+ util.execute("drop table new_simple_register_table;");
}
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/1fce139d/src/test/feature/ManagementTool/wrong_dfs_url.yml
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/wrong_dfs_url.yml b/src/test/feature/ManagementTool/wrong_dfs_url.yml
new file mode 100755
index 0000000..f42d0f7
--- /dev/null
+++ b/src/test/feature/ManagementTool/wrong_dfs_url.yml
@@ -0,0 +1,23 @@
+AO_FileLocations:
+ Blocksize: 32768
+ Checksum: false
+ CompressionLevel: 0
+ CompressionType: null
+ Files:
+ - path: /hawq_default/16385/16387/16518/1
+ size: 32
+AO_Schema:
+- name: x
+ type: float8
+- name: y
+ type: float8
+DBVersion: PostgreSQL 8.2.15 (Greenplum Database 4.2.0 build 1) (HAWQ 2.0.1.0 build
+ dev) on x86_64-apple-darwin15.5.0, compiled by GCC Apple LLVM version 7.3.0 (clang-703.0.31)
+ compiled on Aug 12 2016 10:49:35
+DFS_URL: hdfs://localhost:80
+Distribution_Policy: DISTRIBUTED RANDOMLY
+Encoding: UTF8
+FileFormat: AO
+TableName: public.t1
+Version: 1.0.0
+Bucketnum: 6
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/1fce139d/src/test/feature/ManagementTool/wrong_schema.yml
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/wrong_schema.yml b/src/test/feature/ManagementTool/wrong_schema.yml
new file mode 100755
index 0000000..aec3a7f
--- /dev/null
+++ b/src/test/feature/ManagementTool/wrong_schema.yml
@@ -0,0 +1,22 @@
+DBVersion: PostgreSQL 8.2.15 (Greenplum Database 4.2.0 build 1) (HAWQ 2.0.1.0 build
+ dev) on x86_64-apple-darwin15.5.0, compiled by GCC Apple LLVM version 7.3.0 (clang-703.0.31)
+ compiled on Jul 25 2016 13:00:28
+DFS_URL: hdfs://localhost:8020
+Distribution_Policy: DISTRIBUTED RANDOMLY
+Encoding: UTF8
+FileFormat: Parquet
+Parquet_FileLocations:
+ Checksum: false
+ CompressionLevel: 0
+ CompressionType: null
+ EnableDictionary: false
+ Files:
+ - path: /hawq_default/16385/16387/35983/1
+ size: 945
+ PageSize: 1048576
+ RowGroupSize: 8388608
+Parquet_Schema:
+- name: i
+TableName: public.hawqregister
+Version: 1.0.0
+Bucketnum: 6
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/1fce139d/tools/bin/hawqregister
----------------------------------------------------------------------
diff --git a/tools/bin/hawqregister b/tools/bin/hawqregister
index 91f087a..2b6fce5 100755
--- a/tools/bin/hawqregister
+++ b/tools/bin/hawqregister
@@ -134,8 +134,7 @@ def create_table(dburl, tablename, schema_info, fmt, distrbution_policy, file_lo
conn.commit()
for row in rows:
if row[0] != 0:
- # TODO
- pass
+ return False
except DatabaseError, ex:
logger.error('Failed to execute query "%s"' % query)
sys.exit(1)
@@ -152,6 +151,7 @@ def create_table(dburl, tablename, schema_info, fmt, distrbution_policy, file_lo
conn = dbconn.connect(dburl, False)
rows = dbconn.execSQL(conn, query)
conn.commit()
+ return True
except DatabaseError, ex:
print DatabaseError, ex
logger.error('Failed to execute query "%s"' % query)
@@ -356,14 +356,22 @@ if __name__ == '__main__':
dburl = dbconn.DbURL(hostname = options.host, port = options.port, username = options.user, dbname = options.database)
filepath, database, tablename = options.filepath, options.database, args[0]
+ second_normal_mode, second_exist_mode, force_mode, repair_mode = False, False, False, False
if options.yml_config: # Usage2
+ if options.force:
+ force_mode = True
+ elif options.repair:
+ repair_mode = True
+ else:
+ second_normal_mode = True
fileformat, files, sizes, schema, distribution_policy, file_locations, bucket_number = option_parser_yml(options.yml_config)
filepath = files[0][:files[0].rfind('/')] if files else ''
if distribution_policy.startswith('DISTRIBUTED BY'):
if len(files) % bucket_number != 0:
- logger.error('Files to be registered must match the bucket number of hash table.')
+ logger.error('Files to be registered must be multiple times to the bucket number of hash table.')
sys.exit(1)
- create_table(dburl, tablename, schema, fileformat, distribution_policy, file_locations, bucket_number)
+ if not create_table(dburl, tablename, schema, fileformat, distribution_policy, file_locations, bucket_number):
+ second_normal_mode, second_exist_mode = False, True
else:
fileformat = 'Parquet'
check_hash_type(dburl, tablename) # Usage1 only support randomly distributed table
@@ -375,6 +383,11 @@ if __name__ == '__main__':
seg_name = get_seg_name(dburl, tablename, database, fileformat)
firstsegno, tabledir = get_metadata_from_database(dburl, tablename, seg_name)
+ if second_exist_mode:
+ if tabledir.strip('/') == filepath.strip('/'):
+ logger.error('Files to be registered in this case should not be the same with table path.')
+ sys.exit(1)
+
check_files_and_table_in_same_hdfs_cluster(filepath, tabledir)
if not options.yml_config:
[2/3] incubator-hawq git commit: HAWQ-1025. Refactor hawq register to
implement HAWQ-1025: using actual sizes,
check files number multiple of bucket number if hash table.
Posted by rl...@apache.org.
HAWQ-1025. Refactor hawq register to implement HAWQ-1025: using actual sizes, check files number multiple of bucket number if hash table.
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/af12c6b3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/af12c6b3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/af12c6b3
Branch: refs/heads/master
Commit: af12c6b3bfb4e676c0d960bd0282b58c1a8221b6
Parents: 2c4f25c
Author: xunzhang <xu...@gmail.com>
Authored: Thu Sep 1 18:12:41 2016 +0800
Committer: rlei <rl...@pivotal.io>
Committed: Fri Sep 2 10:53:27 2016 +0800
----------------------------------------------------------------------
src/test/feature/ManagementTool/incorrect1.yml | 1 +
src/test/feature/ManagementTool/incorrect2.yml | 1 +
src/test/feature/ManagementTool/incorrect3.yml | 1 +
src/test/feature/ManagementTool/incorrect4.yml | 1 +
src/test/feature/ManagementTool/incorrect5.yml | 1 +
src/test/feature/ManagementTool/incorrect6.yml | 1 +
src/test/feature/ManagementTool/incorrect7.yml | 29 +++++
src/test/feature/ManagementTool/incorrect8.yml | 28 +++++
.../ManagementTool/test_hawq_register.cpp | 12 +--
tools/bin/hawqregister | 107 +++++++++++--------
10 files changed, 128 insertions(+), 54 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/af12c6b3/src/test/feature/ManagementTool/incorrect1.yml
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/incorrect1.yml b/src/test/feature/ManagementTool/incorrect1.yml
index 8258a90..cfb2983 100755
--- a/src/test/feature/ManagementTool/incorrect1.yml
+++ b/src/test/feature/ManagementTool/incorrect1.yml
@@ -19,3 +19,4 @@ Parquet_Schema:
type: int4
TableName: public.hawqregister
Version: 1.0.0
+Bucketnum: 6
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/af12c6b3/src/test/feature/ManagementTool/incorrect2.yml
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/incorrect2.yml b/src/test/feature/ManagementTool/incorrect2.yml
index 8258a90..cfb2983 100755
--- a/src/test/feature/ManagementTool/incorrect2.yml
+++ b/src/test/feature/ManagementTool/incorrect2.yml
@@ -19,3 +19,4 @@ Parquet_Schema:
type: int4
TableName: public.hawqregister
Version: 1.0.0
+Bucketnum: 6
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/af12c6b3/src/test/feature/ManagementTool/incorrect3.yml
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/incorrect3.yml b/src/test/feature/ManagementTool/incorrect3.yml
index f45b5e5..ff56f4e 100755
--- a/src/test/feature/ManagementTool/incorrect3.yml
+++ b/src/test/feature/ManagementTool/incorrect3.yml
@@ -19,3 +19,4 @@ Parquet_Schema:
type: int4
TableName: public.hawqregister
Version: 1.0.0
+Bucketnum: 6
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/af12c6b3/src/test/feature/ManagementTool/incorrect4.yml
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/incorrect4.yml b/src/test/feature/ManagementTool/incorrect4.yml
index 3b8b921..aec3a7f 100755
--- a/src/test/feature/ManagementTool/incorrect4.yml
+++ b/src/test/feature/ManagementTool/incorrect4.yml
@@ -19,3 +19,4 @@ Parquet_Schema:
- name: i
TableName: public.hawqregister
Version: 1.0.0
+Bucketnum: 6
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/af12c6b3/src/test/feature/ManagementTool/incorrect5.yml
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/incorrect5.yml b/src/test/feature/ManagementTool/incorrect5.yml
index 42a37ae..540744b 100755
--- a/src/test/feature/ManagementTool/incorrect5.yml
+++ b/src/test/feature/ManagementTool/incorrect5.yml
@@ -19,3 +19,4 @@ Encoding: UTF8
FileFormat: AO
TableName: public.t1
Version: 1.0.0
+Bucketnum: 6
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/af12c6b3/src/test/feature/ManagementTool/incorrect6.yml
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/incorrect6.yml b/src/test/feature/ManagementTool/incorrect6.yml
index 0d572dc..f42d0f7 100755
--- a/src/test/feature/ManagementTool/incorrect6.yml
+++ b/src/test/feature/ManagementTool/incorrect6.yml
@@ -20,3 +20,4 @@ Encoding: UTF8
FileFormat: AO
TableName: public.t1
Version: 1.0.0
+Bucketnum: 6
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/af12c6b3/src/test/feature/ManagementTool/incorrect7.yml
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/incorrect7.yml b/src/test/feature/ManagementTool/incorrect7.yml
new file mode 100644
index 0000000..d9a9185
--- /dev/null
+++ b/src/test/feature/ManagementTool/incorrect7.yml
@@ -0,0 +1,29 @@
+AO_FileLocations:
+ Blocksize: 32768
+ Checksum: false
+ CompressionLevel: 0
+ CompressionType: null
+ Files:
+ - path: /hawq_default/16385/16387/17015/1
+ size: 16
+ - path: /hawq_default/16385/16387/17015/2
+ size: 0
+ - path: /hawq_default/16385/16387/17015/3
+ size: 0
+ - path: /hawq_default/16385/16387/17015/4
+ size: 16
+ - path: /hawq_default/16385/16387/17015/5
+ size: 16
+AO_Schema:
+- name: i
+ type: int4
+DBVersion: PostgreSQL 8.2.15 (Greenplum Database 4.2.0 build 1) (HAWQ 2.0.1.0 build
+ dev) on x86_64-apple-darwin15.5.0, compiled by GCC Apple LLVM version 7.3.0 (clang-703.0.31)
+ compiled on Aug 31 2016 11:10:07
+DFS_URL: hdfs://localhost:8020
+Distribution_Policy: DISTRIBUTED BY (i)
+Encoding: UTF8
+FileFormat: AO
+TableName: public.t10
+Version: 1.0.0
+Bucketnum: 6
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/af12c6b3/src/test/feature/ManagementTool/incorrect8.yml
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/incorrect8.yml b/src/test/feature/ManagementTool/incorrect8.yml
new file mode 100644
index 0000000..32d3b0d
--- /dev/null
+++ b/src/test/feature/ManagementTool/incorrect8.yml
@@ -0,0 +1,28 @@
+AO_FileLocations:
+ Blocksize: 32768
+ Checksum: false
+ CompressionLevel: 0
+ CompressionType: null
+ Files:
+ - path: /hawq_default/16385/16387/17015/1
+ size: 16
+ - path: /hawq_default/16385/16387/17015/2
+ size: 0
+ - path: /hawq_default/16385/16387/17015/3
+ size: 0
+ - path: /hawq_default/16385/16387/17015/4
+ size: 16
+ - path: /hawq_default/16385/16387/17015/5
+ size: 16
+AO_Schema:
+- name: i
+ type: int4
+DBVersion: PostgreSQL 8.2.15 (Greenplum Database 4.2.0 build 1) (HAWQ 2.0.1.0 build
+ dev) on x86_64-apple-darwin15.5.0, compiled by GCC Apple LLVM version 7.3.0 (clang-703.0.31)
+ compiled on Aug 31 2016 11:10:07
+DFS_URL: hdfs://localhost:8020
+Distribution_Policy: DISTRIBUTED BY (i)
+Encoding: UTF8
+FileFormat: AO
+TableName: public.t10
+Version: 1.0.0
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/af12c6b3/src/test/feature/ManagementTool/test_hawq_register.cpp
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/test_hawq_register.cpp b/src/test/feature/ManagementTool/test_hawq_register.cpp
index f7f67c0..97489b8 100644
--- a/src/test/feature/ManagementTool/test_hawq_register.cpp
+++ b/src/test/feature/ManagementTool/test_hawq_register.cpp
@@ -341,15 +341,11 @@ TEST_F(TestHawqRegister, TestIncorrectYaml) {
EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c " + filePath + "incorrect4.yml xx"));
EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c " + filePath + "incorrect5.yml xx"));
EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c " + filePath + "incorrect6.yml xx"));
+ EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c " + filePath + "incorrect8.yml xx"));
}
-TEST_F(TestHawqRegister, TestCreateExistedTable) {
+TEST_F(TestHawqRegister, TestDismatchFileNumber) {
SQLUtility util;
- util.execute("drop table if exists t10;");
- util.execute("create table t10(i int) with (appendonly=true, orientation=row) distributed by (i);");
- util.execute("insert into t10 values(1), (2), (3);");
- EXPECT_EQ(0, Command::getCommandStatus("hawq extract -d " + (string) HAWQ_DB + " -o t10.yml testhawqregister_testcreateexistedtable.t10"));
- auto tmp = Command::getCommandOutput("hawq register -d " + (string) HAWQ_DB + " -c t10.yml testhawqregister_testcreateexistedtable.t10");
- auto out = hawq::test::trim(hawq::test::trimNewLine(tmp));
- EXPECT_EQ(1, hawq::test::endsWith(out, "has already existed."));
+ string filePath = util.getTestRootPath() + "/ManagementTool/";
+ EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c " + filePath + "incorrect7.yml xx"));
}
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/af12c6b3/tools/bin/hawqregister
----------------------------------------------------------------------
diff --git a/tools/bin/hawqregister b/tools/bin/hawqregister
index 6b2c918..91f087a 100755
--- a/tools/bin/hawqregister
+++ b/tools/bin/hawqregister
@@ -17,8 +17,9 @@
# specific language governing permissions and limitations
# under the License.
-# Usage1: hawq register [-h hostname] [-p port] [-U username] [-d database] [-f filepath] tablename
-# Usage2: hawq register [-h hostname] [-p port] [-U username] [-d database] [-c config] tablename
+# Usage1: hawq register [-h hostname] [-p port] [-U username] [-d database] [-f filepath] [-e eof] <tablename>
+# Usage2: hawq register [-h hostname] [-p port] [-U username] [-d database] [-c config] [--force] [--repair] <tablename>
+
import os, sys, optparse, getpass, re, urlparse
try:
from gppylib.commands.unix import getLocalHostname, getUserName
@@ -44,14 +45,16 @@ def option_parser():
usage='usage: %prog [options] table_name',
version='%prog version $Revision: #1 $')
parser.remove_option('-h')
- parser.add_option('-?', '--help', action='help')
- parser.add_option('-h', '--host', help='host of the target DB')
- parser.add_option('-p', '--port', help='port of the target DB', type='int', default=0)
- parser.add_option('-U', '--user', help='username of the target DB')
+ parser.add_option('-?', '--help', action = 'help')
+ parser.add_option('-h', '--host', help = 'host of the target DB')
+ parser.add_option('-p', '--port', help = 'port of the target DB', type = 'int', default = 0)
+ parser.add_option('-U', '--user', help = 'username of the target DB')
parser.add_option('-d', '--database', default = 'postgres', dest = 'database', help='database name')
- parser.add_option('-f', '--filepath', dest = 'filepath', help='file name in HDFS')
- parser.add_option('-e', '--eof', dest = 'filesizes', type='list', default = [], help = 'eof list of files to be registered')
- parser.add_option('-c', '--config', dest = 'yml_config', default = '', help='configuration file in YAML format')
+ parser.add_option('-f', '--filepath', dest = 'filepath', help = 'file name in HDFS')
+ parser.add_option('-e', '--eof', dest = 'filesize', type = 'int', default = 0, help = 'eof of the file to be registered')
+ parser.add_option('-c', '--config', dest = 'yml_config', default = '', help = 'configuration file in YAML format')
+ parser.add_option('--force', action = 'store_true', default = False)
+ parser.add_option('--repair', action = 'store_true', default = False)
return parser
@@ -114,22 +117,16 @@ def option_parser_yml(yml_file):
register_yaml_dict_check(params)
if params['FileFormat'] == 'Parquet':
if not len(params['Parquet_FileLocations']['Files']):
- return 'Parquet', '', params['Parquet_Schema'], params['Distribution_Policy'], params['Parquet_FileLocations'], params['Bucketnum']
- offset = params['Parquet_FileLocations']['Files'][0]['path'].rfind('/')
- filepath = (params['DFS_URL'] + params['Parquet_FileLocations']['Files'][0]['path'][:offset]
- if len(params['Parquet_FileLocations']['Files']) != 1
- else params['DFS_URL'] + params['Parquet_FileLocations']['Files'][0]['path'])
- return 'Parquet', filepath, params['Parquet_Schema'], params['Distribution_Policy'], params['Parquet_FileLocations'], params['Bucketnum']
+ return 'Parquet', [], [], params['Parquet_Schema'], params['Distribution_Policy'], params['Parquet_FileLocations'], params['Bucketnum']
+ files, sizes = [params['DFS_URL'] + d['path'] for d in params['Parquet_FileLocations']['Files']], [d['size'] for d in params['Parquet_FileLocations']['Files']]
+ return 'Parquet', files, sizes, params['Parquet_Schema'], params['Distribution_Policy'], params['Parquet_FileLocations'], params['Bucketnum']
if not len(params['AO_FileLocations']['Files']):
- return 'AO', '', params['AO_Schema'], params['Distribution_Policy'], params['AO_FileLocations'], params['Bucketnum']
- offset = params['AO_FileLocations']['Files'][0]['path'].rfind('/')
- filepath = (params['DFS_URL'] + params['AO_FileLocations']['Files'][0]['path'][:offset]
- if len(params['AO_FileLocations']['Files']) != 1
- else params['DFS_URL'] + params['AO_FileLocations']['Files'][0]['path'])
- return 'AO', filepath, params['AO_Schema'], params['Distribution_Policy'], params['AO_FileLocations'], params['Bucketnum']
+ return 'AO', [], [], params['AO_Schema'], params['Distribution_Policy'], params['AO_FileLocations'], params['Bucketnum']
+ files, sizes = [params['DFS_URL'] + d['path'] for d in params['AO_FileLocations']['Files']], [d['size'] for d in params['AO_FileLocations']['Files']]
+ return 'AO', files, sizes, params['AO_Schema'], params['Distribution_Policy'], params['AO_FileLocations'], params['Bucketnum']
-def create_table(dburl, tablename, schema_info, fmt, distrbution_policy, file_locations):
+def create_table(dburl, tablename, schema_info, fmt, distrbution_policy, file_locations, bucket_number):
try:
query = "select count(*) from pg_class where relname = '%s'" % tablename.split('.')[-1].lower()
conn = dbconn.connect(dburl, False)
@@ -137,24 +134,26 @@ def create_table(dburl, tablename, schema_info, fmt, distrbution_policy, file_lo
conn.commit()
for row in rows:
if row[0] != 0:
- logger.error("Register failed: table %s has already existed." % tablename)
- sys.exit(1)
+ # TODO
+ pass
except DatabaseError, ex:
logger.error('Failed to execute query "%s"' % query)
sys.exit(1)
+
try:
schema = ','.join([k['name'] + ' ' + k['type'] for k in schema_info])
fmt = 'ROW' if fmt == 'AO' else fmt
if fmt == 'ROW':
- query = ('create table %s(%s) with (appendonly=true, orientation=%s, compresstype=%s, compresslevel=%s, checksum=%s) %s;'
- % (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['Checksum'], distrbution_policy))
+ query = ('create table %s(%s) with (appendonly=true, orientation=%s, compresstype=%s, compresslevel=%s, checksum=%s, bucketnum=%s) %s;'
+ % (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['Checksum'], bucket_number, distrbution_policy))
else: # Parquet
- query = ('create table %s(%s) with (appendonly=true, orientation=%s, compresstype=%s, compresslevel=%s, pagesize=%s, rowgroupsize=%s) %s;'
- % (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['PageSize'], file_locations['RowGroupSize'], distrbution_policy))
+ query = ('create table %s(%s) with (appendonly=true, orientation=%s, compresstype=%s, compresslevel=%s, pagesize=%s, rowgroupsize=%s, bucketnum=%s) %s;'
+ % (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['PageSize'], file_locations['RowGroupSize'], bucket_number, distrbution_policy))
conn = dbconn.connect(dburl, False)
rows = dbconn.execSQL(conn, query)
conn.commit()
except DatabaseError, ex:
+ print DatabaseError, ex
logger.error('Failed to execute query "%s"' % query)
sys.exit(1)
@@ -222,9 +221,9 @@ def get_metadata_from_database(dburl, tablename, seg_name):
try:
# get the full path of correspoding file for target table
query = ("select location, gp_persistent_tablespace_node.tablespace_oid, database_oid, relfilenode from pg_class, gp_persistent_relation_node, "
- "gp_persistent_tablespace_node, gp_persistent_filespace_node where relname = '%s' and pg_class.relfilenode = "
- "gp_persistent_relation_node.relfilenode_oid and gp_persistent_relation_node.tablespace_oid = gp_persistent_tablespace_node.tablespace_oid "
- "and gp_persistent_filespace_node.filespace_oid = gp_persistent_filespace_node.filespace_oid;") % tablename.split('.')[-1]
+ "gp_persistent_tablespace_node, gp_persistent_filespace_node where relname = '%s' and pg_class.relfilenode = "
+ "gp_persistent_relation_node.relfilenode_oid and gp_persistent_relation_node.tablespace_oid = gp_persistent_tablespace_node.tablespace_oid "
+ "and gp_persistent_filespace_node.filespace_oid = gp_persistent_filespace_node.filespace_oid;") % tablename.split('.')[-1]
conn = dbconn.connect(dburl, False)
rows = dbconn.execSQL(conn, query)
conn.commit()
@@ -328,56 +327,72 @@ def insert_metadata_into_database(dburl, databasename, tablename, seg_name, firs
'''Insert the metadata into database'''
try:
query = "SET allow_system_table_mods='dml';"
- segno = firstsegno
- for eof in eofs:
- query += "insert into pg_aoseg.%s values(%d, %d, %d, %d);" % (seg_name, segno, eof, -1, -1)
- segno += 1
+ query += 'insert into pg_aoseg.%s values(%d, %d, %d, %d)' % (seg_name, firstsegno, eofs[0], -1, -1)
+ for k, eof in enumerate(eofs[1:]):
+ query += ',(%d, %d, %d, %d)' % (firstsegno + k + 1, eof, -1, -1)
+ query += ';'
conn = dbconn.connect(dburl, True)
rows = dbconn.execSQL(conn, query)
conn.commit()
conn.close()
except DatabaseError, ex:
logger.error('Failed to connect to database, this script can only be run when the database is up')
- move_files_in_hdfs(options.database, options.tablename, files, firstsegno, tabledir, False)
+ move_files_in_hdfs(database, tablename, files, firstsegno, tabledir, False)
sys.exit(1)
if __name__ == '__main__':
+
parser = option_parser()
options, args = parser.parse_args()
- if len(args) != 1 or (options.yml_config and options.filepath):
+
+ if len(args) != 1 or ((options.yml_config or options.force or options.repair) and options.filepath) or (options.force and options.repair):
parser.print_help(sys.stderr)
sys.exit(1)
if local_ssh('hadoop', logger):
logger.error('command "hadoop" is not available.')
sys.exit(1)
- dburl = dbconn.DbURL(hostname=options.host, port=options.port, username=options.user, dbname=options.database)
+ dburl = dbconn.DbURL(hostname = options.host, port = options.port, username = options.user, dbname = options.database)
filepath, database, tablename = options.filepath, options.database, args[0]
if options.yml_config: # Usage2
- fileformat, filepath, schema, distribution_policy, file_locations, _ = option_parser_yml(options.yml_config)
- create_table(dburl, tablename, schema, fileformat, distribution_policy, file_locations)
+ fileformat, files, sizes, schema, distribution_policy, file_locations, bucket_number = option_parser_yml(options.yml_config)
+ filepath = files[0][:files[0].rfind('/')] if files else ''
+ if distribution_policy.startswith('DISTRIBUTED BY'):
+ if len(files) % bucket_number != 0:
+ logger.error('Files to be registered must match the bucket number of hash table.')
+ sys.exit(1)
+ create_table(dburl, tablename, schema, fileformat, distribution_policy, file_locations, bucket_number)
else:
fileformat = 'Parquet'
check_hash_type(dburl, tablename) # Usage1 only support randomly distributed table
+
+ # check filepath
if not filepath:
sys.exit(0)
+
seg_name = get_seg_name(dburl, tablename, database, fileformat)
firstsegno, tabledir = get_metadata_from_database(dburl, tablename, seg_name)
- sizes = 0
+
check_files_and_table_in_same_hdfs_cluster(filepath, tabledir)
- files, sizes = get_files_in_hdfs(filepath)
+
+ if not options.yml_config:
+ files, sizes = get_files_in_hdfs(filepath)
print 'File(s) to be registered:', files
+
# set specified eofs
- if options.filesizes:
- if len(options.filesizes) != len(files):
- logger.error('Specified eof list is incomplete.')
+ if options.filesize:
+ if options.filesize != len(files):
+ logger.error('-e option is only supported with single file case.')
sys.exit(1)
- sizes = options.sizes
+ sizes = [options.filesize]
if fileformat == 'Parquet':
check_parquet_format(files)
move_files_in_hdfs(database, tablename, files, firstsegno, tabledir, True)
+
+ # update catalog table
insert_metadata_into_database(dburl, database, tablename, seg_name, firstsegno, tabledir, sizes)
+
logger.info('Hawq Register Succeed.')
[3/3] incubator-hawq git commit: HAWQ-1025. Add bucket number in the
yaml file of hawq extract, modify to use actual eof for usage1.
Posted by rl...@apache.org.
HAWQ-1025. Add bucket number in the yaml file of hawq extract, modify to use actual eof for usage1.
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/2c4f25c8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/2c4f25c8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/2c4f25c8
Branch: refs/heads/master
Commit: 2c4f25c8e1259356f02513b696271da5dc269042
Parents: 25a4ab5
Author: xunzhang <xu...@gmail.com>
Authored: Tue Aug 30 16:03:42 2016 +0800
Committer: rlei <rl...@pivotal.io>
Committed: Fri Sep 2 10:53:27 2016 +0800
----------------------------------------------------------------------
tools/bin/hawqextract | 20 ++++++++++++++++----
tools/bin/hawqregister | 29 ++++++++++++++++++-----------
2 files changed, 34 insertions(+), 15 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/2c4f25c8/tools/bin/hawqextract
----------------------------------------------------------------------
diff --git a/tools/bin/hawqextract b/tools/bin/hawqextract
index f3ffe5b..28f55d6 100644
--- a/tools/bin/hawqextract
+++ b/tools/bin/hawqextract
@@ -259,6 +259,17 @@ class GpMetadataAccessor:
cols_list = [cols[int(k)-1] for k in policy.strip('{}').split(',')]
return 'DISTRIBUTED BY (' + ','.join(cols_list) + ')'
+ def get_bucket_number(self, oid):
+ '''
+ Get table's bucket number from gp_distribution_policy view.
+ '''
+ qry = """
+ SELECT bucketnum
+ FROM gp_distribution_policy
+ WHERE localoid = '%s'
+ """ % oid
+ return self.exec_query(qry)[0]['bucketnum']
+
def connectdb(options):
'''
@@ -405,12 +416,12 @@ def extract_metadata(conn, tbname):
}
file_locations['Partitions'].append(par_info)
metadata['AO_FileLocations'] = file_locations
-
logger.info('-- extract AO_Schema')
metadata['AO_Schema'] = accessor.get_schema(relid)
-
logger.info('-- extract Distribution_Policy')
metadata['Distribution_Policy'] = accessor.get_distribution_policy_info(rel_pgclass['oid'], relid)
+ logger.info('-- extract bucket number')
+ metadata['Bucketnum'] = accessor.get_bucket_number(rel_pgclass['oid'])
def extract_Parquet_metadata():
relid = rel_pgclass['oid']
@@ -455,12 +466,13 @@ def extract_metadata(conn, tbname):
}
file_locations['Partitions'].append(par_info)
metadata['Parquet_FileLocations'] = file_locations
-
logger.info('-- extract Parquet_Schema')
metadata['Parquet_Schema'] = accessor.get_schema(relid)
-
logger.info('-- extract Distribution_Policy')
metadata['Distribution_Policy'] = accessor.get_distribution_policy_info(rel_pgclass['oid'], relid)
+ logger.info('-- extract bucket number')
+ metadata['Bucketnum'] = accessor.get_bucket_number(rel_pgclass['oid'])
+
# extract AO/Parquet specific metadata
cases = { 'AO': extract_AO_metadata,
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/2c4f25c8/tools/bin/hawqregister
----------------------------------------------------------------------
diff --git a/tools/bin/hawqregister b/tools/bin/hawqregister
index 10bdf0b..6b2c918 100755
--- a/tools/bin/hawqregister
+++ b/tools/bin/hawqregister
@@ -50,13 +50,14 @@ def option_parser():
parser.add_option('-U', '--user', help='username of the target DB')
parser.add_option('-d', '--database', default = 'postgres', dest = 'database', help='database name')
parser.add_option('-f', '--filepath', dest = 'filepath', help='file name in HDFS')
+ parser.add_option('-e', '--eof', dest = 'filesizes', type='list', default = [], help = 'eof list of files to be registered')
parser.add_option('-c', '--config', dest = 'yml_config', default = '', help='configuration file in YAML format')
return parser
def register_yaml_dict_check(D):
# check exists
- check_list = ['DFS_URL', 'Distribution_Policy', 'FileFormat', 'TableName']
+ check_list = ['DFS_URL', 'Distribution_Policy', 'FileFormat', 'TableName', 'Bucketnum']
for attr in check_list:
if D.get(attr) == None:
logger.error('Wrong configuration yaml file format: "%s" attribute does not exist.\n See example in "hawq register --help".' % attr)
@@ -113,19 +114,19 @@ def option_parser_yml(yml_file):
register_yaml_dict_check(params)
if params['FileFormat'] == 'Parquet':
if not len(params['Parquet_FileLocations']['Files']):
- return 'Parquet', '', params['Parquet_Schema'], params['Distribution_Policy'], params['Parquet_FileLocations']
+ return 'Parquet', '', params['Parquet_Schema'], params['Distribution_Policy'], params['Parquet_FileLocations'], params['Bucketnum']
offset = params['Parquet_FileLocations']['Files'][0]['path'].rfind('/')
filepath = (params['DFS_URL'] + params['Parquet_FileLocations']['Files'][0]['path'][:offset]
if len(params['Parquet_FileLocations']['Files']) != 1
else params['DFS_URL'] + params['Parquet_FileLocations']['Files'][0]['path'])
- return 'Parquet', filepath, params['Parquet_Schema'], params['Distribution_Policy'], params['Parquet_FileLocations']
+ return 'Parquet', filepath, params['Parquet_Schema'], params['Distribution_Policy'], params['Parquet_FileLocations'], params['Bucketnum']
if not len(params['AO_FileLocations']['Files']):
- return 'AO', '', params['AO_Schema'], params['Distribution_Policy'], params['AO_FileLocations']
+ return 'AO', '', params['AO_Schema'], params['Distribution_Policy'], params['AO_FileLocations'], params['Bucketnum']
offset = params['AO_FileLocations']['Files'][0]['path'].rfind('/')
filepath = (params['DFS_URL'] + params['AO_FileLocations']['Files'][0]['path'][:offset]
if len(params['AO_FileLocations']['Files']) != 1
else params['DFS_URL'] + params['AO_FileLocations']['Files'][0]['path'])
- return 'AO', filepath, params['AO_Schema'], params['Distribution_Policy'], params['AO_FileLocations']
+ return 'AO', filepath, params['AO_Schema'], params['Distribution_Policy'], params['AO_FileLocations'], params['Bucketnum']
def create_table(dburl, tablename, schema_info, fmt, distrbution_policy, file_locations):
@@ -297,8 +298,8 @@ def move_files_in_hdfs(databasename, tablename, files, firstsegno, tabledir, nor
'''Move file(s) in src path into the folder correspoding to the target table'''
if normal:
segno = firstsegno
- for file in files:
- srcfile = file
+ for f in files:
+ srcfile = f
dstfile = tabledir + str(segno)
segno += 1
if srcfile != dstfile:
@@ -310,8 +311,8 @@ def move_files_in_hdfs(databasename, tablename, files, firstsegno, tabledir, nor
sys.exit(1)
else:
segno = firstsegno
- for file in files:
- dstfile = file
+ for f in files:
+ dstfile = f
srcfile = tabledir + str(segno)
segno += 1
if srcfile != dstfile:
@@ -355,7 +356,7 @@ if __name__ == '__main__':
filepath, database, tablename = options.filepath, options.database, args[0]
if options.yml_config: # Usage2
- fileformat, filepath, schema, distribution_policy, file_locations = option_parser_yml(options.yml_config)
+ fileformat, filepath, schema, distribution_policy, file_locations, _ = option_parser_yml(options.yml_config)
create_table(dburl, tablename, schema, fileformat, distribution_policy, file_locations)
else:
fileformat = 'Parquet'
@@ -368,9 +369,15 @@ if __name__ == '__main__':
check_files_and_table_in_same_hdfs_cluster(filepath, tabledir)
files, sizes = get_files_in_hdfs(filepath)
print 'File(s) to be registered:', files
+ # set specified eofs
+ if options.filesizes:
+ if len(options.filesizes) != len(files):
+ logger.error('Specified eof list is incomplete.')
+ sys.exit(1)
+ sizes = options.sizes
+
if fileformat == 'Parquet':
check_parquet_format(files)
- print files
move_files_in_hdfs(database, tablename, files, firstsegno, tabledir, True)
insert_metadata_into_database(dburl, database, tablename, seg_name, firstsegno, tabledir, sizes)
logger.info('Hawq Register Succeed.')