You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by rl...@apache.org on 2016/08/19 02:59:12 UTC
[1/3] incubator-hawq git commit: HAWQ-991. Add feature test cases for
the update hawqregister.
Repository: incubator-hawq
Updated Branches:
refs/heads/master 7661dec7c -> 8cc4a042e
HAWQ-991. Add feature test cases for the update hawqregister.
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/af483766
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/af483766
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/af483766
Branch: refs/heads/master
Commit: af483766e8b710661f89e170318b1e856aafb26e
Parents: 2596be6
Author: xunzhang <xu...@gmail.com>
Authored: Wed Aug 17 22:05:30 2016 +0800
Committer: rlei <rl...@pivotal.io>
Committed: Fri Aug 19 10:57:09 2016 +0800
----------------------------------------------------------------------
.../ManagementTool/test_hawq_register.cpp | 126 +++++++++++++++++--
tools/bin/hawqregister | 13 +-
2 files changed, 128 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/af483766/src/test/feature/ManagementTool/test_hawq_register.cpp
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/test_hawq_register.cpp b/src/test/feature/ManagementTool/test_hawq_register.cpp
index afc2cb4..e6fead8 100644
--- a/src/test/feature/ManagementTool/test_hawq_register.cpp
+++ b/src/test/feature/ManagementTool/test_hawq_register.cpp
@@ -16,7 +16,7 @@ class TestHawqRegister : public ::testing::Test {
~TestHawqRegister() {}
};
-TEST_F(TestHawqRegister, TestSingleHawqFile) {
+TEST_F(TestHawqRegister, TestUsage1SingleHawqFile) {
SQLUtility util;
string rootPath(util.getTestRootPath());
string relativePath("/ManagementTool/test_hawq_register_hawq.paq");
@@ -35,7 +35,7 @@ TEST_F(TestHawqRegister, TestSingleHawqFile) {
util.execute("drop table hawqregister;");
}
-TEST_F(TestHawqRegister, TestSingleHiveFile) {
+TEST_F(TestHawqRegister, TestUsage1SingleHiveFile) {
SQLUtility util;
string rootPath(util.getTestRootPath());
string relativePath("/ManagementTool/test_hawq_register_hive.paq");
@@ -122,7 +122,7 @@ TEST_F(TestHawqRegister, TestFiles) {
util.execute("drop table hawqregister;");
}
-TEST_F(TestHawqRegister, TestHashDistributedTable) {
+TEST_F(TestHawqRegister, TestUsage1HashDistributedTable) {
SQLUtility util;
string rootPath(util.getTestRootPath());
string relativePath("/ManagementTool/test_hawq_register_hawq.paq");
@@ -140,7 +140,7 @@ TEST_F(TestHawqRegister, TestHashDistributedTable) {
util.execute("drop table hawqregister;");
}
-TEST_F(TestHawqRegister, TestNotParquetFile) {
+TEST_F(TestHawqRegister, TestUsage1NotParquetFile) {
SQLUtility util;
string rootPath(util.getTestRootPath());
string relativePath("/ManagementTool/test_hawq_register_not_paq");
@@ -158,7 +158,7 @@ TEST_F(TestHawqRegister, TestNotParquetFile) {
util.execute("drop table hawqregister;");
}
-TEST_F(TestHawqRegister, TestNotParquetTable) {
+TEST_F(TestHawqRegister, TestUsage1NotParquetTable) {
SQLUtility util;
string rootPath(util.getTestRootPath());
string relativePath("/ManagementTool/test_hawq_register_hawq.paq");
@@ -176,7 +176,7 @@ TEST_F(TestHawqRegister, TestNotParquetTable) {
util.execute("drop table hawqregister;");
}
-TEST_F(TestHawqRegister, TestFileNotExist) {
+TEST_F(TestHawqRegister, TestUsage1FileNotExist) {
SQLUtility util;
util.execute("create table hawqregister(i int);");
@@ -188,7 +188,7 @@ TEST_F(TestHawqRegister, TestFileNotExist) {
util.execute("drop table hawqregister;");
}
-TEST_F(TestHawqRegister, TestNotHDFSPath) {
+TEST_F(TestHawqRegister, TestUsage1NotHDFSPath) {
SQLUtility util;
string rootPath(util.getTestRootPath());
string relativePath("/ManagementTool/test_hawq_register_hawq.paq");
@@ -205,3 +205,115 @@ TEST_F(TestHawqRegister, TestNotHDFSPath) {
EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -rm hdfs://localhost:8020/hawq_register_hawq.paq"));
util.execute("drop table hawqregister;");
}
+
+TEST_F(TestHawqRegister, TestUsage1ParquetRandomly) {
+ SQLUtility util;
+ string rootPath(util.getTestRootPath());
+ string relativePath("/ManagementTool/test_hawq_register_hawq.paq");
+ string filePath = rootPath + relativePath;
+ EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put -f " + filePath + " hdfs://localhost:8020/hawq_register_hawq.paq"));
+ util.execute("drop table if exists nt;");
+ util.execute("create table nt(i int) with (appendonly=true, orientation=parquet);");
+ EXPECT_EQ(0, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -f hdfs://localhost:8020/hawq_register_hawq.paq nt"));
+ util.query("select * from nt;", 3);
+ util.execute("insert into nt values(1);");
+ util.query("select * from nt;", 4);
+ util.execute("drop table nt;");
+}
+
+TEST_F(TestHawqRegister, TestUsage1ParquetRandomly2) {
+ SQLUtility util;
+ string rootPath(util.getTestRootPath());
+ string relativePath("/ManagementTool/test_hawq_register_hawq.paq");
+ string filePath = rootPath + relativePath;
+ EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put -f " + filePath + " hdfs://localhost:8020/hawq_register_hawq.paq"));
+ util.execute("drop table if exists nt;");
+ util.execute("create table nt(i int) with (appendonly=true, orientation=parquet);");
+ EXPECT_EQ(0, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -f hdfs://localhost:8020/hawq_register_hawq.paq nt"));
+ util.query("select * from nt;", 3);
+ util.execute("insert into nt values(1);");
+ util.query("select * from nt;", 4);
+ util.execute("drop table nt;");
+}
+
+TEST_F(TestHawqRegister, TestUsage2ParquetRandomly) {
+ SQLUtility util;
+ util.execute("drop table if exists t;");
+ util.execute("create table t(i int) with (appendonly=true, orientation=parquet) distributed randomly;");
+ util.execute("insert into t values(1), (2), (3);");
+ util.query("select * from t;", 3);
+ EXPECT_EQ(0, Command::getCommandStatus("hawq extract -d " + (string) HAWQ_DB + " -o t.yml testhawqregister_testusage2parquetrandomly.t"));
+ EXPECT_EQ(0, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c t.yml testhawqregister_testusage2parquetrandomly.nt"));
+ util.query("select * from nt;", 3);
+ EXPECT_EQ(0, Command::getCommandStatus("rm -rf t.yml"));
+ util.execute("drop table t;");
+ util.execute("drop table nt;");
+}
+
+TEST_F(TestHawqRegister, TestUsage2ParquetHash1) {
+ SQLUtility util;
+ util.execute("drop table if exists t4;");
+ util.execute("create table t4(i int) with (appendonly=true, orientation=parquet) distributed by (i);");
+ util.execute("insert into t4 values(1), (2), (3);");
+ EXPECT_EQ(0, Command::getCommandStatus("hawq extract -d " + (string) HAWQ_DB + " -o t4.yml testhawqregister_testusage2parquethash1.t4"));
+ EXPECT_EQ(0, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c t4.yml testhawqregister_testusage2parquethash1.nt4"));
+ util.query("select * from nt4;", 3);
+ EXPECT_EQ(0, Command::getCommandStatus("rm -rf t4.yml"));
+ util.execute("drop table t4;");
+ util.execute("drop table nt4;");
+}
+
+
+TEST_F(TestHawqRegister, TestUsage2ParquetHash2) {
+ SQLUtility util;
+ util.execute("drop table if exists t5;");
+ util.execute("create table t5(i int, j varchar, k text) with (appendonly=true, orientation=parquet) distributed by (i, k);");
+ util.execute("insert into t5 values(1, 'x', 'ab'), (2, 'y', 'cd'), (3, 'z', 'ef');");
+ EXPECT_EQ(0, Command::getCommandStatus("hawq extract -d " + (string) HAWQ_DB + " -o t5.yml testhawqregister_testusage2parquethash2.t5"));
+ EXPECT_EQ(0, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c t5.yml testhawqregister_testusage2parquethash2.nt5"));
+ util.query("select * from nt5;", 3);
+ EXPECT_EQ(0, Command::getCommandStatus("rm -rf t5.yml"));
+ util.execute("drop table t5;");
+ util.execute("drop table nt5;");
+}
+
+
+TEST_F(TestHawqRegister, TestUsage2AORandom) {
+ SQLUtility util;
+ util.execute("drop table if exists t6;");
+ util.execute("create table t6(i int) with (appendonly=true, orientation=row) distributed randomly;");
+ util.execute("insert into t6 values(1), (2), (3);");
+ EXPECT_EQ(0, Command::getCommandStatus("hawq extract -d " + (string) HAWQ_DB + " -o t6.yml testhawqregister_testusage2aorandom.t6"));
+ EXPECT_EQ(0, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c t6.yml testhawqregister_testusage2aorandom.nt6"));
+ util.query("select * from nt6;", 3);
+ EXPECT_EQ(0, Command::getCommandStatus("rm -rf t6.yml"));
+ util.execute("drop table t6;");
+ util.execute("drop table nt6;");
+}
+
+TEST_F(TestHawqRegister, TestUsage2AOHash1) {
+ SQLUtility util;
+ util.execute("drop table if exists t7;");
+ util.execute("create table t7(i int) with (appendonly=true, orientation=row) distributed by (i);");
+ util.execute("insert into t7 values(1), (2), (3);");
+ EXPECT_EQ(0, Command::getCommandStatus("hawq extract -d " + (string) HAWQ_DB + " -o t7.yml testhawqregister_testusage2aohash1.t7"));
+ EXPECT_EQ(0, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c t7.yml testhawqregister_testusage2aohash1.nt7"));
+ util.query("select * from nt7;", 3);
+ EXPECT_EQ(0, Command::getCommandStatus("rm -rf t7.yml"));
+ util.execute("drop table t7;");
+ util.execute("drop table nt7;");
+}
+
+
+TEST_F(TestHawqRegister, TestUsage2AOHash2) {
+ SQLUtility util;
+ util.execute("drop table if exists t8;");
+ util.execute("create table t8(i int, j varchar, k text) with (appendonly=true, orientation=row) distributed by (i, k);");
+ util.execute("insert into t8 values(1, 'x', 'ab'), (2, 'y', 'cd'), (3, 'z', 'ef');");
+ EXPECT_EQ(0, Command::getCommandStatus("hawq extract -d " + (string) HAWQ_DB + " -o t8.yml testhawqregister_testusage2aohash2.t8"));
+ EXPECT_EQ(0, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c t8.yml testhawqregister_testusage2aohash2.nt8"));
+ util.query("select * from nt8;", 3);
+ EXPECT_EQ(0, Command::getCommandStatus("rm -rf t8.yml"));
+ util.execute("drop table t8;");
+ util.execute("drop table nt8;");
+}
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/af483766/tools/bin/hawqregister
----------------------------------------------------------------------
diff --git a/tools/bin/hawqregister b/tools/bin/hawqregister
index 6700f54..2b492de 100755
--- a/tools/bin/hawqregister
+++ b/tools/bin/hawqregister
@@ -80,9 +80,10 @@ def create_table(dburl, tablename, schema_info, fmt, distrbution_policy):
sys.exit(1)
-def get_seg_name(dburl, tablename, database):
+def get_seg_name(dburl, tablename, database, fmt):
try:
relname = ''
+ tablename = tablename.split('.')[-1]
query = ("select pg_class2.relname from pg_class as pg_class1, pg_appendonly, pg_class as pg_class2 "
"where pg_class1.relname ='%s' and pg_class1.oid = pg_appendonly.relid and pg_appendonly.segrelid = pg_class2.oid;") % tablename
conn = dbconn.connect(dburl, True)
@@ -97,6 +98,11 @@ def get_seg_name(dburl, tablename, database):
except DatabaseError, ex:
logger.error('Failed to run query "%s" with dbname "%s"' % (query, database))
sys.exit(1)
+ if fmt == 'Parquet':
+ if relname.find("paq") == -1:
+ logger.error("table '%s' is not parquet format" % tablename)
+ sys.exit(1)
+
return relname
@@ -139,7 +145,7 @@ def get_metadata_from_database(dburl, tablename, seg_name):
query = ("select location, gp_persistent_tablespace_node.tablespace_oid, database_oid, relfilenode from pg_class, gp_persistent_relation_node, "
"gp_persistent_tablespace_node, gp_persistent_filespace_node where relname = '%s' and pg_class.relfilenode = "
"gp_persistent_relation_node.relfilenode_oid and gp_persistent_relation_node.tablespace_oid = gp_persistent_tablespace_node.tablespace_oid "
- "and gp_persistent_filespace_node.filespace_oid = gp_persistent_filespace_node.filespace_oid;") % tablename
+ "and gp_persistent_filespace_node.filespace_oid = gp_persistent_filespace_node.filespace_oid;") % tablename.split('.')[-1]
conn = dbconn.connect(dburl, False)
rows = dbconn.execSQL(conn, query)
conn.commit()
@@ -178,7 +184,6 @@ def get_files_in_hdfs(filepath):
logger.error("Path '%s' does not exist in hdfs" % filepath)
sys.exit(1)
hdfscmd = "hadoop fs -ls -R %s" % filepath
- print filepath
result, out, err = local_ssh_output(hdfscmd)
outlines = out.splitlines()
# recursively search all the files under path 'filepath'
@@ -276,7 +281,7 @@ if __name__ == '__main__':
fileformat = 'Parquet'
check_hash_type(dburl, tablename) # Usage1 only support randomly distributed table
- seg_name = get_seg_name(dburl, tablename, database)
+ seg_name = get_seg_name(dburl, tablename, database, fileformat)
firstsegno, tabledir = get_metadata_from_database(dburl, tablename, seg_name)
check_files_and_table_in_same_hdfs_cluster(filepath, tabledir)
[2/3] incubator-hawq git commit: HAWQ-991. Rewrite hawqregister to
support registering from yaml file.
Posted by rl...@apache.org.
HAWQ-991. Rewrite hawqregister to support registering from yaml file.
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/2596be6e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/2596be6e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/2596be6e
Branch: refs/heads/master
Commit: 2596be6e9c87da3d23a13f92e85307b785bee5d5
Parents: 7661dec
Author: xunzhang <xu...@gmail.com>
Authored: Tue Aug 9 19:39:13 2016 +0800
Committer: rlei <rl...@pivotal.io>
Committed: Fri Aug 19 10:57:09 2016 +0800
----------------------------------------------------------------------
.../ManagementTool/test_hawq_register.cpp | 20 +-
tools/bin/hawqextract | 0
tools/bin/hawqregister | 256 +++++++++----------
3 files changed, 130 insertions(+), 146 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/2596be6e/src/test/feature/ManagementTool/test_hawq_register.cpp
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/test_hawq_register.cpp b/src/test/feature/ManagementTool/test_hawq_register.cpp
index a7982b3..afc2cb4 100644
--- a/src/test/feature/ManagementTool/test_hawq_register.cpp
+++ b/src/test/feature/ManagementTool/test_hawq_register.cpp
@@ -27,7 +27,7 @@ TEST_F(TestHawqRegister, TestSingleHawqFile) {
util.execute("create table hawqregister(i int) with (appendonly=true, orientation=parquet);");
util.query("select * from hawqregister;", 0);
- EXPECT_EQ(0, Command::getCommandStatus("hawq register " + (string) HAWQ_DB + " hawqregister hdfs://localhost:8020/hawq_register_hawq.paq"));
+ EXPECT_EQ(0, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -f hdfs://localhost:8020/hawq_register_hawq.paq hawqregister"));
util.query("select * from hawqregister;", 3);
util.execute("insert into hawqregister values(1);");
@@ -46,7 +46,7 @@ TEST_F(TestHawqRegister, TestSingleHiveFile) {
util.execute("create table hawqregister(i int) with (appendonly=true, orientation=parquet);");
util.query("select * from hawqregister;", 0);
- EXPECT_EQ(0, Command::getCommandStatus("hawq register " + (string) HAWQ_DB + " hawqregister hdfs://localhost:8020/hawq_register_hive.paq"));
+ EXPECT_EQ(0, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -f hdfs://localhost:8020/hawq_register_hive.paq hawqregister"));
util.query("select * from hawqregister;", 1);
util.execute("insert into hawqregister values(1);");
@@ -67,7 +67,7 @@ TEST_F(TestHawqRegister, TestDataTypes) {
util.execute("create table hawqregister(a bool, b int2, c int2, d int4, e int8, f date, g float4, h float8, i varchar, j bytea, k char, l varchar) with (appendonly=true, orientation=parquet);");
util.query("select * from hawqregister;", 0);
- EXPECT_EQ(0, Command::getCommandStatus("hawq register " + (string) HAWQ_DB + " hawqregister hdfs://localhost:8020/hawq_register_data_types.paq"));
+ EXPECT_EQ(0, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -f hdfs://localhost:8020/hawq_register_data_types.paq hawqregister"));
util.query("select * from hawqregister;", 1);
util.execute("drop table hawqregister;");
@@ -87,7 +87,7 @@ TEST_F(TestHawqRegister, TestAllNULL) {
util.execute("create table hawqregister(a bool, b int2, c int2, d int4, e int8, f date, g float4, h float8, i varchar, j bytea, k char, l varchar) with (appendonly=true, orientation=parquet);");
util.query("select * from hawqregister;", 0);
- EXPECT_EQ(0, Command::getCommandStatus("hawq register " + (string) HAWQ_DB + " hawqregister hdfs://localhost:8020/hawq_register_data_types.paq"));
+ EXPECT_EQ(0, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -f hdfs://localhost:8020/hawq_register_data_types.paq hawqregister"));
util.query("select * from hawqregister;", 1);
util.execute("drop table hawqregister;");
@@ -113,7 +113,7 @@ TEST_F(TestHawqRegister, TestFiles) {
util.execute("create table hawqregister(i int) with (appendonly=true, orientation=parquet);");
util.query("select * from hawqregister;", 0);
- EXPECT_EQ(0, Command::getCommandStatus("hawq register " + (string) HAWQ_DB + " hawqregister hdfs://localhost:8020/hawq_register_test"));
+ EXPECT_EQ(0, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -f hdfs://localhost:8020/hawq_register_test hawqregister"));
util.query("select * from hawqregister;", 12);
util.execute("insert into hawqregister values(1);");
@@ -133,7 +133,7 @@ TEST_F(TestHawqRegister, TestHashDistributedTable) {
util.execute("create table hawqregister(i int) with (appendonly=true, orientation=parquet) distributed by (i);");
util.query("select * from hawqregister;", 0);
- EXPECT_EQ(1, Command::getCommandStatus("hawq register " + (string) HAWQ_DB + " hawqregister hdfs://localhost:8020/hawq_register_hawq.paq"));
+ EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -f hdfs://localhost:8020/hawq_register_hawq.paq hawqregister"));
util.query("select * from hawqregister;", 0);
EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -rm hdfs://localhost:8020/hawq_register_hawq.paq"));
@@ -151,7 +151,7 @@ TEST_F(TestHawqRegister, TestNotParquetFile) {
util.execute("create table hawqregister(i int) with (appendonly=true, orientation=parquet);");
util.query("select * from hawqregister;", 0);
- EXPECT_EQ(1, Command::getCommandStatus("hawq register " + (string) HAWQ_DB + " hawqregister hdfs://localhost:8020/hawq_register_test_not_paq"));
+ EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -f hdfs://localhost:8020/hawq_register_test_not_paq hawqregister"));
util.query("select * from hawqregister;", 0);
EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -rm hdfs://localhost:8020/hawq_register_test_not_paq"));
@@ -169,7 +169,7 @@ TEST_F(TestHawqRegister, TestNotParquetTable) {
util.execute("create table hawqregister(i int);");
util.query("select * from hawqregister;", 0);
- EXPECT_EQ(1, Command::getCommandStatus("hawq register " + (string) HAWQ_DB + " hawqregister hdfs://localhost:8020/hawq_register_hawq.paq"));
+ EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -f hdfs://localhost:8020/hawq_register_hawq.paq hawqregister"));
util.query("select * from hawqregister;", 0);
EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -rm hdfs://localhost:8020/hawq_register_hawq.paq"));
@@ -182,7 +182,7 @@ TEST_F(TestHawqRegister, TestFileNotExist) {
util.execute("create table hawqregister(i int);");
util.query("select * from hawqregister;", 0);
- EXPECT_EQ(1, Command::getCommandStatus("hawq register " + (string) HAWQ_DB + " hawqregister /hdfs://localhost:8020hawq_register_file_not_exist"));
+ EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -f /hdfs://localhost:8020hawq_register_file_not_exist hawqregister"));
util.query("select * from hawqregister;", 0);
util.execute("drop table hawqregister;");
@@ -199,7 +199,7 @@ TEST_F(TestHawqRegister, TestNotHDFSPath) {
util.execute("create table hawqregister(i int);");
util.query("select * from hawqregister;", 0);
- EXPECT_EQ(1, Command::getCommandStatus("hawq register " + (string) HAWQ_DB + "hawqregister /hawq_register_hawq.paq"));
+ EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -f /hawq_register_hawq.paq hawqregister"));
util.query("select * from hawqregister;", 0);
EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -rm hdfs://localhost:8020/hawq_register_hawq.paq"));
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/2596be6e/tools/bin/hawqextract
----------------------------------------------------------------------
diff --git a/tools/bin/hawqextract b/tools/bin/hawqextract
old mode 100755
new mode 100644
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/2596be6e/tools/bin/hawqregister
----------------------------------------------------------------------
diff --git a/tools/bin/hawqregister b/tools/bin/hawqregister
index 380a548..6700f54 100755
--- a/tools/bin/hawqregister
+++ b/tools/bin/hawqregister
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -17,9 +17,8 @@
# specific language governing permissions and limitations
# under the License.
-'''
-hawq register [options] database_name table_name file_or_dir_path_in_hdfs
-'''
+# Usage1: hawq register [-h hostname] [-p port] [-U username] [-d database] [-f filepath] tablename
+# Usage2: hawq register [-h hostname] [-p port] [-U username] [-d database] [-c config] tablename
import os, sys, optparse, getpass, re, urlparse
try:
from gppylib.commands.unix import getLocalHostname, getUserName
@@ -40,133 +39,137 @@ EXECNAME = os.path.split(__file__)[-1]
setup_tool_logging(EXECNAME,getLocalHostname(),getUserName())
-def create_opt_parser(version):
+def option_parser():
parser = OptParser(option_class=OptChecker,
- usage='usage: %prog [options] database_name table_name file_or_dir_path_in_hdfs',
- version=version)
+ usage='usage: %prog [options] table_name',
+ version='%prog version $Revision: #1 $')
parser.remove_option('-h')
parser.add_option('-?', '--help', action='help')
- parser.add_option('-h', '--host', help="host of the target DB")
- parser.add_option('-p', '--port', help="port of the target DB", type='int', default=0)
- parser.add_option('-U', '--user', help="username of the target DB")
- return parser
-
-
-def check_hadoop_command():
- hdfscmd = "hadoop"
- result = local_ssh(hdfscmd);
- if result != 0:
- logger.error("command 'hadoop' is not available, please set environment variable $PATH to fix this")
+ parser.add_option('-h', '--host', help='host of the target DB')
+ parser.add_option('-p', '--port', help='port of the target DB', type='int', default=0)
+ parser.add_option('-U', '--user', help='username of the target DB')
+ parser.add_option('-d', '--database', default = 'postgres', dest = 'database', help='database name')
+ parser.add_option('-f', '--filepath', dest = 'filepath', help='file name in HDFS')
+ parser.add_option('-c', '--config', dest = 'yml_config', default = '', help='configuration file in YAML format')
+ return parser.parse_args()
+
+
+def option_parser_yml(yml_file):
+ import yaml
+ with open(yml_file, 'r') as f:
+ params = yaml.load(f)
+ if params['FileFormat'] == 'Parquet':
+ offset = params['Parquet_FileLocations']['Files'][0]['path'].rfind('/')
+ filepath = params['DFS_URL'] + params['Parquet_FileLocations']['Files'][0]['path'][:offset] if len(params['Parquet_FileLocations']['Files']) != 1 else params['DFS_URL'] + params['Parquet_FileLocations']['Files'][0]['path']
+ return 'Parquet', filepath, params['Parquet_Schema'], params['Distribution_Policy']
+ offset = params['AO_FileLocations']['Files'][0]['path'].rfind('/')
+ filepath = params['DFS_URL'] + params['AO_FileLocations']['Files'][0]['path'][:offset] if len(params['AO_FileLocations']['Files']) != 1 else params['DFS_URL'] + params['AO_FileLocations']['Files'][0]['path']
+ return 'AO', filepath, params['AO_Schema'], params['Distribution_Policy']
+
+
+def create_table(dburl, tablename, schema_info, fmt, distrbution_policy):
+ try:
+ schema = ','.join([k['name'] + ' ' + k['type'] for k in schema_info])
+ fmt = 'ROW' if fmt == 'AO' else fmt
+ query = 'create table %s(%s) with (appendonly=true, orientation=%s) %s;' % (tablename, schema, fmt, distrbution_policy)
+ conn = dbconn.connect(dburl, False)
+ rows = dbconn.execSQL(conn, query)
+ conn.commit()
+ except DatabaseError, ex:
+ logger.error('Failed to execute query ""%s"' % query)
sys.exit(1)
-def get_seg_name(options, databasename, tablename):
+def get_seg_name(dburl, tablename, database):
try:
- relfilenode = 0
- relname = ""
- query = ("select pg_class2.relname from pg_class as pg_class1, pg_appendonly, pg_class as pg_class2 where pg_class1.relname ='%s' "
- "and pg_class1.oid = pg_appendonly.relid and pg_appendonly.segrelid = pg_class2.oid;") % tablename
- dburl = dbconn.DbURL(hostname=options.host, port=options.port, username=options.user, dbname=databasename)
+ relname = ''
+ query = ("select pg_class2.relname from pg_class as pg_class1, pg_appendonly, pg_class as pg_class2 "
+ "where pg_class1.relname ='%s' and pg_class1.oid = pg_appendonly.relid and pg_appendonly.segrelid = pg_class2.oid;") % tablename
conn = dbconn.connect(dburl, True)
rows = dbconn.execSQL(conn, query)
- conn.commit()
- if rows.rowcount == 0:
- logger.error("table '%s' not found in db '%s'" % (tablename, databasename));
+ conn.commit()
+ if not rows.rowcount:
+ logger.error('table "%s" not found in db "%s"' % (tablename, database))
sys.exit(1)
for row in rows:
relname = row[0]
conn.close()
-
except DatabaseError, ex:
- logger.error("Failed to connect to database, this script can only be run when the database is up")
- logger.error("host = %s, port = %d, user = %s, dbname = %s, query = %s" % (options.host, options.port, options.user, databasename, query))
- sys.exit(1)
-
- # check whether the target table is parquet format
- if relname.find("paq") == -1:
- logger.error("table '%s' is not parquet format" % tablename)
+ logger.error('Failed to run query "%s" with dbname "%s"' % (query, database))
sys.exit(1)
-
return relname
-def check_hash_type(options, databasename, tablename):
+def check_hash_type(dburl, tablename):
+ '''Check whether target table is hash-typed, in that case simple insertion does not work'''
try:
query = "select attrnums from gp_distribution_policy, pg_class where pg_class.relname = '%s' and pg_class.oid = gp_distribution_policy.localoid;" % tablename
- dburl = dbconn.DbURL(hostname=options.host, port=options.port, username=options.user, dbname=databasename)
conn = dbconn.connect(dburl, False)
rows = dbconn.execSQL(conn, query)
- conn.commit()
- if rows.rowcount == 0:
- logger.error("target not found in table gp_distribution_policy")
+ conn.commit()
+ if not rows.rowcount:
+ logger.error('Target not found in table gp_distribution_policy.')
sys.exit(1)
for row in rows:
- if row[0] != None:
- logger.error("Cannot register file(s) to a table which is hash-typed")
+ if row[0]:
+ logger.error('Cannot register file(s) to a table which is hash-typed.')
sys.exit(1)
-
conn.close()
-
except DatabaseError, ex:
- logger.error("Failed to connect to database, this script can only be run when the database is up")
- logger.error("host = %s, port = %d, user = %s, dbname = %s, query = %s" % (options.host, options.port, options.user, databasename, query))
+ logger.error('Failed to execute query "%s"' % query)
sys.exit(1)
-def get_metadata_from_database(options, databasename, tablename, seg_name):
+def get_metadata_from_database(dburl, tablename, seg_name):
+ '''Get the metadata to be inserted from hdfs'''
try:
- query = "select segno from pg_aoseg.%s;" % seg_name
- dburl = dbconn.DbURL(hostname=options.host, port=options.port, username=options.user, dbname=databasename)
+ query = 'select segno from pg_aoseg.%s;' % seg_name
conn = dbconn.connect(dburl, False)
rows = dbconn.execSQL(conn, query)
- conn.commit()
+ conn.commit()
conn.close()
-
except DatabaseError, ex:
- logger.error("Failed to connect to database, this script can only be run when the database is up")
- logger.error("host = %s, port = %d, user = %s, dbname = %s, query = %s" % (options.host, options.port, options.user, databasename, query))
+ logger.error('Failed to execute query "%s"' % query)
sys.exit(1)
firstsegno = rows.rowcount + 1
- # get the full path of correspoding file for target table
try:
+ # get the full path of correspoding file for target table
query = ("select location, gp_persistent_tablespace_node.tablespace_oid, database_oid, relfilenode from pg_class, gp_persistent_relation_node, "
- "gp_persistent_tablespace_node, gp_persistent_filespace_node where relname = '%s' and pg_class.relfilenode = "
- "gp_persistent_relation_node.relfilenode_oid and gp_persistent_relation_node.tablespace_oid = gp_persistent_tablespace_node.tablespace_oid "
- "and gp_persistent_filespace_node.filespace_oid = gp_persistent_filespace_node.filespace_oid;") % tablename
- dburl = dbconn.DbURL(hostname=options.host, port=options.port, username=options.user, dbname=databasename)
+ "gp_persistent_tablespace_node, gp_persistent_filespace_node where relname = '%s' and pg_class.relfilenode = "
+ "gp_persistent_relation_node.relfilenode_oid and gp_persistent_relation_node.tablespace_oid = gp_persistent_tablespace_node.tablespace_oid "
+ "and gp_persistent_filespace_node.filespace_oid = gp_persistent_filespace_node.filespace_oid;") % tablename
conn = dbconn.connect(dburl, False)
rows = dbconn.execSQL(conn, query)
- conn.commit()
+ conn.commit()
conn.close()
-
except DatabaseError, ex:
- logger.error("Failed to connect to database, this script can only be run when the database is up")
- logger.error("host = %s, port = %d, user = %s, dbname = %s, query = %s" % (options.host, options.port, options.user, databasename, query))
+ logger.error('Failed to execute query "%s"' % query)
sys.exit(1)
-
for row in rows:
tabledir = row[0].strip() + "/" + str(row[1]) + "/" + str(row[2]) + "/" + str(row[3]) + "/"
-
+ #tabledir = '/'.join([row[0], str(row[1]), str(row[2]), str(row[3]), ''])
return firstsegno, tabledir
def check_files_and_table_in_same_hdfs_cluster(filepath, tabledir):
+ '''Check whether all the files refered by 'filepath' and the location corresponding to the table are in the same hdfs cluster'''
# check whether the files to be registered is in hdfs
filesystem = filepath.split('://')
if filesystem[0] != 'hdfs':
- logger.error("Only support to register file(s) in hdfs")
+ logger.error('Only support to register file(s) in hdfs')
sys.exit(1)
fileroot = filepath.split('/')
tableroot = tabledir.split('/')
# check the root url of them. eg: for 'hdfs://localhost:8020/temp/tempfile', we check 'hdfs://localohst:8020'
- if fileroot[0] != tableroot[0] or fileroot[1] != tableroot[1] or fileroot[2] != tableroot[2]:
+ if fileroot[0:3] != tableroot[0:3]:
logger.error("Files to be registered and the table are not in the same hdfs cluster.\nFile(s) to be registered: '%s'\nTable path in HDFS: '%s'" % (filepath, tabledir))
sys.exit(1)
def get_files_in_hdfs(filepath):
+ '''Get all the files refered by 'filepath', which could be a file or a directory containing all the files'''
files = []
sizes = []
hdfscmd = "hadoop fs -test -e %s" % filepath
@@ -174,52 +177,52 @@ def get_files_in_hdfs(filepath):
if result != 0:
logger.error("Path '%s' does not exist in hdfs" % filepath)
sys.exit(1)
-
hdfscmd = "hadoop fs -ls -R %s" % filepath
+ print filepath
result, out, err = local_ssh_output(hdfscmd)
outlines = out.splitlines()
-
# recursively search all the files under path 'filepath'
- i = 0
for line in outlines:
lineargs = line.split()
if len(lineargs) == 8 and lineargs[0].find ("d") == -1:
files.append(lineargs[7])
sizes.append(int(lineargs[4]))
-
if len(files) == 0:
logger.error("Dir '%s' is empty" % filepath)
sys.exit(1)
-
return files, sizes
-def check_parquet_format(options, files):
- # check whether the files are parquet format by checking the first and last four bytes
- for file in files:
- hdfscmd = "hadoop fs -cat %s | head -c 4 | grep PAR1" % file
+def check_parquet_format(files):
+ '''Check whether the file to be registered is parquet format'''
+ for f in files:
+ hdfscmd = 'hadoop fs -du -h %s | head -c 1' % f
+ rc, out, err = local_ssh_output(hdfscmd)
+ if out == '0':
+ continue
+ hdfscmd = 'hadoop fs -cat %s | head -c 4 | grep PAR1' % f
result1 = local_ssh(hdfscmd)
- hdfscmd = "hadoop fs -cat %s | tail -c 4 | grep PAR1" % file
+ hdfscmd = 'hadoop fs -cat %s | tail -c 4 | grep PAR1' % f
result2 = local_ssh(hdfscmd)
if result1 or result2:
- logger.error("File %s is not parquet format" % file)
+ logger.error('File %s is not parquet format' % f)
sys.exit(1)
-def move_files_in_hdfs(options, databasename, tablename, files, firstsegno, tabledir, normal):
- # move file(s) in src path into the folder correspoding to the target table
- if (normal == True):
+def move_files_in_hdfs(databasename, tablename, files, firstsegno, tabledir, normal):
+ '''Move file(s) in src path into the folder correspoding to the target table'''
+ if normal:
segno = firstsegno
for file in files:
srcfile = file
dstfile = tabledir + str(segno)
segno += 1
if srcfile != dstfile:
- hdfscmd = "hadoop fs -mv %s %s" % (srcfile, dstfile)
- sys.stdout.write("hdfscmd: '%s'\n" % hdfscmd)
+ hdfscmd = 'hadoop fs -mv %s %s' % (srcfile, dstfile)
+ sys.stdout.write('hdfscmd: "%s"\n' % hdfscmd)
result = local_ssh(hdfscmd)
if result != 0:
- logger.error("Fail to move '%s' to '%s'" % (srcfile, dstfile))
+ logger.error('Fail to move %s to %s' % (srcfile, dstfile))
sys.exit(1)
else:
segno = firstsegno
@@ -228,79 +231,60 @@ def move_files_in_hdfs(options, databasename, tablename, files, firstsegno, tabl
srcfile = tabledir + str(segno)
segno += 1
if srcfile != dstfile:
- hdfscmd = "hadoop fs -mv %s %s" % (srcfile, dstfile)
- sys.stdout.write("hdfscmd: '%s'\n" % hdfscmd)
+ hdfscmd = 'hadoop fs -mv %s %s' % (srcfile, dstfile)
+ sys.stdout.write('hdfscmd: "%s"\n' % hdfscmd)
result = local_ssh(hdfscmd)
if result != 0:
- logger.error("Fail to move '%s' to '%s'" % (srcfile, dstfile))
+ logger.error('Fail to move "%s" to "%s"' % (srcfile, dstfile))
sys.exit(1)
-def insert_metadata_into_database(options, databasename, tablename, seg_name, firstsegno, tabledir, eofs):
+def insert_metadata_into_database(dburl, databasename, tablename, seg_name, firstsegno, tabledir, eofs):
+ '''Insert the metadata into database'''
try:
query = "SET allow_system_table_mods='dml';"
segno = firstsegno
for eof in eofs:
query += "insert into pg_aoseg.%s values(%d, %d, %d, %d);" % (seg_name, segno, eof, -1, -1)
segno += 1
-
- dburl = dbconn.DbURL(hostname=options.host, port=options.port, username=options.user, dbname=databasename)
conn = dbconn.connect(dburl, True)
rows = dbconn.execSQL(conn, query)
- conn.commit()
+ conn.commit()
conn.close()
-
except DatabaseError, ex:
- logger.error("Failed to connect to database, this script can only be run when the database is up")
- logger.error("host = %s, port = %d, user = %s, dbname = %s, query = %s" % (options.host, options.port, options.user, databasename, query))
- move_files_in_hdfs(options, databasename, tablename, files, firstsegno, tabledir, False)
-
+ logger.error('Failed to connect to database, this script can only be run when the database is up')
+ move_files_in_hdfs(options.database, options.tablename, files, firstsegno, tabledir, False)
sys.exit(1)
-def main(args=None):
- parser = create_opt_parser('%prog version $Revision: #1 $')
- options, args = parser.parse_args(args)
- if len(args) != 3:
- sys.stderr.write('Incorrect number of arguments\n\n')
- parser.print_help(sys.stderr)
- return 1
-
- databasename = args[0]
- tablename = args[1]
- filepath = args[2]
-
- # 1. check whether the path of shell command 'hadoop' is set.
- check_hadoop_command()
-
- # 2. get the seg_name from database
- seg_name = get_seg_name(options, databasename, tablename)
+if __name__ == '__main__':
+ options, args = option_parser()
+ if len(args) != 1 or (options.yml_config and options.filepath):
+ logger.error('Incorrect usage!\n Correct usage: "hawq register [-h hostname] [-p port] [-U username] [-d database] [-f filepath] tablename"\n or "hawq register [-h hostname] [-p port] [-U username] [-d database] [-c config] tablename"\n')
+ sys.exit(1)
+ if local_ssh('hadoop'):
+ logger.error('command "hadoop" is not available.')
+ sys.exit(1)
- # 3. check whether target table is hash-typed, in that case simple insertion does not work
- result = check_hash_type(options, databasename, tablename)
+ dburl = dbconn.DbURL(hostname=options.host, port=options.port, username=options.user, dbname=options.database)
+ filepath, database, tablename = options.filepath, options.database, args[0]
- # 4. get the metadata to be inserted from hdfs
- firstsegno, tabledir = get_metadata_from_database(options, databasename, tablename, seg_name)
+ if options.yml_config: # Usage2
+ fileformat, filepath, schema, distribution_policy = option_parser_yml(options.yml_config)
+ create_table(dburl, tablename, schema, fileformat, distribution_policy)
+ else:
+ fileformat = 'Parquet'
+ check_hash_type(dburl, tablename) # Usage1 only support randomly distributed table
- # 5. check whether all the files refered by 'filepath' and the location corresponding to the table are in the same hdfs cluster
+ seg_name = get_seg_name(dburl, tablename, database)
+ firstsegno, tabledir = get_metadata_from_database(dburl, tablename, seg_name)
check_files_and_table_in_same_hdfs_cluster(filepath, tabledir)
- # 6. get all the files refered by 'filepath', which could be a file or a directory containing all the files
files, sizes = get_files_in_hdfs(filepath)
- print "File(s) to be registered:"
+ print 'File(s) to be registered:', files
+ if fileformat == 'Parquet':
+ check_parquet_format(files)
print files
-
- # 7. check whether the file to be registered is parquet format
- check_parquet_format(options, files)
-
- # 8. move the file in hdfs to proper location
- move_files_in_hdfs(options, databasename, tablename, files, firstsegno, tabledir, True)
-
- # 9. insert the metadata into database
- insert_metadata_into_database(options, databasename, tablename, seg_name, firstsegno, tabledir, sizes)
-
- # 10. report the final status of hawq register
- logger.info("Hawq register succeed.")
-
-if __name__ == '__main__':
- sys.exit(main())
+ move_files_in_hdfs(database, tablename, files, firstsegno, tabledir, True)
+ insert_metadata_into_database(dburl, database, tablename, seg_name, firstsegno, tabledir, sizes)
+ logger.info('Hawq Register Succeed.')
[3/3] incubator-hawq git commit: HAWQ-991. update help info for the
update hawq register in tools/doc/hawqregister_help
Posted by rl...@apache.org.
HAWQ-991. update help info for the update hawq register in tools/doc/hawqregister_help
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/8cc4a042
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/8cc4a042
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/8cc4a042
Branch: refs/heads/master
Commit: 8cc4a042e9d9703d4f3d45689ab8d1f984cfcaa4
Parents: af48376
Author: xunzhang <xu...@gmail.com>
Authored: Wed Aug 17 22:34:21 2016 +0800
Committer: rlei <rl...@pivotal.io>
Committed: Fri Aug 19 10:57:40 2016 +0800
----------------------------------------------------------------------
.../ManagementTool/test_hawq_register.cpp | 2 +-
tools/bin/hawqregister | 50 ++++++++++++++++----
tools/doc/hawqregister_help | 40 +++++++++++-----
3 files changed, 69 insertions(+), 23 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/8cc4a042/src/test/feature/ManagementTool/test_hawq_register.cpp
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/test_hawq_register.cpp b/src/test/feature/ManagementTool/test_hawq_register.cpp
index e6fead8..00934a9 100644
--- a/src/test/feature/ManagementTool/test_hawq_register.cpp
+++ b/src/test/feature/ManagementTool/test_hawq_register.cpp
@@ -228,7 +228,7 @@ TEST_F(TestHawqRegister, TestUsage1ParquetRandomly2) {
string filePath = rootPath + relativePath;
EXPECT_EQ(0, Command::getCommandStatus("hadoop fs -put -f " + filePath + " hdfs://localhost:8020/hawq_register_hawq.paq"));
util.execute("drop table if exists nt;");
- util.execute("create table nt(i int) with (appendonly=true, orientation=parquet);");
+ util.execute("create table nt(i int) with (appendonly=true, orientation=parquet) distributed randomly;");
EXPECT_EQ(0, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -f hdfs://localhost:8020/hawq_register_hawq.paq nt"));
util.query("select * from nt;", 3);
util.execute("insert into nt values(1);");
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/8cc4a042/tools/bin/hawqregister
----------------------------------------------------------------------
diff --git a/tools/bin/hawqregister b/tools/bin/hawqregister
index 2b492de..7a20906 100755
--- a/tools/bin/hawqregister
+++ b/tools/bin/hawqregister
@@ -51,19 +51,49 @@ def option_parser():
parser.add_option('-d', '--database', default = 'postgres', dest = 'database', help='database name')
parser.add_option('-f', '--filepath', dest = 'filepath', help='file name in HDFS')
parser.add_option('-c', '--config', dest = 'yml_config', default = '', help='configuration file in YAML format')
- return parser.parse_args()
+ return parser
def option_parser_yml(yml_file):
import yaml
with open(yml_file, 'r') as f:
params = yaml.load(f)
+ # check if valid configuration yaml file
+ attrs = ['FileFormat', 'DFS_URL', 'Distribution_Policy']
+ for attr in attrs:
+ if attr not in params.keys():
+ logger.error('Wrong configuration yaml file format, see example in "hawq register --help"')
+ sys.exit(1)
if params['FileFormat'] == 'Parquet':
+ attrs = ['Parquet_FileLocations', 'Parquet_Schema']
+ for attr in attrs:
+ if attr not in params.keys():
+ logger.error('Wrong configuration yaml file format, see example in "hawq register --help"')
+ sys.exit(1)
+ if not params['Parquet_FileLocations'].get('Files'):
+ logger.error('Wrong configuration yaml file format, see example in "hawq register --help"')
+ sys.exit(1)
+ if not len(params['Parquet_FileLocations']['Files']):
+ return 'Parquet', '', params['Parquet_Schema'], params['Distribution_Policy']
offset = params['Parquet_FileLocations']['Files'][0]['path'].rfind('/')
- filepath = params['DFS_URL'] + params['Parquet_FileLocations']['Files'][0]['path'][:offset] if len(params['Parquet_FileLocations']['Files']) != 1 else params['DFS_URL'] + params['Parquet_FileLocations']['Files'][0]['path']
+ filepath = (params['DFS_URL'] + params['Parquet_FileLocations']['Files'][0]['path'][:offset]
+ if len(params['Parquet_FileLocations']['Files']) != 1
+ else params['DFS_URL'] + params['Parquet_FileLocations']['Files'][0]['path'])
return 'Parquet', filepath, params['Parquet_Schema'], params['Distribution_Policy']
+ attrs = ['AO_FileLocations', 'AO_Schema']
+ for attr in attrs:
+ if attr not in params.keys():
+ logger.error('Wrong configuration yaml file format, see example in "hawq register --help"')
+ sys.exit(1)
+ if not (params['AO_FileLocations']['Files']):
+ return 'AO', '', params['AO_Schema'], params['Distribution_Policy']
+ if not params['AO_FileLocations'].get('Files'):
+ logger.error('Wrong configuration yaml file format, see example in "hawq register --help"')
+ sys.exit(1)
offset = params['AO_FileLocations']['Files'][0]['path'].rfind('/')
- filepath = params['DFS_URL'] + params['AO_FileLocations']['Files'][0]['path'][:offset] if len(params['AO_FileLocations']['Files']) != 1 else params['DFS_URL'] + params['AO_FileLocations']['Files'][0]['path']
+ filepath = (params['DFS_URL'] + params['AO_FileLocations']['Files'][0]['path'][:offset]
+ if len(params['AO_FileLocations']['Files']) != 1
+ else params['DFS_URL'] + params['AO_FileLocations']['Files'][0]['path'])
return 'AO', filepath, params['AO_Schema'], params['Distribution_Policy']
@@ -107,18 +137,18 @@ def get_seg_name(dburl, tablename, database, fmt):
def check_hash_type(dburl, tablename):
- '''Check whether target table is hash-typed, in that case simple insertion does not work'''
+ '''Check whether target table is hash distributed, in that case simple insertion does not work'''
try:
query = "select attrnums from gp_distribution_policy, pg_class where pg_class.relname = '%s' and pg_class.oid = gp_distribution_policy.localoid;" % tablename
conn = dbconn.connect(dburl, False)
rows = dbconn.execSQL(conn, query)
conn.commit()
if not rows.rowcount:
- logger.error('Target not found in table gp_distribution_policy.')
+ logger.error('Table not found in table gp_distribution_policy.' % tablename)
sys.exit(1)
for row in rows:
if row[0]:
- logger.error('Cannot register file(s) to a table which is hash-typed.')
+ logger.error('Cannot register file(s) to a table which is hash distribuetd.')
sys.exit(1)
conn.close()
except DatabaseError, ex:
@@ -154,8 +184,7 @@ def get_metadata_from_database(dburl, tablename, seg_name):
logger.error('Failed to execute query "%s"' % query)
sys.exit(1)
for row in rows:
- tabledir = row[0].strip() + "/" + str(row[1]) + "/" + str(row[2]) + "/" + str(row[3]) + "/"
- #tabledir = '/'.join([row[0], str(row[1]), str(row[2]), str(row[3]), ''])
+ tabledir = '/'.join([row[0].strip(), str(row[1]), str(row[2]), str(row[3]), ''])
return firstsegno, tabledir
@@ -263,9 +292,10 @@ def insert_metadata_into_database(dburl, databasename, tablename, seg_name, firs
if __name__ == '__main__':
- options, args = option_parser()
+ parser = option_parser()
+ options, args = parser.parse_args()
if len(args) != 1 or (options.yml_config and options.filepath):
- logger.error('Incorrect usage!\n Correct usage: "hawq register [-h hostname] [-p port] [-U username] [-d database] [-f filepath] tablename"\n or "hawq register [-h hostname] [-p port] [-U username] [-d database] [-c config] tablename"\n')
+ parser.print_help(sys.stderr)
sys.exit(1)
if local_ssh('hadoop'):
logger.error('command "hadoop" is not available.')
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/8cc4a042/tools/doc/hawqregister_help
----------------------------------------------------------------------
diff --git a/tools/doc/hawqregister_help b/tools/doc/hawqregister_help
index ade1e3a..a664127 100644
--- a/tools/doc/hawqregister_help
+++ b/tools/doc/hawqregister_help
@@ -1,12 +1,14 @@
COMMAND NAME: hawq register
-Register parquet files generated by other system into the corrsponding table in HAWQ
+Usage1: Register parquet files generated by other system into the corrsponding table in HAWQ
+Usage2: Register parquet/ao table from laterst-sync-metadata in yaml format
*****************************************************
SYNOPSIS
*****************************************************
-hawq register [-h hostname] [-p port] [-U username] <databasename> <tablename> <hdfspath>
+Usage1: hawq register [-h hostname] [-p port] [-U username] [-d databasename] [-f filepath] <tablename>
+Usage2: hawq register [-h hostname] [-p port] [-U username] [-d databasename] [-c config] <tablename>
hawq register help
hawq register -?
@@ -17,6 +19,7 @@ hawq register --version
DESCRIPTION
*****************************************************
+Use Case1:
"hawq register" is a utility to register file(s) on HDFS into
the table in HAWQ. It moves the file in the path(if path
refers to a file) or files under the path(if path refers to a
@@ -33,23 +36,24 @@ is created by using "distributed by" statement when creating that table.
The file(s) to be registered and the table in HAWQ must be in the
same HDFS cluster.
+Use Case2:
+User should be able to use hawq register to register table files into a new HAWQ cluster.
+It is some kind of protecting against corruption from users' perspective.
+Users use the last-known-good metadata to update the portion of catalog managing HDFS blocks.
+The table files or dictionary should be backuped(such as using distcp) into the same path in the new HDFS setting.
+
+To use "hawq register", HAWQ must have been started.
+Currently "hawq register" supports both AO and Parquet formats in this case.
+The partition table is not supported in this version, and we will support it soon.
+
*****************************************************
Arguments
*****************************************************
-<databasename>
-
-Name of the database to be operated on.
-
<tablename>
Name of the table to be registered into.
-<hdfspath>
-
-The path of the file or the directory containing the files
-that will be registered.
-
*****************************************************
OPTIONS
*****************************************************
@@ -85,7 +89,7 @@ CONNECTION OPTIONS
system user name.
*****************************************************
-EXAMPLES
+EXAMPLE FOR USAGE1
*****************************************************
Run "hawq register" to register a parquet file in HDFS with path
@@ -104,6 +108,18 @@ update the meta data of the table 'parquet_table' in HAWQ which is in the
table 'pg_aoseg.pg_paqseg_77160'.
*****************************************************
+EXAMPLE FOR USAGE2
+*****************************************************
+$ psql -c "drop table if exists table;"
+$ psql -c "create table table(i int) with (appendonly=true, orientation=parquet) distributed by (i);"
+$ psql -c "insert into table values(1), (2), (3);"
+$ hawq extract -d postgres -o t.yml table
+$ hawq register -d postgres -c t.yml newtable
+In this example, suppose that "table" is a table in old HAWQ Cluster, user dump "t.yml" yaml file to
+save the metadata of "table". To register the "newtable" in a new HAWQ Cluster, user run "hawq register"
+to register the newtable with the given yaml file "t.yml".
+
+*****************************************************
DATA TYPES
*****************************************************
The data types used in HAWQ and parquet format are not the same, so there is a