You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by hu...@apache.org on 2016/08/25 02:21:18 UTC
[1/2] incubator-hawq git commit: HAWQ-1012. Check whether the input
yaml file for hawq register is valid.
Repository: incubator-hawq
Updated Branches:
refs/heads/master c2280debb -> 7e0c63adc
HAWQ-1012. Check whether the input yaml file for hawq register is valid.
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/31c3cde5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/31c3cde5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/31c3cde5
Branch: refs/heads/master
Commit: 31c3cde5565a26023bd314c64cccfcd669032680
Parents: c2280de
Author: xunzhang <xu...@gmail.com>
Authored: Wed Aug 24 11:20:44 2016 +0800
Committer: Ruilong Huo <rh...@pivotal.io>
Committed: Thu Aug 25 10:24:34 2016 +0800
----------------------------------------------------------------------
tools/bin/hawqregister | 110 ++++++++++++++++++++++++++++++--------------
1 file changed, 75 insertions(+), 35 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/31c3cde5/tools/bin/hawqregister
----------------------------------------------------------------------
diff --git a/tools/bin/hawqregister b/tools/bin/hawqregister
index 7a20906..c2692d8 100755
--- a/tools/bin/hawqregister
+++ b/tools/bin/hawqregister
@@ -54,59 +54,96 @@ def option_parser():
return parser
+def register_yaml_dict_check(D):
+ # check exists
+ check_list = ['DFS_URL', 'Distribution_Policy', 'FileFormat', 'TableName']
+ for attr in check_list:
+ if D.get(attr) == None:
+ logger.error('Wrong configuration yaml file format: "%s" attribute does not exist.\n See example in "hawq register --help".' % attr)
+ sys.exit(1)
+ if D['FileFormat'] in ['Parquet', 'AO']:
+ prefix = D['FileFormat']
+ local_check_list = ['%s_FileLocations' % prefix, '%s_Schema' % prefix]
+ for attr in local_check_list:
+ if D.get(attr) == None:
+ logger.error('Wrong configuration yaml file format: "%s" attribute does not exist.\n See example in "hawq register --help".' % attr)
+ sys.exit(1)
+ if D['%s_FileLocations' % prefix].get('Files') == None:
+ logger.error('Wrong configuration yaml file format: "%s" attribute does not exist.\n See example in "hawq register --help".' % '%s_FileLocations.Files' % prefix)
+ sys.exit(1)
+ for d in D['%s_FileLocations' % prefix]['Files']:
+ if d.get('path') == None:
+ logger.error('Wrong configuration yaml file format: "%s" attribute does not exist.\n See example in "hawq register --help".' % '%s_FileLocations.Files.path' % prefix)
+ sys.exit(1)
+ if d.get('size') == None:
+ logger.error('Wrong configuration yaml file format: "%s" attribute does not exist.\n See example in "hawq register --help".' % '%s_FileLocations.Files.size' % prefix)
+ sys.exit(1)
+ else:
+ logger.error('hawq register only support Parquet and AO formats. Format %s is not supported.' % D['FileFormat'])
+ sys.exit(1)
+ prefix = D['FileFormat']
+ if D.get('%s_Schema' % prefix) == None:
+ logger.error('Wrong configuration yaml file format: "%s" attribute does not exist.\n See example in "hawq register --help".' % '%s_Schema' % prefix)
+ sys.exit(1)
+ for d in D['%s_Schema' % prefix]:
+ if d.get('name') == None:
+ logger.error('Wrong configuration yaml file format: "%s" attribute does not exist.\n See example in "hawq register --help".' % '%s_Schema.name' % prefix)
+ sys.exit(1)
+ if d.get('type') == None:
+ logger.error('Wrong configuration yaml file format: "%s" attribute does not exist.\n See example in "hawq register --help".' % '%s_Schema.type' % prefix)
+ sys.exit(1)
+ if D['FileFormat'] == 'Parquet':
+ sub_check_list = ['CompressionLevel', 'CompressionType', 'PageSize', 'RowGroupSize']
+ for attr in sub_check_list:
+ if not D['Parquet_FileLocations'].has_key(attr):
+ logger.error('Wrong configuration yaml file format: "%s" attribute does not exist.\n See example in "hawq register --help".' % 'Parquet_FileLocations.%s' % attr)
+ sys.exit(1)
+ else:
+ sub_check_list = ['Checksum', 'CompressionLevel', 'CompressionType']
+ for attr in sub_check_list:
+ if not D['AO_FileLocations'].has_key(attr):
+ logger.error('Wrong configuration yaml file format: "%s" attribute does not exist.\n See example in "hawq register --help".' % 'AO_FileLocations.%s' % attr)
+ sys.exit(1)
+
+
+
def option_parser_yml(yml_file):
import yaml
with open(yml_file, 'r') as f:
params = yaml.load(f)
- # check if valid configuration yaml file
- attrs = ['FileFormat', 'DFS_URL', 'Distribution_Policy']
- for attr in attrs:
- if attr not in params.keys():
- logger.error('Wrong configuration yaml file format, see example in "hawq register --help"')
- sys.exit(1)
+ register_yaml_dict_check(params)
if params['FileFormat'] == 'Parquet':
- attrs = ['Parquet_FileLocations', 'Parquet_Schema']
- for attr in attrs:
- if attr not in params.keys():
- logger.error('Wrong configuration yaml file format, see example in "hawq register --help"')
- sys.exit(1)
- if not params['Parquet_FileLocations'].get('Files'):
- logger.error('Wrong configuration yaml file format, see example in "hawq register --help"')
- sys.exit(1)
if not len(params['Parquet_FileLocations']['Files']):
- return 'Parquet', '', params['Parquet_Schema'], params['Distribution_Policy']
+ return 'Parquet', '', params['Parquet_Schema'], params['Distribution_Policy'], params['Parquet_FileLocations']
offset = params['Parquet_FileLocations']['Files'][0]['path'].rfind('/')
filepath = (params['DFS_URL'] + params['Parquet_FileLocations']['Files'][0]['path'][:offset]
if len(params['Parquet_FileLocations']['Files']) != 1
else params['DFS_URL'] + params['Parquet_FileLocations']['Files'][0]['path'])
- return 'Parquet', filepath, params['Parquet_Schema'], params['Distribution_Policy']
- attrs = ['AO_FileLocations', 'AO_Schema']
- for attr in attrs:
- if attr not in params.keys():
- logger.error('Wrong configuration yaml file format, see example in "hawq register --help"')
- sys.exit(1)
- if not (params['AO_FileLocations']['Files']):
- return 'AO', '', params['AO_Schema'], params['Distribution_Policy']
- if not params['AO_FileLocations'].get('Files'):
- logger.error('Wrong configuration yaml file format, see example in "hawq register --help"')
- sys.exit(1)
+ return 'Parquet', filepath, params['Parquet_Schema'], params['Distribution_Policy'], params['Parquet_FileLocations']
+ if not len(params['AO_FileLocations']['Files']):
+ return 'AO', '', params['AO_Schema'], params['Distribution_Policy'], params['AO_FileLocations']
offset = params['AO_FileLocations']['Files'][0]['path'].rfind('/')
filepath = (params['DFS_URL'] + params['AO_FileLocations']['Files'][0]['path'][:offset]
if len(params['AO_FileLocations']['Files']) != 1
else params['DFS_URL'] + params['AO_FileLocations']['Files'][0]['path'])
- return 'AO', filepath, params['AO_Schema'], params['Distribution_Policy']
+ return 'AO', filepath, params['AO_Schema'], params['Distribution_Policy'], params['AO_FileLocations']
-def create_table(dburl, tablename, schema_info, fmt, distrbution_policy):
+def create_table(dburl, tablename, schema_info, fmt, distrbution_policy, file_locations):
try:
schema = ','.join([k['name'] + ' ' + k['type'] for k in schema_info])
fmt = 'ROW' if fmt == 'AO' else fmt
- query = 'create table %s(%s) with (appendonly=true, orientation=%s) %s;' % (tablename, schema, fmt, distrbution_policy)
+ if fmt == 'ROW':
+ query = ('create table %s(%s) with (appendonly=true, orientation=%s, compresstype=%s, compresslevel=%s, checksum=%s) %s;'
+ % (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['Checksum'], distrbution_policy))
+ else: # Parquet
+ query = ('create table %s(%s) with (appendonly=true, orientation=%s, compresstype=%s, compresslevel=%s, pagesize=%s, rowgroupsize=%s) %s;'
+ % (tablename, schema, fmt, file_locations['CompressionType'], file_locations['CompressionLevel'], file_locations['PageSize'], file_locations['RowGroupSize'], distrbution_policy))
conn = dbconn.connect(dburl, False)
rows = dbconn.execSQL(conn, query)
conn.commit()
except DatabaseError, ex:
- logger.error('Failed to execute query ""%s"' % query)
+ logger.error('Failed to execute query "%s"' % query)
sys.exit(1)
@@ -190,6 +227,8 @@ def get_metadata_from_database(dburl, tablename, seg_name):
def check_files_and_table_in_same_hdfs_cluster(filepath, tabledir):
'''Check whether all the files refered by 'filepath' and the location corresponding to the table are in the same hdfs cluster'''
+ if not filepath:
+ return
# check whether the files to be registered is in hdfs
filesystem = filepath.split('://')
if filesystem[0] != 'hdfs':
@@ -305,21 +344,22 @@ if __name__ == '__main__':
filepath, database, tablename = options.filepath, options.database, args[0]
if options.yml_config: # Usage2
- fileformat, filepath, schema, distribution_policy = option_parser_yml(options.yml_config)
- create_table(dburl, tablename, schema, fileformat, distribution_policy)
+ fileformat, filepath, schema, distribution_policy, file_locations = option_parser_yml(options.yml_config)
+ create_table(dburl, tablename, schema, fileformat, distribution_policy, file_locations)
else:
fileformat = 'Parquet'
check_hash_type(dburl, tablename) # Usage1 only support randomly distributed table
-
+ if not filepath:
+ sys.exit(0)
seg_name = get_seg_name(dburl, tablename, database, fileformat)
firstsegno, tabledir = get_metadata_from_database(dburl, tablename, seg_name)
+ sizes = 0
check_files_and_table_in_same_hdfs_cluster(filepath, tabledir)
-
files, sizes = get_files_in_hdfs(filepath)
print 'File(s) to be registered:', files
if fileformat == 'Parquet':
check_parquet_format(files)
- print files
+ print files
move_files_in_hdfs(database, tablename, files, firstsegno, tabledir, True)
insert_metadata_into_database(dburl, database, tablename, seg_name, firstsegno, tabledir, sizes)
logger.info('Hawq Register Succeed.')
[2/2] incubator-hawq git commit: HAWQ-1012. Update feature tests for
hawq register.
Posted by hu...@apache.org.
HAWQ-1012. Update feature tests for hawq register.
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/7e0c63ad
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/7e0c63ad
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/7e0c63ad
Branch: refs/heads/master
Commit: 7e0c63adc7b7d117517a64905e20bb23229fb368
Parents: 31c3cde
Author: xunzhang <xu...@gmail.com>
Authored: Wed Aug 24 17:14:37 2016 +0800
Committer: Ruilong Huo <rh...@pivotal.io>
Committed: Thu Aug 25 10:25:09 2016 +0800
----------------------------------------------------------------------
.../ManagementTool/test_hawq_register.cpp | 24 ++++++++++++++++++++
tools/bin/hawqregister | 2 +-
2 files changed, 25 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/7e0c63ad/src/test/feature/ManagementTool/test_hawq_register.cpp
----------------------------------------------------------------------
diff --git a/src/test/feature/ManagementTool/test_hawq_register.cpp b/src/test/feature/ManagementTool/test_hawq_register.cpp
index 00934a9..434f817 100644
--- a/src/test/feature/ManagementTool/test_hawq_register.cpp
+++ b/src/test/feature/ManagementTool/test_hawq_register.cpp
@@ -2,6 +2,7 @@
#include "lib/command.h"
#include "lib/sql_util.h"
+#include "lib/string_util.h"
#include "gtest/gtest.h"
@@ -317,3 +318,26 @@ TEST_F(TestHawqRegister, TestUsage2AOHash2) {
util.execute("drop table t8;");
util.execute("drop table nt8;");
}
+
+TEST_F(TestHawqRegister, TestEmptyTable) {
+ SQLUtility util;
+ util.execute("drop table if exists t9;");
+ util.execute("create table t9(i int) with (appendonly=true, orientation=row) distributed randomly;");
+ EXPECT_EQ(0, Command::getCommandStatus("hawq extract -d " + (string) HAWQ_DB + " -o t9.yml testhawqregister_testemptytable.t9"));
+ EXPECT_EQ(0, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c t9.yml testhawqregister_testemptytable.nt9"));
+ util.query("select * from nt9;", 0);
+ EXPECT_EQ(0, Command::getCommandStatus("rm -rf t9.yml"));
+ util.execute("drop table t9;");
+ util.execute("drop table nt9;");
+}
+
+TEST_F(TestHawqRegister, TestIncorrectYaml) {
+ SQLUtility util;
+ string filePath = util.getTestRootPath() + "/ManagementTool/";
+ EXPECT_EQ(0, hawq::test::endsWith(Command::getCommandOutput("hawq register -d " + (string) HAWQ_DB + " -c " + filePath + "incorrect1.yml xx"), "attribute does not exist."));
+ EXPECT_EQ(0, hawq::test::endsWith(Command::getCommandOutput("hawq register -d " + (string) HAWQ_DB + " -c " + filePath + "incorrect2.yml xx"), "attribute does not exist."));
+ EXPECT_EQ(0, hawq::test::endsWith(Command::getCommandOutput("hawq register -d " + (string) HAWQ_DB + " -c " + filePath + "incorrect3.yml xx"), "attribute does not exist."));
+ EXPECT_EQ(0, hawq::test::endsWith(Command::getCommandOutput("hawq register -d " + (string) HAWQ_DB + " -c " + filePath + "incorrect4.yml xx"), "attribute does not exist."));
+ EXPECT_EQ(0, hawq::test::endsWith(Command::getCommandOutput("hawq register -d " + (string) HAWQ_DB + " -c " + filePath + "incorrect5.yml xx"), "attribute does not exist."));
+ EXPECT_EQ(1, Command::getCommandStatus("hawq register -d " + (string) HAWQ_DB + " -c " + filePath + "incorrect6.yml xx"));
+}
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/7e0c63ad/tools/bin/hawqregister
----------------------------------------------------------------------
diff --git a/tools/bin/hawqregister b/tools/bin/hawqregister
index c2692d8..26284a8 100755
--- a/tools/bin/hawqregister
+++ b/tools/bin/hawqregister
@@ -359,7 +359,7 @@ if __name__ == '__main__':
print 'File(s) to be registered:', files
if fileformat == 'Parquet':
check_parquet_format(files)
- print files
+ print files
move_files_in_hdfs(database, tablename, files, firstsegno, tabledir, True)
insert_metadata_into_database(dburl, database, tablename, seg_name, firstsegno, tabledir, sizes)
logger.info('Hawq Register Succeed.')