You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by hu...@apache.org on 2016/08/16 01:46:20 UTC
[1/2] incubator-hawq git commit: HAWQ-1005. Add schema info with
Parquet format in hawqextract.
Repository: incubator-hawq
Updated Branches:
refs/heads/master 189a2c3a0 -> a5a2e6d13
HAWQ-1005. Add schema info with Parquet format in hawqextract.
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/ff1419c1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/ff1419c1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/ff1419c1
Branch: refs/heads/master
Commit: ff1419c192ffdf7105158262470c85af507f851b
Parents: 189a2c3
Author: xunzhang <xu...@gmail.com>
Authored: Mon Aug 15 17:09:39 2016 +0800
Committer: xunzhang <xu...@gmail.com>
Committed: Mon Aug 15 17:09:39 2016 +0800
----------------------------------------------------------------------
tools/bin/hawqextract | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/ff1419c1/tools/bin/hawqextract
----------------------------------------------------------------------
diff --git a/tools/bin/hawqextract b/tools/bin/hawqextract
index 8dc471c..699c713 100755
--- a/tools/bin/hawqextract
+++ b/tools/bin/hawqextract
@@ -434,7 +434,8 @@ def extract_metadata(conn, tbname):
p_pgclass['relfilenode'])
}
file_locations['Partitions'].append(par_info)
-
+ logger.info('-- extract Parquet_Schema')
+ metadata['Parquet_Schema'] = accessor.get_schema(relid)
metadata['Parquet_FileLocations'] = file_locations
# extract AO/Parquet specific metadata
[2/2] incubator-hawq git commit: HAWQ-1005. Add distribution policy
info with hawqextract.
Posted by hu...@apache.org.
HAWQ-1005. Add distribution policy info with hawqextract.
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/a5a2e6d1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/a5a2e6d1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/a5a2e6d1
Branch: refs/heads/master
Commit: a5a2e6d13805915eb78f700f090859efd1b1b5b0
Parents: ff1419c
Author: xunzhang <xu...@gmail.com>
Authored: Mon Aug 15 17:59:34 2016 +0800
Committer: xunzhang <xu...@gmail.com>
Committed: Mon Aug 15 17:59:34 2016 +0800
----------------------------------------------------------------------
tools/bin/hawqextract | 58 ++++++++++++++++++++++++++++++++--------------
1 file changed, 41 insertions(+), 17 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a5a2e6d1/tools/bin/hawqextract
----------------------------------------------------------------------
diff --git a/tools/bin/hawqextract b/tools/bin/hawqextract
index 699c713..f3ffe5b 100755
--- a/tools/bin/hawqextract
+++ b/tools/bin/hawqextract
@@ -7,9 +7,9 @@
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
-#
+#
# http://www.apache.org/licenses/LICENSE-2.0
-#
+#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -28,7 +28,7 @@ Options:
-o output file: the output metadata file, if not set, will output to terminal.
-W: force password authentication
-v: verbose
- -?: help
+ -?: help
hawq extract output YAML file format:
@@ -38,7 +38,7 @@ FileFormat: string (AO/Parquet)
TableName: string (schemaname.tablename)
DFS_URL: string (hdfs://127.0.0.1:9000)
Encoding: UTF8
-AO_Schema:
+AO_Schema:
- name: string
type: string
@@ -51,7 +51,7 @@ AO_FileLocations:
Files:
- path: string (/gpseg0/16385/35469/35470.1)
size: long
-
+
Partitions:
- Blocksize: int
Checksum: boolean
@@ -113,7 +113,7 @@ class GpExtractError(Exception): pass
class GpMetadataAccessor:
def __init__(self, conn):
self.conn = conn
-
+
rows = self.exec_query("""
SELECT oid, datname, dat2tablespace,
pg_encoding_to_char(encoding) encoding
@@ -157,7 +157,7 @@ class GpMetadataAccessor:
... {'fileno':'3', 'filesize':'160'}]
'''
qry = """
- SELECT segno as fileno, eof as filesize
+ SELECT segno as fileno, eof as filesize
FROM pg_aoseg.pg_aoseg_%d
ORDER by fileno;
""" % oid
@@ -175,7 +175,7 @@ class GpMetadataAccessor:
... {'fileno':'3', 'filesize':'160'}]
'''
qry = """
- SELECT segno as fileno, eof as filesize
+ SELECT segno as fileno, eof as filesize
FROM pg_aoseg.pg_paqseg_%d
ORDER by fileno;
""" % oid
@@ -194,7 +194,7 @@ class GpMetadataAccessor:
if not rows:
raise GpExtractError('Table %s.%s not exists!' % (nspname, relname))
return rows[0]
-
+
def get_schema(self, relid):
'''
Fetch schema of the table specified by oid `relid`.
@@ -242,6 +242,23 @@ class GpMetadataAccessor:
""" % (nspname, relname)
return self.exec_query(qry)
+ def get_distribution_policy_info(self, oid, relid):
+ '''
+ Get table's distribution policy from gp_distribution_policy view.
+ '''
+ qry = """
+ SELECT *
+ FROM gp_distribution_policy
+ WHERE localoid = '%s'
+ """ % oid
+ policy = self.exec_query(qry)[0]['attrnums']
+ if not policy:
+ return 'DISTRIBUTED RANDOMLY'
+ else:
+ cols = [d['name'] for d in self.get_schema(relid)]
+ cols_list = [cols[int(k)-1] for k in policy.strip('{}').split(',')]
+ return 'DISTRIBUTED BY (' + ','.join(cols_list) + ')'
+
def connectdb(options):
'''
@@ -301,7 +318,7 @@ def extract_metadata(conn, tbname):
'''
Given AO table's oid and relfilenode, return path and size of all its
data files on HDFS as [{'path': path1, 'size': size1}, {...}].
-
+
Path doesn't include DFS URL.
Example:
@@ -331,7 +348,7 @@ def extract_metadata(conn, tbname):
)
files.append({'path': path, 'size': int(f['filesize'])})
return files
-
+
def get_parquet_table_files(oid, relfilenode):
'''
The same with get_ao_table_files, except that it's for Parquet table.
@@ -349,7 +366,7 @@ def extract_metadata(conn, tbname):
)
files.append({'path': path, 'size': int(f['filesize'])})
return files
-
+
def extract_AO_metadata():
relid = rel_pgclass['oid']
rel_appendonly = accessor.get_appendonly_attrs(relid)
@@ -369,7 +386,7 @@ def extract_metadata(conn, tbname):
# fill Partitions
file_locations['Partitions'] = []
for p in partitions:
- p_pgclass = accessor.get_pgclass(p['partitionschemaname'],
+ p_pgclass = accessor.get_pgclass(p['partitionschemaname'],
p['partitiontablename'])
if get_table_format(p_pgclass['reloptions']) != file_format:
@@ -387,10 +404,13 @@ def extract_metadata(conn, tbname):
'Files': get_ao_table_files(p_pgclass['oid'], p_pgclass['relfilenode'])
}
file_locations['Partitions'].append(par_info)
+ metadata['AO_FileLocations'] = file_locations
logger.info('-- extract AO_Schema')
metadata['AO_Schema'] = accessor.get_schema(relid)
- metadata['AO_FileLocations'] = file_locations
+
+ logger.info('-- extract Distribution_Policy')
+ metadata['Distribution_Policy'] = accessor.get_distribution_policy_info(rel_pgclass['oid'], relid)
def extract_Parquet_metadata():
relid = rel_pgclass['oid']
@@ -413,7 +433,7 @@ def extract_metadata(conn, tbname):
# fill Partitions
file_locations['Partitions'] = []
for p in partitions:
- p_pgclass = accessor.get_pgclass(p['partitionschemaname'],
+ p_pgclass = accessor.get_pgclass(p['partitionschemaname'],
p['partitiontablename'])
if get_table_format(p_pgclass['reloptions']) != file_format:
@@ -434,9 +454,13 @@ def extract_metadata(conn, tbname):
p_pgclass['relfilenode'])
}
file_locations['Partitions'].append(par_info)
+ metadata['Parquet_FileLocations'] = file_locations
+
logger.info('-- extract Parquet_Schema')
metadata['Parquet_Schema'] = accessor.get_schema(relid)
- metadata['Parquet_FileLocations'] = file_locations
+
+ logger.info('-- extract Distribution_Policy')
+ metadata['Distribution_Policy'] = accessor.get_distribution_policy_info(rel_pgclass['oid'], relid)
# extract AO/Parquet specific metadata
cases = { 'AO': extract_AO_metadata,
@@ -506,7 +530,7 @@ def create_opt_parser(version):
def main(args=None):
parser = create_opt_parser('%prog version $Revision: #1 $')
-
+
options, args = parser.parse_args(args)
if len(args) != 1:
sys.stderr.write('Incorrect number of arguments: missing <tablename>.\n\n')