You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by hu...@apache.org on 2016/08/16 01:46:20 UTC

[1/2] incubator-hawq git commit: HAWQ-1005. Add schema info with Parquet format in hawqextract.

Repository: incubator-hawq
Updated Branches:
  refs/heads/master 189a2c3a0 -> a5a2e6d13


HAWQ-1005. Add schema info with Parquet format in hawqextract.


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/ff1419c1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/ff1419c1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/ff1419c1

Branch: refs/heads/master
Commit: ff1419c192ffdf7105158262470c85af507f851b
Parents: 189a2c3
Author: xunzhang <xu...@gmail.com>
Authored: Mon Aug 15 17:09:39 2016 +0800
Committer: xunzhang <xu...@gmail.com>
Committed: Mon Aug 15 17:09:39 2016 +0800

----------------------------------------------------------------------
 tools/bin/hawqextract | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/ff1419c1/tools/bin/hawqextract
----------------------------------------------------------------------
diff --git a/tools/bin/hawqextract b/tools/bin/hawqextract
index 8dc471c..699c713 100755
--- a/tools/bin/hawqextract
+++ b/tools/bin/hawqextract
@@ -434,7 +434,8 @@ def extract_metadata(conn, tbname):
                                                          p_pgclass['relfilenode'])
                 }
                 file_locations['Partitions'].append(par_info)
-
+        logger.info('-- extract Parquet_Schema')
+        metadata['Parquet_Schema'] = accessor.get_schema(relid)
         metadata['Parquet_FileLocations'] = file_locations
 
     # extract AO/Parquet specific metadata


[2/2] incubator-hawq git commit: HAWQ-1005. Add distribution policy info with hawqextract.

Posted by hu...@apache.org.
HAWQ-1005. Add distribution policy info with hawqextract.


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/a5a2e6d1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/a5a2e6d1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/a5a2e6d1

Branch: refs/heads/master
Commit: a5a2e6d13805915eb78f700f090859efd1b1b5b0
Parents: ff1419c
Author: xunzhang <xu...@gmail.com>
Authored: Mon Aug 15 17:59:34 2016 +0800
Committer: xunzhang <xu...@gmail.com>
Committed: Mon Aug 15 17:59:34 2016 +0800

----------------------------------------------------------------------
 tools/bin/hawqextract | 58 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 41 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/a5a2e6d1/tools/bin/hawqextract
----------------------------------------------------------------------
diff --git a/tools/bin/hawqextract b/tools/bin/hawqextract
index 699c713..f3ffe5b 100755
--- a/tools/bin/hawqextract
+++ b/tools/bin/hawqextract
@@ -7,9 +7,9 @@
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
-# 
+#
 #   http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -28,7 +28,7 @@ Options:
     -o output file: the output metadata file, if not set, will output to terminal.
     -W: force password authentication
     -v: verbose
-    -?: help 
+    -?: help
 
 hawq extract output YAML file format:
 
@@ -38,7 +38,7 @@ FileFormat: string (AO/Parquet)
 TableName: string (schemaname.tablename)
 DFS_URL: string (hdfs://127.0.0.1:9000)
 Encoding: UTF8
-AO_Schema: 
+AO_Schema:
 - name: string
   type: string
 
@@ -51,7 +51,7 @@ AO_FileLocations:
   Files:
   - path: string (/gpseg0/16385/35469/35470.1)
     size: long
-  
+
   Partitions:
   - Blocksize: int
     Checksum: boolean
@@ -113,7 +113,7 @@ class GpExtractError(Exception): pass
 class GpMetadataAccessor:
     def __init__(self, conn):
         self.conn = conn
-        
+
         rows = self.exec_query("""
         SELECT oid, datname, dat2tablespace,
                pg_encoding_to_char(encoding) encoding
@@ -157,7 +157,7 @@ class GpMetadataAccessor:
         ...  {'fileno':'3', 'filesize':'160'}]
         '''
         qry = """
-        SELECT segno as fileno, eof as filesize 
+        SELECT segno as fileno, eof as filesize
         FROM pg_aoseg.pg_aoseg_%d
         ORDER by fileno;
         """ % oid
@@ -175,7 +175,7 @@ class GpMetadataAccessor:
         ...  {'fileno':'3', 'filesize':'160'}]
         '''
         qry = """
-        SELECT segno as fileno, eof as filesize 
+        SELECT segno as fileno, eof as filesize
         FROM pg_aoseg.pg_paqseg_%d
         ORDER by fileno;
         """ % oid
@@ -194,7 +194,7 @@ class GpMetadataAccessor:
         if not rows:
             raise GpExtractError('Table %s.%s not exists!' % (nspname, relname))
         return rows[0]
-    
+
     def get_schema(self, relid):
         '''
         Fetch schema of the table specified by oid `relid`.
@@ -242,6 +242,23 @@ class GpMetadataAccessor:
         """ % (nspname, relname)
         return self.exec_query(qry)
 
+    def get_distribution_policy_info(self, oid, relid):
+        '''
+        Get table's distribution policy from gp_distribution_policy view.
+        '''
+        qry = """
+        SELECT *
+        FROM gp_distribution_policy
+        WHERE localoid = '%s'
+        """ % oid
+        policy = self.exec_query(qry)[0]['attrnums']
+        if not policy:
+            return 'DISTRIBUTED RANDOMLY'
+        else:
+            cols = [d['name'] for d in self.get_schema(relid)]
+            cols_list = [cols[int(k)-1] for k in policy.strip('{}').split(',')]
+            return 'DISTRIBUTED BY (' + ','.join(cols_list) + ')'
+
 
 def connectdb(options):
     '''
@@ -301,7 +318,7 @@ def extract_metadata(conn, tbname):
         '''
         Given AO table's oid and relfilenode, return path and size of all its
         data files on HDFS as [{'path': path1, 'size': size1}, {...}].
-        
+
         Path doesn't include DFS URL.
 
         Example:
@@ -331,7 +348,7 @@ def extract_metadata(conn, tbname):
             )
             files.append({'path': path, 'size': int(f['filesize'])})
         return files
-    
+
     def get_parquet_table_files(oid, relfilenode):
         '''
         The same with get_ao_table_files, except that it's for Parquet table.
@@ -349,7 +366,7 @@ def extract_metadata(conn, tbname):
             )
             files.append({'path': path, 'size': int(f['filesize'])})
         return files
-    
+
     def extract_AO_metadata():
         relid = rel_pgclass['oid']
         rel_appendonly = accessor.get_appendonly_attrs(relid)
@@ -369,7 +386,7 @@ def extract_metadata(conn, tbname):
             # fill Partitions
             file_locations['Partitions'] = []
             for p in partitions:
-                p_pgclass = accessor.get_pgclass(p['partitionschemaname'], 
+                p_pgclass = accessor.get_pgclass(p['partitionschemaname'],
                                                  p['partitiontablename'])
 
                 if get_table_format(p_pgclass['reloptions']) != file_format:
@@ -387,10 +404,13 @@ def extract_metadata(conn, tbname):
                         'Files': get_ao_table_files(p_pgclass['oid'], p_pgclass['relfilenode'])
                 }
                 file_locations['Partitions'].append(par_info)
+        metadata['AO_FileLocations'] = file_locations
 
         logger.info('-- extract AO_Schema')
         metadata['AO_Schema'] = accessor.get_schema(relid)
-        metadata['AO_FileLocations'] = file_locations
+
+        logger.info('-- extract Distribution_Policy')
+        metadata['Distribution_Policy'] = accessor.get_distribution_policy_info(rel_pgclass['oid'], relid)
 
     def extract_Parquet_metadata():
         relid = rel_pgclass['oid']
@@ -413,7 +433,7 @@ def extract_metadata(conn, tbname):
             # fill Partitions
             file_locations['Partitions'] = []
             for p in partitions:
-                p_pgclass = accessor.get_pgclass(p['partitionschemaname'], 
+                p_pgclass = accessor.get_pgclass(p['partitionschemaname'],
                                                  p['partitiontablename'])
 
                 if get_table_format(p_pgclass['reloptions']) != file_format:
@@ -434,9 +454,13 @@ def extract_metadata(conn, tbname):
                                                          p_pgclass['relfilenode'])
                 }
                 file_locations['Partitions'].append(par_info)
+        metadata['Parquet_FileLocations'] = file_locations
+
         logger.info('-- extract Parquet_Schema')
         metadata['Parquet_Schema'] = accessor.get_schema(relid)
-        metadata['Parquet_FileLocations'] = file_locations
+
+        logger.info('-- extract Distribution_Policy')
+        metadata['Distribution_Policy'] = accessor.get_distribution_policy_info(rel_pgclass['oid'], relid)
 
     # extract AO/Parquet specific metadata
     cases = { 'AO': extract_AO_metadata,
@@ -506,7 +530,7 @@ def create_opt_parser(version):
 
 def main(args=None):
     parser = create_opt_parser('%prog version $Revision: #1 $')
-    
+
     options, args = parser.parse_args(args)
     if len(args) != 1:
         sys.stderr.write('Incorrect number of arguments: missing <tablename>.\n\n')