You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by li...@apache.org on 2016/09/20 07:57:05 UTC

[1/2] incubator-hawq git commit: HAWQ-1061. Fix update catelog inconsistency issue.

Repository: incubator-hawq
Updated Branches:
  refs/heads/master a683b5c31 -> 50c1aa9ea


HAWQ-1061. Fix update catelog inconsistency issue.


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/50c1aa9e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/50c1aa9e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/50c1aa9e

Branch: refs/heads/master
Commit: 50c1aa9eab39868f11f8946d9564aa832f5eb96c
Parents: 8954090
Author: xunzhang <xu...@gmail.com>
Authored: Tue Sep 20 12:37:56 2016 +0800
Committer: Lili Ma <ic...@gmail.com>
Committed: Tue Sep 20 15:56:49 2016 +0800

----------------------------------------------------------------------
 tools/bin/hawqregister | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/50c1aa9e/tools/bin/hawqregister
----------------------------------------------------------------------
diff --git a/tools/bin/hawqregister b/tools/bin/hawqregister
index 2b9b343..bdd6947 100755
--- a/tools/bin/hawqregister
+++ b/tools/bin/hawqregister
@@ -572,8 +572,12 @@ class HawqRegister(object):
             query = "set allow_system_table_mods='dml';"
             query += "begin transaction;"
             segno_lst = [f.split('/')[-1] for f in self.files_update]
-            for i, eof in enumerate(eofs):
-                query += "update pg_aoseg.%s set eof = '%s' where segno = '%s';" % (self.seg_name, eof, segno_lst[i])
+            if self.file_format == 'Parquet':
+                for i, eof in enumerate(eofs):
+                    query += "update pg_aoseg.%s set eof = '%s', tupcount = '%s', eofuncompressed = '%s' where segno = '%s';" % (self.seg_name, eof, -1, -1, segno_lst[i])
+            else:
+                for i, eof in enumerate(eofs):
+                    query += "update pg_aoseg.%s set eof = '%s', tupcount = '%s', varblockcount = '%s', eofuncompressed = '%s' where segno = '%s';" % (self.seg_name, eof, -1, -1, -1, segno_lst[i])
             query += "end transaction;"
         else: # update_and_insert
             eofs = self.sizes
@@ -590,8 +594,12 @@ class HawqRegister(object):
             query += ';'
 
             segno_lst = [f.split('/')[-1] for f in self.files_update]
-            for i, eof in enumerate(self.sizes_update):
-                query += "update pg_aoseg.%s set eof = '%s' where segno = '%s';" % (self.seg_name, eof, segno_lst[i])
+            if self.file_format == 'Parquet':
+                for i, eof in enumerate(self.sizes_update):
+                    query += "update pg_aoseg.%s set eof = '%s', tupcount = '%s', eofuncompressed = '%s' where segno = '%s';" % (self.seg_name, eof, -1, -1, segno_lst[i])
+            else:
+                for i, eof in enumerate(self.sizes_update):
+                    query += "update pg_aoseg.%s set eof = '%s', tupcount = '%s', varblockcount = '%s', eofuncompressed = '%s' where segno = '%s';" % (self.seg_name, eof, -1, -1, -1, segno_lst[i])
             query += "end transaction;"
         return self.utility_accessor.update_catalog(query)
 


[2/2] incubator-hawq git commit: HAWQ-1061. Fix data loss when file locations include directories, check policy and bucketnum in all mode.

Posted by li...@apache.org.
HAWQ-1061. Fix data loss when file locations include directories, check policy and bucketnum in all mode.


Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/8954090c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/8954090c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/8954090c

Branch: refs/heads/master
Commit: 8954090c29a770889c3e3269e14bd4bdaa6926aa
Parents: a683b5c
Author: xunzhang <xu...@gmail.com>
Authored: Mon Sep 19 17:42:22 2016 +0800
Committer: Lili Ma <ic...@gmail.com>
Committed: Tue Sep 20 15:56:49 2016 +0800

----------------------------------------------------------------------
 tools/bin/hawqregister | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/8954090c/tools/bin/hawqregister
----------------------------------------------------------------------
diff --git a/tools/bin/hawqregister b/tools/bin/hawqregister
index 153ea9d..2b9b343 100755
--- a/tools/bin/hawqregister
+++ b/tools/bin/hawqregister
@@ -375,14 +375,24 @@ class HawqRegister(object):
                 set_yml_dataa('AO', files, sizes, params['AO_Schema'], params['Distribution_Policy'], params['AO_FileLocations'], params['Bucketnum'], partitionby, partitions_constraint,\
                               partitions_name, partitions_compression_level, partitions_compression_type, partitions_checksum, partitions_filepaths, partitions_filesizes, encoding)
                 
+        def check_file_not_folder():
+            for fn in self.files:
+                hdfscmd = 'hdfs dfs -test -f %s' % fn
+                if local_ssh(hdfscmd, logger):
+                    logger.info('%s is not a file in hdfs, please check the yaml configuration file.' % fn)
+                    sys.exit(1)
+
         if self.yml:
             option_parser_yml(options.yml_config)
             self.filepath = self.files[0][:self.files[0].rfind('/')] if self.files else ''
-            check_distribution_policy()
+            check_file_not_folder()
             check_database_encoding()
             if self.mode != 'force' and self.mode != 'repair':
                 if not create_table():
                     self.mode = 'second_exist'
+            check_bucket_number()
+            check_distribution_policy()
+            check_policy_consistency()
         else:
             self.file_format = 'Parquet'
             check_hash_type() # Usage1 only support randomly distributed table
@@ -400,8 +410,6 @@ class HawqRegister(object):
             if self.tabledir.strip('/') != self.filepath.strip('/'):
                 logger.error("In repair mode, file path from yaml file should be the same with table's path.")
                 sys.exit(1)
-            check_policy_consistency()
-            check_bucket_number()
             existed_files, existed_sizes = self._get_files_in_hdfs(self.filepath)
             existed_info = {}
             for k, fn in enumerate(existed_files):