You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ar...@apache.org on 2019/08/23 01:39:54 UTC

[impala] 01/02: IMPALA-8875: fix test_drop_column_maintains_stats with Hive 3

This is an automated email from the ASF dual-hosted git repository.

arodoni pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit f6117c9580731b290d3566fef420f9c4a02273d9
Author: Csaba Ringhofer <cs...@cloudera.com>
AuthorDate: Thu Aug 22 20:35:22 2019 +0200

    IMPALA-8875: fix test_drop_column_maintains_stats with Hive 3
    
    Hive 3 adds 'COLUMN_STATS_ACCURATE': '{}' to column statistics
    during ALTER TABLE  which lead to breaking this test, as it
    expected the stats to be completely equal. Removing this
    property before comparision solves the issue.
    
    Change-Id: Ic2937e7634eca01e10492733102c834237ab6d6a
    Reviewed-on: http://gerrit.cloudera.org:8080/14123
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 tests/metadata/test_hms_integration.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/tests/metadata/test_hms_integration.py b/tests/metadata/test_hms_integration.py
index 8df23cb..59de250 100644
--- a/tests/metadata/test_hms_integration.py
+++ b/tests/metadata/test_hms_integration.py
@@ -217,13 +217,17 @@ class TestHmsIntegration(ImpalaTestSuite):
         result[line_elements[0]] = line_elements[1]
     return result
 
-  def hive_column_stats(self, table, column):
+  def hive_column_stats(self, table, column, remove_stats_accurate=False):
     """Returns a dictionary of stats for a column according to Hive."""
     output = self.run_stmt_in_hive('describe formatted %s %s' % (table, column))
+    result = {}
     if HIVE_MAJOR_VERSION == 2:
-      return self.parse_hive2_describe_formatted_output(output)
+      result = self.parse_hive2_describe_formatted_output(output)
     else:
-      return self.parse_hive3_describe_formatted_output(output)
+      result = self.parse_hive3_describe_formatted_output(output)
+    if remove_stats_accurate:
+      result.pop('COLUMN_STATS_ACCURATE', None)
+    return result
 
   def impala_columns(self, table_name):
     """
@@ -387,8 +391,10 @@ class TestHmsIntegration(ImpalaTestSuite):
         hive_y_stats = self.hive_column_stats(table_name, 'y')
         impala_stats = self.impala_all_column_stats(table_name)
         self.client.execute('alter table %s drop column z' % table_name)
-        assert hive_x_stats == self.hive_column_stats(table_name, 'x')
-        assert hive_y_stats == self.hive_column_stats(table_name, 'y')
+        assert hive_x_stats == self.hive_column_stats(table_name, 'x',
+                                                      remove_stats_accurate=True)
+        assert hive_y_stats == self.hive_column_stats(table_name, 'y',
+                                                      remove_stats_accurate=True)
         assert impala_stats['x'] == self.impala_all_column_stats(table_name)[
             'x']
         assert impala_stats['y'] == self.impala_all_column_stats(table_name)[
@@ -396,7 +402,8 @@ class TestHmsIntegration(ImpalaTestSuite):
         self.run_stmt_in_hive(
             'alter table %s replace columns (x int)' %
             table_name)
-        assert hive_x_stats == self.hive_column_stats(table_name, 'x')
+        assert hive_x_stats == self.hive_column_stats(table_name, 'x',
+                                                      remove_stats_accurate=True)
         assert impala_stats['x'] == self.impala_all_column_stats(table_name)[
             'x']