You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2016/05/23 15:40:29 UTC

[03/17] incubator-impala git commit: IMPALA-3491: Merge test_hbase_metadata.py into compute_stats.py. Use unique db fixture.

IMPALA-3491: Merge test_hbase_metadata.py into compute_stats.py. Use unique db fixture.

- Moves the test into compute_stats.py
- Changes some test classes in compute_stats.py to inherit from
  ImpalaTestSuite and not from TestComputeStats because that
  will cause all tests in TestComputeStats to be run in the
  subclasses again (redundantly).
- Clean up and add more coverage to testing incremental stats on
  HBase which was probably broken in this commit 6b32ff06.
- Fixes a side effect that the original test had for testing
  incremental stats on HBase. It computes stats on a functional
  table which was not supposed to have stats.

Testing: Ran compute_stats.py on exhaustive locally in a loop 10 times.
Did a private hdfs/core run.

Change-Id: Iee8b84e30948c3c98166e08cae2666574777730c
Reviewed-on: http://gerrit.cloudera.org:8080/3074
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/ea45de84
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/ea45de84
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/ea45de84

Branch: refs/heads/master
Commit: ea45de84f49412778b2f3f009bf2f5819307bfd6
Parents: 1ccfc45
Author: Alex Behm <al...@cloudera.com>
Authored: Thu May 12 10:28:13 2016 -0700
Committer: Tim Armstrong <ta...@cloudera.com>
Committed: Mon May 23 08:40:19 2016 -0700

----------------------------------------------------------------------
 .../hbase-compute-stats-incremental.test        | 43 +++++++++++++++++-
 .../queries/QueryTest/hbase-compute-stats.test  | 33 +++++---------
 tests/metadata/test_compute_stats.py            | 40 ++++++++++++++---
 tests/metadata/test_hbase_metadata.py           | 47 --------------------
 4 files changed, 88 insertions(+), 75 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ea45de84/testdata/workloads/functional-query/queries/QueryTest/hbase-compute-stats-incremental.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/hbase-compute-stats-incremental.test b/testdata/workloads/functional-query/queries/QueryTest/hbase-compute-stats-incremental.test
index 4d536bc..ea76090 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/hbase-compute-stats-incremental.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/hbase-compute-stats-incremental.test
@@ -1,7 +1,48 @@
 ====
 ---- QUERY
-compute incremental stats alltypes;
+create table alltypessmall_hbase like functional_hbase.alltypessmall
+---- RESULTS
+====
+---- QUERY
+compute incremental stats alltypessmall_hbase
 ---- RESULTS
 'Updated 1 partition(s) and 13 column(s).'
 ---- TYPES
 STRING
+====
+---- QUERY
+show table stats alltypessmall_hbase
+---- LABELS
+REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE
+---- RESULTS: VERIFY_IS_EQUAL
+regex:.+,'',regex:.+,regex:.+B
+regex:.+,'1',regex:.+,regex:.+B
+regex:.+,'3',regex:.+,regex:.+B
+regex:.+,'5',regex:.+,regex:.+B
+regex:.+,'7',regex:.+,regex:.+B
+regex:.+,'9',regex:.+,regex:.+B
+'Total','',regex:.+,regex:.+B
+---- TYPES
+STRING, STRING, BIGINT, STRING
+====
+---- QUERY
+show column stats alltypessmall_hbase
+---- LABELS
+COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
+---- RESULTS
+'id','INT',99,-1,4,4
+'bigint_col','BIGINT',10,-1,8,8
+'bool_col','BOOLEAN',2,-1,1,1
+'date_string_col','STRING',12,-1,8,8
+'double_col','DOUBLE',10,-1,8,8
+'float_col','FLOAT',10,-1,4,4
+'int_col','INT',10,-1,4,4
+'month','INT',4,-1,4,4
+'smallint_col','SMALLINT',10,-1,2,2
+'string_col','STRING',10,-1,1,1
+'timestamp_col','TIMESTAMP',101,-1,16,16
+'tinyint_col','TINYINT',10,-1,1,1
+'year','INT',1,-1,4,4
+---- TYPES
+STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ea45de84/testdata/workloads/functional-query/queries/QueryTest/hbase-compute-stats.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/hbase-compute-stats.test b/testdata/workloads/functional-query/queries/QueryTest/hbase-compute-stats.test
index eae6cec..8ec19b5 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/hbase-compute-stats.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/hbase-compute-stats.test
@@ -1,18 +1,17 @@
 ====
 ---- QUERY
 # test computing stats on an HBase table
-create table compute_stats_db_hbase.alltypessmall_hbase
-like alltypessmall;
+create table alltypessmall_hbase like functional_hbase.alltypessmall
 ====
 ---- QUERY
-compute stats compute_stats_db_hbase.alltypessmall_hbase
+compute stats alltypessmall_hbase
 ---- RESULTS
 'Updated 1 partition(s) and 13 column(s).'
 ---- TYPES
 STRING
 ====
 ---- QUERY
-show table stats compute_stats_db_hbase.alltypessmall_hbase
+show table stats alltypessmall_hbase
 ---- LABELS
 REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE
 ---- RESULTS: VERIFY_IS_EQUAL
@@ -27,7 +26,7 @@ regex:.+,'9',regex:.+,regex:.+B
 STRING, STRING, BIGINT, STRING
 ====
 ---- QUERY
-show column stats compute_stats_db_hbase.alltypessmall_hbase
+show column stats alltypessmall_hbase
 ---- LABELS
 COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
 ---- RESULTS
@@ -49,18 +48,17 @@ STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
 ====
 ---- QUERY
 # test computing stats on an binary HBase table
-create table compute_stats_db_hbase.alltypessmall_hbase_bin
-like alltypessmallbinary;
+create table alltypessmall_hbase_bin like functional_hbase.alltypessmallbinary
 ====
 ---- QUERY
-compute stats compute_stats_db_hbase.alltypessmall_hbase_bin
+compute stats alltypessmall_hbase_bin
 ---- RESULTS
 'Updated 1 partition(s) and 13 column(s).'
 ---- TYPES
 STRING
 ====
 ---- QUERY: VERIFY_IS_EQUAL
-show table stats compute_stats_db_hbase.alltypessmall_hbase_bin
+show table stats alltypessmall_hbase_bin
 ---- LABELS
 REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE
 ---- RESULTS
@@ -69,7 +67,7 @@ regex:.+,'',regex:.+,regex:.+
 STRING, STRING, BIGINT, STRING
 ====
 ---- QUERY
-show column stats compute_stats_db_hbase.alltypessmall_hbase_bin
+show column stats alltypessmall_hbase_bin
 ---- LABELS
 COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
 ---- RESULTS
@@ -92,18 +90,17 @@ STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
 ---- QUERY
 # IMP-1227: Test computing stats on an HBase table that has a
 # complex-typed column that Impala does not yet support.
-create table compute_stats_db_hbase.allcomplextypes
-like allcomplextypes
+create table allcomplextypes_hbase like functional_hbase.allcomplextypes
 ====
 ---- QUERY
-compute stats compute_stats_db_hbase.allcomplextypes
+compute stats allcomplextypes_hbase
 ---- RESULTS
 'Updated 1 partition(s) and 3 column(s).'
 ---- TYPES
 STRING
 ====
 ---- QUERY: VERIFY_IS_EQUAL
-show table stats compute_stats_db_hbase.allcomplextypes
+show table stats allcomplextypes_hbase
 ---- LABELS
 REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE
 ---- RESULTS
@@ -112,7 +109,7 @@ regex:.+,'',regex:.+,regex:.+
 STRING, STRING, BIGINT, STRING
 ====
 ---- QUERY
-show column stats compute_stats_db_hbase.allcomplextypes
+show column stats allcomplextypes_hbase
 ---- LABELS
 COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
 ---- RESULTS
@@ -134,9 +131,3 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE
 ---- TYPES
 STRING, STRING, BIGINT, BIGINT, INT, DOUBLE
 ====
----- QUERY
-compute incremental stats alltypes;
----- RESULTS
-'Updated 1 partition(s) and 13 column(s).'
----- TYPES
-STRING

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ea45de84/tests/metadata/test_compute_stats.py
----------------------------------------------------------------------
diff --git a/tests/metadata/test_compute_stats.py b/tests/metadata/test_compute_stats.py
index 16c96ab..925fce2 100644
--- a/tests/metadata/test_compute_stats.py
+++ b/tests/metadata/test_compute_stats.py
@@ -48,7 +48,7 @@ class TestComputeStats(ImpalaTestSuite):
   def test_compute_stats_many_partitions(self, vector):
     # To cut down on test execution time, only run the compute stats test against many
     # partitions if performing an exhaustive test run.
-    if self.exploration_strategy() != 'exhaustive': return
+    if self.exploration_strategy() != 'exhaustive': pytest.skip()
     self.run_test_case('QueryTest/compute-stats-many-partitions', vector)
 
   @pytest.mark.execute_serially
@@ -111,16 +111,40 @@ class TestComputeStats(ImpalaTestSuite):
     assert(len(show_result.data) == 2)
     assert("1\tpval\t8" in show_result.data[0])
 
+# Tests compute stats on HBase tables. This test is separate from TestComputeStats,
+# because we want to use the existing machanism to disable running tests on hbase/none
+# based on the filesystem type (S3, Isilon, etc.).
+class TestHbaseComputeStats(ImpalaTestSuite):
+  @classmethod
+  def get_workload(self):
+    return 'functional-query'
+
+  @classmethod
+  def add_test_dimensions(cls):
+    super(TestHbaseComputeStats, cls).add_test_dimensions()
+    cls.TestMatrix.add_dimension(create_single_exec_option_dimension())
+    cls.TestMatrix.add_constraint(
+        lambda v: v.get_value('table_format').file_format == 'hbase')
+
+  def test_hbase_compute_stats(self, vector, unique_database):
+    self.run_test_case('QueryTest/hbase-compute-stats', vector, unique_database)
+
+  def test_hbase_compute_stats_incremental(self, vector, unique_database):
+    self.run_test_case('QueryTest/hbase-compute-stats-incremental', vector,
+      unique_database)
+
 
 @SkipIf.not_default_fs # Isilon: Missing coverage: compute stats
-class TestCorruptTableStats(TestComputeStats):
+class TestCorruptTableStats(ImpalaTestSuite):
+  @classmethod
+  def get_workload(self):
+    return 'functional-query'
+
   @classmethod
   def add_test_dimensions(cls):
-    super(TestComputeStats, cls).add_test_dimensions()
+    super(TestCorruptTableStats, cls).add_test_dimensions()
     cls.TestMatrix.add_dimension(create_exec_option_dimension(
       disable_codegen_options=[False], exec_single_node_option=[100]))
-    # Do not run these tests using all dimensions because the expected results
-    # are different for different file formats.
     cls.TestMatrix.add_dimension(create_uncompressed_text_dimension(cls.get_workload()))
 
   def test_corrupt_stats(self, vector, unique_database):
@@ -130,7 +154,11 @@ class TestCorruptTableStats(TestComputeStats):
     self.run_test_case('QueryTest/corrupt-stats', vector, unique_database)
 
 
-class TestIncompatibleColStats(TestComputeStats):
+class TestIncompatibleColStats(ImpalaTestSuite):
+  @classmethod
+  def get_workload(self):
+    return 'functional-query'
+
   @classmethod
   def add_test_dimensions(cls):
     super(TestIncompatibleColStats, cls).add_test_dimensions()

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ea45de84/tests/metadata/test_hbase_metadata.py
----------------------------------------------------------------------
diff --git a/tests/metadata/test_hbase_metadata.py b/tests/metadata/test_hbase_metadata.py
deleted file mode 100644
index 81b87cb..0000000
--- a/tests/metadata/test_hbase_metadata.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright (c) 2012 Cloudera, Inc. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from tests.common.test_vector import *
-from tests.common.impala_test_suite import *
-from tests.common.test_dimensions import create_uncompressed_text_dimension
-
-# Tests the COMPUTE STATS command for gathering table and column stats.
-# TODO: Merge this test file with test_col_stats.py
-@pytest.mark.execute_serially
-class TestHbaseMetadata(ImpalaTestSuite):
-  TEST_DB_NAME = "compute_stats_db_hbase"
-
-  @classmethod
-  def get_workload(self):
-    return 'functional-query'
-
-  @classmethod
-  def add_test_dimensions(cls):
-    super(TestHbaseMetadata, cls).add_test_dimensions()
-    cls.TestMatrix.add_dimension(create_single_exec_option_dimension())
-    cls.TestMatrix.add_constraint(\
-        lambda v: v.get_value('table_format').file_format == 'hbase')
-
-  def setup_method(self, method):
-    # cleanup and create a fresh test database
-    self.cleanup_db(self.TEST_DB_NAME)
-    self.execute_query("create database %s" % (self.TEST_DB_NAME))
-
-  def teardown_method(self, method):
-    self.cleanup_db(self.TEST_DB_NAME)
-
-  def test_hbase_compute_stats(self, vector):
-    self.run_test_case('QueryTest/hbase-compute-stats', vector)
-
-  def test_hbase_compute_stats_incremental(self, vector):
-    self.run_test_case('QueryTest/hbase-compute-stats-incremental', vector)