You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ab...@apache.org on 2016/09/08 04:39:33 UTC

[3/7] incubator-impala git commit: IMPALA-3491: Use unique database fixture in test_insert_parquet.py

IMPALA-3491: Use unique database fixture in test_insert_parquet.py

Testing: Ran the test locally in a loop.
Did a private debug/core/hdfs build.

Change-Id: I790b2ed5236640c7263826d1d2a74b64d43ac6f7
Reviewed-on: http://gerrit.cloudera.org:8080/4317
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/f0ffbca2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/f0ffbca2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/f0ffbca2

Branch: refs/heads/master
Commit: f0ffbca2c36ce3ad8ea5b3629d91e6b579d7dfcf
Parents: 157c800
Author: Alex Behm <al...@cloudera.com>
Authored: Fri Sep 2 10:44:20 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Thu Sep 8 03:25:29 2016 +0000

----------------------------------------------------------------------
 .../workloads/tpch/queries/insert_parquet.test  |  6 +-
 tests/query_test/test_insert_parquet.py         | 60 ++++++++------------
 2 files changed, 26 insertions(+), 40 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/f0ffbca2/testdata/workloads/tpch/queries/insert_parquet.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/tpch/queries/insert_parquet.test b/testdata/workloads/tpch/queries/insert_parquet.test
index 35b7cde..4707b7b 100644
--- a/testdata/workloads/tpch/queries/insert_parquet.test
+++ b/testdata/workloads/tpch/queries/insert_parquet.test
@@ -1,8 +1,8 @@
 ====
 ---- QUERY
 # Tests using a larger table.
-create table if not exists orders_insert_test like orders location
-'$FILESYSTEM_PREFIX/test-warehouse/orders_insert_table';
+create table if not exists orders_insert_test like tpch_parquet.orders
+location '$FILESYSTEM_PREFIX/test-warehouse/$DATABASE.db/orders_insert_table';
 insert overwrite table orders_insert_test select * from tpch.orders
 ---- RESULTS
 : 1500000
@@ -56,7 +56,7 @@ bigint
 ---- QUERY
 # Test to verify that huge (larger than 64k) values can be written, see IMPALA-1705
 create table if not exists test_insert_huge_vals (s string) stored as parquet
-location '$FILESYSTEM_PREFIX/test-warehouse/test_insert_huge_vals';
+location '$FILESYSTEM_PREFIX/test-warehouse/$DATABASE.db/test_insert_huge_vals';
 insert overwrite table test_insert_huge_vals
   select cast(l_orderkey as string) from tpch.lineitem
   union select group_concat(concat(s_name, s_address, s_phone)) from tpch.supplier

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/f0ffbca2/tests/query_test/test_insert_parquet.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_insert_parquet.py b/tests/query_test/test_insert_parquet.py
index 70d3545..d5b8c17 100644
--- a/tests/query_test/test_insert_parquet.py
+++ b/tests/query_test/test_insert_parquet.py
@@ -26,6 +26,7 @@ from tempfile import mkdtemp as make_tmp_dir
 
 from tests.common.environ import impalad_basedir
 from tests.common.impala_test_suite import ImpalaTestSuite
+from tests.common.parametrize import UniqueDatabase
 from tests.common.skip import SkipIfIsilon, SkipIfLocal
 from tests.common.test_dimensions import create_exec_option_dimension
 from tests.common.test_vector import TestDimension
@@ -63,18 +64,14 @@ class TestInsertParquetQueries(ImpalaTestSuite):
     cls.TestMatrix.add_constraint(lambda v:\
         v.get_value('table_format').compression_codec == 'none')
 
-  @classmethod
-  def setup_class(cls):
-    super(TestInsertParquetQueries, cls).setup_class()
-
-  @pytest.mark.execute_serially
   @SkipIfLocal.multiple_impalad
-  def test_insert_parquet(self, vector):
+  @UniqueDatabase.parametrize(sync_ddl=True)
+  def test_insert_parquet(self, vector, unique_database):
     vector.get_value('exec_option')['PARQUET_FILE_SIZE'] = \
         vector.get_value('file_size')
     vector.get_value('exec_option')['COMPRESSION_CODEC'] = \
         vector.get_value('compression_codec')
-    self.run_test_case('insert_parquet', vector, multiple_impalad=True)
+    self.run_test_case('insert_parquet', vector, unique_database, multiple_impalad=True)
 
 class TestInsertParquetInvalidCodec(ImpalaTestSuite):
   @classmethod
@@ -94,10 +91,6 @@ class TestInsertParquetInvalidCodec(ImpalaTestSuite):
     cls.TestMatrix.add_constraint(lambda v:\
         v.get_value('table_format').compression_codec == 'none')
 
-  @classmethod
-  def setup_class(cls):
-    super(TestInsertParquetInvalidCodec, cls).setup_class()
-
   @SkipIfLocal.multiple_impalad
   def test_insert_parquet_invalid_codec(self, vector):
     vector.get_value('exec_option')['COMPRESSION_CODEC'] = \
@@ -124,40 +117,34 @@ class TestInsertParquetVerifySize(ImpalaTestSuite):
         v.get_value('table_format').compression_codec == 'none')
     cls.TestMatrix.add_dimension(TestDimension("compression_codec", *PARQUET_CODECS));
 
-  @classmethod
-  def setup_class(cls):
-    super(TestInsertParquetVerifySize, cls).setup_class()
-
-  @pytest.mark.execute_serially
   @SkipIfIsilon.hdfs_block_size
   @SkipIfLocal.hdfs_client
-  def test_insert_parquet_verify_size(self, vector):
-    # Test to verify that the result file size is close to what we expect.i
-    TBL = "parquet_insert_size"
-    DROP = "drop table if exists {0}".format(TBL)
-    CREATE = ("create table parquet_insert_size like tpch_parquet.orders"
-              " stored as parquet location '{0}/{1}'".format(WAREHOUSE, TBL))
-    QUERY = "insert overwrite {0} select * from tpch.orders".format(TBL)
-    DIR = get_fs_path("test-warehouse/{0}/".format(TBL))
-    BLOCK_SIZE = 40 * 1024 * 1024
-
-    self.execute_query(DROP)
-    self.execute_query(CREATE)
-
-    vector.get_value('exec_option')['PARQUET_FILE_SIZE'] = BLOCK_SIZE
+  def test_insert_parquet_verify_size(self, vector, unique_database):
+    # Test to verify that the result file size is close to what we expect.
+    tbl_name = "parquet_insert_size"
+    fq_tbl_name = unique_database + "." + tbl_name
+    location = get_fs_path("test-warehouse/{0}.db/{1}/"
+                           .format(unique_database, tbl_name))
+    create = ("create table {0} like tpch_parquet.orders stored as parquet"
+              .format(fq_tbl_name, location))
+    query = "insert overwrite {0} select * from tpch.orders".format(fq_tbl_name)
+    block_size = 40 * 1024 * 1024
+
+    self.execute_query(create)
+    vector.get_value('exec_option')['PARQUET_FILE_SIZE'] = block_size
     vector.get_value('exec_option')['COMPRESSION_CODEC'] =\
         vector.get_value('compression_codec')
     vector.get_value('exec_option')['num_nodes'] = 1
-    self.execute_query(QUERY, vector.get_value('exec_option'))
+    self.execute_query(query, vector.get_value('exec_option'))
 
     # Get the files in hdfs and verify. There can be at most 1 file that is smaller
-    # that the BLOCK_SIZE. The rest should be within 80% of it and not over.
+    # that the block_size. The rest should be within 80% of it and not over.
     found_small_file = False
-    sizes = self.filesystem_client.get_all_file_sizes(DIR)
+    sizes = self.filesystem_client.get_all_file_sizes(location)
     for size in sizes:
-      assert size < BLOCK_SIZE, "File size greater than expected.\
-          Expected: {0}, Got: {1}".format(BLOCK_SIZE, size)
-      if size < BLOCK_SIZE * 0.80:
+      assert size < block_size, "File size greater than expected.\
+          Expected: {0}, Got: {1}".format(block_size, size)
+      if size < block_size * 0.80:
         assert found_small_file == False
         found_small_file = True
 
@@ -179,7 +166,6 @@ class TestHdfsParquetTableWriter(ImpalaTestSuite):
     """
     table_name = "test_hdfs_parquet_table_writer"
     qualified_table_name = "%s.%s" % (unique_database, table_name)
-    self.execute_query("drop table if exists %s" % qualified_table_name)
     self.execute_query("create table %s stored as parquet as select l_linenumber from "
         "tpch_parquet.lineitem limit 180000" % qualified_table_name)