You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ab...@apache.org on 2016/09/08 04:39:33 UTC
[3/7] incubator-impala git commit: IMPALA-3491: Use unique database
fixture in test_insert_parquet.py
IMPALA-3491: Use unique database fixture in test_insert_parquet.py
Testing: Ran the test locally in a loop.
Did a private debug/core/hdfs build.
Change-Id: I790b2ed5236640c7263826d1d2a74b64d43ac6f7
Reviewed-on: http://gerrit.cloudera.org:8080/4317
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Internal Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/f0ffbca2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/f0ffbca2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/f0ffbca2
Branch: refs/heads/master
Commit: f0ffbca2c36ce3ad8ea5b3629d91e6b579d7dfcf
Parents: 157c800
Author: Alex Behm <al...@cloudera.com>
Authored: Fri Sep 2 10:44:20 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Thu Sep 8 03:25:29 2016 +0000
----------------------------------------------------------------------
.../workloads/tpch/queries/insert_parquet.test | 6 +-
tests/query_test/test_insert_parquet.py | 60 ++++++++------------
2 files changed, 26 insertions(+), 40 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/f0ffbca2/testdata/workloads/tpch/queries/insert_parquet.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/tpch/queries/insert_parquet.test b/testdata/workloads/tpch/queries/insert_parquet.test
index 35b7cde..4707b7b 100644
--- a/testdata/workloads/tpch/queries/insert_parquet.test
+++ b/testdata/workloads/tpch/queries/insert_parquet.test
@@ -1,8 +1,8 @@
====
---- QUERY
# Tests using a larger table.
-create table if not exists orders_insert_test like orders location
-'$FILESYSTEM_PREFIX/test-warehouse/orders_insert_table';
+create table if not exists orders_insert_test like tpch_parquet.orders
+location '$FILESYSTEM_PREFIX/test-warehouse/$DATABASE.db/orders_insert_table';
insert overwrite table orders_insert_test select * from tpch.orders
---- RESULTS
: 1500000
@@ -56,7 +56,7 @@ bigint
---- QUERY
# Test to verify that huge (larger than 64k) values can be written, see IMPALA-1705
create table if not exists test_insert_huge_vals (s string) stored as parquet
-location '$FILESYSTEM_PREFIX/test-warehouse/test_insert_huge_vals';
+location '$FILESYSTEM_PREFIX/test-warehouse/$DATABASE.db/test_insert_huge_vals';
insert overwrite table test_insert_huge_vals
select cast(l_orderkey as string) from tpch.lineitem
union select group_concat(concat(s_name, s_address, s_phone)) from tpch.supplier
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/f0ffbca2/tests/query_test/test_insert_parquet.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_insert_parquet.py b/tests/query_test/test_insert_parquet.py
index 70d3545..d5b8c17 100644
--- a/tests/query_test/test_insert_parquet.py
+++ b/tests/query_test/test_insert_parquet.py
@@ -26,6 +26,7 @@ from tempfile import mkdtemp as make_tmp_dir
from tests.common.environ import impalad_basedir
from tests.common.impala_test_suite import ImpalaTestSuite
+from tests.common.parametrize import UniqueDatabase
from tests.common.skip import SkipIfIsilon, SkipIfLocal
from tests.common.test_dimensions import create_exec_option_dimension
from tests.common.test_vector import TestDimension
@@ -63,18 +64,14 @@ class TestInsertParquetQueries(ImpalaTestSuite):
cls.TestMatrix.add_constraint(lambda v:\
v.get_value('table_format').compression_codec == 'none')
- @classmethod
- def setup_class(cls):
- super(TestInsertParquetQueries, cls).setup_class()
-
- @pytest.mark.execute_serially
@SkipIfLocal.multiple_impalad
- def test_insert_parquet(self, vector):
+ @UniqueDatabase.parametrize(sync_ddl=True)
+ def test_insert_parquet(self, vector, unique_database):
vector.get_value('exec_option')['PARQUET_FILE_SIZE'] = \
vector.get_value('file_size')
vector.get_value('exec_option')['COMPRESSION_CODEC'] = \
vector.get_value('compression_codec')
- self.run_test_case('insert_parquet', vector, multiple_impalad=True)
+ self.run_test_case('insert_parquet', vector, unique_database, multiple_impalad=True)
class TestInsertParquetInvalidCodec(ImpalaTestSuite):
@classmethod
@@ -94,10 +91,6 @@ class TestInsertParquetInvalidCodec(ImpalaTestSuite):
cls.TestMatrix.add_constraint(lambda v:\
v.get_value('table_format').compression_codec == 'none')
- @classmethod
- def setup_class(cls):
- super(TestInsertParquetInvalidCodec, cls).setup_class()
-
@SkipIfLocal.multiple_impalad
def test_insert_parquet_invalid_codec(self, vector):
vector.get_value('exec_option')['COMPRESSION_CODEC'] = \
@@ -124,40 +117,34 @@ class TestInsertParquetVerifySize(ImpalaTestSuite):
v.get_value('table_format').compression_codec == 'none')
cls.TestMatrix.add_dimension(TestDimension("compression_codec", *PARQUET_CODECS));
- @classmethod
- def setup_class(cls):
- super(TestInsertParquetVerifySize, cls).setup_class()
-
- @pytest.mark.execute_serially
@SkipIfIsilon.hdfs_block_size
@SkipIfLocal.hdfs_client
- def test_insert_parquet_verify_size(self, vector):
- # Test to verify that the result file size is close to what we expect.i
- TBL = "parquet_insert_size"
- DROP = "drop table if exists {0}".format(TBL)
- CREATE = ("create table parquet_insert_size like tpch_parquet.orders"
- " stored as parquet location '{0}/{1}'".format(WAREHOUSE, TBL))
- QUERY = "insert overwrite {0} select * from tpch.orders".format(TBL)
- DIR = get_fs_path("test-warehouse/{0}/".format(TBL))
- BLOCK_SIZE = 40 * 1024 * 1024
-
- self.execute_query(DROP)
- self.execute_query(CREATE)
-
- vector.get_value('exec_option')['PARQUET_FILE_SIZE'] = BLOCK_SIZE
+ def test_insert_parquet_verify_size(self, vector, unique_database):
+ # Test to verify that the result file size is close to what we expect.
+ tbl_name = "parquet_insert_size"
+ fq_tbl_name = unique_database + "." + tbl_name
+ location = get_fs_path("test-warehouse/{0}.db/{1}/"
+ .format(unique_database, tbl_name))
+ create = ("create table {0} like tpch_parquet.orders stored as parquet"
+ .format(fq_tbl_name, location))
+ query = "insert overwrite {0} select * from tpch.orders".format(fq_tbl_name)
+ block_size = 40 * 1024 * 1024
+
+ self.execute_query(create)
+ vector.get_value('exec_option')['PARQUET_FILE_SIZE'] = block_size
vector.get_value('exec_option')['COMPRESSION_CODEC'] =\
vector.get_value('compression_codec')
vector.get_value('exec_option')['num_nodes'] = 1
- self.execute_query(QUERY, vector.get_value('exec_option'))
+ self.execute_query(query, vector.get_value('exec_option'))
# Get the files in hdfs and verify. There can be at most 1 file that is smaller
- # that the BLOCK_SIZE. The rest should be within 80% of it and not over.
+ # that the block_size. The rest should be within 80% of it and not over.
found_small_file = False
- sizes = self.filesystem_client.get_all_file_sizes(DIR)
+ sizes = self.filesystem_client.get_all_file_sizes(location)
for size in sizes:
- assert size < BLOCK_SIZE, "File size greater than expected.\
- Expected: {0}, Got: {1}".format(BLOCK_SIZE, size)
- if size < BLOCK_SIZE * 0.80:
+ assert size < block_size, "File size greater than expected.\
+ Expected: {0}, Got: {1}".format(block_size, size)
+ if size < block_size * 0.80:
assert found_small_file == False
found_small_file = True
@@ -179,7 +166,6 @@ class TestHdfsParquetTableWriter(ImpalaTestSuite):
"""
table_name = "test_hdfs_parquet_table_writer"
qualified_table_name = "%s.%s" % (unique_database, table_name)
- self.execute_query("drop table if exists %s" % qualified_table_name)
self.execute_query("create table %s stored as parquet as select l_linenumber from "
"tpch_parquet.lineitem limit 180000" % qualified_table_name)