You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2022/08/23 22:24:22 UTC
[impala] branch branch-4.1.1 updated (22745b562 -> f6ee249ac)
This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a change to branch branch-4.1.1
in repository https://gitbox.apache.org/repos/asf/impala.git
from 22745b562 IMPALA-11489: Fix int overflow in >2GB ORC files
new c90d088c6 IMPALA-11447: Fix crash when fetching arrays/structs with result caching
new 635753492 IMPALA-11295: Deflake TestParquet.test_multiple_blocks_mt_dop
new a1a13c60d IMPALA-11391: Fixed race condition in test_drop_managed_kudu_table
new f6ee249ac IMPALA-11317/IMPALA-11316/IMPALA-11315: impala-shell Python 3 fixes
The 4 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
be/src/service/query-result-set.cc | 8 ++++-
shell/ImpalaHttpClient.py | 2 +-
shell/impala_shell.py | 5 ++-
tests/custom_cluster/test_kudu.py | 9 +++--
tests/hs2/test_fetch_first.py | 66 ++++++++++++++++++++++-------------
tests/query_test/test_scanners.py | 18 +++++++---
tests/shell/test_shell_commandline.py | 11 ++++--
7 files changed, 83 insertions(+), 36 deletions(-)
[impala] 03/04: IMPALA-11391: Fixed race condition in test_drop_managed_kudu_table
Posted by st...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch branch-4.1.1
in repository https://gitbox.apache.org/repos/asf/impala.git
commit a1a13c60d3365f5e87d798e1c18b4875ebde2405
Author: Gergely Fürnstáhl <gf...@cloudera.com>
AuthorDate: Fri Jun 24 15:25:34 2022 +0200
IMPALA-11391: Fixed race condition in test_drop_managed_kudu_table
test_drop_managed_kudu_table uses exception to verify the deleted table
is really missing. Depending on timing, this exception could have been
raised in several control pathes with different content. Now the test
waits for event processing, meaning Analyzer will consistently catch
the missing table and raise the same exception.
Change-Id: I857098c87fcd44d945dd33108bcfdfaa2ca939df
Reviewed-on: http://gerrit.cloudera.org:8080/18667
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-on: http://gerrit.cloudera.org:8080/18893
Tested-by: Quanlong Huang <hu...@gmail.com>
Reviewed-by: Quanlong Huang <hu...@gmail.com>
---
tests/custom_cluster/test_kudu.py | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/tests/custom_cluster/test_kudu.py b/tests/custom_cluster/test_kudu.py
index 8fbc6c938..c706c984f 100644
--- a/tests/custom_cluster/test_kudu.py
+++ b/tests/custom_cluster/test_kudu.py
@@ -24,8 +24,9 @@ from time import sleep
from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
from tests.common.kudu_test_suite import KuduTestSuite
-from tests.common.skip import SkipIfKudu, SkipIfHive3, SkipIfBuildType
+from tests.common.skip import SkipIfKudu, SkipIfBuildType
from tests.common.test_dimensions import add_exec_option_dimension
+from tests.util.event_processor_utils import EventProcessorUtils
KUDU_MASTER_HOSTS = pytest.config.option.kudu_master_hosts
LOG = logging.getLogger(__name__)
@@ -291,12 +292,16 @@ class TestKuduHMSIntegration(CustomKuduTest):
assert kudu_client.table_exists(kudu_tbl_name)
kudu_client.delete_table(kudu_tbl_name)
assert not kudu_client.table_exists(kudu_tbl_name)
+
+ # Wait for events to prevent race condition
+ EventProcessorUtils.wait_for_event_processing(self)
+
try:
cursor.execute("DROP TABLE %s" % kudu_tbl_name)
assert False
except Exception as e:
LOG.info(str(e))
- assert "Table %s no longer exists in the Hive MetaStore." % kudu_tbl_name in str(e)
+ "Table does not exist: %s" % kudu_tbl_name in str(e)
@pytest.mark.execute_serially
def test_drop_external_kudu_table(self, cursor, kudu_client, unique_database):
[impala] 01/04: IMPALA-11447: Fix crash when fetching arrays/structs with result caching
Posted by st...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch branch-4.1.1
in repository https://gitbox.apache.org/repos/asf/impala.git
commit c90d088c6f9cd37b3db463b038ad32cc130c02e9
Author: Csaba Ringhofer <cs...@cloudera.com>
AuthorDate: Thu Jul 21 14:53:42 2022 +0200
IMPALA-11447: Fix crash when fetching arrays/structs with result caching
Some parts of HS2ColumnarResultSet were not prepared for returning
non-scalar types. This code only runs if impala.resultset.cache.size
is set, which is not the case in most of tests. The issue was caught
with Hue, which uses result caching.
Testing:
- Added a regression test in test_fetch_first.py, which contained
other tests that used result caching.
- It turned out that some tests in the file did not run at all,
as @needs_session() needs the parenthesis at the end. For this
reason some test fixes were added to run them correctly, though
these changes are totally unrelated to the current issue.
Backport issue:
- Test fails due to STRUCT in SelectList not supported on Parquet.
Changed to use the corresponding ORC table.
Change-Id: Ia4dd8f76187dc3555207e2d30d46d811e0a7a126
Reviewed-on: http://gerrit.cloudera.org:8080/18768
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Wenzhe Zhou <wz...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-on: http://gerrit.cloudera.org:8080/18889
Tested-by: Quanlong Huang <hu...@gmail.com>
---
be/src/service/query-result-set.cc | 8 ++++-
tests/hs2/test_fetch_first.py | 66 ++++++++++++++++++++++++--------------
2 files changed, 49 insertions(+), 25 deletions(-)
diff --git a/be/src/service/query-result-set.cc b/be/src/service/query-result-set.cc
index 7ae6dc613..9d0300954 100644
--- a/be/src/service/query-result-set.cc
+++ b/be/src/service/query-result-set.cc
@@ -351,7 +351,13 @@ int HS2ColumnarResultSet::AddRows(
for (int j = 0; j < metadata_.columns.size(); ++j) {
ThriftTColumn* from = &o->result_set_->columns[j];
ThriftTColumn* to = &result_set_->columns[j];
- switch (metadata_.columns[j].columnType.types[0].scalar_type.type) {
+ const TColumnType& colType = metadata_.columns[j].columnType;
+ TPrimitiveType::type primitiveType = colType.types[0].scalar_type.type;
+ if (colType.types[0].type != TTypeNodeType::SCALAR) {
+ DCHECK(from->__isset.stringVal);
+ primitiveType = TPrimitiveType::STRING;
+ }
+ switch (primitiveType) {
case TPrimitiveType::NULL_TYPE:
case TPrimitiveType::BOOLEAN:
StitchNulls(
diff --git a/tests/hs2/test_fetch_first.py b/tests/hs2/test_fetch_first.py
index b925c66cb..3bd1b3835 100644
--- a/tests/hs2/test_fetch_first.py
+++ b/tests/hs2/test_fetch_first.py
@@ -128,8 +128,18 @@ class TestFetchFirst(HS2TestSuite):
def test_query_stmts_v1_with_result_spooling(self):
self.run_query_stmts_test({'spool_query_results': 'true'})
+ def run_query_expect_success(self, query, options):
+ """Executes a query and returns its handle."""
+ execute_statement_req = TCLIService.TExecuteStatementReq()
+ execute_statement_req.sessionHandle = self.session_handle
+ execute_statement_req.confOverlay = options
+ execute_statement_req.statement = query
+ execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
+ HS2TestSuite.check_response(execute_statement_resp)
+ return execute_statement_resp.operationHandle
+
@pytest.mark.execute_serially
- @needs_session
+ @needs_session()
def test_rows_materialized_counters(self):
"""Test that NumRowsFetched is updated even when a fetch request is served by the
results cache, and that RowsMaterialized is only updated when rows are first created
@@ -140,29 +150,25 @@ class TestFetchFirst(HS2TestSuite):
num_rows_fetched_from_cache = "NumRowsFetchedFromCache: {0} ({0})"
# Execute the query with the results cache enabled.
- execute_statement_req = TCLIService.TExecuteStatementReq()
- execute_statement_req.confOverlay[self.IMPALA_RESULT_CACHING_OPT] = str(num_rows)
- execute_statement_req.statement = statement
- execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
- HS2TestSuite.check_response(execute_statement_resp)
+ options = {self.IMPALA_RESULT_CACHING_OPT: str(num_rows)}
+ handle = self.run_query_expect_success(statement, options)
# Fetch all rows from the query and verify they have been cached.
- self.fetch_until(execute_statement_resp.operationHandle,
- TCLIService.TFetchOrientation.FETCH_NEXT, num_rows)
+ self.fetch_until(handle, TCLIService.TFetchOrientation.FETCH_NEXT, num_rows)
self.__verify_num_cached_rows(num_rows)
# Get the runtime profile and validate that NumRowsFetched and RowsMaterialized both
# equal the number of rows fetched by the query.
- profile = self.__get_runtime_profile(execute_statement_resp.operationHandle)
+ profile = self.__get_runtime_profile(handle)
assert num_rows_fetched.format(num_rows) in profile
# Fetch all rows again and confirm that RowsMaterialized is unchanged, but
# NumRowsFetched is double the number of rows returned by the query.
- self.fetch_until(execute_statement_resp.operationHandle,
- TCLIService.TFetchOrientation.FETCH_FIRST, num_rows)
- profile = self.__get_runtime_profile(execute_statement_resp.operationHandle)
+ self.fetch_until(handle, TCLIService.TFetchOrientation.FETCH_FIRST, num_rows)
+ profile = self.__get_runtime_profile(handle)
assert num_rows_fetched.format(num_rows) in profile
assert num_rows_fetched_from_cache.format(num_rows) in profile
+ self.close(handle)
def __get_runtime_profile(self, op_handle):
"""Helper method to get the runtime profile from a given operation handle."""
@@ -319,7 +325,7 @@ class TestFetchFirst(HS2TestSuite):
self.close(execute_statement_resp.operationHandle)
@pytest.mark.execute_serially
- @needs_session
+ @needs_session()
def test_constant_query_stmts(self):
"""Tests query stmts that return a constant result set. These queries are handled
somewhat specially by Impala, therefore, we test them separately. We expect
@@ -363,7 +369,7 @@ class TestFetchFirst(HS2TestSuite):
self.close(execute_statement_resp.operationHandle)
@pytest.mark.execute_serially
- @needs_session
+ @needs_session()
def test_non_query_stmts(self):
"""Tests Impala's limited support for the FETCH_FIRST fetch orientation for
non-query stmts that return a result set, such as SHOW, COMPUTE STATS, etc.
@@ -434,9 +440,10 @@ class TestFetchFirst(HS2TestSuite):
# FETCH_NEXT asking for 100 rows. There are only 20 remaining rows.
self.fetch_until(execute_statement_resp.operationHandle,
TCLIService.TFetchOrientation.FETCH_NEXT, 100, 20)
+ self.close(execute_statement_resp.operationHandle)
@pytest.mark.execute_serially
- @needs_session
+ @needs_session()
def test_parallel_insert(self):
"""Tests parallel inserts with result set caching on.
Parallel inserts have a coordinator instance but no coordinator
@@ -447,12 +454,23 @@ class TestFetchFirst(HS2TestSuite):
self.client.set_configuration({'sync_ddl': 1})
self.client.execute("create database %s" % self.TEST_DB)
self.client.execute("create table %s.orderclone like tpch.orders" % self.TEST_DB)
- execute_statement_req = TCLIService.TExecuteStatementReq()
- execute_statement_req.sessionHandle = self.session_handle
- execute_statement_req.confOverlay = dict()
- execute_statement_req.confOverlay[self.IMPALA_RESULT_CACHING_OPT] = "10"
- execute_statement_req.statement = ("insert overwrite %s.orderclone "
- "select * from tpch.orders "
- "where o_orderkey < 0" % self.TEST_DB)
- execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
- HS2TestSuite.check_response(execute_statement_resp)
+ options = {self.IMPALA_RESULT_CACHING_OPT: "10"}
+ handle = self.run_query_expect_success("insert overwrite %s.orderclone "
+ "select * from tpch.orders "
+ "where o_orderkey < 0" % self.TEST_DB, options)
+ self.close(handle)
+
+ @pytest.mark.execute_serially
+ @needs_session()
+ def test_complex_types_result_caching(self):
+ """Regression test for IMPALA-11447. Returning complex types in select list
+ was crashing in hs2 if result caching was enabled.
+ """
+ options = {self.IMPALA_RESULT_CACHING_OPT: "1024", "disable_codegen": "true"}
+ handle = self.run_query_expect_success(
+ "select int_array from functional_orc_def.complextypestbl", options)
+ self.fetch_until(handle, TCLIService.TFetchOrientation.FETCH_NEXT, 10, 8)
+ handle = self.run_query_expect_success(
+ "select alltypes from functional_orc_def.complextypes_structs", options)
+ self.fetch_until(handle, TCLIService.TFetchOrientation.FETCH_NEXT, 10, 6)
+ self.close(handle)
[impala] 04/04: IMPALA-11317/IMPALA-11316/IMPALA-11315: impala-shell Python 3 fixes
Posted by st...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch branch-4.1.1
in repository https://gitbox.apache.org/repos/asf/impala.git
commit f6ee249ac96fb09293cd3f9f1c0adeafb923cd5d
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Sat May 21 19:04:45 2022 -0700
IMPALA-11317/IMPALA-11316/IMPALA-11315: impala-shell Python 3 fixes
This fixes a few impala-shell Python 3 issues:
1. In ImpalaShell's do_history(), the decode() call needs to be
avoided in Python 3, because in Python 3 the cmd is already
a string and doesn't need further decoding. (IMPALA-11315)
2. TestImpalaShell.test_http_socket_timeout() gets a different
error message in Python 3. It throws the "BlockingIOError"
rather than "socker.error". (IMPALA-11316)
3. ImpalaHttpClient.py's code to retrieve the body when
handling an HTTP error needs to have a decode() call
for the body. Otherwise, the body remains bytes and
causes TestImpalaShellInteractive.test_http_interactions_extra()
to fail. (IMPALA-11317)
Testing:
- Ran shell tests in the standard way
- Ran shell tests with the impala-shell executable coming from
a Python 3 virtualenv using the PyPi package
Change-Id: Ie58380a17d7e011f4ce96b27d34717509a0b80a6
Reviewed-on: http://gerrit.cloudera.org:8080/18556
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-by: Wenzhe Zhou <wz...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-on: http://gerrit.cloudera.org:8080/18885
Reviewed-by: Quanlong Huang <hu...@gmail.com>
---
shell/ImpalaHttpClient.py | 2 +-
shell/impala_shell.py | 5 ++++-
tests/shell/test_shell_commandline.py | 11 ++++++++---
3 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/shell/ImpalaHttpClient.py b/shell/ImpalaHttpClient.py
index 947cecfc1..bbf1e4d6f 100644
--- a/shell/ImpalaHttpClient.py
+++ b/shell/ImpalaHttpClient.py
@@ -271,5 +271,5 @@ class ImpalaHttpClient(TTransportBase):
if self.code >= 300:
# Report any http response code that is not 1XX (informational response) or
# 2XX (successful).
- body = self.readBody()
+ body = self.readBody().decode('utf-8')
raise HttpError(self.code, self.message, body, self.headers)
diff --git a/shell/impala_shell.py b/shell/impala_shell.py
index acaa02b88..8a061c736 100755
--- a/shell/impala_shell.py
+++ b/shell/impala_shell.py
@@ -1508,7 +1508,10 @@ class ImpalaShell(cmd.Cmd, object):
if self.readline and self.readline.get_current_history_length() > 0:
for index in xrange(1, self.readline.get_current_history_length() + 1):
cmd = self.readline.get_history_item(index)
- print('[%d]: %s' % (index, cmd.decode('utf-8', 'replace')), file=sys.stderr)
+ if sys.version_info.major == 2:
+ print('[%d]: %s' % (index, cmd.decode('utf-8', 'replace')), file=sys.stderr)
+ else:
+ print('[%d]: %s' % (index, cmd), file=sys.stderr)
else:
print(READLINE_UNAVAILABLE_ERROR, file=sys.stderr)
diff --git a/tests/shell/test_shell_commandline.py b/tests/shell/test_shell_commandline.py
index ef6536e26..cd53404c2 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -1220,9 +1220,14 @@ class TestImpalaShell(ImpalaTestSuite):
args = ['--quiet', '-B', '--query', 'select 0;']
result = run_impala_shell_cmd(vector, args + ['--http_socket_timeout_s=0'],
expect_success=False)
- expected_err = ("Caught exception [Errno 115] Operation now in progress, "
- "type=<class 'socket.error'> in OpenSession. Num remaining tries: 3")
- assert result.stderr.splitlines()[0] == expected_err
+ expected_err_py2 = (
+ "Caught exception [Errno 115] Operation now in progress, "
+ "type=<class 'socket.error'> in OpenSession. Num remaining tries: 3")
+ expected_err_py3 = (
+ "Caught exception [Errno 115] Operation now in progress, "
+ "type=<class 'BlockingIOError'> in OpenSession. Num remaining tries: 3")
+ actual_err = result.stderr.splitlines()[0]
+ assert actual_err == expected_err_py2 or actual_err == expected_err_py3
# Test http_socket_timeout_s=-1, expect errors
result = run_impala_shell_cmd(vector, args + ['--http_socket_timeout_s=-1'],
[impala] 02/04: IMPALA-11295: Deflake TestParquet.test_multiple_blocks_mt_dop
Posted by st...@apache.org.
This is an automated email from the ASF dual-hosted git repository.
stigahuang pushed a commit to branch branch-4.1.1
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 6357534926a24c1ec8e95c37e7b23f5ac29571e6
Author: stiga-huang <hu...@gmail.com>
AuthorDate: Tue May 17 20:20:55 2022 +0800
IMPALA-11295: Deflake TestParquet.test_multiple_blocks_mt_dop
TestParquet.test_multiple_blocks_mt_dop runs a query on 6 scan ranges
using mt_dop=2. It then verifies the sum of ranges read on a backend is
2 (6/3). The test assumes that counters of the 2 instances on the same
host are printed consecutively. However, this is not always true. They
could be interleaving.
This patch makes the test more robust by grouping the counters based on
the host.
Test
- I can't reproduce the issue locally. But I'm able to run the new test
100 times without any error.
Change-Id: I16c576c41a212f83dda82a83931ab336a78a41e4
Reviewed-on: http://gerrit.cloudera.org:8080/18533
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Reviewed-on: http://gerrit.cloudera.org:8080/18892
Reviewed-by: Csaba Ringhofer <cs...@cloudera.com>
Tested-by: Quanlong Huang <hu...@gmail.com>
---
tests/query_test/test_scanners.py | 18 ++++++++++++++----
1 file changed, 14 insertions(+), 4 deletions(-)
diff --git a/tests/query_test/test_scanners.py b/tests/query_test/test_scanners.py
index 9a8553349..53a15f0ae 100644
--- a/tests/query_test/test_scanners.py
+++ b/tests/query_test/test_scanners.py
@@ -688,6 +688,11 @@ class TestParquet(ImpalaTestSuite):
assert len(num_rows_read_list) == 7
assert len(ranges_complete_list) == 7
+ # Extract the host for each fragment instance. The first is the coordinator
+ # fragment instance.
+ host_list = re.findall(r'host=(\S+:[0-9]*)', result.runtime_profile)
+ assert len(host_list) == 7
+
total_rows_read = 0
# Skip the Averaged Fragment; it comes first in the runtime profile.
for num_row_read in num_rows_read_list[1:]:
@@ -695,10 +700,15 @@ class TestParquet(ImpalaTestSuite):
assert total_rows_read == TOTAL_ROWS
# Again skip the Averaged Fragment; it comes first in the runtime profile.
- # With mt_dop 2, every backend will have 2 instances which are printed consecutively
- # in the profile.
- for i in range(1, len(ranges_complete_list), 2):
- assert int(ranges_complete_list[i]) + int(ranges_complete_list[i + 1]) == 2
+ # With mt_dop 2, every backend will have 2 instances.
+ ranges_per_host = {}
+ for i in range(1, 7):
+ host = host_list[i]
+ if host not in ranges_per_host:
+ ranges_per_host[host] = 0
+ ranges_per_host[host] += int(ranges_complete_list[i])
+ for host in ranges_per_host:
+ assert ranges_per_host[host] == 2
finally:
self.client.clear_configuration()