You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2017/12/01 16:01:50 UTC
[1/4] impala git commit: IMPALA-6244: Fix test failures with Hadoop
3.0
Repository: impala
Updated Branches:
refs/heads/master 3ed122185 -> ea8d2ba7f
IMPALA-6244: Fix test failures with Hadoop 3.0
The metadata query test fails when run against Hadoop 3.0 due to
some defaults changing for sequence files.
Testing: Compared the output of
hadoop fs -text
/test-warehouse/alltypesmixedformat/year=2009/month=2/000023_0
and verified it is the same after a data load on Hadoop 2.6 and
Hadoop 3.0; ran the metadata query test and verified it now
passes in both cases.
Change-Id: I1ccffdb0f712da1feb55f839e8d87a30f15e4fb6
Reviewed-on: http://gerrit.cloudera.org:8080/8656
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/24c2ba0c
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/24c2ba0c
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/24c2ba0c
Branch: refs/heads/master
Commit: 24c2ba0cc5560fb54c1695e7c14e1a62c55654b5
Parents: 3ed1221
Author: Zach Amsden <za...@cloudera.com>
Authored: Mon Nov 27 23:43:54 2017 +0000
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Nov 30 07:33:16 2017 +0000
----------------------------------------------------------------------
.../functional-query/queries/QueryTest/show-stats.test | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/24c2ba0c/testdata/workloads/functional-query/queries/QueryTest/show-stats.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/show-stats.test b/testdata/workloads/functional-query/queries/QueryTest/show-stats.test
index f6a3d99..11f8264 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/show-stats.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/show-stats.test
@@ -82,11 +82,11 @@ BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
show table stats alltypesmixedformat
---- LABELS
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
----- RESULTS
-'2009','1',-1,1,'19.59KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypesmixedformat/year=2009/month=1'
-'2009','2',-1,1,'21.35KB','NOT CACHED','NOT CACHED','SEQUENCE_FILE','false','$NAMENODE/test-warehouse/alltypesmixedformat/year=2009/month=2'
-'2009','3',-1,1,'17.42KB','NOT CACHED','NOT CACHED','RC_FILE','false','$NAMENODE/test-warehouse/alltypesmixedformat/year=2009/month=3'
-'Total','',-1,3,'58.36KB','0B','','','',''
+---- RESULTS: VERIFY_IS_EQUAL
+'2009','1',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypesmixedformat/year=2009/month=1'
+'2009','2',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','SEQUENCE_FILE','false','$NAMENODE/test-warehouse/alltypesmixedformat/year=2009/month=2'
+'2009','3',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','RC_FILE','false','$NAMENODE/test-warehouse/alltypesmixedformat/year=2009/month=3'
+'Total','',-1,3,regex:.+KB,'0B','','','',''
---- TYPES
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
====
[2/4] impala git commit: IMPALA-4506: Do not display some intro
message if --quiet is set
Posted by mi...@apache.org.
IMPALA-4506: Do not display some intro message if --quiet is set
Change-Id: I19c6d00dfbbe805ee9c525b72eb5703840e2f582
Reviewed-on: http://gerrit.cloudera.org:8080/8613
Reviewed-by: Jim Apple <jb...@apache.org>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/9560d883
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/9560d883
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/9560d883
Branch: refs/heads/master
Commit: 9560d883e2e8584353242146c85cf17fc5bb55b9
Parents: 24c2ba0
Author: Jinchul <ji...@gmail.com>
Authored: Tue Nov 21 14:44:12 2017 +0900
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Nov 30 18:30:47 2017 +0000
----------------------------------------------------------------------
shell/impala_shell.py | 42 ++++++++++++++++++++++++++++--------------
1 file changed, 28 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/9560d883/shell/impala_shell.py
----------------------------------------------------------------------
diff --git a/shell/impala_shell.py b/shell/impala_shell.py
index ffe01e1..36f8420 100755
--- a/shell/impala_shell.py
+++ b/shell/impala_shell.py
@@ -1417,6 +1417,21 @@ def execute_queries_non_interactive_mode(options, query_options):
shell.execute_query_list(queries)):
sys.exit(1)
+def get_intro(options):
+ """Get introduction message for start-up. The last character should not be a return."""
+ if not options.verbose:
+ return ""
+
+ intro = WELCOME_STRING
+
+ if not options.ssl and options.creds_ok_in_clear and options.use_ldap:
+ intro += ("\n\nLDAP authentication is enabled, but the connection to Impala is "
+ "not secured by TLS.\nALL PASSWORDS WILL BE SENT IN THE CLEAR TO IMPALA.")
+
+ if options.refresh_after_connect:
+ intro += '\n'.join(REFRESH_AFTER_CONNECT_DEPRECATION_WARNING)
+ return intro
+
if __name__ == "__main__":
"""
There are two types of options: shell options and query_options. Both can be set in the
@@ -1481,8 +1496,9 @@ if __name__ == "__main__":
sys.exit(1)
if options.use_kerberos:
- print_to_stderr("Starting Impala Shell using Kerberos authentication")
- print_to_stderr("Using service name '%s'" % options.kerberos_service_name)
+ if options.verbose:
+ print_to_stderr("Starting Impala Shell using Kerberos authentication")
+ print_to_stderr("Using service name '%s'" % options.kerberos_service_name)
# Check if the user has a ticket in the credentials cache
try:
if call(['klist', '-s']) != 0:
@@ -1493,9 +1509,11 @@ if __name__ == "__main__":
print_to_stderr('klist not found on the system, install kerberos clients')
sys.exit(1)
elif options.use_ldap:
- print_to_stderr("Starting Impala Shell using LDAP-based authentication")
+ if options.verbose:
+ print_to_stderr("Starting Impala Shell using LDAP-based authentication")
else:
- print_to_stderr("Starting Impala Shell without Kerberos authentication")
+ if options.verbose:
+ print_to_stderr("Starting Impala Shell without Kerberos authentication")
options.ldap_password = None
if options.use_ldap and options.ldap_password_cmd:
@@ -1514,10 +1532,12 @@ if __name__ == "__main__":
if options.ssl:
if options.ca_cert is None:
- print_to_stderr("SSL is enabled. Impala server certificates will NOT be verified"\
- " (set --ca_cert to change)")
+ if options.verbose:
+ print_to_stderr("SSL is enabled. Impala server certificates will NOT be verified"\
+ " (set --ca_cert to change)")
else:
- print_to_stderr("SSL is enabled")
+ if options.verbose:
+ print_to_stderr("SSL is enabled")
if options.output_file:
try:
@@ -1542,13 +1562,7 @@ if __name__ == "__main__":
execute_queries_non_interactive_mode(options, query_options)
sys.exit(0)
- intro = WELCOME_STRING
- if not options.ssl and options.creds_ok_in_clear and options.use_ldap:
- intro += ("\n\nLDAP authentication is enabled, but the connection to Impala is "
- "not secured by TLS.\nALL PASSWORDS WILL BE SENT IN THE CLEAR TO IMPALA.\n")
-
- if options.refresh_after_connect:
- intro += REFRESH_AFTER_CONNECT_DEPRECATION_WARNING
+ intro = get_intro(options)
shell = ImpalaShell(options, query_options)
while shell.is_alive:
[4/4] impala git commit: IMPALA-6255: Add device names to DiskIoMgr
thread names
Posted by mi...@apache.org.
IMPALA-6255: Add device names to DiskIoMgr thread names
This change adds device names to the DiskIoMgr thread names. It will
make them easier to identify during debugging.
Change-Id: I30faeda6db8846e4aad64ce29ca811366d84910b
Reviewed-on: http://gerrit.cloudera.org:8080/8669
Reviewed-by: Lars Volker <lv...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/ea8d2ba7
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/ea8d2ba7
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/ea8d2ba7
Branch: refs/heads/master
Commit: ea8d2ba7f6b54c45f06d5effa6bc7238c50452a9
Parents: 0a0be17
Author: Lars Volker <lv...@cloudera.com>
Authored: Tue Nov 28 14:01:00 2017 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Dec 1 05:51:59 2017 +0000
----------------------------------------------------------------------
be/src/runtime/io/disk-io-mgr.cc | 12 +++++++++++-
tests/webserver/test_web_pages.py | 14 ++++++++++++++
2 files changed, 25 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/ea8d2ba7/be/src/runtime/io/disk-io-mgr.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/io/disk-io-mgr.cc b/be/src/runtime/io/disk-io-mgr.cc
index f9aed92..4f4074c 100644
--- a/be/src/runtime/io/disk-io-mgr.cc
+++ b/be/src/runtime/io/disk-io-mgr.cc
@@ -38,6 +38,8 @@ using namespace impala;
using namespace impala::io;
using namespace strings;
+using std::to_string;
+
// Control the number of disks on the machine. If 0, this comes from the system
// settings.
DEFINE_int32(num_disks, 0, "Number of disks on data node.");
@@ -303,20 +305,28 @@ Status DiskIoMgr::Init(MemTracker* process_mem_tracker) {
for (int i = 0; i < disk_queues_.size(); ++i) {
disk_queues_[i] = new DiskQueue(i);
int num_threads_per_disk;
+ string device_name;
if (i == RemoteDfsDiskId()) {
num_threads_per_disk = FLAGS_num_remote_hdfs_io_threads;
+ device_name = "HDFS remote";
} else if (i == RemoteS3DiskId()) {
num_threads_per_disk = FLAGS_num_s3_io_threads;
+ device_name = "S3 remote";
} else if (i == RemoteAdlsDiskId()) {
num_threads_per_disk = FLAGS_num_adls_io_threads;
+ device_name = "ADLS remote";
} else if (DiskInfo::is_rotational(i)) {
num_threads_per_disk = num_io_threads_per_rotational_disk_;
+ // During tests, i may not point to an existing disk.
+ device_name = i < DiskInfo::num_disks() ? DiskInfo::device_name(i) : to_string(i);
} else {
num_threads_per_disk = num_io_threads_per_solid_state_disk_;
+ // During tests, i may not point to an existing disk.
+ device_name = i < DiskInfo::num_disks() ? DiskInfo::device_name(i) : to_string(i);
}
for (int j = 0; j < num_threads_per_disk; ++j) {
stringstream ss;
- ss << "work-loop(Disk: " << i << ", Thread: " << j << ")";
+ ss << "work-loop(Disk: " << device_name << ", Thread: " << j << ")";
std::unique_ptr<Thread> t;
RETURN_IF_ERROR(Thread::Create("disk-io-mgr", ss.str(), &DiskIoMgr::WorkLoop,
this, disk_queues_[i], &t));
http://git-wip-us.apache.org/repos/asf/impala/blob/ea8d2ba7/tests/webserver/test_web_pages.py
----------------------------------------------------------------------
diff --git a/tests/webserver/test_web_pages.py b/tests/webserver/test_web_pages.py
index 2586399..101f786 100644
--- a/tests/webserver/test_web_pages.py
+++ b/tests/webserver/test_web_pages.py
@@ -30,6 +30,7 @@ class TestWebPage(ImpalaTestSuite):
CATALOG_URL = "http://localhost:{0}/catalog"
CATALOG_OBJECT_URL = "http://localhost:{0}/catalog_object"
QUERY_BACKENDS_URL = "http://localhost:{0}/query_backends"
+ THREAD_GROUP_URL = "http://localhost:{0}/thread-group"
# log4j changes do not apply to the statestore since it doesn't
# have an embedded JVM. So we make two sets of ports to test the
# log level endpoints, one without the statestore port and the
@@ -185,3 +186,16 @@ class TestWebPage(ImpalaTestSuite):
assert 'backend_states' not in response_json
finally:
self.client.cancel(query_handle)
+
+ def test_io_mgr_threads(self):
+ """Test that IoMgr threads have readable names. This test assumed that all systems we
+ support have a disk called 'sda'."""
+ response = self.get_and_check_status(
+ self.THREAD_GROUP_URL + "?group=disk-io-mgr&json", ports_to_test=[25000])
+ response_json = json.loads(response)
+ thread_names = [t["name"] for t in response_json['threads']]
+ expected_name_patterns = ["ADLS remote", "S3 remote", "HDFS remote", "sda"]
+ for pattern in expected_name_patterns:
+ assert any(pattern in t for t in thread_names), \
+ "Could not find thread matching '%s'" % pattern
+
[3/4] impala git commit: IMPALA-5940: Avoid stack tracing and log
spew with Status::Expected()
Posted by mi...@apache.org.
IMPALA-5940: Avoid stack tracing and log spew with Status::Expected()
This change converts some callers of Status() to Status::Expected()
in the DataStreamMgr to avoid log spew and unnecessary overhead of
stack tracing.
Change-Id: Ie1f7d16e60f7859d662e87642d0f82e1d74183ad
Reviewed-on: http://gerrit.cloudera.org:8080/8689
Reviewed-by: Bharath Vissapragada <bh...@cloudera.com>
Reviewed-by: Dan Hecht <dh...@cloudera.com>
Tested-by: Impala Public Jenkins
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/0a0be171
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/0a0be171
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/0a0be171
Branch: refs/heads/master
Commit: 0a0be17102981e7874c6396c42f56e0be8286069
Parents: 9560d88
Author: Michael Ho <kw...@cloudera.com>
Authored: Wed Nov 29 14:28:04 2017 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Nov 30 23:34:35 2017 +0000
----------------------------------------------------------------------
be/src/runtime/data-stream-mgr.cc | 21 ++++++++++++++-------
be/src/runtime/krpc-data-stream-mgr.cc | 9 +++++----
2 files changed, 19 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/0a0be171/be/src/runtime/data-stream-mgr.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/data-stream-mgr.cc b/be/src/runtime/data-stream-mgr.cc
index 9af8384..503cd29 100644
--- a/be/src/runtime/data-stream-mgr.cc
+++ b/be/src/runtime/data-stream-mgr.cc
@@ -185,8 +185,10 @@ Status DataStreamMgr::AddData(const TUniqueId& fragment_instance_id,
// and there's no unexpected error here. If already_unregistered is false,
// FindRecvrOrWait() timed out, which is unexpected and suggests a query setup error;
// we return DATASTREAM_SENDER_TIMEOUT to trigger tear-down of the query.
- return already_unregistered ? Status::OK() :
- Status(TErrorCode::DATASTREAM_SENDER_TIMEOUT, PrintId(fragment_instance_id));
+ if (already_unregistered) return Status::OK();
+ ErrorMsg msg(TErrorCode::DATASTREAM_SENDER_TIMEOUT, PrintId(fragment_instance_id));
+ VLOG_QUERY << "DataStreamMgr::AddData(): " << msg.msg();
+ return Status::Expected(msg);
}
DCHECK(!already_unregistered);
recvr->AddBatch(thrift_batch, sender_id);
@@ -202,11 +204,16 @@ Status DataStreamMgr::CloseSender(const TUniqueId& fragment_instance_id,
shared_ptr<DataStreamRecvr> recvr = FindRecvrOrWait(fragment_instance_id, dest_node_id,
&already_unregistered);
if (recvr == nullptr) {
- // Was not able to notify the receiver that this was the end of stream. Notify the
- // sender that this failed so that they can take appropriate action (i.e. failing
- // the query).
- status = already_unregistered ? Status::OK() :
- Status(TErrorCode::DATASTREAM_SENDER_TIMEOUT, PrintId(fragment_instance_id));
+ if (already_unregistered) {
+ status = Status::OK();
+ } else {
+ // Was not able to notify the receiver that this was the end of stream. Notify the
+ // sender that this failed so that they can take appropriate action (i.e. failing
+ // the query).
+ ErrorMsg msg(TErrorCode::DATASTREAM_SENDER_TIMEOUT, PrintId(fragment_instance_id));
+ VLOG_QUERY << "DataStreamMgr::CloseSender(): " << msg.msg();
+ status = Status::Expected(msg);
+ }
} else {
recvr->RemoveSender(sender_id);
}
http://git-wip-us.apache.org/repos/asf/impala/blob/0a0be171/be/src/runtime/krpc-data-stream-mgr.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/krpc-data-stream-mgr.cc b/be/src/runtime/krpc-data-stream-mgr.cc
index b70bca6..7c36191 100644
--- a/be/src/runtime/krpc-data-stream-mgr.cc
+++ b/be/src/runtime/krpc-data-stream-mgr.cc
@@ -198,8 +198,8 @@ void KrpcDataStreamMgr::AddData(const TransmitDataRequestPB* request,
// FindRecvr() may return nullptr even though the receiver was once present. We
// detect this case by checking already_unregistered - if true then the receiver was
// already closed deliberately, and there's no unexpected error here.
- Status(TErrorCode::DATASTREAM_RECVR_CLOSED, PrintId(finst_id), dest_node_id)
- .ToProto(response->mutable_status());
+ ErrorMsg msg(TErrorCode::DATASTREAM_RECVR_CLOSED, PrintId(finst_id), dest_node_id);
+ Status::Expected(msg).ToProto(response->mutable_status());
rpc_context->RespondSuccess();
return;
}
@@ -335,8 +335,9 @@ void KrpcDataStreamMgr::RespondToTimedOutSender(const std::unique_ptr<ContextTyp
finst_id.__set_lo(request->dest_fragment_instance_id().lo());
finst_id.__set_hi(request->dest_fragment_instance_id().hi());
- Status(TErrorCode::DATASTREAM_SENDER_TIMEOUT, PrintId(finst_id)).ToProto(
- ctx->response->mutable_status());
+ ErrorMsg msg(TErrorCode::DATASTREAM_SENDER_TIMEOUT, PrintId(finst_id));
+ VLOG_QUERY << msg.msg();
+ Status::Expected(msg).ToProto(ctx->response->mutable_status());
ctx->rpc_context->RespondSuccess();
num_senders_waiting_->Increment(-1);
num_senders_timedout_->Increment(1);