You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by mi...@apache.org on 2017/12/01 16:01:50 UTC

[1/4] impala git commit: IMPALA-6244: Fix test failures with Hadoop 3.0

Repository: impala
Updated Branches:
  refs/heads/master 3ed122185 -> ea8d2ba7f


IMPALA-6244: Fix test failures with Hadoop 3.0

The metadata query test fails when run against Hadoop 3.0 due to
some defaults changing for sequence files.

Testing: Compared the output of
 hadoop fs -text
/test-warehouse/alltypesmixedformat/year=2009/month=2/000023_0
 and verified it is the same after a data load on Hadoop 2.6 and
 Hadoop 3.0; ran the metadata query test and verified it now
 passes in both cases.

Change-Id: I1ccffdb0f712da1feb55f839e8d87a30f15e4fb6
Reviewed-on: http://gerrit.cloudera.org:8080/8656
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/24c2ba0c
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/24c2ba0c
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/24c2ba0c

Branch: refs/heads/master
Commit: 24c2ba0cc5560fb54c1695e7c14e1a62c55654b5
Parents: 3ed1221
Author: Zach Amsden <za...@cloudera.com>
Authored: Mon Nov 27 23:43:54 2017 +0000
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Nov 30 07:33:16 2017 +0000

----------------------------------------------------------------------
 .../functional-query/queries/QueryTest/show-stats.test    | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/24c2ba0c/testdata/workloads/functional-query/queries/QueryTest/show-stats.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/show-stats.test b/testdata/workloads/functional-query/queries/QueryTest/show-stats.test
index f6a3d99..11f8264 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/show-stats.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/show-stats.test
@@ -82,11 +82,11 @@ BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
 show table stats alltypesmixedformat
 ---- LABELS
 YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION
----- RESULTS
-'2009','1',-1,1,'19.59KB','NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypesmixedformat/year=2009/month=1'
-'2009','2',-1,1,'21.35KB','NOT CACHED','NOT CACHED','SEQUENCE_FILE','false','$NAMENODE/test-warehouse/alltypesmixedformat/year=2009/month=2'
-'2009','3',-1,1,'17.42KB','NOT CACHED','NOT CACHED','RC_FILE','false','$NAMENODE/test-warehouse/alltypesmixedformat/year=2009/month=3'
-'Total','',-1,3,'58.36KB','0B','','','',''
+---- RESULTS: VERIFY_IS_EQUAL
+'2009','1',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','TEXT','false','$NAMENODE/test-warehouse/alltypesmixedformat/year=2009/month=1'
+'2009','2',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','SEQUENCE_FILE','false','$NAMENODE/test-warehouse/alltypesmixedformat/year=2009/month=2'
+'2009','3',-1,1,regex:.+KB,'NOT CACHED','NOT CACHED','RC_FILE','false','$NAMENODE/test-warehouse/alltypesmixedformat/year=2009/month=3'
+'Total','',-1,3,regex:.+KB,'0B','','','',''
 ---- TYPES
 STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING
 ====


[2/4] impala git commit: IMPALA-4506: Do not display some intro message if --quiet is set

Posted by mi...@apache.org.
IMPALA-4506: Do not display some intro message if --quiet is set

Change-Id: I19c6d00dfbbe805ee9c525b72eb5703840e2f582
Reviewed-on: http://gerrit.cloudera.org:8080/8613
Reviewed-by: Jim Apple <jb...@apache.org>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/9560d883
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/9560d883
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/9560d883

Branch: refs/heads/master
Commit: 9560d883e2e8584353242146c85cf17fc5bb55b9
Parents: 24c2ba0
Author: Jinchul <ji...@gmail.com>
Authored: Tue Nov 21 14:44:12 2017 +0900
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Nov 30 18:30:47 2017 +0000

----------------------------------------------------------------------
 shell/impala_shell.py | 42 ++++++++++++++++++++++++++++--------------
 1 file changed, 28 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/9560d883/shell/impala_shell.py
----------------------------------------------------------------------
diff --git a/shell/impala_shell.py b/shell/impala_shell.py
index ffe01e1..36f8420 100755
--- a/shell/impala_shell.py
+++ b/shell/impala_shell.py
@@ -1417,6 +1417,21 @@ def execute_queries_non_interactive_mode(options, query_options):
           shell.execute_query_list(queries)):
     sys.exit(1)
 
+def get_intro(options):
+  """Get introduction message for start-up. The last character should not be a return."""
+  if not options.verbose:
+    return ""
+
+  intro = WELCOME_STRING
+
+  if not options.ssl and options.creds_ok_in_clear and options.use_ldap:
+    intro += ("\n\nLDAP authentication is enabled, but the connection to Impala is "
+              "not secured by TLS.\nALL PASSWORDS WILL BE SENT IN THE CLEAR TO IMPALA.")
+
+  if options.refresh_after_connect:
+    intro += '\n'.join(REFRESH_AFTER_CONNECT_DEPRECATION_WARNING)
+  return intro
+
 if __name__ == "__main__":
   """
   There are two types of options: shell options and query_options. Both can be set in the
@@ -1481,8 +1496,9 @@ if __name__ == "__main__":
     sys.exit(1)
 
   if options.use_kerberos:
-    print_to_stderr("Starting Impala Shell using Kerberos authentication")
-    print_to_stderr("Using service name '%s'" % options.kerberos_service_name)
+    if options.verbose:
+      print_to_stderr("Starting Impala Shell using Kerberos authentication")
+      print_to_stderr("Using service name '%s'" % options.kerberos_service_name)
     # Check if the user has a ticket in the credentials cache
     try:
       if call(['klist', '-s']) != 0:
@@ -1493,9 +1509,11 @@ if __name__ == "__main__":
       print_to_stderr('klist not found on the system, install kerberos clients')
       sys.exit(1)
   elif options.use_ldap:
-    print_to_stderr("Starting Impala Shell using LDAP-based authentication")
+    if options.verbose:
+      print_to_stderr("Starting Impala Shell using LDAP-based authentication")
   else:
-    print_to_stderr("Starting Impala Shell without Kerberos authentication")
+    if options.verbose:
+      print_to_stderr("Starting Impala Shell without Kerberos authentication")
 
   options.ldap_password = None
   if options.use_ldap and options.ldap_password_cmd:
@@ -1514,10 +1532,12 @@ if __name__ == "__main__":
 
   if options.ssl:
     if options.ca_cert is None:
-      print_to_stderr("SSL is enabled. Impala server certificates will NOT be verified"\
-                      " (set --ca_cert to change)")
+      if options.verbose:
+        print_to_stderr("SSL is enabled. Impala server certificates will NOT be verified"\
+                        " (set --ca_cert to change)")
     else:
-      print_to_stderr("SSL is enabled")
+      if options.verbose:
+        print_to_stderr("SSL is enabled")
 
   if options.output_file:
     try:
@@ -1542,13 +1562,7 @@ if __name__ == "__main__":
     execute_queries_non_interactive_mode(options, query_options)
     sys.exit(0)
 
-  intro = WELCOME_STRING
-  if not options.ssl and options.creds_ok_in_clear and options.use_ldap:
-    intro += ("\n\nLDAP authentication is enabled, but the connection to Impala is "
-              "not secured by TLS.\nALL PASSWORDS WILL BE SENT IN THE CLEAR TO IMPALA.\n")
-
-  if options.refresh_after_connect:
-    intro += REFRESH_AFTER_CONNECT_DEPRECATION_WARNING
+  intro = get_intro(options)
 
   shell = ImpalaShell(options, query_options)
   while shell.is_alive:


[4/4] impala git commit: IMPALA-6255: Add device names to DiskIoMgr thread names

Posted by mi...@apache.org.
IMPALA-6255: Add device names to DiskIoMgr thread names

This change adds device names to the DiskIoMgr thread names. It will
make them easier to identify during debugging.

Change-Id: I30faeda6db8846e4aad64ce29ca811366d84910b
Reviewed-on: http://gerrit.cloudera.org:8080/8669
Reviewed-by: Lars Volker <lv...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/ea8d2ba7
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/ea8d2ba7
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/ea8d2ba7

Branch: refs/heads/master
Commit: ea8d2ba7f6b54c45f06d5effa6bc7238c50452a9
Parents: 0a0be17
Author: Lars Volker <lv...@cloudera.com>
Authored: Tue Nov 28 14:01:00 2017 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Fri Dec 1 05:51:59 2017 +0000

----------------------------------------------------------------------
 be/src/runtime/io/disk-io-mgr.cc  | 12 +++++++++++-
 tests/webserver/test_web_pages.py | 14 ++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/ea8d2ba7/be/src/runtime/io/disk-io-mgr.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/io/disk-io-mgr.cc b/be/src/runtime/io/disk-io-mgr.cc
index f9aed92..4f4074c 100644
--- a/be/src/runtime/io/disk-io-mgr.cc
+++ b/be/src/runtime/io/disk-io-mgr.cc
@@ -38,6 +38,8 @@ using namespace impala;
 using namespace impala::io;
 using namespace strings;
 
+using std::to_string;
+
 // Control the number of disks on the machine.  If 0, this comes from the system
 // settings.
 DEFINE_int32(num_disks, 0, "Number of disks on data node.");
@@ -303,20 +305,28 @@ Status DiskIoMgr::Init(MemTracker* process_mem_tracker) {
   for (int i = 0; i < disk_queues_.size(); ++i) {
     disk_queues_[i] = new DiskQueue(i);
     int num_threads_per_disk;
+    string device_name;
     if (i == RemoteDfsDiskId()) {
       num_threads_per_disk = FLAGS_num_remote_hdfs_io_threads;
+      device_name = "HDFS remote";
     } else if (i == RemoteS3DiskId()) {
       num_threads_per_disk = FLAGS_num_s3_io_threads;
+      device_name = "S3 remote";
     } else if (i == RemoteAdlsDiskId()) {
       num_threads_per_disk = FLAGS_num_adls_io_threads;
+      device_name = "ADLS remote";
     } else if (DiskInfo::is_rotational(i)) {
       num_threads_per_disk = num_io_threads_per_rotational_disk_;
+      // During tests, i may not point to an existing disk.
+      device_name = i < DiskInfo::num_disks() ? DiskInfo::device_name(i) : to_string(i);
     } else {
       num_threads_per_disk = num_io_threads_per_solid_state_disk_;
+      // During tests, i may not point to an existing disk.
+      device_name = i < DiskInfo::num_disks() ? DiskInfo::device_name(i) : to_string(i);
     }
     for (int j = 0; j < num_threads_per_disk; ++j) {
       stringstream ss;
-      ss << "work-loop(Disk: " << i << ", Thread: " << j << ")";
+      ss << "work-loop(Disk: " << device_name << ", Thread: " << j << ")";
       std::unique_ptr<Thread> t;
       RETURN_IF_ERROR(Thread::Create("disk-io-mgr", ss.str(), &DiskIoMgr::WorkLoop,
           this, disk_queues_[i], &t));

http://git-wip-us.apache.org/repos/asf/impala/blob/ea8d2ba7/tests/webserver/test_web_pages.py
----------------------------------------------------------------------
diff --git a/tests/webserver/test_web_pages.py b/tests/webserver/test_web_pages.py
index 2586399..101f786 100644
--- a/tests/webserver/test_web_pages.py
+++ b/tests/webserver/test_web_pages.py
@@ -30,6 +30,7 @@ class TestWebPage(ImpalaTestSuite):
   CATALOG_URL = "http://localhost:{0}/catalog"
   CATALOG_OBJECT_URL = "http://localhost:{0}/catalog_object"
   QUERY_BACKENDS_URL = "http://localhost:{0}/query_backends"
+  THREAD_GROUP_URL = "http://localhost:{0}/thread-group"
   # log4j changes do not apply to the statestore since it doesn't
   # have an embedded JVM. So we make two sets of ports to test the
   # log level endpoints, one without the statestore port and the
@@ -185,3 +186,16 @@ class TestWebPage(ImpalaTestSuite):
           assert 'backend_states' not in response_json
       finally:
         self.client.cancel(query_handle)
+
+  def test_io_mgr_threads(self):
+    """Test that IoMgr threads have readable names. This test assumed that all systems we
+    support have a disk called 'sda'."""
+    response = self.get_and_check_status(
+        self.THREAD_GROUP_URL + "?group=disk-io-mgr&json", ports_to_test=[25000])
+    response_json = json.loads(response)
+    thread_names = [t["name"] for t in response_json['threads']]
+    expected_name_patterns = ["ADLS remote", "S3 remote", "HDFS remote", "sda"]
+    for pattern in expected_name_patterns:
+      assert any(pattern in t for t in thread_names), \
+           "Could not find thread matching '%s'" % pattern
+


[3/4] impala git commit: IMPALA-5940: Avoid stack tracing and log spew with Status::Expected()

Posted by mi...@apache.org.
IMPALA-5940: Avoid stack tracing and log spew with Status::Expected()

This change converts some callers of Status() to Status::Expected()
in the DataStreamMgr to avoid log spew and unnecessary overhead of
stack tracing.

Change-Id: Ie1f7d16e60f7859d662e87642d0f82e1d74183ad
Reviewed-on: http://gerrit.cloudera.org:8080/8689
Reviewed-by: Bharath Vissapragada <bh...@cloudera.com>
Reviewed-by: Dan Hecht <dh...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/0a0be171
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/0a0be171
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/0a0be171

Branch: refs/heads/master
Commit: 0a0be17102981e7874c6396c42f56e0be8286069
Parents: 9560d88
Author: Michael Ho <kw...@cloudera.com>
Authored: Wed Nov 29 14:28:04 2017 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Thu Nov 30 23:34:35 2017 +0000

----------------------------------------------------------------------
 be/src/runtime/data-stream-mgr.cc      | 21 ++++++++++++++-------
 be/src/runtime/krpc-data-stream-mgr.cc |  9 +++++----
 2 files changed, 19 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/0a0be171/be/src/runtime/data-stream-mgr.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/data-stream-mgr.cc b/be/src/runtime/data-stream-mgr.cc
index 9af8384..503cd29 100644
--- a/be/src/runtime/data-stream-mgr.cc
+++ b/be/src/runtime/data-stream-mgr.cc
@@ -185,8 +185,10 @@ Status DataStreamMgr::AddData(const TUniqueId& fragment_instance_id,
     // and there's no unexpected error here. If already_unregistered is false,
     // FindRecvrOrWait() timed out, which is unexpected and suggests a query setup error;
     // we return DATASTREAM_SENDER_TIMEOUT to trigger tear-down of the query.
-    return already_unregistered ? Status::OK() :
-        Status(TErrorCode::DATASTREAM_SENDER_TIMEOUT, PrintId(fragment_instance_id));
+    if (already_unregistered) return Status::OK();
+    ErrorMsg msg(TErrorCode::DATASTREAM_SENDER_TIMEOUT, PrintId(fragment_instance_id));
+    VLOG_QUERY << "DataStreamMgr::AddData(): " << msg.msg();
+    return Status::Expected(msg);
   }
   DCHECK(!already_unregistered);
   recvr->AddBatch(thrift_batch, sender_id);
@@ -202,11 +204,16 @@ Status DataStreamMgr::CloseSender(const TUniqueId& fragment_instance_id,
   shared_ptr<DataStreamRecvr> recvr = FindRecvrOrWait(fragment_instance_id, dest_node_id,
       &already_unregistered);
   if (recvr == nullptr) {
-    // Was not able to notify the receiver that this was the end of stream. Notify the
-    // sender that this failed so that they can take appropriate action (i.e. failing
-    // the query).
-    status = already_unregistered ? Status::OK() :
-        Status(TErrorCode::DATASTREAM_SENDER_TIMEOUT, PrintId(fragment_instance_id));
+    if (already_unregistered) {
+      status = Status::OK();
+    } else {
+      // Was not able to notify the receiver that this was the end of stream. Notify the
+      // sender that this failed so that they can take appropriate action (i.e. failing
+      // the query).
+      ErrorMsg msg(TErrorCode::DATASTREAM_SENDER_TIMEOUT, PrintId(fragment_instance_id));
+      VLOG_QUERY << "DataStreamMgr::CloseSender(): " << msg.msg();
+      status = Status::Expected(msg);
+    }
   } else {
     recvr->RemoveSender(sender_id);
   }

http://git-wip-us.apache.org/repos/asf/impala/blob/0a0be171/be/src/runtime/krpc-data-stream-mgr.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/krpc-data-stream-mgr.cc b/be/src/runtime/krpc-data-stream-mgr.cc
index b70bca6..7c36191 100644
--- a/be/src/runtime/krpc-data-stream-mgr.cc
+++ b/be/src/runtime/krpc-data-stream-mgr.cc
@@ -198,8 +198,8 @@ void KrpcDataStreamMgr::AddData(const TransmitDataRequestPB* request,
     // FindRecvr() may return nullptr even though the receiver was once present. We
     // detect this case by checking already_unregistered - if true then the receiver was
     // already closed deliberately, and there's no unexpected error here.
-    Status(TErrorCode::DATASTREAM_RECVR_CLOSED, PrintId(finst_id), dest_node_id)
-        .ToProto(response->mutable_status());
+    ErrorMsg msg(TErrorCode::DATASTREAM_RECVR_CLOSED, PrintId(finst_id), dest_node_id);
+    Status::Expected(msg).ToProto(response->mutable_status());
     rpc_context->RespondSuccess();
     return;
   }
@@ -335,8 +335,9 @@ void KrpcDataStreamMgr::RespondToTimedOutSender(const std::unique_ptr<ContextTyp
   finst_id.__set_lo(request->dest_fragment_instance_id().lo());
   finst_id.__set_hi(request->dest_fragment_instance_id().hi());
 
-  Status(TErrorCode::DATASTREAM_SENDER_TIMEOUT, PrintId(finst_id)).ToProto(
-      ctx->response->mutable_status());
+  ErrorMsg msg(TErrorCode::DATASTREAM_SENDER_TIMEOUT, PrintId(finst_id));
+  VLOG_QUERY << msg.msg();
+  Status::Expected(msg).ToProto(ctx->response->mutable_status());
   ctx->rpc_context->RespondSuccess();
   num_senders_waiting_->Increment(-1);
   num_senders_timedout_->Increment(1);