You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2019/04/18 17:10:57 UTC

[impala] branch master updated (c8dbad8 -> 5c2d211)

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git.


    from c8dbad8  IMPALA-8392: fix parallel docker_images build
     new 2ab2308  IMPALA-8426: Logging error in DROP_TABLE event processing
     new 25c3dfb  IMPALA-8158: Retrieve thrift profiles through Impyla 0.15.0
     new 5c2d211  IMPALA-8430: Fix flakiness in testCreateDropCreateDatabase

The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 .../impala/catalog/events/MetastoreEvents.java     | 31 +++++++++--
 .../events/MetastoreEventsProcessorTest.java       | 32 +++++++++--
 infra/python/deps/compiled-requirements.txt        |  2 +-
 tests/common/impala_connection.py                  | 39 ++++++++++---
 tests/common/impala_service.py                     | 26 ---------
 tests/common/impala_test_suite.py                  |  4 ++
 tests/query_test/test_cancellation.py              | 42 +++++++++-----
 tests/query_test/test_observability.py             | 65 ++++++++++------------
 8 files changed, 144 insertions(+), 97 deletions(-)


[impala] 01/03: IMPALA-8426: Logging error in DROP_TABLE event processing

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 2ab23088849fa1da33e14b25816622837e50e99e
Author: Bharath Krishna <bh...@cloudera.com>
AuthorDate: Wed Apr 17 09:17:23 2019 -0700

    IMPALA-8426: Logging error in DROP_TABLE event processing
    
    Fixing the bug in condition check while logging in
    DROP_TABLE event processing. Also updating EVENTS_SKIPPED
    metric to keep track of the number of drop table events
    skipped when CREATION_TIME matches.
    
    Testing:
     - Added metric check to unit test.
     - Ran existing unit tests.
    
    Change-Id: I0a2ca10f82d183fd2821014e30b109b9f4474db4
    Reviewed-on: http://gerrit.cloudera.org:8080/13056
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../java/org/apache/impala/catalog/events/MetastoreEvents.java |  8 +++++---
 .../impala/catalog/events/MetastoreEventsProcessorTest.java    | 10 ++++++++--
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java b/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
index a876c61..4f2b6e7 100644
--- a/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
+++ b/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
@@ -786,11 +786,13 @@ public class MetastoreEvents {
       Table removedTable = catalog_.removeTableIfExists(msTbl_, tblWasFound, tblMatched);
       if (removedTable != null) {
         infoLog("Removed table {} ", getFullyQualifiedTblName());
+      } else if (!tblWasFound.getRef()) {
+        debugLog("Table {} was not removed since it did not exist in catalog.",
+            tblName_);
       } else if (!tblMatched.getRef()) {
-        LOG.warn(debugString("Table %s was not removed from "
+        infoLog(debugString("Table %s was not removed from "
             + "catalog since the creation time of the table did not match", tblName_));
-      } else if (!tblWasFound.getRef()) {
-        debugLog("Table {} was not removed since it did not exist in catalog.", tblName_);
+        metrics_.getCounter(MetastoreEventsProcessor.EVENTS_SKIPPED_METRIC).inc();
       }
     }
   }
diff --git a/fe/src/test/java/org/apache/impala/catalog/events/MetastoreEventsProcessorTest.java b/fe/src/test/java/org/apache/impala/catalog/events/MetastoreEventsProcessorTest.java
index 7ab48e7..b9982e9 100644
--- a/fe/src/test/java/org/apache/impala/catalog/events/MetastoreEventsProcessorTest.java
+++ b/fe/src/test/java/org/apache/impala/catalog/events/MetastoreEventsProcessorTest.java
@@ -313,9 +313,9 @@ public class MetastoreEventsProcessorTest {
     // than that in the DROP_DB notification event. Two events are filtered here,
     // 1 : first CREATE_DATABASE as it is followed by another create of the same name.
     // 2 : DROP_DATABASE as it is trying to drop a database which is again created.
-    assertEquals(2, eventsProcessor_.getMetrics()
+    assertEquals(filteredCount + 2, eventsProcessor_.getMetrics()
         .getCounter(MetastoreEventsProcessor.EVENTS_SKIPPED_METRIC)
-        .getCount() - filteredCount);
+        .getCount());
 
     // Teardown step - Drop the created DB
     dropDatabaseCascadeFromImpala(TEST_DB_NAME);
@@ -716,7 +716,13 @@ public class MetastoreEventsProcessorTest {
     //second event should be drop_table. This event should also be skipped since
     // catalog state is more recent than the event
     assertEquals("DROP_TABLE", events.get(1).getEventType());
+    long numFilteredEvents =
+        eventsProcessor_.getMetrics()
+            .getCounter(MetastoreEventsProcessor.EVENTS_SKIPPED_METRIC).getCount();
     eventsProcessor_.processEvents(Lists.newArrayList(events.get(1)));
+    // Verify that the drop_table event is skipped and the metric is incremented.
+    assertEquals(numFilteredEvents + 1, eventsProcessor_.getMetrics()
+        .getCounter(MetastoreEventsProcessor.EVENTS_SKIPPED_METRIC).getCount());
     // even after drop table event, the table should still exist
     assertNotNull("Table should have existed since catalog state is current and event "
         + "is stale", catalog_.getTable(TEST_DB_NAME, testTblName));


[impala] 03/03: IMPALA-8430: Fix flakiness in testCreateDropCreateDatabase

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 5c2d211a88c1190a02ce7720fda7b0871f1e5cf5
Author: Bharath Krishna <bh...@cloudera.com>
AuthorDate: Wed Apr 17 09:54:13 2019 -0700

    IMPALA-8430: Fix flakiness in testCreateDropCreateDatabase
    
    The test fails because of two Databases getting created with
    same CREATION_TIME. Hence, adding a sleep of 2 seconds to
    avoid this case. Also fixing other tests with similar use-case.
    
    Testing
     - Fixed the unit tests
    
    Change-Id: I30bf4535d54c9cd8d257b528dc7a1b42f106800d
    Reviewed-on: http://gerrit.cloudera.org:8080/13058
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../impala/catalog/events/MetastoreEvents.java     | 23 ++++++++++++++++++++--
 .../events/MetastoreEventsProcessorTest.java       | 22 ++++++++++++++++++---
 2 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java b/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
index 4f2b6e7..8b5b6f3 100644
--- a/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
+++ b/fe/src/main/java/org/apache/impala/catalog/events/MetastoreEvents.java
@@ -777,7 +777,16 @@ public class MetastoreEvents {
      * catalog, ignore the event. If the table exists in the catalog, compares the
      * createTime of the table in catalog with the createTime of the table from the event
      * and remove the catalog table if there is a match. If the catalog table is a
-     * incomplete table it is removed as well.
+     * incomplete table it is removed as well. The creation_time from HMS is unfortunately
+     * in seconds granularity, which means there is a limitation that we cannot
+     * distinguish between tables which are created with the same name within a second.
+     * So a sequence of create_table, drop_table, create_table happening within the
+     * same second might cause false positives on drop_table event processing. This is
+     * not a huge problem since the tables will eventually be created when the
+     * create events are processed but there will be a non-zero amount of time when the
+     * table will not be existing in catalog.
+     * TODO : Once HIVE-21595 is available we should rely on table_id for determining a
+     * newer incarnation of a previous table.
      */
     @Override
     public void process() {
@@ -900,7 +909,17 @@ public class MetastoreEvents {
      * CREATION_TIME of the catalog's DB object is greater than that of the notification
      * event's DB object, it means that the Database object present in the catalog is a
      * later version and we can skip the event. (For instance, when user does a create db,
-     * drop db and create db again with the same dbName.)
+     * drop db and create db again with the same dbName.).
+     * The creation_time from HMS is unfortunately in seconds granularity, which means
+     * there is a limitation that we cannot distinguish between databases which are
+     * created with the same name within a second. So a sequence of create_database,
+     * drop_database, create_database happening within the same second might cause
+     * false positives on drop_database event processing. This is not a huge problem
+     * since the databases will eventually be created when the create events are
+     * processed but there will be a non-zero amount of time when the database will not
+     * be existing in catalog.
+     * TODO : Once HIVE-21595 is available we should rely on database_id for determining a
+     * newer incarnation of a previous database.
      */
     @Override
     public void process() {
diff --git a/fe/src/test/java/org/apache/impala/catalog/events/MetastoreEventsProcessorTest.java b/fe/src/test/java/org/apache/impala/catalog/events/MetastoreEventsProcessorTest.java
index b9982e9..00d8b78 100644
--- a/fe/src/test/java/org/apache/impala/catalog/events/MetastoreEventsProcessorTest.java
+++ b/fe/src/test/java/org/apache/impala/catalog/events/MetastoreEventsProcessorTest.java
@@ -300,7 +300,7 @@ public class MetastoreEventsProcessorTest {
     // as the previous CREATE_DB operation, so as to trigger the filtering logic
     // based on CREATION_TIME in DROP_DB event processing. This is currently a
     // limitation : the DROP_DB event filtering expects that while processing events,
-    // the CREATION_TIME of two DB's with same name won't have the same
+    // the CREATION_TIME of two Databases with same name won't have the same
     // creation timestamp.
     sleep(2000);
     // Create database again with same name
@@ -687,7 +687,8 @@ public class MetastoreEventsProcessorTest {
    * already at its latest state
    */
   @Test
-  public void testCreateDropCreateTableFromImpala() throws ImpalaException, TException {
+  public void testCreateDropCreateTableFromImpala()
+      throws ImpalaException, TException, InterruptedException {
     assertEquals(EventProcessorStatus.ACTIVE, eventsProcessor_.getStatus());
     createDatabase(TEST_DB_NAME, null);
     final String testTblName = "testCreateDropCreateTableFromImpala";
@@ -696,6 +697,13 @@ public class MetastoreEventsProcessorTest {
     assertNotNull("Table should have been found after create table statement",
         catalog_.getTable(TEST_DB_NAME, testTblName));
     loadTable(testTblName);
+    // Adding sleep here to make sure that the CREATION_TIME is not same
+    // as the previous CREATE_TABLE operation, so as to trigger the filtering logic
+    // based on CREATION_TIME in DROP_TABLE event processing. This is currently a
+    // limitation : the DROP_TABLE event filtering expects that while processing events,
+    // the CREATION_TIME of two tables with same name won't have the same
+    // creation timestamp.
+    sleep(2000);
     dropTableFromImpala(TEST_DB_NAME, testTblName);
     // now catalogD does not have the table entry, create the table again
     createTableFromImpala(TEST_DB_NAME, testTblName, false);
@@ -841,11 +849,19 @@ public class MetastoreEventsProcessorTest {
    * of Table.
    */
   @Test
-  public void testCreateDropCreateDatabaseFromImpala() throws ImpalaException {
+  public void testCreateDropCreateDatabaseFromImpala()
+      throws ImpalaException, InterruptedException {
     assertEquals(EventProcessorStatus.ACTIVE, eventsProcessor_.getStatus());
     createDatabaseFromImpala(TEST_DB_NAME, "first");
     assertNotNull("Db should have been found after create database statement",
         catalog_.getDb(TEST_DB_NAME));
+    // Adding sleep here to make sure that the CREATION_TIME is not same
+    // as the previous CREATE_DB operation, so as to trigger the filtering logic
+    // based on CREATION_TIME in DROP_DB event processing. This is currently a
+    // limitation : the DROP_DB event filtering expects that while processing events,
+    // the CREATION_TIME of two Databases with same name won't have the same
+    // creation timestamp.
+    sleep(2000);
     dropDatabaseCascadeFromImpala(TEST_DB_NAME);
     assertNull(catalog_.getDb(TEST_DB_NAME));
     createDatabaseFromImpala(TEST_DB_NAME, "second");


[impala] 02/03: IMPALA-8158: Retrieve thrift profiles through Impyla 0.15.0

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 25c3dfb774296bce6ca511042babd97dca4b02cb
Author: Lars Volker <lv...@cloudera.com>
AuthorDate: Fri Jan 25 18:51:26 2019 -0800

    IMPALA-8158: Retrieve thrift profiles through Impyla 0.15.0
    
    This change updates Impyla to 0.15.0 and then uses Impyla to retrieve
    thrift profiles through the HS2 api.
    
    Unfortunately, some of the current usages of get_thrift_profile rely on
    the Beeswax query states and the ImpylaHS2Connection does not have the
    required functionality yet. We will have to update these in a future
    change, once we unified the query states.
    
    This change also adds a self-contained test for IMPALA-2063
    
    Change-Id: I769a99f0843297dd2b20f2f5b1a9046c97bb131e
    Reviewed-on: http://gerrit.cloudera.org:8080/12530
    Reviewed-by: Lars Volker <lv...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 infra/python/deps/compiled-requirements.txt |  2 +-
 tests/common/impala_connection.py           | 39 +++++++++++++----
 tests/common/impala_service.py              | 26 ------------
 tests/common/impala_test_suite.py           |  4 ++
 tests/query_test/test_cancellation.py       | 42 ++++++++++++-------
 tests/query_test/test_observability.py      | 65 +++++++++++++----------------
 6 files changed, 91 insertions(+), 87 deletions(-)

diff --git a/infra/python/deps/compiled-requirements.txt b/infra/python/deps/compiled-requirements.txt
index 72e939a..0a406c2 100644
--- a/infra/python/deps/compiled-requirements.txt
+++ b/infra/python/deps/compiled-requirements.txt
@@ -19,7 +19,7 @@
 # after the toolchain is bootstrapped. Installed after requirements.txt
 
 argparse == 1.4.0
-impyla == 0.14.0
+impyla == 0.15.0
   bitarray == 0.8.1
   sasl == 0.1.3
   six == 1.11.0
diff --git a/tests/common/impala_connection.py b/tests/common/impala_connection.py
index fbb47e5..ceddf6b 100644
--- a/tests/common/impala_connection.py
+++ b/tests/common/impala_connection.py
@@ -25,6 +25,7 @@ import re
 
 import impala.dbapi as impyla
 import tests.common
+from RuntimeProfile.ttypes import TRuntimeProfileFormat
 from tests.beeswax.impala_beeswax import ImpalaBeeswaxClient
 
 
@@ -101,6 +102,11 @@ class ImpalaConnection(object):
     pass
 
   @abc.abstractmethod
+  def state_is_finished(self, operation_handle):
+    """Returns whether the state of a query is finished"""
+    pass
+
+  @abc.abstractmethod
   def get_log(self, operation_handle):
     """Returns the log of an operation as a string, with entries separated by newlines."""
     pass
@@ -186,6 +192,10 @@ class BeeswaxConnection(ImpalaConnection):
     LOG.info("-- getting state for operation: %s" % operation_handle)
     return self.__beeswax_client.get_state(operation_handle.get_handle())
 
+  def state_is_finished(self, operation_handle):
+    LOG.info("-- checking finished state for operation: {0}".format(operation_handle))
+    return self.get_state(operation_handle) == self.QUERY_STATES["FINISHED"]
+
   def get_exec_summary(self, operation_handle):
     LOG.info("-- getting exec summary operation: %s" % operation_handle)
     return self.__beeswax_client.get_exec_summary(operation_handle.get_handle())
@@ -270,10 +280,10 @@ class ImpylaHS2Connection(ImpalaConnection):
     LOG.info("-- closing query for operation handle: {0}".format(operation_handle))
     operation_handle.get_handle().close_operation()
 
-  def execute(self, sql_stmt, user=None):
+  def execute(self, sql_stmt, user=None, profile_format=TRuntimeProfileFormat.STRING):
     handle = self.execute_async(sql_stmt, user)
     try:
-      return self.__fetch_results(handle)
+      return self.__fetch_results(handle, profile_format=profile_format)
     finally:
       self.close_query(handle)
 
@@ -282,7 +292,7 @@ class ImpylaHS2Connection(ImpalaConnection):
     if user is not None:
       raise NotImplementedError("Not yet implemented for HS2 - authentication")
     try:
-      self.__cursor.execute(sql_stmt, configuration=self.__query_options)
+      self.__cursor.execute_async(sql_stmt, configuration=self.__query_options)
       return OperationHandle(self.__cursor, sql_stmt)
     except Exception:
       self.__cursor.close_operation()
@@ -290,20 +300,30 @@ class ImpylaHS2Connection(ImpalaConnection):
 
   def cancel(self, operation_handle):
     LOG.info("-- canceling operation: {0}".format(operation_handle))
-    return self.__beeswax_client.cancel_query(operation_handle.get_handle())
+    cursor = operation_handle.get_handle()
+    return cursor.cancel_operation(reset_state=False)
 
   def get_state(self, operation_handle):
     LOG.info("-- getting state for operation: {0}".format(operation_handle))
-    raise NotImplementedError("Not yet implemented for HS2 - states differ from beeswax")
+    cursor = operation_handle.get_handle()
+    return cursor.status()
+
+  def state_is_finished(self, operation_handle):
+    LOG.info("-- checking finished state for operation: {0}".format(operation_handle))
+    cursor = operation_handle.get_handle()
+    # cursor.status contains a string representation of one of
+    # TCLIService.TOperationState.
+    return cursor.status() == "FINISHED_STATE"
 
   def get_exec_summary(self, operation_handle):
     LOG.info("-- getting exec summary operation: {0}".format(operation_handle))
     raise NotImplementedError(
         "Not yet implemented for HS2 - summary returned is thrift, not string.")
 
-  def get_runtime_profile(self, operation_handle):
+  def get_runtime_profile(self, operation_handle, profile_format):
     LOG.info("-- getting runtime profile operation: {0}".format(operation_handle))
-    return self.__cursor.get_profile()
+    cursor = operation_handle.get_handle()
+    return cursor.get_profile(profile_format=profile_format)
 
   def wait_for_finished_timeout(self, operation_handle, timeout):
     LOG.info("-- waiting for query to reach FINISHED state: {0}".format(operation_handle))
@@ -330,7 +350,8 @@ class ImpylaHS2Connection(ImpalaConnection):
     LOG.info("-- fetching results from: {0}".format(handle))
     return self.__fetch_results(handle, max_rows)
 
-  def __fetch_results(self, handle, max_rows=-1):
+  def __fetch_results(self, handle, max_rows=-1,
+                      profile_format=TRuntimeProfileFormat.STRING):
     """Implementation of result fetching from handle."""
     cursor = handle.get_handle()
     assert cursor is not None
@@ -347,7 +368,7 @@ class ImpylaHS2Connection(ImpalaConnection):
       else:
         result_tuples = cursor.fetchmany(max_rows)
     log = self.get_log(handle)
-    profile = self.get_runtime_profile(handle)
+    profile = self.get_runtime_profile(handle, profile_format=profile_format)
     return ImpylaHS2ResultSet(success=True, result_tuples=result_tuples,
                               column_labels=column_labels, column_types=column_types,
                               query=handle.sql_stmt(), log=log, profile=profile)
diff --git a/tests/common/impala_service.py b/tests/common/impala_service.py
index 5b35556..24eba8e 100644
--- a/tests/common/impala_service.py
+++ b/tests/common/impala_service.py
@@ -65,32 +65,6 @@ class BaseImpalaService(object):
   def read_debug_webpage(self, page_name, timeout=10, interval=1):
     return self.open_debug_webpage(page_name, timeout=timeout, interval=interval).text
 
-  def get_thrift_profile(self, query_id, timeout=10, interval=1):
-    """Returns thrift profile of the specified query ID, if available"""
-    page_name = "query_profile_encoded?query_id=%s" % (query_id)
-    try:
-      response = self.open_debug_webpage(page_name, timeout=timeout, interval=interval)
-      tbuf = response.text
-    except Exception as e:
-      LOG.info("Thrift profile for query %s not yet available: %s", query_id, str(e))
-      return None
-    else:
-      tree = TRuntimeProfileTree()
-      try:
-        deserialize(tree, zlib.decompress(base64.b64decode(tbuf)),
-                  protocol_factory=TCompactProtocol.TCompactProtocolFactory())
-        tree.validate()
-        return tree
-      except Exception as e:
-        LOG.info("Exception while deserializing query profile of %s: %s", query_id,
-                str(e));
-        # We should assert that the response code is not 200 once
-        # IMPALA-6332: Impala webserver should return HTTP error code for missing query
-        # profiles, is fixed.
-        if str(e) == 'Incorrect padding':
-          assert "Could not obtain runtime profile" in tbuf, tbuf
-        return None
-
   def get_debug_webpage_json(self, page_name):
     """Returns the json for the given Impala debug webpage, eg. '/queries'"""
     return json.loads(self.read_debug_webpage(page_name + "?json"))
diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py
index 0f18144..b25a87f 100644
--- a/tests/common/impala_test_suite.py
+++ b/tests/common/impala_test_suite.py
@@ -651,10 +651,14 @@ class ImpalaTestSuite(BaseTestSuite):
 
   def execute_query_using_client(self, client, query, vector):
     self.change_database(client, vector.get_value('table_format'))
+    query_options = vector.get_value('exec_option')
+    if query_options is not None: client.set_configuration(query_options)
     return client.execute(query)
 
   def execute_query_async_using_client(self, client, query, vector):
     self.change_database(client, vector.get_value('table_format'))
+    query_options = vector.get_value('exec_option')
+    if query_options is not None: client.set_configuration(query_options)
     return client.execute_async(query)
 
   def close_query_using_client(self, client, query):
diff --git a/tests/query_test/test_cancellation.py b/tests/query_test/test_cancellation.py
index c24aab0..ff21a7a 100644
--- a/tests/query_test/test_cancellation.py
+++ b/tests/query_test/test_cancellation.py
@@ -21,6 +21,7 @@
 import pytest
 import threading
 from time import sleep
+from RuntimeProfile.ttypes import TRuntimeProfileFormat
 from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
 from tests.common.test_vector import ImpalaTestDimension
 from tests.common.impala_test_suite import ImpalaTestSuite
@@ -159,9 +160,6 @@ class TestCancellation(ImpalaTestSuite):
         except ImpalaBeeswaxException as e:
           threading.current_thread().fetch_results_error = e
 
-        threading.current_thread().query_profile = \
-          self.impalad_test_service.get_thrift_profile(handle.get_handle().id)
-
       thread = threading.Thread(target=fetch_results)
       thread.start()
 
@@ -183,17 +181,6 @@ class TestCancellation(ImpalaTestSuite):
       # Before accessing fetch_results_error we need to join the fetch thread
       thread.join()
 
-      # IMPALA-2063 Cancellation tests may generate profile text that is otherwise hard
-      # to reproduce for testing mis-formatting.
-      profile = thread.query_profile
-      if profile:
-        for (k, v) in profile.nodes[1].info_strings.iteritems():
-          assert v == v.rstrip(), \
-            "Mis-formatted profile text: %s %s" % (k, v)
-          # "Plan" text may be strangely formatted.
-          assert k == 'Plan' or '\n\n' not in v, \
-            "Mis-formatted profile text: %s %s" % (k, v)
-
       if thread.fetch_results_error is None:
         # If the fetch rpc didn't result in CANCELLED (and auto-close the query) then
         # the close rpc should have succeeded.
@@ -223,6 +210,33 @@ class TestCancellation(ImpalaTestSuite):
     if not debug_action and ('count' in query or 'limit' in query):
       self.execute_query(query, vector.get_value('exec_option'))
 
+  def test_misformatted_profile_text(self):
+    """Tests that canceled queries have no whitespace formatting errors in their profiles
+    (IMPALA-2063)."""
+    query = "select count(*) from functional_parquet.alltypes where bool_col = sleep(100)"
+    client = self.hs2_client
+    # Start query
+    handle = client.execute_async(query)
+    # Wait up to 5 seconds for the query to start
+    assert any(client.get_state(handle) == 'RUNNING_STATE' or sleep(1)
+               for _ in range(5)), 'Query failed to start'
+
+    client.cancel(handle)
+    # Wait up to 5 seconds for the query to get cancelled
+    # TODO(IMPALA-1262): This should be CANCELED_STATE
+    # TODO(IMPALA-8411): Remove and assert that the query is cancelled immediately
+    assert any(client.get_state(handle) == 'ERROR_STATE' or sleep(1)
+               for _ in range(5)), 'Query failed to cancel'
+    # Get profile and check for formatting errors
+    profile = client.get_runtime_profile(handle, TRuntimeProfileFormat.THRIFT)
+    for (k, v) in profile.nodes[1].info_strings.iteritems():
+      # Ensure that whitespace gets removed from values.
+      assert v == v.rstrip(), \
+        "Profile value contains surrounding whitespace: %s %s" % (k, v)
+      # Plan text may be strangely formatted.
+      assert k == 'Plan' or '\n\n' not in v, \
+        "Profile contains repeating newlines: %s %s" % (k, v)
+
   def teardown_method(self, method):
     # For some reason it takes a little while for the query to get completely torn down
     # when the debug action is WAIT, causing TestValidateMetrics.test_metrics_are_zero to
diff --git a/tests/query_test/test_observability.py b/tests/query_test/test_observability.py
index fd4c796..de8a2ab 100644
--- a/tests/query_test/test_observability.py
+++ b/tests/query_test/test_observability.py
@@ -23,12 +23,11 @@ from tests.common.skip import (SkipIfS3, SkipIfABFS, SkipIfADLS, SkipIfIsilon,
                                SkipIfLocal, SkipIfNotHdfsMinicluster)
 from tests.util.filesystem_utils import IS_EC
 from time import sleep, time
+from RuntimeProfile.ttypes import TRuntimeProfileFormat
 import logging
 import pytest
 import re
 
-MAX_THRIFT_PROFILE_WAIT_TIME_S = 300
-
 class TestObservability(ImpalaTestSuite):
   @classmethod
   def get_workload(self):
@@ -72,13 +71,11 @@ class TestObservability(ImpalaTestSuite):
     profiles of fragment instances."""
     query = """select count(distinct a.int_col) from functional.alltypes a
         inner join functional.alltypessmall b on (a.id = b.id + cast(sleep(15) as INT))"""
-    handle = self.client.execute_async(query)
-    query_id = handle.get_handle().id
+    handle = self.hs2_client.execute_async(query)
 
     num_validated = 0
-    tree = self.impalad_test_service.get_thrift_profile(query_id,
-        MAX_THRIFT_PROFILE_WAIT_TIME_S)
-    while self.client.get_state(handle) != self.client.QUERY_STATES['FINISHED']:
+    tree = self.hs2_client.get_runtime_profile(handle, TRuntimeProfileFormat.THRIFT)
+    while not self.hs2_client.state_is_finished(handle):
       assert tree, num_validated
       for node in tree.nodes:
         if node.name.startswith('Instance '):
@@ -88,9 +85,11 @@ class TestObservability(ImpalaTestSuite):
           # Try converting the string to make sure it's in the expected format
           assert datetime.strptime(report_time_str, '%Y-%m-%d %H:%M:%S')
           num_validated += 1
-      tree = self.impalad_test_service.get_thrift_profile(query_id)
+      tree = self.hs2_client.get_runtime_profile(handle, TRuntimeProfileFormat.THRIFT)
+      # Let's not hit the backend too hard
+      sleep(0.1)
     assert num_validated > 0
-    self.client.close_query(handle)
+    self.hs2_client.close_query(handle)
 
   @SkipIfS3.hbase
   @SkipIfLocal.hbase
@@ -478,23 +477,15 @@ class TestObservability(ImpalaTestSuite):
           counters.append(counter)
     return counters
 
-  def _get_thrift_profile(self, query_id, timeout=MAX_THRIFT_PROFILE_WAIT_TIME_S):
-    """Downloads a thrift profile and asserts that a profile was retrieved within the
-       specified timeout. If you see unexpected timeouts, try running the calling test
-       serially."""
-    thrift_profile = self.impalad_test_service.get_thrift_profile(query_id,
-                                                                  timeout=timeout)
-    assert thrift_profile, "Debug thrift profile for query {0} not available in {1} \
-        seconds".format(query_id, timeout)
-    return thrift_profile
-
   @pytest.mark.execute_serially
   def test_thrift_profile_contains_host_resource_metrics(self):
     """Tests that the thrift profile contains time series counters for CPU and network
        resource usage."""
     query_opts = {'resource_trace_ratio': 1.0}
-    result = self.execute_query("select sleep(2000)", query_opts)
-    thrift_profile = self._get_thrift_profile(result.query_id)
+    self.hs2_client.set_configuration(query_opts)
+    result = self.hs2_client.execute("select sleep(2000)",
+                                     profile_format=TRuntimeProfileFormat.THRIFT)
+    thrift_profile = result.profile
 
     expected_keys = ["HostCpuUserPercentage", "HostNetworkRx", "HostDiskReadThroughput"]
     for key in expected_keys:
@@ -510,9 +501,9 @@ class TestObservability(ImpalaTestSuite):
     nanosecond precision. Nanosecond precision is expected by management API clients
     that consume Impala debug webpages."""
     query = "select sleep(5)"
-    result = self.execute_query(query)
+    result = self.hs2_client.execute(query, profile_format=TRuntimeProfileFormat.THRIFT)
+    tree = result.profile
 
-    tree = self._get_thrift_profile(result.query_id)
     # tree.nodes[1] corresponds to ClientRequestState::summary_profile_
     # See be/src/service/client-request-state.[h|cc].
     start_time = tree.nodes[1].info_strings["Start Time"]
@@ -533,30 +524,30 @@ class TestObservability(ImpalaTestSuite):
     without coordinators, the End Time is set only when UnregisterQuery() is called."""
     # Test the end time of a query with a coordinator.
     query = "select 1"
-    handle = self.execute_query_async(query)
-    result = self.client.fetch(query, handle)
+    handle = self.hs2_client.execute_async(query)
+    result = self.hs2_client.fetch(query, handle)
     # Ensure that the query returns a non-empty result set.
     assert result is not None
     # Once the results have been fetched, the query End Time must be set.
-    query_id = handle.get_handle().id
-    tree = self._get_thrift_profile(query_id)
+    tree = self.hs2_client.get_runtime_profile(handle, TRuntimeProfileFormat.THRIFT)
     end_time = tree.nodes[1].info_strings["End Time"]
-    assert end_time is not None
-    self.client.close_query(handle)
+    assert end_time
+    self.hs2_client.close_query(handle)
+
     # Test the end time of a query without a coordinator.
     query = "describe functional.alltypes"
-    handle = self.execute_query_async(query)
-    result = self.client.fetch(query, handle)
+    handle = self.hs2_client.execute_async(query)
+    result = self.hs2_client.fetch(query, handle)
     # Ensure that the query returns a non-empty result set.
-    assert result is not None
+    assert result
     # The query End Time must not be set until the query is unregisterted
-    query_id = handle.get_handle().id
-    tree = self._get_thrift_profile(query_id)
+    tree = self.hs2_client.get_runtime_profile(handle, TRuntimeProfileFormat.THRIFT)
     end_time = tree.nodes[1].info_strings["End Time"]
     assert len(end_time) == 0, end_time
-    self.client.close_query(handle)
-    # The query End Time must be set after the query is unregistered
-    tree = self._get_thrift_profile(query_id)
+    # Save the last operation to be able to retrieve the profile after closing the query
+    last_op = handle.get_handle()._last_operation
+    self.hs2_client.close_query(handle)
+    tree = last_op.get_profile(TRuntimeProfileFormat.THRIFT)
     end_time = tree.nodes[1].info_strings["End Time"]
     assert end_time is not None