You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by jo...@apache.org on 2023/02/28 17:12:47 UTC

[impala] branch master updated (7c854e117 -> ba3518366)

This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


    from 7c854e117 IMPALA-11953: Declare num_trues and num_falses in TIntermediateColumnStats as optional
     new 2ae0e4139 IMPALA-10111: Fix TestWebPage::test_query_stmt flakiness
     new ff62a4df3 IMPALA-11951: Add tools for checking/fixing python 3 syntax
     new c71de994b IMPALA-11952 (part 1): Fix except syntax
     new 2b550634d IMPALA-11952 (part 2): Fix print function syntax
     new c1794023b IMPALA-11952 (part 3): Fix raise syntax
     new ba3518366 IMPALA-11952 (part 4): Fix odds and ends: Octals, long, lambda, etc.

The 6 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 be/src/codegen/gen_ir_descriptions.py              |  5 +-
 bin/bootstrap_toolchain.py                         |  2 +-
 bin/check-python-syntax.sh                         | 71 +++++++++++++++++++++
 bin/check-rat-report.py                            |  5 +-
 bin/collect_minidumps.py                           | 11 ++--
 bin/compare_branches.py                            | 33 +++++-----
 bin/diagnostics/experimental/plan-graph.py         |  1 +
 .../experimental/tpcds_run_comparator.py           |  1 +
 bin/gen_build_version.py                           |  9 +--
 bin/generate_xml_config.py                         | 15 ++---
 bin/get_code_size.py                               |  3 +-
 bin/{impala-flake8 => impala-futurize}             |  2 +-
 bin/inline_pom.py                                  |  1 +
 bin/jenkins/critique-gerrit-review.py              |  3 +-
 bin/jenkins/dockerized-impala-preserve-vars.py     |  1 +
 bin/jenkins/populate_m2_directory.py               |  1 +
 bin/load-data.py                                   |  3 +-
 bin/parse-thrift-profile.py                        |  5 +-
 bin/push_to_asf.py                                 | 73 +++++++++++-----------
 bin/run-workload.py                                | 11 ++--
 bin/single_node_perf_run.py                        |  3 +-
 bin/start-impala-cluster.py                        |  2 +-
 bin/validate-unified-backend-test-filters.py       |  1 +
 common/thrift/generate_error_codes.py              |  1 +
 common/thrift/generate_metrics.py                  |  5 +-
 docker/monitor.py                                  |  2 +-
 docker/test-with-docker.py                         |  2 +-
 infra/deploy/deploy.py                             | 62 +++++++++---------
 infra/python/deps/requirements.txt                 |  1 +
 lib/python/impala_py_lib/gdb/impala-gdb.py         |  1 +
 .../impala_py_lib/jenkins/generate_junitxml.py     |  1 +
 lib/python/impala_py_lib/profiles.py               |  3 +-
 testdata/bin/generate-schema-statements.py         | 40 ++++++------
 testdata/bin/generate-test-vectors.py              | 17 ++---
 testdata/bin/load-tpc-kudu.py                      |  3 +-
 testdata/bin/rewrite-iceberg-metadata.py           |  1 +
 testdata/bin/wait-for-hiveserver2.py               | 13 ++--
 testdata/bin/wait-for-metastore.py                 | 11 ++--
 .../common/etc/hadoop/conf/yarn-site.xml.py        |  3 +-
 testdata/common/cgroups.py                         |  5 +-
 testdata/common/text_delims_table.py               |  1 +
 testdata/common/widetable.py                       |  3 +-
 tests/beeswax/impala_beeswax.py                    | 12 ++--
 tests/benchmark/report_benchmark_results.py        |  5 +-
 tests/common/impala_cluster.py                     |  7 ++-
 tests/common/impala_service.py                     |  6 +-
 tests/common/impala_test_suite.py                  | 13 ++--
 tests/common/resource_pool_config.py               |  3 +-
 tests/common/test_dimensions.py                    | 14 ++---
 tests/common/test_result_verifier.py               |  4 +-
 tests/common/test_vector.py                        |  2 +-
 tests/comparison/cluster.py                        |  1 +
 tests/comparison/data_generator_mapper.py          |  1 +
 tests/comparison/data_generator_mapred_common.py   |  2 +-
 tests/comparison/db_connection.py                  |  1 +
 tests/comparison/discrepancy_searcher.py           |  1 +
 tests/comparison/leopard/front_end.py              |  2 +-
 tests/comparison/leopard/impala_docker_env.py      |  3 +-
 tests/comparison/leopard/report.py                 | 40 ++++++------
 tests/comparison/query_generator.py                |  1 +
 tests/comparison/util/verify-oracle-connection.py  |  3 +-
 tests/conftest.py                                  |  1 +
 tests/custom_cluster/test_blacklist.py             |  3 +-
 tests/custom_cluster/test_breakpad.py              |  5 +-
 tests/custom_cluster/test_catalog_hms_failures.py  |  3 +-
 tests/custom_cluster/test_client_ssl.py            |  5 +-
 tests/custom_cluster/test_coordinators.py          |  4 +-
 tests/custom_cluster/test_events_custom_configs.py |  1 +
 tests/custom_cluster/test_hdfs_timeout.py          |  2 +-
 tests/custom_cluster/test_hs2_fault_injection.py   |  6 +-
 tests/custom_cluster/test_local_catalog.py         |  5 +-
 tests/custom_cluster/test_mem_reservations.py      |  2 +-
 tests/custom_cluster/test_query_expiration.py      |  6 +-
 tests/custom_cluster/test_query_retries.py         | 13 ++--
 tests/custom_cluster/test_restart_services.py      | 15 ++---
 tests/custom_cluster/test_scratch_disk.py          |  3 +-
 tests/custom_cluster/test_thrift_socket.py         |  2 +-
 .../custom_cluster/test_topic_update_frequency.py  |  1 +
 tests/custom_cluster/test_udf_concurrency.py       | 11 ++--
 tests/custom_cluster/test_web_pages.py             |  1 +
 tests/metadata/test_ddl.py                         |  2 +-
 tests/metadata/test_hdfs_permissions.py            |  4 +-
 tests/metadata/test_hms_integration.py             |  4 +-
 tests/performance/query_exec_functions.py          | 10 +--
 tests/query_test/test_aggregation.py               |  4 +-
 tests/query_test/test_beeswax.py                   |  4 +-
 tests/query_test/test_compressed_formats.py        |  3 +-
 tests/query_test/test_decimal_queries.py           |  6 +-
 tests/query_test/test_hdfs_caching.py              |  3 +-
 tests/query_test/test_insert.py                    |  2 +-
 tests/query_test/test_kudu.py                      |  3 +-
 tests/query_test/test_limit.py                     |  3 +-
 tests/query_test/test_nested_types.py              |  6 +-
 tests/query_test/test_partitioning.py              |  2 +-
 tests/query_test/test_query_mem_limit.py           |  2 +-
 tests/query_test/test_scanners.py                  |  5 +-
 tests/query_test/test_udfs.py                      |  6 +-
 tests/run-tests.py                                 | 11 ++--
 tests/shell/test_shell_commandline.py              |  4 +-
 tests/shell/test_shell_interactive.py              |  5 +-
 tests/shell/util.py                                |  1 +
 tests/statestore/test_statestore.py                | 15 ++---
 tests/stress/extract_min_mem.py                    |  3 +-
 tests/stress/runtime_info.py                       |  1 +
 tests/unittests/test_file_parser.py                |  7 ++-
 tests/unittests/test_result_verifier.py            |  4 +-
 tests/util/acid_txn.py                             |  3 +-
 tests/util/get_parquet_metadata.py                 |  2 +-
 tests/util/plugin_runner.py                        |  2 +-
 tests/util/run_impyla_http_query.py                |  1 +
 tests/util/test_file_parser.py                     | 21 ++++---
 tests/webserver/test_web_pages.py                  | 11 ++--
 112 files changed, 480 insertions(+), 327 deletions(-)
 create mode 100755 bin/check-python-syntax.sh
 copy bin/{impala-flake8 => impala-futurize} (95%)


[impala] 03/06: IMPALA-11952 (part 1): Fix except syntax

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit c71de994b07c3863104b7626c6fd15a5650c066c
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Sun Feb 26 13:12:47 2023 -0800

    IMPALA-11952 (part 1): Fix except syntax
    
    Python 3 does not support this old except syntax:
    
    except Exception, e:
    
    Instead, it needs to be:
    
    except Exception as e:
    
    This uses impala-futurize to fix all locations of
    the old syntax.
    
    Testing:
     - The check-python-syntax.sh no longer shows errors
       for except syntax.
    
    Change-Id: I1737281a61fa159c8d91b7d4eea593177c0bd6c9
    Reviewed-on: http://gerrit.cloudera.org:8080/19551
    Reviewed-by: Joe McDonnell <jo...@cloudera.com>
    Reviewed-by: Michael Smith <mi...@cloudera.com>
    Tested-by: Michael Smith <mi...@cloudera.com>
---
 bin/bootstrap_toolchain.py                       |  2 +-
 bin/generate_xml_config.py                       |  2 +-
 bin/get_code_size.py                             |  2 +-
 bin/start-impala-cluster.py                      |  2 +-
 docker/monitor.py                                |  2 +-
 infra/deploy/deploy.py                           |  2 +-
 testdata/common/cgroups.py                       |  2 +-
 tests/beeswax/impala_beeswax.py                  | 10 +++++-----
 tests/common/impala_cluster.py                   |  4 ++--
 tests/common/impala_service.py                   |  6 +++---
 tests/common/impala_test_suite.py                |  8 ++++----
 tests/custom_cluster/test_coordinators.py        |  4 ++--
 tests/custom_cluster/test_hdfs_timeout.py        |  2 +-
 tests/custom_cluster/test_hs2_fault_injection.py |  6 +++---
 tests/custom_cluster/test_local_catalog.py       |  2 +-
 tests/custom_cluster/test_mem_reservations.py    |  2 +-
 tests/custom_cluster/test_query_expiration.py    |  6 +++---
 tests/custom_cluster/test_query_retries.py       | 10 +++++-----
 tests/custom_cluster/test_restart_services.py    | 12 ++++++------
 tests/metadata/test_ddl.py                       |  2 +-
 tests/metadata/test_hdfs_permissions.py          |  4 ++--
 tests/performance/query_exec_functions.py        | 10 +++++-----
 tests/query_test/test_beeswax.py                 |  4 ++--
 tests/query_test/test_decimal_queries.py         |  6 +++---
 tests/query_test/test_insert.py                  |  2 +-
 tests/query_test/test_nested_types.py            |  6 +++---
 tests/query_test/test_partitioning.py            |  2 +-
 tests/query_test/test_query_mem_limit.py         |  2 +-
 tests/query_test/test_udfs.py                    |  6 +++---
 tests/shell/test_shell_commandline.py            |  4 ++--
 tests/statestore/test_statestore.py              | 12 ++++++------
 tests/unittests/test_result_verifier.py          |  4 ++--
 32 files changed, 75 insertions(+), 75 deletions(-)

diff --git a/bin/bootstrap_toolchain.py b/bin/bootstrap_toolchain.py
index 7e8371a4d..457625d19 100755
--- a/bin/bootstrap_toolchain.py
+++ b/bin/bootstrap_toolchain.py
@@ -138,7 +138,7 @@ def wget_and_unpack_package(download_path, file_name, destination, wget_no_clobb
         cmd.append("--no-clobber")
       check_output(cmd)
       break
-    except Exception, e:
+    except Exception as e:
       if attempt == NUM_ATTEMPTS:
         raise
       logging.error("Download failed; retrying after sleep: " + str(e))
diff --git a/bin/generate_xml_config.py b/bin/generate_xml_config.py
index af9c2e5c4..915e3e276 100755
--- a/bin/generate_xml_config.py
+++ b/bin/generate_xml_config.py
@@ -84,7 +84,7 @@ def dump_config(d, source_path, out):
       if isinstance(v, int):
         v = str(v)
       v_new = _substitute_env_vars(v)
-    except KeyError, e:
+    except KeyError as e:
       raise Exception("failed environment variable substitution for value {k}: {e}"
                       .format(k=k, e=e))
     print >>out, """\
diff --git a/bin/get_code_size.py b/bin/get_code_size.py
index 53e19485b..9a2e78298 100755
--- a/bin/get_code_size.py
+++ b/bin/get_code_size.py
@@ -29,7 +29,7 @@ def get_bin_size_data(file):
   data = ""
   try:
     data = subprocess.check_output(["size", "-B", "-t", file], stderr=subprocess.STDOUT)
-  except Exception, e:
+  except Exception as e:
     data = e.output
 
   res = re.split(r'\s+', data.split("\n")[-2])
diff --git a/bin/start-impala-cluster.py b/bin/start-impala-cluster.py
index d4295a165..b1e94545e 100755
--- a/bin/start-impala-cluster.py
+++ b/bin/start-impala-cluster.py
@@ -846,7 +846,7 @@ if __name__ == "__main__":
     # Check for the cluster to be ready.
     impala_cluster.wait_until_ready(expected_cluster_size,
         expected_cluster_size - expected_catalog_delays)
-  except Exception, e:
+  except Exception as e:
     LOG.exception("Error starting cluster")
     sys.exit(1)
 
diff --git a/docker/monitor.py b/docker/monitor.py
index 5eefe5c9b..530c63c7c 100644
--- a/docker/monitor.py
+++ b/docker/monitor.py
@@ -163,7 +163,7 @@ class ContainerMonitor(object):
     try:
       statcontents = file(os.path.join(dirname, stat)).read()
       return statcontents.replace("\n", " ").strip()
-    except IOError, e:
+    except IOError as e:
       # Ignore errors; cgroup can disappear on us.
       logging.warning("Ignoring exception reading cgroup. " +
                       "This can happen if container just exited. " + str(e))
diff --git a/infra/deploy/deploy.py b/infra/deploy/deploy.py
index 9fcfb2863..1665e5182 100644
--- a/infra/deploy/deploy.py
+++ b/infra/deploy/deploy.py
@@ -339,7 +339,7 @@ def transform_port(rcg_name, rcg_config_dict, rcg_config_name):
                         (rcg_config_name, rcg_name,))
     try:
         val_int = int(val)
-    except ValueError, e:
+    except ValueError as e:
         raise Exception("Could not convert %s config (%s) for rcg %s into integer" %
                         (rcg_config_name, val, rcg_name))
 
diff --git a/testdata/common/cgroups.py b/testdata/common/cgroups.py
index f7f90a358..c313b563f 100755
--- a/testdata/common/cgroups.py
+++ b/testdata/common/cgroups.py
@@ -73,7 +73,7 @@ def create_impala_cgroup_path(instance_num):
   cgroup_path = os.path.join(parent_cgroup, ("impala-%s" % instance_num))
   try:
     os.makedirs(cgroup_path)
-  except OSError, ex:
+  except OSError as ex:
     if ex.errno == errno.EEXIST and os.path.isdir(cgroup_path):
         pass
     else: raise
diff --git a/tests/beeswax/impala_beeswax.py b/tests/beeswax/impala_beeswax.py
index a3fae9da7..3edfb2584 100644
--- a/tests/beeswax/impala_beeswax.py
+++ b/tests/beeswax/impala_beeswax.py
@@ -160,7 +160,7 @@ class ImpalaBeeswaxClient(object):
       protocol = TBinaryProtocol.TBinaryProtocolAccelerated(self.transport)
       self.imp_service = ImpalaService.Client(protocol)
       self.connected = True
-    except Exception, e:
+    except Exception as e:
       raise ImpalaBeeswaxException(self.__build_error_message(e), e)
 
   def close_connection(self):
@@ -518,12 +518,12 @@ class ImpalaBeeswaxClient(object):
       raise ImpalaBeeswaxException("Not connected", None)
     try:
       return rpc()
-    except BeeswaxService.BeeswaxException, b:
+    except BeeswaxService.BeeswaxException as b:
       raise ImpalaBeeswaxException(self.__build_error_message(b), b)
-    except TTransportException, e:
+    except TTransportException as e:
       self.connected = False
       raise ImpalaBeeswaxException(self.__build_error_message(e), e)
-    except TApplicationException, t:
+    except TApplicationException as t:
       raise ImpalaBeeswaxException(self.__build_error_message(t), t)
-    except Exception, u:
+    except Exception as u:
       raise ImpalaBeeswaxException(self.__build_error_message(u), u)
diff --git a/tests/common/impala_cluster.py b/tests/common/impala_cluster.py
index 89167b7dd..8e0860fec 100644
--- a/tests/common/impala_cluster.py
+++ b/tests/common/impala_cluster.py
@@ -597,10 +597,10 @@ def find_user_processes(binaries):
       binary_name = os.path.basename(cmdline[0])
       if binary_name in binaries:
         yield binary_name, process
-    except KeyError, e:
+    except KeyError as e:
       if "uid not found" not in str(e):
         raise
-    except psutil.NoSuchProcess, e:
+    except psutil.NoSuchProcess as e:
       # Ignore the case when a process no longer exists.
       pass
 
diff --git a/tests/common/impala_service.py b/tests/common/impala_service.py
index b8e5ca7d9..822e361f6 100644
--- a/tests/common/impala_service.py
+++ b/tests/common/impala_service.py
@@ -120,7 +120,7 @@ class BaseImpalaService(object):
       value = None
       try:
         value = self.get_metric_value(metric_name)
-      except Exception, e:
+      except Exception as e:
         LOG.error(e)
 
       # if allow_greater is True we wait until the metric value becomes >= the expected
@@ -359,7 +359,7 @@ class ImpaladService(BaseImpalaService):
       try:
         value = self.get_num_known_live_backends(timeout=1, interval=interval,
             include_shutting_down=include_shutting_down)
-      except Exception, e:
+      except Exception as e:
         LOG.error(e)
       if value == expected_value:
         LOG.info("num_known_live_backends has reached value: %s" % value)
@@ -461,7 +461,7 @@ class ImpaladService(BaseImpalaService):
       transport.open()
       transport.close()
       return True
-    except Exception, e:
+    except Exception as e:
       LOG.info(e)
       return False
 
diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py
index 0a605a0d7..98d7ba164 100644
--- a/tests/common/impala_test_suite.py
+++ b/tests/common/impala_test_suite.py
@@ -323,13 +323,13 @@ class ImpalaTestSuite(BaseTestSuite):
     cls.hs2_client = None
     try:
       cls.hs2_client = cls.create_impala_client(protocol='hs2')
-    except Exception, e:
+    except Exception as e:
       # HS2 connection can fail for benign reasons, e.g. running with unsupported auth.
       LOG.info("HS2 connection setup failed, continuing...: {0}".format(e))
     cls.hs2_http_client = None
     try:
       cls.hs2_http_client = cls.create_impala_client(protocol='hs2-http')
-    except Exception, e:
+    except Exception as e:
       # HS2 HTTP connection can fail for benign reasons, e.g. running with unsupported
       # auth.
       LOG.info("HS2 HTTP connection setup failed, continuing...: {0}".format(e))
@@ -882,7 +882,7 @@ class ImpalaTestSuite(BaseTestSuite):
     result = None
     try:
       result = cls.__execute_query(impalad_client, query, query_options, user)
-    except Exception, e:
+    except Exception as e:
       return e
 
     assert not result.success, "No failure encountered for query %s" % query
@@ -1201,7 +1201,7 @@ class ImpalaTestSuite(BaseTestSuite):
         self.client.execute("describe `{db_name}`.`{table_name}`".format(
                             db_name=db_name, table_name=table_name))
         return
-      except Exception, ex:
+      except Exception as ex:
         print str(ex)
         time.sleep(0.2)
         continue
diff --git a/tests/custom_cluster/test_coordinators.py b/tests/custom_cluster/test_coordinators.py
index 5b816f424..635892da5 100644
--- a/tests/custom_cluster/test_coordinators.py
+++ b/tests/custom_cluster/test_coordinators.py
@@ -48,7 +48,7 @@ class TestCoordinators(CustomClusterTestSuite):
     beeswax_client = None
     try:
       beeswax_client = worker.service.create_beeswax_client()
-    except Exception, e:
+    except Exception as e:
       LOG.info("Caught exception {0}".format(e))
     finally:
       assert beeswax_client is None
@@ -56,7 +56,7 @@ class TestCoordinators(CustomClusterTestSuite):
     hs2_client = None
     try:
       hs2_client = worker.service.create_hs2_client()
-    except Exception, e:
+    except Exception as e:
       LOG.info("Caught exception {0}".format(e))
     finally:
       assert hs2_client is None
diff --git a/tests/custom_cluster/test_hdfs_timeout.py b/tests/custom_cluster/test_hdfs_timeout.py
index 9e9b84a96..fb2d3e38e 100644
--- a/tests/custom_cluster/test_hdfs_timeout.py
+++ b/tests/custom_cluster/test_hdfs_timeout.py
@@ -71,7 +71,7 @@ class TestHdfsTimeouts(CustomClusterTestSuite):
       result = self.execute_query("select count(*) from functional.alltypes",
           vector=vector)
       end_time = time.time()
-    except Exception, e:
+    except Exception as e:
       ex = e
     finally:
       end_time = time.time()
diff --git a/tests/custom_cluster/test_hs2_fault_injection.py b/tests/custom_cluster/test_hs2_fault_injection.py
index 47107ba74..0b3f8e7b4 100644
--- a/tests/custom_cluster/test_hs2_fault_injection.py
+++ b/tests/custom_cluster/test_hs2_fault_injection.py
@@ -274,7 +274,7 @@ class TestHS2FaultInjection(CustomClusterTestSuite):
     query_handle = None
     try:
       query_handle = self.custom_hs2_http_client.execute_query('select 1', {})
-    except Exception, e:
+    except Exception as e:
       assert str(e) == 'HTTP code 502: Injected Fault'
     assert query_handle is None
     output = capsys.readouterr()[1].splitlines()
@@ -293,7 +293,7 @@ class TestHS2FaultInjection(CustomClusterTestSuite):
     try:
       for rows in rows_fetched:
         num_rows += 1
-    except Exception, e:
+    except Exception as e:
       assert str(e) == 'HTTP code 502: Injected Fault'
     assert num_rows is None
     self.close_query(query_handle)
@@ -315,7 +315,7 @@ class TestHS2FaultInjection(CustomClusterTestSuite):
     (num_rows, num_row_errors) = None, None
     try:
       (num_rows, num_row_errors) = self.custom_hs2_http_client.close_dml(query_handle)
-    except Exception, e:
+    except Exception as e:
       assert str(e) == 'HTTP code 502: Injected Fault'
     assert num_rows is None
     assert num_row_errors is None
diff --git a/tests/custom_cluster/test_local_catalog.py b/tests/custom_cluster/test_local_catalog.py
index 53d535ec6..78eccb4ae 100644
--- a/tests/custom_cluster/test_local_catalog.py
+++ b/tests/custom_cluster/test_local_catalog.py
@@ -149,7 +149,7 @@ class TestCompactCatalogUpdates(CustomClusterTestSuite):
           err = self.execute_query_expect_failure(client, "select * from %s" % view)
           assert "Could not resolve table reference" in str(err)
           break
-        except Exception, e:
+        except Exception as e:
           assert attempt < NUM_ATTEMPTS - 1, str(e)
         time.sleep(1)
 
diff --git a/tests/custom_cluster/test_mem_reservations.py b/tests/custom_cluster/test_mem_reservations.py
index bf1641b2c..6365ec126 100644
--- a/tests/custom_cluster/test_mem_reservations.py
+++ b/tests/custom_cluster/test_mem_reservations.py
@@ -87,7 +87,7 @@ class TestMemReservations(CustomClusterTestSuite):
             result = client.execute(self.query)
             assert result.success
             assert len(result.data) == 1
-        except Exception, e:
+        except Exception as e:
           self.error = str(e)
         finally:
           client.close()
diff --git a/tests/custom_cluster/test_query_expiration.py b/tests/custom_cluster/test_query_expiration.py
index 4324b532e..d6b0011fd 100644
--- a/tests/custom_cluster/test_query_expiration.py
+++ b/tests/custom_cluster/test_query_expiration.py
@@ -127,7 +127,7 @@ class TestQueryExpiration(CustomClusterTestSuite):
     for handle in handles:
       try:
         client.close_query(handle)
-      except Exception, e:
+      except Exception as e:
         # We fetched from some cancelled handles above, which unregistered the queries.
         assert 'Invalid or unknown query handle' in str(e)
 
@@ -172,7 +172,7 @@ class TestQueryExpiration(CustomClusterTestSuite):
     try:
       client.fetch(query, handle)
       assert False
-    except Exception, e:
+    except Exception as e:
       assert re.search(exception_regex, str(e))
 
   def __expect_client_state(self, client, handle, expected_state, timeout=0.1):
@@ -222,7 +222,7 @@ class TestQueryExpiration(CustomClusterTestSuite):
         try:
           result = self.client.execute("SELECT SLEEP(2500)")
           assert "Expected to hit time limit"
-        except Exception, e:
+        except Exception as e:
           self.exception = e
 
     class NonExpiringTimeLimitThread(threading.Thread):
diff --git a/tests/custom_cluster/test_query_retries.py b/tests/custom_cluster/test_query_retries.py
index 469bb326f..1a5f5efdb 100644
--- a/tests/custom_cluster/test_query_retries.py
+++ b/tests/custom_cluster/test_query_retries.py
@@ -403,7 +403,7 @@ class TestQueryRetries(CustomClusterTestSuite):
     try:
       self.client.fetch(self._shuffle_heavy_query, handle)
       assert False
-    except ImpalaBeeswaxException, e:
+    except ImpalaBeeswaxException as e:
       assert "Admission for query exceeded timeout 60000ms in pool default-pool." \
           in str(e)
       assert "Queued reason: Waiting for executors to start. Only DDL queries and " \
@@ -467,7 +467,7 @@ class TestQueryRetries(CustomClusterTestSuite):
     try:
       self.client.fetch(self._shuffle_heavy_query, handle)
       assert False
-    except ImpalaBeeswaxException, e:
+    except ImpalaBeeswaxException as e:
       assert "Max retry limit was hit. Query was retried 1 time(s)." in str(e)
 
     # Assert that the killed impalad shows up in the list of blacklisted executors from
@@ -724,7 +724,7 @@ class TestQueryRetries(CustomClusterTestSuite):
     try:
       self.client.fetch(query, handle)
       assert False
-    except Exception, e:
+    except Exception as e:
         assert "Cancelled" in str(e)
     self.__validate_memz()
 
@@ -874,7 +874,7 @@ class TestQueryRetries(CustomClusterTestSuite):
     try:
       self.client.fetch(query, handle)
       assert False
-    except Exception, e:
+    except Exception as e:
         assert "expired due to client inactivity" in str(e)
 
     # Assert that the impalad metrics show one expired query.
@@ -907,7 +907,7 @@ class TestQueryRetries(CustomClusterTestSuite):
     # error.
     try:
       client.fetch(query, handle)
-    except Exception, e:
+    except Exception as e:
       assert "Client session expired" in str(e)
 
     # Assert that the impalad metrics show one expired session.
diff --git a/tests/custom_cluster/test_restart_services.py b/tests/custom_cluster/test_restart_services.py
index a97f7e857..fb1564943 100644
--- a/tests/custom_cluster/test_restart_services.py
+++ b/tests/custom_cluster/test_restart_services.py
@@ -62,7 +62,7 @@ class TestRestart(CustomClusterTestSuite):
       try:
         cursor.execute("describe database functional")
         return
-      except HiveServer2Error, e:
+      except HiveServer2Error as e:
         assert "AnalysisException: Database does not exist: functional" in e.message,\
                "Unexpected exception: " + e.message
         sleep(1)
@@ -178,7 +178,7 @@ class TestRestart(CustomClusterTestSuite):
       try:
         query = "alter table join_aa add columns (age" + str(i) + " int)"
         self.execute_query_async(query)
-      except Exception, e:
+      except Exception as e:
         LOG.info(str(e))
       if i == 5:
         self.cluster.catalogd.restart()
@@ -204,7 +204,7 @@ class TestRestart(CustomClusterTestSuite):
       try:
         query = "alter table join_aa add columns (age" + str(i) + " int)"
         self.execute_query_async(query, query_options)
-      except Exception, e:
+      except Exception as e:
         LOG.info(str(e))
       if i == 5:
         self.cluster.catalogd.restart()
@@ -256,7 +256,7 @@ class TestRestart(CustomClusterTestSuite):
       try:
         query = "alter table join_aa add columns (age" + str(i) + " int)"
         self.execute_query_async(query)
-      except Exception, e:
+      except Exception as e:
         LOG.info(str(e))
       if i == 5:
         self.cluster.catalogd.restart()
@@ -666,7 +666,7 @@ class TestGracefulShutdown(CustomClusterTestSuite, HS2TestSuite):
     def expect_beeswax_shutdown_error(fn):
       try:
         fn()
-      except ImpalaBeeswaxException, e:
+      except ImpalaBeeswaxException as e:
         assert SHUTDOWN_ERROR_PREFIX in str(e)
     expect_beeswax_shutdown_error(lambda: self.client.execute("select 1"))
     expect_beeswax_shutdown_error(lambda: self.client.execute_async("select 1"))
@@ -741,7 +741,7 @@ class TestGracefulShutdown(CustomClusterTestSuite, HS2TestSuite):
     try:
       self.client.fetch(query, handle)
       assert False, "Expected query to fail"
-    except Exception, e:
+    except Exception as e:
       assert 'Failed due to unreachable impalad(s)' in str(e)
 
   @pytest.mark.execute_serially
diff --git a/tests/metadata/test_ddl.py b/tests/metadata/test_ddl.py
index d8ecbcad5..06ef7fbc7 100644
--- a/tests/metadata/test_ddl.py
+++ b/tests/metadata/test_ddl.py
@@ -570,7 +570,7 @@ class TestDdlStatements(TestDdlBase):
             result = self.execute_query_expect_success(
                 client, "describe formatted %s" % view_name)
             exp_line = [l for l in result.data if 'View Expanded' in l][0]
-          except ImpalaBeeswaxException, e:
+          except ImpalaBeeswaxException as e:
             # In non-SYNC_DDL tests, it's OK to get a "missing view" type error
             # until the metadata propagates.
             exp_line = "Exception: %s" % e
diff --git a/tests/metadata/test_hdfs_permissions.py b/tests/metadata/test_hdfs_permissions.py
index 2ff4e1e6a..766fdfcfc 100644
--- a/tests/metadata/test_hdfs_permissions.py
+++ b/tests/metadata/test_hdfs_permissions.py
@@ -64,7 +64,7 @@ class TestHdfsPermissions(ImpalaTestSuite):
     try:
       self.client.execute('insert into table %s select 1' % TEST_TBL)
       assert False, 'Expected INSERT INTO read-only table to fail'
-    except Exception, e:
+    except Exception as e:
       assert re.search('does not have WRITE access to HDFS location: .*/read_only_tbl',
                        str(e))
     # Should still be able to query this table without any errors.
@@ -85,7 +85,7 @@ class TestHdfsPermissions(ImpalaTestSuite):
           'insert into table functional_seq.alltypes '
           'partition(year, month) select * from functional.alltypes limit 0')
       assert False, 'Expected INSERT INTO read-only partition to fail'
-    except Exception, e:
+    except Exception as e:
       assert re.search(
           'does not have WRITE access to HDFS location: .*/alltypes_seq',
           str(e))
diff --git a/tests/performance/query_exec_functions.py b/tests/performance/query_exec_functions.py
index 352c9a737..2bd2953f1 100644
--- a/tests/performance/query_exec_functions.py
+++ b/tests/performance/query_exec_functions.py
@@ -47,7 +47,7 @@ def get_hs2_hive_cursor(hiveserver, user=None, use_kerberos=False,
 
     cursor = conn.cursor(configuration=execOptions)
     LOG.info("Connected to {0}:{1}".format(host, port))
-  except Exception, e:
+  except Exception as e:
     LOG.error("Error Connecting: {0}".format(str(e)))
   return cursor
 
@@ -102,7 +102,7 @@ def get_hs2_impala_cursor(impalad, use_kerberos=False, database=None):
         auth_mechanism="GSSAPI" if use_kerberos else "NOSASL")
     cursor = conn.cursor()
     LOG.info("Connected to {0}:{1}".format(host, port))
-  except Exception, e:
+  except Exception as e:
     LOG.error("Error connecting: {0}".format(str(e)))
   return cursor
 
@@ -165,7 +165,7 @@ def establish_beeswax_connection(query_config):
     # Set the exec options.
     client.set_query_options(query_config.exec_options)
     LOG.info("Connected to %s" % query_config.impalad)
-  except Exception, e:
+  except Exception as e:
     LOG.error("Error connecting: {0}".format(str(e)))
   return client
 
@@ -195,7 +195,7 @@ def execute_using_impala_beeswax(query, query_config):
   result = None
   try:
     result = client.execute(query.query_str)
-  except Exception, e:
+  except Exception as e:
     LOG.error(e)
     exec_result.query_error = str(e)
   finally:
@@ -284,7 +284,7 @@ def run_query_capture_results(cmd, query, exit_on_error):
   start_time = datetime.now()
   try:
     rc, stdout, stderr = exec_process(cmd)
-  except Exception, e:
+  except Exception as e:
     LOG.error('Error while executing query command: %s' % e)
     exec_result.query_error = str(e)
     # TODO: Should probably save the start time and query string for failed queries.
diff --git a/tests/query_test/test_beeswax.py b/tests/query_test/test_beeswax.py
index 037a4a344..2887d1c23 100644
--- a/tests/query_test/test_beeswax.py
+++ b/tests/query_test/test_beeswax.py
@@ -85,7 +85,7 @@ class TestBeeswax(ImpalaTestSuite):
     try:
       fn()
       assert False, "Expected invalid handle"
-    except ImpalaBeeswaxException, e:
+    except ImpalaBeeswaxException as e:
       assert "Query id" in str(e) and "not found" in str(e), str(e)
 
   def _assert_profile_access_denied(self, fn):
@@ -94,5 +94,5 @@ class TestBeeswax(ImpalaTestSuite):
     try:
       fn()
       assert False, "Expected invalid handle"
-    except ImpalaBeeswaxException, e:
+    except ImpalaBeeswaxException as e:
       assert "is not authorized to access the runtime profile" in str(e), str(e)
diff --git a/tests/query_test/test_decimal_queries.py b/tests/query_test/test_decimal_queries.py
index 13c64e341..a361eef59 100644
--- a/tests/query_test/test_decimal_queries.py
+++ b/tests/query_test/test_decimal_queries.py
@@ -172,7 +172,7 @@ class TestDecimalOverflowExprs(ImpalaTestSuite):
       try:
         self.execute_query_using_client(self.client, query_1, vector)
         assert False, "Query was expected to fail"
-      except ImpalaBeeswaxException, e:
+      except ImpalaBeeswaxException as e:
         assert "Decimal expression overflowed" in str(e)
 
       result = self.execute_query_expect_success(self.client,
@@ -187,7 +187,7 @@ class TestDecimalOverflowExprs(ImpalaTestSuite):
     try:
       self.execute_query_using_client(self.client, query_1, vector)
       assert False, "Query was expected to fail"
-    except ImpalaBeeswaxException, e:
+    except ImpalaBeeswaxException as e:
       assert "Decimal expression overflowed" in str(e)
 
     result = self.execute_query_expect_success(self.client,
@@ -215,7 +215,7 @@ class TestDecimalOverflowExprs(ImpalaTestSuite):
     try:
       self.execute_query_using_client(self.client, query_2, vector)
       assert False, "Query was expected to fail"
-    except ImpalaBeeswaxException, e:
+    except ImpalaBeeswaxException as e:
       assert "Decimal expression overflowed" in str(e)
 
     result = self.execute_query_expect_success(self.client,
diff --git a/tests/query_test/test_insert.py b/tests/query_test/test_insert.py
index 213be78b7..05a40401f 100644
--- a/tests/query_test/test_insert.py
+++ b/tests/query_test/test_insert.py
@@ -124,7 +124,7 @@ class TestInsertQueries(ImpalaTestSuite):
     try:
       self.client.execute("select s from {0}".format(table_name))
       assert False, "Expected query to fail"
-    except Exception, e:
+    except Exception as e:
       assert "Memory limit exceeded" in str(e)
 
 
diff --git a/tests/query_test/test_nested_types.py b/tests/query_test/test_nested_types.py
index 8c1c50e9f..999989440 100644
--- a/tests/query_test/test_nested_types.py
+++ b/tests/query_test/test_nested_types.py
@@ -619,12 +619,12 @@ class TestParquetArrayEncodings(ImpalaTestSuite):
       expected_err = "has an incompatible Parquet schema"
       try:
         self.execute_query("select item from %s.col1.item" % full_name, qopts)
-      except Exception, e:
+      except Exception as e:
         assert expected_err in str(e)
       try:
         self.execute_query("select cnt from %s t, (select count(*) cnt from t.col1) v"\
           % full_name, qopts)
-      except Exception, e:
+      except Exception as e:
         assert expected_err in str(e)
 
   # $ parquet-tools schema UnannotatedListOfPrimitives.parquet
@@ -851,7 +851,7 @@ class TestMaxNestingDepth(ImpalaTestSuite):
     try:
       self.client.execute("explain select 1 from %s.above_max_depth" % unique_database)
       assert False, "Expected table loading to fail."
-    except ImpalaBeeswaxException, e:
+    except ImpalaBeeswaxException as e:
       assert "Type exceeds the maximum nesting depth" in str(e)
 
 
diff --git a/tests/query_test/test_partitioning.py b/tests/query_test/test_partitioning.py
index acabdcbbc..649d785fc 100644
--- a/tests/query_test/test_partitioning.py
+++ b/tests/query_test/test_partitioning.py
@@ -85,7 +85,7 @@ class TestPartitioning(ImpalaTestSuite):
     # INSERT into a boolean column is disabled in Impala due to this Hive bug.
     try:
       self.execute_query("insert into %s partition(bool_col=true) select 1" % full_name)
-    except ImpalaBeeswaxException, e:
+    except ImpalaBeeswaxException as e:
       assert 'AnalysisException: INSERT into table with BOOLEAN partition column (%s) '\
           'is not supported: %s' % ('b', full_name) in str(e)
 
diff --git a/tests/query_test/test_query_mem_limit.py b/tests/query_test/test_query_mem_limit.py
index 2586d39bc..209fca84f 100644
--- a/tests/query_test/test_query_mem_limit.py
+++ b/tests/query_test/test_query_mem_limit.py
@@ -112,7 +112,7 @@ class TestQueryMemLimit(ImpalaTestSuite):
     try:
       self.execute_query(query, exec_options, table_format=table_format)
       assert should_succeed, "Query was expected to fail"
-    except ImpalaBeeswaxException, e:
+    except ImpalaBeeswaxException as e:
       assert not should_succeed, "Query should not have failed: %s" % e
 
 
diff --git a/tests/query_test/test_udfs.py b/tests/query_test/test_udfs.py
index 035d01fe4..d70a50663 100644
--- a/tests/query_test/test_udfs.py
+++ b/tests/query_test/test_udfs.py
@@ -375,13 +375,13 @@ class TestUdfExecution(TestUdfBase):
     try:
       self.run_test_case('QueryTest/udf-mem-limit', vector, use_db=unique_database)
       assert False, "Query was expected to fail"
-    except ImpalaBeeswaxException, e:
+    except ImpalaBeeswaxException as e:
       self._check_mem_limit_exception(e)
 
     try:
       self.run_test_case('QueryTest/uda-mem-limit', vector, use_db=unique_database)
       assert False, "Query was expected to fail"
-    except ImpalaBeeswaxException, e:
+    except ImpalaBeeswaxException as e:
       self._check_mem_limit_exception(e)
 
     # It takes a long time for Impala to free up memory after this test, especially if
@@ -513,7 +513,7 @@ class TestUdfTargeted(TestUdfBase):
       self.execute_query_using_client(
           client, "select `{0}`.`pi_missing_jar`()".format(unique_database), vector)
       assert False, "Query expected to fail"
-    except ImpalaBeeswaxException, e:
+    except ImpalaBeeswaxException as e:
       assert "Failed to get file info" in str(e)
 
   def test_libs_with_same_filenames(self, vector, unique_database):
diff --git a/tests/shell/test_shell_commandline.py b/tests/shell/test_shell_commandline.py
index d097ccdaf..484f7b97a 100644
--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -740,7 +740,7 @@ class TestImpalaShell(ImpalaTestSuite):
       query_file_handle = open(query_file, 'r')
       query = query_file_handle.read()
       query_file_handle.close()
-    except Exception, e:
+    except Exception as e:
       assert query_file_handle is not None, "Exception %s: Could not find query file" % e
     result = run_impala_shell_cmd(vector, args, expect_success=True, stdin_input=query)
     output = result.stdout
@@ -942,7 +942,7 @@ class TestImpalaShell(ImpalaTestSuite):
           try:
             connection, client_address = sock.accept()
             break
-          except IOError, e:
+          except IOError as e:
             if e.errno != errno.EINTR:
               raise
         data = connection.recv(1024)
diff --git a/tests/statestore/test_statestore.py b/tests/statestore/test_statestore.py
index b69b3f2f8..9daedba30 100644
--- a/tests/statestore/test_statestore.py
+++ b/tests/statestore/test_statestore.py
@@ -119,7 +119,7 @@ class KillableThreadedServer(TServer):
       try:
         cnxn.open()
         return
-      except Exception, e:
+      except Exception as e:
         if i == num_tries - 1: raise
       time.sleep(0.1)
 
@@ -129,7 +129,7 @@ class KillableThreadedServer(TServer):
       try:
         cnxn.open()
         time.sleep(0.1)
-      except Exception, e:
+      except Exception as e:
         return
     raise Exception("Server did not stop")
 
@@ -152,9 +152,9 @@ class KillableThreadedServer(TServer):
     try:
       while not self.is_shutdown:
         self.processor.process(iprot, oprot)
-    except TTransport.TTransportException, tx:
+    except TTransport.TTransportException as tx:
       pass
-    except Exception, x:
+    except Exception as x:
       print x
 
     itrans.close()
@@ -203,7 +203,7 @@ class StatestoreSubscriber(object):
       if self.heartbeat_cb is not None and self.exception is None:
         try:
           response = self.heartbeat_cb(self, args)
-        except Exception, e:
+        except Exception as e:
           self.exception = e
       self.heartbeat_event.notify()
     finally:
@@ -219,7 +219,7 @@ class StatestoreSubscriber(object):
       if self.update_cb is not None and self.exception is None:
         try:
           response = self.update_cb(self, args)
-        except Exception, e:
+        except Exception as e:
           # Print the original backtrace so it doesn't get lost.
           traceback.print_exc()
           self.exception = e
diff --git a/tests/unittests/test_result_verifier.py b/tests/unittests/test_result_verifier.py
index e6bf9ba64..94d2fd917 100644
--- a/tests/unittests/test_result_verifier.py
+++ b/tests/unittests/test_result_verifier.py
@@ -48,13 +48,13 @@ class TestResultVerifier(ImpalaTestSuite):
     try:
       res.rows[0]['does_not_exist']
       assert False, 'Expected error due to column alias not existing'
-    except IndexError, e:
+    except IndexError as e:
       assert "No column with label: does_not_exist" in e.message
 
     try:
       res.rows[0][2]
       assert False, 'Expected error due to column position not existing'
-    except IndexError, e:
+    except IndexError as e:
       assert 'list index out of range' in e.message
 
   def test_compute_aggregation(self, vector):


[impala] 01/06: IMPALA-10111: Fix TestWebPage::test_query_stmt flakiness

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 2ae0e4139c2a9cd07e32bae29a256ecc8f8424c0
Author: Gergely Fürnstáhl <gf...@cloudera.com>
AuthorDate: Tue Feb 21 11:39:11 2023 +0100

    IMPALA-10111: Fix TestWebPage::test_query_stmt flakiness
    
    A query might be logged in "in_flight_queries" even though it has
    already finished. Earlier the test only looked for the executed query
    in "completed_queries", after this commit it will look in both lists.
    
    Moreover, removed unnecessary duplicated execution of the query and
    added formatting to dumped response to ease debugging in the future.
    
    Testing:
     - Locally reproducible for the first test run after starting the
    cluster.
     - Verified that the query is found in in_flight_queries too and the
    test passes in this case.
    
    Change-Id: If25d430a871415a3884dece772b8d8105c583c05
    Reviewed-on: http://gerrit.cloudera.org:8080/19524
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 tests/webserver/test_web_pages.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/tests/webserver/test_web_pages.py b/tests/webserver/test_web_pages.py
index d0efd220d..544e8c3d3 100644
--- a/tests/webserver/test_web_pages.py
+++ b/tests/webserver/test_web_pages.py
@@ -492,15 +492,18 @@ class TestWebPage(ImpalaTestSuite):
     # chars + "..."
     expected_result = "select \"{0}...".format("x " * 121)
     check_if_contains = False
-    self.execute_query(query)
-    response_json = self.__run_query_and_get_debug_page(query, self.QUERIES_URL)
+    response_json = self.__run_query_and_get_debug_page(
+      query, self.QUERIES_URL, expected_state=self.client.QUERY_STATES["FINISHED"])
     # Search the json for the expected value.
-    for json_part in response_json['completed_queries']:
+    # The query can be in in_filght_queries even though it is in FINISHED state.
+    for json_part in itertools.chain(
+      response_json['completed_queries'], response_json['in_flight_queries']):
       if expected_result in json_part['stmt']:
         check_if_contains = True
         break
+
     assert check_if_contains, "No matching statement found in the jsons at {}: {}".format(
-        datetime.now(), response_json)
+        datetime.now(), json.dumps(response_json, sort_keys=True, indent=4))
 
   def __run_query_and_get_debug_page(self, query, page_url, query_options=None,
                                      expected_state=None):


[impala] 04/06: IMPALA-11952 (part 2): Fix print function syntax

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 2b550634d2dbb38d9fda2536e8c3b092787e7456
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Sun Feb 26 13:54:52 2023 -0800

    IMPALA-11952 (part 2): Fix print function syntax
    
    Python 3 now treats print as a function and requires
    the parenthesis in invocation.
    
    print "Hello World!"
    is now:
    print("Hello World!")
    
    This fixes all locations to use the function
    invocation. This is more complicated when the output
    is being redirected to a file or when avoiding the
    usual newline.
    
    print >> sys.stderr , "Hello World!"
    is now:
    print("Hello World!", file=sys.stderr)
    
    To support this properly and guarantee equivalent behavior
    between python 2 and python 3, all files that use print
    now add this import:
    from __future__ import print_function
    
    This also fixes random flake8 issues that intersect with
    the changes.
    
    Testing:
     - check-python-syntax.sh shows no errors related to print
    
    Change-Id: Ib634958369ad777a41e72d80c8053b74384ac351
    Reviewed-on: http://gerrit.cloudera.org:8080/19552
    Reviewed-by: Joe McDonnell <jo...@cloudera.com>
    Reviewed-by: Michael Smith <mi...@cloudera.com>
    Tested-by: Michael Smith <mi...@cloudera.com>
---
 be/src/codegen/gen_ir_descriptions.py              |  5 +-
 bin/check-rat-report.py                            |  3 +-
 bin/collect_minidumps.py                           | 11 ++--
 bin/compare_branches.py                            | 33 +++++-----
 bin/diagnostics/experimental/plan-graph.py         |  1 +
 .../experimental/tpcds_run_comparator.py           |  1 +
 bin/gen_build_version.py                           |  9 +--
 bin/generate_xml_config.py                         | 13 ++--
 bin/get_code_size.py                               |  1 +
 bin/inline_pom.py                                  |  1 +
 bin/jenkins/critique-gerrit-review.py              |  3 +-
 bin/jenkins/dockerized-impala-preserve-vars.py     |  1 +
 bin/jenkins/populate_m2_directory.py               |  1 +
 bin/load-data.py                                   |  3 +-
 bin/parse-thrift-profile.py                        |  5 +-
 bin/push_to_asf.py                                 | 73 +++++++++++-----------
 bin/run-workload.py                                | 11 ++--
 bin/single_node_perf_run.py                        |  3 +-
 bin/validate-unified-backend-test-filters.py       |  1 +
 common/thrift/generate_error_codes.py              |  1 +
 common/thrift/generate_metrics.py                  |  5 +-
 infra/deploy/deploy.py                             | 60 +++++++++---------
 lib/python/impala_py_lib/gdb/impala-gdb.py         |  1 +
 .../impala_py_lib/jenkins/generate_junitxml.py     |  1 +
 lib/python/impala_py_lib/profiles.py               |  3 +-
 testdata/bin/generate-schema-statements.py         | 40 ++++++------
 testdata/bin/generate-test-vectors.py              | 17 ++---
 testdata/bin/load-tpc-kudu.py                      |  3 +-
 testdata/bin/rewrite-iceberg-metadata.py           |  1 +
 testdata/bin/wait-for-hiveserver2.py               | 13 ++--
 testdata/bin/wait-for-metastore.py                 | 11 ++--
 .../common/etc/hadoop/conf/yarn-site.xml.py        |  3 +-
 testdata/common/cgroups.py                         |  3 +-
 testdata/common/text_delims_table.py               |  1 +
 testdata/common/widetable.py                       |  3 +-
 tests/benchmark/report_benchmark_results.py        |  5 +-
 tests/common/impala_cluster.py                     |  3 +-
 tests/common/impala_test_suite.py                  |  3 +-
 tests/common/resource_pool_config.py               |  3 +-
 tests/comparison/cluster.py                        |  1 +
 tests/comparison/data_generator_mapper.py          |  1 +
 tests/comparison/db_connection.py                  |  1 +
 tests/comparison/discrepancy_searcher.py           |  1 +
 tests/comparison/query_generator.py                |  1 +
 tests/comparison/util/verify-oracle-connection.py  |  3 +-
 tests/conftest.py                                  |  1 +
 tests/custom_cluster/test_blacklist.py             |  3 +-
 tests/custom_cluster/test_breakpad.py              |  3 +-
 tests/custom_cluster/test_catalog_hms_failures.py  |  3 +-
 tests/custom_cluster/test_client_ssl.py            |  3 +-
 tests/custom_cluster/test_events_custom_configs.py |  1 +
 tests/custom_cluster/test_local_catalog.py         |  3 +-
 tests/custom_cluster/test_query_retries.py         |  3 +-
 tests/custom_cluster/test_restart_services.py      |  3 +-
 tests/custom_cluster/test_scratch_disk.py          |  3 +-
 .../custom_cluster/test_topic_update_frequency.py  |  1 +
 tests/custom_cluster/test_udf_concurrency.py       | 11 ++--
 tests/custom_cluster/test_web_pages.py             |  1 +
 tests/metadata/test_hms_integration.py             |  4 +-
 tests/query_test/test_aggregation.py               |  4 +-
 tests/query_test/test_compressed_formats.py        |  3 +-
 tests/query_test/test_hdfs_caching.py              |  3 +-
 tests/query_test/test_kudu.py                      |  3 +-
 tests/query_test/test_limit.py                     |  3 +-
 tests/query_test/test_scanners.py                  |  5 +-
 tests/run-tests.py                                 | 11 ++--
 tests/shell/test_shell_interactive.py              |  5 +-
 tests/shell/util.py                                |  1 +
 tests/statestore/test_statestore.py                |  3 +-
 tests/stress/extract_min_mem.py                    |  3 +-
 tests/stress/runtime_info.py                       |  1 +
 tests/unittests/test_file_parser.py                |  7 ++-
 tests/util/acid_txn.py                             |  3 +-
 tests/util/run_impyla_http_query.py                |  1 +
 tests/util/test_file_parser.py                     |  5 +-
 75 files changed, 275 insertions(+), 196 deletions(-)

diff --git a/be/src/codegen/gen_ir_descriptions.py b/be/src/codegen/gen_ir_descriptions.py
index 344491b5c..3ed2935f4 100755
--- a/be/src/codegen/gen_ir_descriptions.py
+++ b/be/src/codegen/gen_ir_descriptions.py
@@ -19,6 +19,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import print_function
 from string import Template
 import os
 import shutil
@@ -338,7 +339,7 @@ def move_if_different(src_file, dest_file):
   if not os.path.isfile(dest_file) or not filecmp.cmp(src_file, dest_file):
     shutil.move(src_file, dest_file)
   else:
-    print 'Retaining existing file: %s' % (dest_file)
+    print('Retaining existing file: %s' % (dest_file))
 
 BE_PATH = os.path.join(os.environ['IMPALA_HOME'], 'be/generated-sources/impala-ir/')
 IR_FUNCTIONS_FILE = 'impala-ir-functions.h'
@@ -352,7 +353,7 @@ if not os.path.exists(BE_PATH):
   os.makedirs(BE_PATH)
 
 if __name__ == "__main__":
-  print "Generating IR description files"
+  print("Generating IR description files")
   enums_file = open(TMP_IR_FUNCTIONS_PATH, 'w')
   enums_file.write(enums_preamble)
 
diff --git a/bin/check-rat-report.py b/bin/check-rat-report.py
index b296c6dec..3c6870bee 100755
--- a/bin/check-rat-report.py
+++ b/bin/check-rat-report.py
@@ -33,6 +33,7 @@
 # time, and the RAT JAR is not included in the Impala repo; it must be downloaded
 # separately.
 
+from __future__ import print_function
 import fnmatch
 import re
 import sys
@@ -74,5 +75,5 @@ for r in resources:
 if not all_ok:
   sys.exit(1)
 
-print 'OK'
+print('OK')
 sys.exit(0)
diff --git a/bin/collect_minidumps.py b/bin/collect_minidumps.py
index 30d6d1f80..db233a946 100755
--- a/bin/collect_minidumps.py
+++ b/bin/collect_minidumps.py
@@ -24,6 +24,7 @@
 #       --role_name=statestored --max_output_size=50000000 --end_time=1463033495000 \
 #       --output_file_path=/tmp/minidump_package.tar.gz
 
+from __future__ import print_function
 import os
 import re
 import sys
@@ -162,7 +163,7 @@ def get_config_parameter_value(conf_dir, role_name, config_parameter_name):
         if m:
           config_parameter_value = m.group(1)
   except IOError as e:
-    print >> sys.stderr, 'Error: Unable to open "{0}".'.format(file_path)
+    print('Error: Unable to open "{0}".'.format(file_path), file=sys.stderr)
     sys.exit(1)
   return config_parameter_value
 
@@ -179,7 +180,7 @@ def get_minidump_dir(conf_dir, role_name):
   result = os.path.join(minidump_path, role_name)
   if not os.path.isdir(result):
     msg = 'Error: minidump directory does not exist.'
-    print >> sys.stderr, msg
+    print(msg, file=sys.stderr)
     sys.exit(1)
   return result
 
@@ -202,11 +203,11 @@ def main():
   options, args = parser.parse_args()
   if not options.conf_dir:
     msg = 'Error: conf_dir is not specified.'
-    print >> sys.stderr, msg
+    print(msg, file=sys.stderr)
     sys.exit(1)
   if not options.output_file_path:
     msg = 'Error: output_file_path is not specified.'
-    print >> sys.stderr, msg
+    print(msg, file=sys.stderr)
     sys.exit(1)
 
   minidump_dir = get_minidump_dir(options.conf_dir, options.role_name)
@@ -216,7 +217,7 @@ def main():
       end_time=options.end_time,
       output_file_path=options.output_file_path)
   status, msg = file_archiver.make_tarball()
-  print >> sys.stderr, msg
+  print(msg, file=sys.stderr)
   sys.exit(status)
 
 if __name__ == '__main__':
diff --git a/bin/compare_branches.py b/bin/compare_branches.py
index da1d5f8b9..143fa7242 100755
--- a/bin/compare_branches.py
+++ b/bin/compare_branches.py
@@ -12,6 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# Future imports must happen at the beginning of the file
+from __future__ import print_function
+
 HELP = '''
 Compares two specified branches, using the Gerrit Change-Id as the
 primary identifier. Ignored commits can be added via a JSON
@@ -171,7 +174,7 @@ def cherrypick(cherry_pick_hashes, full_target_branch_name, partial_ok):
 
   Note that this function does not push to the remote.
   """
-  print "Cherrypicking %d changes." % (len(cherry_pick_hashes),)
+  print("Cherrypicking %d changes." % (len(cherry_pick_hashes),))
 
   if len(cherry_pick_hashes) == 0:
     return
@@ -181,8 +184,8 @@ def cherrypick(cherry_pick_hashes, full_target_branch_name, partial_ok):
   target_branch_sha = subprocess.check_output(
       ['git', 'rev-parse', full_target_branch_name]).strip()
   if head_sha != target_branch_sha:
-    print "Cannot cherrypick because %s (%s) and HEAD (%s) are divergent." % (
-        full_target_branch_name, target_branch_sha, head_sha)
+    print("Cannot cherrypick because %s (%s) and HEAD (%s) are divergent." % (
+        full_target_branch_name, target_branch_sha, head_sha))
     sys.exit(1)
 
   cherry_pick_hashes.reverse()
@@ -192,7 +195,7 @@ def cherrypick(cherry_pick_hashes, full_target_branch_name, partial_ok):
     if ret != 0:
       if partial_ok and i > 0:
         subprocess.check_call(['git', 'cherry-pick', '--abort'])
-        print "Failed to cherry-pick %s; stopping picks." % (cherry_pick_hash,)
+        print("Failed to cherry-pick %s; stopping picks." % (cherry_pick_hash,))
         return
       else:
         raise Exception("Failed to cherry-pick: %s" % (cherry_pick_hash,))
@@ -238,10 +241,10 @@ def main():
   commits_ignored = []  # Track commits actually ignored for debug logging
 
   cherry_pick_hashes = []
-  print '-' * 80
-  print 'Commits in {0} but not in {1}:'.format(
-      full_source_branch_name, full_target_branch_name)
-  print '-' * 80
+  print('-' * 80)
+  print('Commits in {0} but not in {1}:'.format(
+      full_source_branch_name, full_target_branch_name))
+  print('-' * 80)
   jira_keys = []
   jira_key_pat = re.compile(r'(IMPALA-\d+)')
   skip_commits_matching = options.skip_commits_matching.format(
@@ -267,17 +270,17 @@ def main():
       logging.debug("NOT ignoring commit {0} since not in ignored commits ({1},{2})"
                    .format(commit_hash, options.source_branch, options.target_branch))
     if not change_in_target and not ignore_by_config and not ignore_by_commit_message:
-      print u'{0} {1} ({2}) - {3}'\
-          .format(commit_hash, msg.decode('utf8'), date, author.decode('utf8'))\
-          .encode('utf8')
+      print(u'{0} {1} ({2}) - {3}'
+          .format(commit_hash, msg.decode('utf8'), date, author.decode('utf8'))
+          .encode('utf8'))
       cherry_pick_hashes.append(commit_hash)
       jira_keys += jira_key_pat.findall(msg)
 
-  print '-' * 80
+  print('-' * 80)
 
-  print "Jira keys referenced (Note: not all commit messages will reference a jira key):"
-  print ','.join(jira_keys)
-  print '-' * 80
+  print("Jira keys referenced (Note: not all commit messages will reference a jira key):")
+  print(','.join(jira_keys))
+  print('-' * 80)
 
   logging.debug("Commits actually ignored (change was not in target): {0}"
                .format(pformat(commits_ignored)))
diff --git a/bin/diagnostics/experimental/plan-graph.py b/bin/diagnostics/experimental/plan-graph.py
index 566d195df..03e06d663 100755
--- a/bin/diagnostics/experimental/plan-graph.py
+++ b/bin/diagnostics/experimental/plan-graph.py
@@ -17,6 +17,7 @@
 # specific language governing permissions and limitations
 # under the License
 
+from __future__ import print_function
 import argparse
 import math
 import re
diff --git a/bin/diagnostics/experimental/tpcds_run_comparator.py b/bin/diagnostics/experimental/tpcds_run_comparator.py
index 3c635cd6b..80b787f0f 100755
--- a/bin/diagnostics/experimental/tpcds_run_comparator.py
+++ b/bin/diagnostics/experimental/tpcds_run_comparator.py
@@ -17,6 +17,7 @@
 # specific language governing permissions and limitations
 # under the License
 
+from __future__ import print_function
 import argparse
 import csv
 import math
diff --git a/bin/gen_build_version.py b/bin/gen_build_version.py
index 4030d61ca..c6468fa69 100755
--- a/bin/gen_build_version.py
+++ b/bin/gen_build_version.py
@@ -22,6 +22,7 @@
 # This script generates be/src/common/version.h which contains the build version based
 # on the git hash.
 
+from __future__ import print_function
 import os
 from subprocess import call
 
@@ -46,15 +47,15 @@ version_file_exists = os.path.isfile(VERSION_FILE_NAME)
 # If we have a version file and cannot obtain a git hash, skip generating a new
 # version file.
 if version_file_exists and not can_obtain_git_hash:
-  print "Cannot obtain git hash, using existing version file."
+  print("Cannot obtain git hash, using existing version file.")
 else:
   # Remove existing version files only if they exist.
   # TODO: Might be useful to make a common utility function remove_if_clean.
   if version_file_exists:
-    print 'Removing existing file: %s' % (VERSION_FILE_NAME)
+    print('Removing existing file: %s' % (VERSION_FILE_NAME))
     os.remove(VERSION_FILE_NAME)
   if os.path.isfile(VERSION_CC_FILE_NAME):
-    print 'Removing existing file: %s' % (VERSION_CC_FILE_NAME)
+    print('Removing existing file: %s' % (VERSION_CC_FILE_NAME))
     os.remove(VERSION_CC_FILE_NAME)
 
   # SAVE_VERSION_SCRIPT will generate a dummy version.info file if we cannot obtain the
@@ -74,7 +75,7 @@ try:
 finally:
   version_file.close()
 
-print '\n'.join([version, git_hash, build_time])
+print('\n'.join([version, git_hash, build_time]))
 
 file_contents = """
 //
diff --git a/bin/generate_xml_config.py b/bin/generate_xml_config.py
index 915e3e276..e8f03dd11 100755
--- a/bin/generate_xml_config.py
+++ b/bin/generate_xml_config.py
@@ -40,6 +40,7 @@ REPL:
 """
 
 from __future__ import with_statement
+from __future__ import print_function
 import imp
 import os
 import re
@@ -77,7 +78,7 @@ def dump_config(d, source_path, out):
 
       -->
       <configuration>""".format(source_path=os.path.abspath(source_path))
-  print >>out, dedent(header)
+  print(dedent(header), file=out)
   for k, v in sorted(d.iteritems()):
     try:
       k_new = _substitute_env_vars(k)
@@ -87,24 +88,24 @@ def dump_config(d, source_path, out):
     except KeyError as e:
       raise Exception("failed environment variable substitution for value {k}: {e}"
                       .format(k=k, e=e))
-    print >>out, """\
+    print("""\
       <property>
         <name>{name}</name>
         <value>{value}</value>
-      </property>""".format(name=xmlescape(k_new), value=xmlescape(v_new))
-  print >>out, "</configuration>"
+      </property>""".format(name=xmlescape(k_new), value=xmlescape(v_new)), file=out)
+  print("</configuration>", file=out)
 
 
 def main():
   if len(sys.argv) != 3:
-    print >>sys.stderr, "usage: {prog} <template> <out>".format(prog=sys.argv[0])
+    print("usage: {prog} <template> <out>".format(prog=sys.argv[0]), file=sys.stderr)
     sys.exit(1)
 
   _, in_path, out_path = sys.argv
   try:
     mod = imp.load_source('template', in_path)
   except:  # noqa
-    print >>sys.stderr, "Unable to load template: %s" % in_path
+    print("Unable to load template: %s" % in_path, file=sys.stderr)
     raise
   conf = mod.__dict__.get('CONFIG')
   if not isinstance(conf, dict):
diff --git a/bin/get_code_size.py b/bin/get_code_size.py
index 9a2e78298..b7d6eda6f 100755
--- a/bin/get_code_size.py
+++ b/bin/get_code_size.py
@@ -19,6 +19,7 @@
 
 # This tool walks the build directory (release by default) and will print the text, data,
 # and bss section sizes of the archives.
+from __future__ import print_function
 import fnmatch
 import os
 import re
diff --git a/bin/inline_pom.py b/bin/inline_pom.py
index a80574363..1b021683a 100755
--- a/bin/inline_pom.py
+++ b/bin/inline_pom.py
@@ -21,6 +21,7 @@
 #
 # Usage: inline_pom.py <pom.xml>...
 
+from __future__ import print_function
 import re
 import sys
 from tempfile import mkstemp
diff --git a/bin/jenkins/critique-gerrit-review.py b/bin/jenkins/critique-gerrit-review.py
index 466a1ad18..7f44f47bd 100755
--- a/bin/jenkins/critique-gerrit-review.py
+++ b/bin/jenkins/critique-gerrit-review.py
@@ -36,6 +36,7 @@
 # TODO: generalise to other warnings
 # * clang-tidy
 
+from __future__ import print_function
 from argparse import ArgumentParser
 from collections import defaultdict
 import json
@@ -229,6 +230,6 @@ if __name__ == "__main__":
   comments = get_flake8_comments(revision)
   merge_comments(comments, get_misc_comments(revision))
   review_input = {"comments": comments}
-  print json.dumps(review_input, indent=True)
+  print(json.dumps(review_input, indent=True))
   if not args.dryrun:
     post_review_to_gerrit(review_input)
diff --git a/bin/jenkins/dockerized-impala-preserve-vars.py b/bin/jenkins/dockerized-impala-preserve-vars.py
index f36a45dab..24abea465 100755
--- a/bin/jenkins/dockerized-impala-preserve-vars.py
+++ b/bin/jenkins/dockerized-impala-preserve-vars.py
@@ -28,6 +28,7 @@
 # If an environment variable is not defined in the current environment,
 # it is omitted with a warning.
 
+from __future__ import print_function
 import sys
 import os
 
diff --git a/bin/jenkins/populate_m2_directory.py b/bin/jenkins/populate_m2_directory.py
index 15701898a..de56808df 100755
--- a/bin/jenkins/populate_m2_directory.py
+++ b/bin/jenkins/populate_m2_directory.py
@@ -16,6 +16,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import print_function
 import json
 import subprocess
 import os
diff --git a/bin/load-data.py b/bin/load-data.py
index ef407306e..362ec8ee3 100755
--- a/bin/load-data.py
+++ b/bin/load-data.py
@@ -20,6 +20,7 @@
 # This script is used to load the proper datasets for the specified workloads. It loads
 # all data via Hive except for parquet data which needs to be loaded via Impala.
 # Most ddl commands are executed by Impala.
+from __future__ import print_function
 import collections
 import getpass
 import logging
@@ -102,7 +103,7 @@ HIVE_CMD = os.path.join(os.environ['HIVE_HOME'], 'bin/beeline')
 hive_auth = "auth=none"
 if options.use_kerberos:
   if not options.principal:
-    print "--principal is required when --use_kerberos is specified"
+    print("--principal is required when --use_kerberos is specified")
     exit(1)
   hive_auth = "principal=" + options.principal
 
diff --git a/bin/parse-thrift-profile.py b/bin/parse-thrift-profile.py
index 55cf1f2b6..7661e1ff0 100755
--- a/bin/parse-thrift-profile.py
+++ b/bin/parse-thrift-profile.py
@@ -39,6 +39,7 @@
 # 2018-04-13T15:06:34.144000 e44af7f93edb8cd6:1b1f801600000000 TRuntimeProfileTree(nodes=[TRuntimeProf...
 
 
+from __future__ import print_function
 from impala_py_lib import profiles
 import sys
 
@@ -47,9 +48,9 @@ if len(sys.argv) == 1 or sys.argv[1] == "-":
 elif len(sys.argv) == 2:
   input_data = file(sys.argv[1])
 else:
-  print >> sys.stderr, "Usage: %s [file]" % (sys.argv[0],)
+  print("Usage: %s [file]" % (sys.argv[0],), file=sys.stderr)
   sys.exit(1)
 
 for line in input_data:
   tree = profiles.decode_profile_line(line)
-  print tree
+  print(tree)
diff --git a/bin/push_to_asf.py b/bin/push_to_asf.py
index fab6cfcc6..fac612c4a 100755
--- a/bin/push_to_asf.py
+++ b/bin/push_to_asf.py
@@ -35,6 +35,7 @@
 # TODO: Improve console output: replace 'print' with format strings
 #       and use sys.stderr/sys.stdout.
 
+from __future__ import print_function
 import logging
 import optparse
 import os
@@ -87,10 +88,10 @@ def confirm_prompt(prompt):
   if the user confirms.
   """
   while True:
-    print prompt, "[Y/n]:",
+    print(prompt, "[Y/n]:", end=' ')
 
     if not os.isatty(sys.stdout.fileno()):
-      print "Not running interactively. Assuming 'N'."
+      print("Not running interactively. Assuming 'N'.")
       return False
 
     r = raw_input().strip().lower()
@@ -115,14 +116,14 @@ def check_apache_remote():
         ['git', 'config', '--local', '--get',
             'remote.' + OPTIONS.apache_remote + '.url']).strip()
   except subprocess.CalledProcessError:
-    print >>sys.stderr, "No remote named " + OPTIONS.apache_remote + \
-        ". Please set one up, for example with: "
-    print >>sys.stderr, "  git remote add apache", APACHE_REPO
+    print("No remote named " + OPTIONS.apache_remote
+        + ". Please set one up, for example with: ", file=sys.stderr)
+    print("  git remote add apache", APACHE_REPO, file=sys.stderr)
     sys.exit(1)
   if url != APACHE_REPO:
-    print >>sys.stderr, "Unexpected URL for remote " + OPTIONS.apache_remote + "."
-    print >>sys.stderr, "  Got:     ", url
-    print >>sys.stderr, "  Expected:", APACHE_REPO
+    print("Unexpected URL for remote " + OPTIONS.apache_remote + ".", file=sys.stderr)
+    print("  Got:     ", url, file=sys.stderr)
+    print("  Expected:", APACHE_REPO, file=sys.stderr)
     sys.exit(1)
 
 
@@ -135,15 +136,15 @@ def check_gerrit_remote():
     url = check_output(['git', 'config', '--local', '--get',
                         'remote.' + OPTIONS.gerrit_remote + '.url']).strip()
   except subprocess.CalledProcessError:
-    print >>sys.stderr, "No remote named " + OPTIONS.gerrit_remote + \
-        ". Please set one up following "
-    print >>sys.stderr, "the contributor guide."
+    print("No remote named " + OPTIONS.gerrit_remote
+        + ". Please set one up following ", file=sys.stderr)
+    print("the contributor guide.", file=sys.stderr)
     sys.exit(1)
 
   if not GERRIT_URL_RE.match(url):
-    print >>sys.stderr, "Unexpected URL for remote " + OPTIONS.gerrit_remote
-    print >>sys.stderr, "  Got:     ", url
-    print >>sys.stderr, "  Expected to find host '%s' in the URL" % GERRIT_HOST
+    print("Unexpected URL for remote " + OPTIONS.gerrit_remote, file=sys.stderr)
+    print("  Got:     ", url, file=sys.stderr)
+    print("  Expected URL to match '%s'" % GERRIT_URL_RE, file=sys.stderr)
     sys.exit(1)
 
 
@@ -211,40 +212,40 @@ def do_update(branch, gerrit_sha, apache_sha):
   # must have gotten committed to Apache outside of gerrit, and we'd need some
   # manual intervention.
   if not is_fast_forward(apache_sha, gerrit_sha):
-    print >>sys.stderr, "Cannot update branch '%s' from gerrit:" % branch
-    print >>sys.stderr, "Apache revision %s is not an ancestor of gerrit revision %s" % (
-      apache_sha[:8], gerrit_sha[:8])
-    print >>sys.stderr,\
-        "Something must have been committed to Apache and bypassed gerrit."
-    print >>sys.stderr, "Manual intervention is required."
+    print("Cannot update branch '%s' from gerrit:" % branch, file=sys.stderr)
+    print("Apache revision %s is not an ancestor of gerrit revision %s" % (
+      apache_sha[:8], gerrit_sha[:8]), file=sys.stderr)
+    print("Something must have been committed to Apache and bypassed gerrit.",
+          file=sys.stderr)
+    print("Manual intervention is required.", file=sys.stderr)
     sys.exit(1)
 
   # List the commits that are going to be pushed to the ASF, so that the committer
   # can verify and "sign off".
   commits = rev_list("%s..%s" % (apache_sha, gerrit_sha))
   commits.reverse()  # Display from oldest to newest.
-  print "-" * 60
-  print Colors.GREEN + ("%d commit(s) need to be pushed from Gerrit to ASF:" %\
-      len(commits)) + Colors.RESET
+  print("-" * 60)
+  print(Colors.GREEN + ("%d commit(s) need to be pushed from Gerrit to ASF:" %
+      len(commits)) + Colors.RESET)
   push_sha = None
   for sha in commits:
     oneline = describe_commit(sha)
-    print "  ", oneline
+    print("  ", oneline)
     committer = get_committer_email(sha)
     if committer != get_my_email():
-      print Colors.RED + "   !!! Committed by someone else (%s) !!!" %\
-          committer, Colors.RESET
+      print(Colors.RED + "   !!! Committed by someone else (%s) !!!" %
+          committer, Colors.RESET)
       if not confirm_prompt(Colors.RED +\
           "   !!! Are you sure you want to push on behalf of another committer?" +\
           Colors.RESET):
         # Even if they don't want to push this commit, we could still push any
         # earlier commits that the user _did_ author.
         if push_sha is not None:
-          print "... will still update to prior commit %s..." % push_sha
+          print("... will still update to prior commit %s..." % push_sha)
         break
     push_sha = sha
   if push_sha is None:
-    print "Nothing to push"
+    print("Nothing to push")
     return
 
   # Everything has been confirmed. Do the actual push
@@ -252,11 +253,11 @@ def do_update(branch, gerrit_sha, apache_sha):
   if OPTIONS.dry_run:
     cmd.append('--dry-run')
   cmd.append('%s:refs/heads/%s' % (push_sha, branch))
-  print Colors.GREEN + "Running: " + Colors.RESET + " ".join(cmd)
+  print(Colors.GREEN + "Running: " + Colors.RESET + " ".join(cmd))
   subprocess.check_call(cmd)
-  print Colors.GREEN + "Successfully updated %s to %s" % (branch, gerrit_sha) +\
-      Colors.RESET
-  print
+  print(Colors.GREEN + "Successfully updated %s to %s" % (branch, gerrit_sha)
+      + Colors.RESET)
+  print()
 
 
 def main():
@@ -295,14 +296,14 @@ def main():
   apache_branches = get_branches(OPTIONS.apache_remote)
   for branch, apache_sha in sorted(apache_branches.iteritems()):
     gerrit_sha = rev_parse("remotes/" + OPTIONS.gerrit_remote + "/" + branch)
-    print "Branch '%s':\t" % branch,
+    print("Branch '%s':\t" % branch, end='')
     if gerrit_sha is None:
-      print Colors.YELLOW, "found on Apache but not in gerrit", Colors.RESET
+      print(Colors.YELLOW, "found on Apache but not in gerrit", Colors.RESET)
       continue
     if gerrit_sha == apache_sha:
-      print Colors.GREEN, "up to date", Colors.RESET
+      print(Colors.GREEN, "up to date", Colors.RESET)
       continue
-    print Colors.YELLOW, "needs update", Colors.RESET
+    print(Colors.YELLOW, "needs update", Colors.RESET)
     do_update(branch, gerrit_sha, apache_sha)
 
 
diff --git a/bin/run-workload.py b/bin/run-workload.py
index 8d2d2f75b..abe95ceb1 100755
--- a/bin/run-workload.py
+++ b/bin/run-workload.py
@@ -27,6 +27,7 @@
 #   - Stores the execution details in JSON format.
 #
 
+from __future__ import print_function
 import getpass
 import json
 import logging
@@ -158,16 +159,16 @@ def prettytable_print(results, failed=False):
   table.float_format = '.2'
   # Group the results by table format.
   for table_format_str, gr in groupby(results, lambda x: x.query.table_format_str):
-    print "Table Format: %s" % table_format_str
+    print("Table Format: %s" % table_format_str)
     for result in gr:
       start_time = result.start_time.strftime("%Y-%m-%d %H:%M:%S") if result.start_time \
           is not None else '-'
       row = [result.query.name, start_time, result.time_taken, result.client_name]
       if failed: row.append(result.query_error)
       table.add_row(row)
-    print table.get_string(sortby='Client ID')
+    print(table.get_string(sortby='Client ID'))
     table.clear_rows()
-    print str()
+    print(str())
 
 def print_result_summary(results):
   """Print failed and successfull queries for a given result list"""
@@ -271,8 +272,8 @@ if __name__ == "__main__":
       if not all(result.success for result in workload_runner.results): exit_code = 1
 
       # Print the results
-      print "\nWorkload: {0}, Scale Factor: {1}\n".format(
-          workload_runner.workload.name.upper(), workload_runner.scale_factor)
+      print("\nWorkload: {0}, Scale Factor: {1}\n".format(
+          workload_runner.workload.name.upper(), workload_runner.scale_factor))
       print_result_summary(workload_runner.results)
 
   # Store the results
diff --git a/bin/single_node_perf_run.py b/bin/single_node_perf_run.py
index e9cd33d80..e1b74de64 100755
--- a/bin/single_node_perf_run.py
+++ b/bin/single_node_perf_run.py
@@ -69,6 +69,7 @@
 #   --start_minicluster   start a new Hadoop minicluster
 #   --ninja               use ninja, rather than Make, as the build tool
 
+from __future__ import print_function
 from optparse import OptionParser
 from tempfile import mkdtemp
 
@@ -208,7 +209,7 @@ def backup_workloads():
   temp_dir = mkdtemp()
   sh.cp(os.path.join(IMPALA_HOME, "testdata", "workloads"),
         temp_dir, R=True, _out=sys.stdout, _err=sys.stderr)
-  print "Backed up workloads to {0}".format(temp_dir)
+  print("Backed up workloads to {0}".format(temp_dir))
   return temp_dir
 
 
diff --git a/bin/validate-unified-backend-test-filters.py b/bin/validate-unified-backend-test-filters.py
index 6f8602da4..a978917b2 100755
--- a/bin/validate-unified-backend-test-filters.py
+++ b/bin/validate-unified-backend-test-filters.py
@@ -17,6 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import print_function
 import os
 import subprocess
 import sys
diff --git a/common/thrift/generate_error_codes.py b/common/thrift/generate_error_codes.py
index 3bbe83672..02b7029ef 100755
--- a/common/thrift/generate_error_codes.py
+++ b/common/thrift/generate_error_codes.py
@@ -25,6 +25,7 @@
 #
 # TODO Add support for SQL Error Codes
 #      https://msdn.microsoft.com/en-us/library/ms714687%28v=vs.85%29.aspx
+from __future__ import print_function
 error_codes = (
   ("OK", 0, ""),
 
diff --git a/common/thrift/generate_metrics.py b/common/thrift/generate_metrics.py
index 42b291e0a..8534d0e0e 100755
--- a/common/thrift/generate_metrics.py
+++ b/common/thrift/generate_metrics.py
@@ -17,6 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import print_function
 import sys
 import os
 import re
@@ -129,13 +130,13 @@ def metric_to_mdl(m):
   """Returns the metric in the mdl format, or None if the metric isn't supported."""
   # TODO: Stamp out metrics with arguments, e.g. output each rpc call_duration metric.
   if '$0' in m['key']:
-    print >>sys.stderr, "Skipping metrics with unbound argument, key=%s" % m['key']
+    print("Skipping metrics with unbound argument, key=%s" % m['key'], file=sys.stderr)
     return None
 
   # TODO: Stamp out individual metrics for other metric types.
   SUPPORTED_METRIC_KINDS = ['COUNTER', 'GAUGE']
   if m['kind'] not in SUPPORTED_METRIC_KINDS:
-    print >>sys.stderr, "Skipping %s metric %s" % (m['kind'], m['key'])
+    print("Skipping %s metric %s" % (m['kind'], m['key']), file=sys.stderr)
     return None
 
   return dict(
diff --git a/infra/deploy/deploy.py b/infra/deploy/deploy.py
index 1665e5182..58ebb69fa 100644
--- a/infra/deploy/deploy.py
+++ b/infra/deploy/deploy.py
@@ -32,6 +32,7 @@
 #   Creates a new Impala_Kudu service called "new_service" using /data/impala/
 #   for its scratch directories.
 
+from __future__ import print_function
 import argparse
 import hashlib
 import os
@@ -134,11 +135,11 @@ def find_dependencies(args, cluster):
             if not found:
                 raise Exception("Could not find dependency service (type %s, name %s)" %
                                 (service_type, arg))
-            print "Found explicit dependency service %s" % (found.name)
+            print("Found explicit dependency service %s" % (found.name))
             deps.append(found)
         else:
             if not required:
-                print "Skipping optional dependency of type %s" % (service_type,)
+                print("Skipping optional dependency of type %s" % (service_type,))
                 continue
             if len(candidates) > 1:
                 raise Exception("Found %d possible implicit dependency services of type %s" %
@@ -148,7 +149,7 @@ def find_dependencies(args, cluster):
                                 (service_type,))
             else:
                 found = candidates.values()[0]
-                print "Found implicit dependency service %s" % (found.name,)
+                print("Found implicit dependency service %s" % (found.name,))
                 deps.append(found)
     return deps
 
@@ -157,7 +158,7 @@ def check_new_service_does_not_exist(api, cluster, new_name):
         if service.displayName == new_name:
             raise Exception("New service name %s already in use" % (new_name,))
 
-    print "New service name %s is not in use" % (new_name,)
+    print("New service name %s is not in use" % (new_name,))
 
 def find_template_service(api, cluster, based_on):
     template_service = None
@@ -166,7 +167,7 @@ def find_template_service(api, cluster, based_on):
             if service.type != "IMPALA":
                 raise Exception("Based-on service %s is of wrong type %s" %
                                 (based_on, service.type))
-            print "Found based-on service: %s" % (based_on,)
+            print("Found based-on service: %s" % (based_on,))
             template_service = service
 
     if based_on and not template_service:
@@ -177,10 +178,10 @@ def find_template_service(api, cluster, based_on):
 def find_master_host(api, cm_hostname, master_hostname):
     for h in api.get_all_hosts():
         if master_hostname and h.hostname == master_hostname:
-            print "Found master host %s" % (master_hostname,)
+            print("Found master host %s" % (master_hostname,))
             return h
         elif not master_hostname and h.hostname == cm_hostname:
-            print "Found implicit master host on CM host %s" % (cm_hostname,)
+            print("Found implicit master host on CM host %s" % (cm_hostname,))
             return h
 
     if master_hostname:
@@ -225,11 +226,11 @@ def get_best_parcel(api, cluster):
         parcel = None
 
     if parcel:
-        print "Chose best parcel %s-%s (stage %s)" % (parcel.product,
+        print("Chose best parcel %s-%s (stage %s)" % (parcel.product,
                                                       parcel.version,
-                                                      parcel.stage)
+                                                      parcel.stage))
     else:
-        print "Found no candidate parcels"
+        print("Found no candidate parcels")
 
     return parcel
 
@@ -238,9 +239,9 @@ def ensure_parcel_repo_added(api):
     config = cm.get_config(view='summary')
     parcel_urls = config.get("REMOTE_PARCEL_REPO_URLS", "").split(",")
     if IMPALA_KUDU_PARCEL_URL in parcel_urls:
-        print "Impala_Kudu parcel URL already present"
+        print("Impala_Kudu parcel URL already present")
     else:
-        print "Adding Impala_Kudu parcel URL"
+        print("Adding Impala_Kudu parcel URL")
         parcel_urls.append(IMPALA_KUDU_PARCEL_URL)
         config["REMOTE_PARCEL_REPO_URLS"] = ",".join(parcel_urls)
         cm.update_config(config)
@@ -252,8 +253,8 @@ def wait_for_parcel_stage(cluster, parcel, stage):
             return
         if new_parcel.state.errors:
             raise Exception(str(new_parcel.state.errors))
-        print "progress: %s / %s" % (new_parcel.state.progress,
-                                     new_parcel.state.totalProgress)
+        print("progress: %s / %s" % (new_parcel.state.progress,
+                                     new_parcel.state.totalProgress))
         time.sleep(1)
     else:
         raise Exception("Parcel %s-%s did not reach stage %s in %d seconds" %
@@ -262,33 +263,33 @@ def wait_for_parcel_stage(cluster, parcel, stage):
 def ensure_parcel_activated(cluster, parcel):
     parcel_stage = parcel.stage
     if parcel_stage == "AVAILABLE_REMOTELY":
-        print "Downloading parcel: %s-%s " % (parcel.product, parcel.version)
+        print("Downloading parcel: %s-%s " % (parcel.product, parcel.version))
         parcel.start_download()
         wait_for_parcel_stage(cluster, parcel, "DOWNLOADED")
-        print "Downloaded parcel: %s-%s " % (parcel.product, parcel.version)
+        print("Downloaded parcel: %s-%s " % (parcel.product, parcel.version))
         parcel_stage = "DOWNLOADED"
     if parcel_stage == "DOWNLOADED":
-        print "Distributing parcel: %s-%s " % (parcel.product, parcel.version)
+        print("Distributing parcel: %s-%s " % (parcel.product, parcel.version))
         parcel.start_distribution()
         wait_for_parcel_stage(cluster, parcel, "DISTRIBUTED")
-        print "Distributed parcel: %s-%s " % (parcel.product, parcel.version)
+        print("Distributed parcel: %s-%s " % (parcel.product, parcel.version))
         parcel_stage = "DISTRIBUTED"
     if parcel_stage == "DISTRIBUTED":
-        print "Activating parcel: %s-%s " % (parcel.product, parcel.version)
+        print("Activating parcel: %s-%s " % (parcel.product, parcel.version))
         parcel.activate()
         wait_for_parcel_stage(cluster, parcel, "ACTIVATED")
-        print "Activated parcel: %s-%s " % (parcel.product, parcel.version)
+        print("Activated parcel: %s-%s " % (parcel.product, parcel.version))
         parcel_stage = "ACTIVATED"
 
-    print "Parcel %s-%s is activated" % (parcel.product, parcel.version)
+    print("Parcel %s-%s is activated" % (parcel.product, parcel.version))
 
 def print_configs(entity_name, config_dict):
     for attr, value in config_dict.iteritems():
-        print "Set %s config %s=\'%s\'" % (entity_name, attr, value)
+        print("Set %s config %s=\'%s\'" % (entity_name, attr, value))
 
 def create_new_service(api, cluster, new_name, deps, scratch_dirs, master_host):
     new_service = cluster.create_service(new_name, "IMPALA")
-    print "Created new service %s" % (new_name,)
+    print("Created new service %s" % (new_name,))
 
     service_config = {}
     for d in deps:
@@ -312,13 +313,13 @@ def create_new_service(api, cluster, new_name, deps, scratch_dirs, master_host):
                 md5.update(h.hostId)
                 new_role_name = "%s-%s-%s" % (new_name, rcg.roleType, md5.hexdigest())
                 new_service.create_role(new_role_name, rcg.roleType, h.hostId)
-                print "Created new role %s" % (new_role_name,)
+                print("Created new role %s" % (new_role_name,))
         else:
             md5 = hashlib.md5()
             md5.update(master_host.hostId)
             new_role_name = "%s-%s-%s" % (new_name, rcg.roleType, md5.hexdigest())
             new_service.create_role(new_role_name, rcg.roleType, master_host.hostId)
-            print "Created new role %s" % (new_role_name,)
+            print("Created new role %s" % (new_role_name,))
 
 def transform_path(rcg_name, rcg_config_dict, rcg_config_name):
     # TODO: Do a better job with paths where the role type is embedded.
@@ -379,7 +380,7 @@ def transform_rcg_config(rcg):
 
 def clone_existing_service(cluster, new_name, template_service):
     new_service = cluster.create_service(new_name, "IMPALA")
-    print "Created new service %s" % (new_name,)
+    print("Created new service %s" % (new_name,))
 
     service_config, _ = template_service.get_config()
     service_config["impala_service_env_safety_valve"] = "IMPALA_KUDU=1"
@@ -395,7 +396,7 @@ def clone_existing_service(cluster, new_name, template_service):
             new_rcg = new_service.create_role_config_group(new_rcg_name,
                                                            new_rcg_name,
                                                            old_rcg.roleType)
-            print "Created new rcg %s" % (new_rcg_name,)
+            print("Created new rcg %s" % (new_rcg_name,))
         else:
             new_rcg = new_service.get_role_config_group("%s-%s-BASE" % (new_name,
                                                                         old_rcg.roleType))
@@ -414,7 +415,7 @@ def clone_existing_service(cluster, new_name, template_service):
             new_role = new_service.create_role(new_role_name,
                                                new_rcg.roleType,
                                                old_role.hostRef.hostId)
-            print "Created new role %s" % (new_role_name,)
+            print("Created new role %s" % (new_role_name,))
             new_role_names.append(new_role.name)
         new_rcg.move_roles(new_role_names)
 
@@ -448,7 +449,8 @@ def main():
             parcel = get_best_parcel(api, cluster)
             if parcel:
                 break
-            print "Could not find parcel in attempt %d, will sleep and retry" % (attempt,)
+            print("Could not find parcel in attempt %d, will sleep and retry" %
+                  (attempt,))
             time.sleep(1)
         else:
             raise Exception("No parcel showed up in %d seconds" % (MAX_PARCEL_REPO_WAIT_SECS,))
diff --git a/lib/python/impala_py_lib/gdb/impala-gdb.py b/lib/python/impala_py_lib/gdb/impala-gdb.py
index 48e2662cf..1bf81eabc 100644
--- a/lib/python/impala_py_lib/gdb/impala-gdb.py
+++ b/lib/python/impala_py_lib/gdb/impala-gdb.py
@@ -19,6 +19,7 @@
 # A collection of useful Python GDB modules and commands for
 # debugging Impala core dumps.
 #
+from __future__ import print_function
 import gdb
 from collections import defaultdict
 
diff --git a/lib/python/impala_py_lib/jenkins/generate_junitxml.py b/lib/python/impala_py_lib/jenkins/generate_junitxml.py
index 285aa5d9b..1f6ba6b60 100755
--- a/lib/python/impala_py_lib/jenkins/generate_junitxml.py
+++ b/lib/python/impala_py_lib/jenkins/generate_junitxml.py
@@ -21,6 +21,7 @@ A script for generating arbitrary junit XML reports while building Impala.
 These files will be consumed by jenkins.impala.io to generate reports for
 easier triaging of build and setup errors.
 """
+from __future__ import print_function
 import argparse
 import codecs
 import errno
diff --git a/lib/python/impala_py_lib/profiles.py b/lib/python/impala_py_lib/profiles.py
index a2f6aebb3..fc7a37298 100644
--- a/lib/python/impala_py_lib/profiles.py
+++ b/lib/python/impala_py_lib/profiles.py
@@ -18,6 +18,7 @@
 
 # This file contains library functions to decode and access Impala query profiles.
 
+from __future__ import print_function
 import base64
 import datetime
 import zlib
@@ -30,7 +31,7 @@ def decode_profile_line(line):
   space_separated = line.split(" ")
   if len(space_separated) == 3:
     ts = int(space_separated[0])
-    print datetime.datetime.fromtimestamp(ts / 1000.0).isoformat(), space_separated[1]
+    print(datetime.datetime.fromtimestamp(ts / 1000.0).isoformat(), space_separated[1])
     base64_encoded = space_separated[2]
   elif len(space_separated) == 1:
     base64_encoded = space_separated[0]
diff --git a/testdata/bin/generate-schema-statements.py b/testdata/bin/generate-schema-statements.py
index 8f28e310d..acd369db4 100755
--- a/testdata/bin/generate-schema-statements.py
+++ b/testdata/bin/generate-schema-statements.py
@@ -94,6 +94,7 @@
 # This should be used sparingly, because these commands are executed
 # serially.
 #
+from __future__ import print_function
 import collections
 import csv
 import glob
@@ -140,7 +141,7 @@ parser.add_option("--hdfs_namenode", dest="hdfs_namenode", default="localhost:20
 (options, args) = parser.parse_args()
 
 if options.workload is None:
-  print "A workload name must be specified."
+  print("A workload name must be specified.")
   parser.print_help()
   sys.exit(1)
 
@@ -604,7 +605,7 @@ def eval_section(section_str):
   # features (e.g. "for i in {1..n}")
   p = subprocess.Popen(['/bin/bash', '-c', cmd], stdout=subprocess.PIPE)
   stdout, stderr = p.communicate()
-  if stderr: print stderr
+  if stderr: print(stderr)
   assert p.returncode == 0
   return stdout.strip()
 
@@ -638,30 +639,31 @@ def generate_statements(output_name, test_vectors, sections,
       table_name = section['BASE_TABLE_NAME'].strip()
 
       if table_names and (table_name.lower() not in table_names):
-        print 'Skipping table: %s.%s, table is not in specified table list' % (db, table_name)
+        print('Skipping table: %s.%s, table is not in specified table list' %
+              (db, table_name))
         continue
 
       # Check Hive version requirement, if present.
       if section['HIVE_MAJOR_VERSION'] and \
          section['HIVE_MAJOR_VERSION'].strip() != \
          os.environ['IMPALA_HIVE_MAJOR_VERSION'].strip():
-        print "Skipping table '{0}.{1}': wrong Hive major version".format(db, table_name)
+        print("Skipping table '{0}.{1}': wrong Hive major version".format(db, table_name))
         continue
 
       if table_format in schema_only_constraints and \
          table_name.lower() not in schema_only_constraints[table_format]:
-        print ('Skipping table: %s.%s, \'only\' constraint for format did not '
-              'include this table.') % (db, table_name)
+        print(('Skipping table: %s.%s, \'only\' constraint for format did not '
+              'include this table.') % (db, table_name))
         continue
 
       if schema_include_constraints[table_name.lower()] and \
          table_format not in schema_include_constraints[table_name.lower()]:
-        print 'Skipping \'%s.%s\' due to include constraint match.' % (db, table_name)
+        print('Skipping \'%s.%s\' due to include constraint match.' % (db, table_name))
         continue
 
       if schema_exclude_constraints[table_name.lower()] and\
          table_format in schema_exclude_constraints[table_name.lower()]:
-        print 'Skipping \'%s.%s\' due to exclude constraint match.' % (db, table_name)
+        print('Skipping \'%s.%s\' due to exclude constraint match.' % (db, table_name))
         continue
 
       alter = section.get('ALTER')
@@ -752,7 +754,7 @@ def generate_statements(output_name, test_vectors, sections,
       # TODO: Currently, Kudu does not support partitioned tables via Impala.
       # If a CREATE_KUDU section was provided, assume it handles the partition columns
       if file_format == 'kudu' and partition_columns != '' and not create_kudu:
-        print "Ignore partitions on Kudu table: %s.%s" % (db_name, table_name)
+        print("Ignore partitions on Kudu table: %s.%s" % (db_name, table_name))
         continue
 
       # If a CREATE section is provided, use that. Otherwise a COLUMNS section
@@ -817,15 +819,15 @@ def generate_statements(output_name, test_vectors, sections,
       # and skip loading the data. Otherwise, the data is generated using either an
       # INSERT INTO statement or a LOAD statement.
       if not force_reload and hdfs_location in existing_tables:
-        print 'HDFS path:', data_path, 'contains data. Data loading can be skipped.'
+        print('HDFS path:', data_path, 'contains data. Data loading can be skipped.')
       else:
-        print 'HDFS path:', data_path, 'does not exist or is empty. Data will be loaded.'
+        print('HDFS path:', data_path, 'does not exist or is empty. Data will be loaded.')
         if not db_suffix:
           if load:
             hive_output.load_base.append(build_load_statement(load, db_name,
                                                               db_suffix, table_name))
           else:
-            print 'Empty base table load for %s. Skipping load generation' % table_name
+            print('Empty base table load for %s. Skipping load generation' % table_name)
         elif file_format in ['kudu', 'parquet', 'iceberg']:
           if insert_hive:
             hive_output.load.append(build_insert(insert_hive, db_name, db_suffix,
@@ -835,8 +837,8 @@ def generate_statements(output_name, test_vectors, sections,
                 file_format, codec, compression_type, table_name, data_path,
                 for_impala=True))
           else:
-            print 'Empty parquet/kudu load for table %s. Skipping insert generation' \
-              % table_name
+            print('Empty parquet/kudu load for table %s. Skipping insert generation'
+              % table_name)
         else:
           if insert_hive:
             insert = insert_hive
@@ -844,7 +846,7 @@ def generate_statements(output_name, test_vectors, sections,
             hive_output.load.append(build_insert(insert, db_name, db_suffix, file_format,
                 codec, compression_type, table_name, data_path, create_hive=create_hive))
           else:
-            print 'Empty insert for table %s. Skipping insert generation' % table_name
+            print('Empty insert for table %s. Skipping insert generation' % table_name)
 
     impala_create.write_to_file("create-%s-impala-generated-%s-%s-%s.sql" %
         (output_name, file_format, codec, compression_type))
@@ -879,8 +881,8 @@ def parse_schema_template_file(file_name):
 if __name__ == "__main__":
   if options.table_formats is None:
     if options.exploration_strategy not in KNOWN_EXPLORATION_STRATEGIES:
-      print 'Invalid exploration strategy:', options.exploration_strategy
-      print 'Valid values:', ', '.join(KNOWN_EXPLORATION_STRATEGIES)
+      print('Invalid exploration strategy:', options.exploration_strategy)
+      print('Valid values:', ', '.join(KNOWN_EXPLORATION_STRATEGIES))
       sys.exit(1)
 
     test_vectors = [vector.value for vector in\
@@ -896,7 +898,7 @@ if __name__ == "__main__":
   convert_orc_to_full_acid = options.workload == 'functional-query'
 
   target_dataset = test_vectors[0].dataset
-  print 'Target Dataset: ' + target_dataset
+  print('Target Dataset: ' + target_dataset)
   dataset_load_dir = os.path.join(SQL_OUTPUT_DIR, target_dataset)
   # If the directory containing the sql files does not exist, create it. Else nuke all the
   # files corresponding to the current workload.
@@ -918,7 +920,7 @@ if __name__ == "__main__":
                                       '%s_schema_template.sql' % target_dataset)
 
   if not os.path.isfile(schema_template_file):
-    print 'Schema file not found: ' + schema_template_file
+    print('Schema file not found: ' + schema_template_file)
     sys.exit(1)
 
   constraints_file = os.path.join(DATASET_DIR, target_dataset, 'schema_constraints.csv')
diff --git a/testdata/bin/generate-test-vectors.py b/testdata/bin/generate-test-vectors.py
index 9d0be9e50..4998a8caa 100755
--- a/testdata/bin/generate-test-vectors.py
+++ b/testdata/bin/generate-test-vectors.py
@@ -40,6 +40,7 @@
 # The pairwise generation is done using the Python 'AllPairs' module. This module can be
 # downloaded from http://pypi.python.org/pypi/AllPairs/2.0.1
 #
+from __future__ import print_function
 import collections
 import csv
 import math
@@ -56,7 +57,7 @@ parser.add_option("-w", "--workload", dest="workload",
 (options, args) = parser.parse_args()
 
 if options.workload is None:
-  print "A workload name must be specified."
+  print("A workload name must be specified.")
   parser.print_help()
   sys.exit(1)
 
@@ -115,12 +116,12 @@ def read_dimension_file(file_name):
          continue
       values = line.split(':')
       if len(values) != 2:
-        print 'Invalid dimension file format. Expected format is <dimension name>: val1,'\
-              ' val2, ... Found: ' + line
+        print('Invalid dimension file format. Expected format is <dimension name>: val1,'
+              ' val2, ... Found: ' + line)
         sys.exit(1)
       if not values[0] in KNOWN_DIMENSION_NAMES:
-        print 'Unknown dimension name: ' + values[0]
-        print 'Valid dimension names: ' + ', '.join(KNOWN_DIMENSION_NAMES)
+        print('Unknown dimension name: ' + values[0])
+        print('Valid dimension names: ' + ', '.join(KNOWN_DIMENSION_NAMES))
         sys.exit(1)
       dimension_map[values[0]] = [val.strip() for val in values[1].split(',')]
   return dimension_map
@@ -132,7 +133,7 @@ def write_vectors_to_csv(output_dir, output_file, matrix):
     output_text += '\n' + ', '.join(row)
 
   output_path = os.path.join(output_dir, output_file)
-  print 'Writing test vectors to: ' + output_path
+  print('Writing test vectors to: ' + output_path)
   with open(output_path, 'wb') as output_file:
     output_file.write(output_text)
     output_file.write('\n')
@@ -140,10 +141,10 @@ def write_vectors_to_csv(output_dir, output_file, matrix):
 dimension_file = os.path.join(WORKLOAD_DIR, options.workload,
                               '%s_dimensions.csv' % options.workload)
 if not os.path.isfile(dimension_file):
-  print 'Dimension file not found: ' + dimension_file
+  print('Dimension file not found: ' + dimension_file)
   sys.exit(1)
 
-print 'Reading dimension file: ' + dimension_file
+print('Reading dimension file: ' + dimension_file)
 vector_map = read_dimension_file(dimension_file)
 vectors = []
 
diff --git a/testdata/bin/load-tpc-kudu.py b/testdata/bin/load-tpc-kudu.py
index 01de79ee6..422d064ba 100755
--- a/testdata/bin/load-tpc-kudu.py
+++ b/testdata/bin/load-tpc-kudu.py
@@ -22,6 +22,7 @@
 # Kudu tables are created in the specified 'target-db' using the existing HDFS tables
 # from 'source-db'.
 
+from __future__ import print_function
 import logging
 import os
 import sqlparse
@@ -72,7 +73,7 @@ def load_data():
       query = sqlparse.format(query.rstrip(';'), strip_comments=True)
       query_str = query.format(**sql_params)
       if (len(query_str)) == 0: continue
-      if verbose: print query_str
+      if verbose: print(query_str)
       impala.execute(query_str)
 
 def get_test_file_path(workload):
diff --git a/testdata/bin/rewrite-iceberg-metadata.py b/testdata/bin/rewrite-iceberg-metadata.py
index 1ccee49a7..26997345b 100755
--- a/testdata/bin/rewrite-iceberg-metadata.py
+++ b/testdata/bin/rewrite-iceberg-metadata.py
@@ -17,6 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import print_function
 import glob
 import json
 import os
diff --git a/testdata/bin/wait-for-hiveserver2.py b/testdata/bin/wait-for-hiveserver2.py
index bf57ea89d..6e99fa5f3 100755
--- a/testdata/bin/wait-for-hiveserver2.py
+++ b/testdata/bin/wait-for-hiveserver2.py
@@ -22,6 +22,7 @@
 # TODO: Consider combining this with wait-for-metastore.py. A TCLIService client
 # can perhaps also talk to the metastore.
 
+from __future__ import print_function
 import os
 import time
 import getpass
@@ -71,20 +72,20 @@ while time.time() - now < TIMEOUT_SECONDS:
       close_session_req = TCLIService.TCloseSessionReq()
       close_session_req.sessionHandle = resp.sessionHandle
       hs2_client.CloseSession(close_session_req)
-      print "HiveServer2 service is up at %s." % options.hs2_hostport
+      print("HiveServer2 service is up at %s." % options.hs2_hostport)
       exit(0)
   except Exception as e:
     if "SASL" in e.message:  # Bail out on SASL failures
-      print "SASL failure when attempting connection:"
+      print("SASL failure when attempting connection:")
       raise
     if "GSS" in e.message:   # Other GSSAPI failures
-      print "GSS failure when attempting connection:"
+      print("GSS failure when attempting connection:")
       raise
-    print "Waiting for HiveServer2 at %s..." % options.hs2_hostport
-    print e
+    print("Waiting for HiveServer2 at %s..." % options.hs2_hostport)
+    print(e)
   finally:
     hs2_transport.close()
     time.sleep(0.5)
 
-print "HiveServer2 service failed to start within %s seconds." % TIMEOUT_SECONDS
+print("HiveServer2 service failed to start within %s seconds." % TIMEOUT_SECONDS)
 exit(1)
diff --git a/testdata/bin/wait-for-metastore.py b/testdata/bin/wait-for-metastore.py
index 2dc6550dd..0707cc16b 100755
--- a/testdata/bin/wait-for-metastore.py
+++ b/testdata/bin/wait-for-metastore.py
@@ -21,6 +21,7 @@
 # to execute the get_database("default") Thrift RPC until the call succeeds,
 # or a timeout is reached.
 
+from __future__ import print_function
 import os
 import time
 from optparse import OptionParser
@@ -56,19 +57,19 @@ while time.time() - now < TIMEOUT_SECONDS:
     hive_transport.open()
     resp = hive_client.get_database("default")
     if resp is not None:
-      print "Metastore service is up at %s." % options.metastore_hostport
+      print("Metastore service is up at %s." % options.metastore_hostport)
       exit(0)
   except Exception as e:
     if "SASL" in e.message:  # Bail out on SASL failures
-      print "SASL failure when attempting connection:"
+      print("SASL failure when attempting connection:")
       raise
     if "GSS" in e.message:   # Other GSSAPI failures
-      print "GSS failure when attempting connection:"
+      print("GSS failure when attempting connection:")
       raise
-    print "Waiting for the Metastore at %s..." % options.metastore_hostport
+    print("Waiting for the Metastore at %s..." % options.metastore_hostport)
   finally:
     hive_transport.close()
     time.sleep(0.5)
 
-print "Metastore service failed to start within %s seconds." % TIMEOUT_SECONDS
+print("Metastore service failed to start within %s seconds." % TIMEOUT_SECONDS)
 exit(1)
diff --git a/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py b/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py
index 73f7edc1d..00443a71a 100644
--- a/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py
+++ b/testdata/cluster/node_templates/common/etc/hadoop/conf/yarn-site.xml.py
@@ -17,6 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import print_function
 import os
 import sys
 
@@ -39,7 +40,7 @@ def _get_yarn_nm_ram_mb():
   # - leave at least 20G for other services
   # - don't need more than 48G
   ret = min(max(available_ram_gb * 1024 - 20 * 1024, 4096), 48 * 1024)
-  print >>sys.stderr, "Configuring Yarn NM to use {0}MB RAM".format(ret)
+  print("Configuring Yarn NM to use {0}MB RAM".format(ret), file=sys.stderr)
   return ret
 
 
diff --git a/testdata/common/cgroups.py b/testdata/common/cgroups.py
index c313b563f..5e1f6f73e 100755
--- a/testdata/common/cgroups.py
+++ b/testdata/common/cgroups.py
@@ -19,6 +19,7 @@
 
 # Utility code for creating cgroups for the Impala development environment.
 # May be used as a library or as a command-line utility for manual testing.
+from __future__ import print_function
 import os
 import sys
 import errno
@@ -81,7 +82,7 @@ def create_impala_cgroup_path(instance_num):
 
 if __name__ == "__main__":
   if options.cluster_size < 0:
-    print 'Please specify a cluster size >= 0'
+    print('Please specify a cluster size >= 0')
     sys.exit(1)
   for i in range(options.cluster_size):
     create_impala_cgroup_path(i)
diff --git a/testdata/common/text_delims_table.py b/testdata/common/text_delims_table.py
index b51441b67..92b2d01a4 100755
--- a/testdata/common/text_delims_table.py
+++ b/testdata/common/text_delims_table.py
@@ -22,6 +22,7 @@
 # command line, will generate data files in the specified directory and a
 # print a SQL load statement to incorporate into dataload SQL script generation.
 
+from __future__ import print_function
 from shutil import rmtree
 from optparse import OptionParser
 from contextlib import contextmanager
diff --git a/testdata/common/widetable.py b/testdata/common/widetable.py
index d83c62e00..dcd31de49 100755
--- a/testdata/common/widetable.py
+++ b/testdata/common/widetable.py
@@ -22,6 +22,7 @@
 # generate a CSV data file and prints a SQL load statement to incorporate
 # into dataload SQL script generation.
 
+from __future__ import print_function
 from datetime import datetime, timedelta
 import itertools
 import optparse
@@ -119,7 +120,7 @@ if __name__ == "__main__":
 
   if options.get_columns:
     # Output column descriptors
-    print '\n'.join(get_columns(options.num_columns))
+    print('\n'.join(get_columns(options.num_columns)))
 
   if options.create_data:
     # Generate data locally, and output the SQL load command for use in dataload
diff --git a/tests/benchmark/report_benchmark_results.py b/tests/benchmark/report_benchmark_results.py
index f8a210568..7ce8ba688 100755
--- a/tests/benchmark/report_benchmark_results.py
+++ b/tests/benchmark/report_benchmark_results.py
@@ -29,6 +29,7 @@
 # if necessary (2.5).
 
 from __future__ import division
+from __future__ import print_function
 import difflib
 import json
 import logging
@@ -1150,5 +1151,5 @@ if __name__ == "__main__":
     if ref_grouped:
       ref_impala_version = get_impala_version(ref_grouped)
 
-  print build_summary_header(current_impala_version, ref_impala_version)
-  print report
+  print(build_summary_header(current_impala_version, ref_impala_version))
+  print(report)
diff --git a/tests/common/impala_cluster.py b/tests/common/impala_cluster.py
index 8e0860fec..4a0ccb940 100644
--- a/tests/common/impala_cluster.py
+++ b/tests/common/impala_cluster.py
@@ -17,6 +17,7 @@
 #
 # Basic object model of an Impala cluster (set of Impala processes).
 
+from __future__ import print_function
 import json
 import logging
 import os
@@ -158,7 +159,7 @@ class ImpalaCluster(object):
         result = client.execute("select 1")
         assert result.success
         ++n
-      except Exception as e: print e
+      except Exception as e: print(e)
       finally:
         client.close()
     return n
diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py
index 98d7ba164..033b0c4f4 100644
--- a/tests/common/impala_test_suite.py
+++ b/tests/common/impala_test_suite.py
@@ -17,6 +17,7 @@
 #
 # The base class that should be used for almost all Impala tests
 
+from __future__ import print_function
 import glob
 import grp
 import json
@@ -1202,7 +1203,7 @@ class ImpalaTestSuite(BaseTestSuite):
                             db_name=db_name, table_name=table_name))
         return
       except Exception as ex:
-        print str(ex)
+        print(str(ex))
         time.sleep(0.2)
         continue
     raise Exception("Table {0}.{1} didn't show up after {2}s", db_name, table_name,
diff --git a/tests/common/resource_pool_config.py b/tests/common/resource_pool_config.py
index 9347b7e9d..88f9b55e4 100644
--- a/tests/common/resource_pool_config.py
+++ b/tests/common/resource_pool_config.py
@@ -20,6 +20,7 @@
 # the tests it is used for. However, it is generic enough that it can be extended if
 # more functionality is required for adding tests.
 
+from __future__ import print_function
 import os
 from time import sleep, time
 import xml.etree.ElementTree as ET
@@ -98,7 +99,7 @@ class ResourcePoolConfig(object):
         if pool_name == name.split('.')[-1] and pool_attribute in name:
           return property
       except Exception as e:
-        print "Current DOM element being inspected: \n{0}".format(ET.dump(property))
+        print("Current DOM element being inspected: \n{0}".format(ET.dump(property)))
         raise e
     assert False, "{0} attribute not found for pool {1} in the config XML:\n{2}".format(
       pool_attribute, pool_name, ET.dump(xml_root))
diff --git a/tests/comparison/cluster.py b/tests/comparison/cluster.py
index f3a3c5091..81fe672b1 100644
--- a/tests/comparison/cluster.py
+++ b/tests/comparison/cluster.py
@@ -16,6 +16,7 @@
 # under the License.
 
 """This module provides utilities for interacting with a cluster."""
+from __future__ import print_function
 
 # This should be moved into the test/util folder eventually. The problem is this
 # module depends on db_connection which use some query generator classes.
diff --git a/tests/comparison/data_generator_mapper.py b/tests/comparison/data_generator_mapper.py
index 897494035..12135f563 100755
--- a/tests/comparison/data_generator_mapper.py
+++ b/tests/comparison/data_generator_mapper.py
@@ -20,6 +20,7 @@
 '''This is a mapper for use with hadoop streaming. See data_generator.DatabasePopulator
    for more information on how this file is used.
 '''
+from __future__ import print_function
 
 import os
 import random
diff --git a/tests/comparison/db_connection.py b/tests/comparison/db_connection.py
index 71786b83d..a70a896c3 100644
--- a/tests/comparison/db_connection.py
+++ b/tests/comparison/db_connection.py
@@ -21,6 +21,7 @@
    connection.
 
 '''
+from __future__ import print_function
 import hashlib
 import impala.dbapi
 import re
diff --git a/tests/comparison/discrepancy_searcher.py b/tests/comparison/discrepancy_searcher.py
index d7a792a16..7a223db8f 100755
--- a/tests/comparison/discrepancy_searcher.py
+++ b/tests/comparison/discrepancy_searcher.py
@@ -22,6 +22,7 @@
    results.
 
 '''
+from __future__ import print_function
 # TODO: IMPALA-4600: refactor this module
 
 from copy import deepcopy
diff --git a/tests/comparison/query_generator.py b/tests/comparison/query_generator.py
index 7fe998278..361c1cb46 100644
--- a/tests/comparison/query_generator.py
+++ b/tests/comparison/query_generator.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import print_function
 from collections import defaultdict
 from copy import deepcopy
 from itertools import ifilter
diff --git a/tests/comparison/util/verify-oracle-connection.py b/tests/comparison/util/verify-oracle-connection.py
index 3db4ed128..f4435b40f 100755
--- a/tests/comparison/util/verify-oracle-connection.py
+++ b/tests/comparison/util/verify-oracle-connection.py
@@ -31,6 +31,7 @@
 
 # Importing the whole module instead of doing selective import seems to help find linker
 # errors.
+from __future__ import print_function
 import cx_Oracle
 
 # Host on which Oracle Database lies.
@@ -56,7 +57,7 @@ def main():
     finally:
       cursor.close()
     assert rows == EXPECTED_RESULT
-  print 'success'
+  print('success')
 
 
 if '__main__' == __name__:
diff --git a/tests/conftest.py b/tests/conftest.py
index 761f0cb73..0616e9110 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -17,6 +17,7 @@
 
 # py.test configuration module
 #
+from __future__ import print_function
 from impala.dbapi import connect as impala_connect
 from kudu import connect as kudu_connect
 from random import choice, sample
diff --git a/tests/custom_cluster/test_blacklist.py b/tests/custom_cluster/test_blacklist.py
index 7fd61d891..20b18b508 100644
--- a/tests/custom_cluster/test_blacklist.py
+++ b/tests/custom_cluster/test_blacklist.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import print_function
 from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
 
 import pytest
@@ -226,7 +227,7 @@ class TestBlacklistFaultyDisk(CustomClusterTestSuite):
       dir_path = tempfile.mkdtemp()
       self.created_dirs.append(dir_path)
       result.append(dir_path)
-      print "Generated dir" + dir_path
+      print("Generated dir" + dir_path)
     return result
 
   def setup_method(self, method):
diff --git a/tests/custom_cluster/test_breakpad.py b/tests/custom_cluster/test_breakpad.py
index d106093ef..aee47772c 100644
--- a/tests/custom_cluster/test_breakpad.py
+++ b/tests/custom_cluster/test_breakpad.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import print_function
 import glob
 import os
 import psutil
@@ -89,7 +90,7 @@ class TestBreakpadBase(CustomClusterTestSuite):
         # For every process in the list we might see the original Impala process plus a
         # forked off child that is writing the minidump. We need to catch both.
         for pid in process.get_pids():
-          print "Checking pid %s" % pid
+          print("Checking pid %s" % pid)
           psutil_process = psutil.Process(pid)
           psutil_process.wait(timeout)
       except psutil.NoSuchProcess:
diff --git a/tests/custom_cluster/test_catalog_hms_failures.py b/tests/custom_cluster/test_catalog_hms_failures.py
index 4d879852b..78c52f42b 100644
--- a/tests/custom_cluster/test_catalog_hms_failures.py
+++ b/tests/custom_cluster/test_catalog_hms_failures.py
@@ -14,6 +14,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import print_function
 import pytest
 import os
 import time
@@ -75,7 +76,7 @@ class TestHiveMetaStoreFailure(CustomClusterTestSuite):
     try:
       self.client.execute("describe %s" % tbl_name)
     except ImpalaBeeswaxException as e:
-      print str(e)
+      print(str(e))
       assert "Failed to load metadata for table: %s. Running 'invalidate metadata %s' "\
           "may resolve this problem." % (tbl_name, tbl_name) in str(e)
     self.run_hive_server()
diff --git a/tests/custom_cluster/test_client_ssl.py b/tests/custom_cluster/test_client_ssl.py
index c368cc4d0..b2a5267d3 100644
--- a/tests/custom_cluster/test_client_ssl.py
+++ b/tests/custom_cluster/test_client_ssl.py
@@ -16,6 +16,7 @@
 # under the License.
 #
 
+from __future__ import print_function
 import json
 import logging
 import os
@@ -128,7 +129,7 @@ class TestClientSsl(CustomClusterTestSuite):
     p.send_cmd("profile")
     result = p.get_result()
 
-    print result.stderr
+    print(result.stderr)
     assert "Query Status: Cancelled" in result.stdout
     assert impalad.wait_for_num_in_flight_queries(0)
 
diff --git a/tests/custom_cluster/test_events_custom_configs.py b/tests/custom_cluster/test_events_custom_configs.py
index e8a8f88fc..36bc7ebdf 100644
--- a/tests/custom_cluster/test_events_custom_configs.py
+++ b/tests/custom_cluster/test_events_custom_configs.py
@@ -14,6 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+from __future__ import print_function
 import logging
 import pytest
 
diff --git a/tests/custom_cluster/test_local_catalog.py b/tests/custom_cluster/test_local_catalog.py
index 78eccb4ae..5f7a25332 100644
--- a/tests/custom_cluster/test_local_catalog.py
+++ b/tests/custom_cluster/test_local_catalog.py
@@ -17,6 +17,7 @@
 
 # Test behaviors specific to --use_local_catalog being enabled.
 
+from __future__ import print_function
 import pytest
 import Queue
 import random
@@ -272,7 +273,7 @@ class TestLocalCatalogRetries(CustomClusterTestSuite):
           q = random.choice(queries)
           attempt += 1
           try:
-            print 'Attempt', attempt, 'client', str(client)
+            print('Attempt', attempt, 'client', str(client))
             ret = self.execute_query_unchecked(client, q)
           except Exception as e:
             if 'InconsistentMetadataFetchException' in str(e):
diff --git a/tests/custom_cluster/test_query_retries.py b/tests/custom_cluster/test_query_retries.py
index 1a5f5efdb..d7e8e4fb7 100644
--- a/tests/custom_cluster/test_query_retries.py
+++ b/tests/custom_cluster/test_query_retries.py
@@ -21,6 +21,7 @@
 # TODO: Re-factor tests into multiple classes.
 # TODO: Add a test that cancels queries while a retry is running
 
+from __future__ import print_function
 import pytest
 import re
 import shutil
@@ -1183,7 +1184,7 @@ class TestQueryRetriesFaultyDisk(CustomClusterTestSuite):
       dir_path = tempfile.mkdtemp()
       self.created_dirs.append(dir_path)
       result.append(dir_path)
-      print "Generated dir" + dir_path
+      print("Generated dir" + dir_path)
     return result
 
   def __validate_web_ui_state(self):
diff --git a/tests/custom_cluster/test_restart_services.py b/tests/custom_cluster/test_restart_services.py
index fb1564943..9740974bb 100644
--- a/tests/custom_cluster/test_restart_services.py
+++ b/tests/custom_cluster/test_restart_services.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import print_function
 import logging
 import os
 import pytest
@@ -515,7 +516,7 @@ class TestGracefulShutdown(CustomClusterTestSuite, HS2TestSuite):
   def test_shutdown_executor_with_delay(self):
     """Regression test for IMPALA-7931 that adds delays to status reporting and
     to fetching of results to trigger races that previously resulted in query failures."""
-    print self.exploration_strategy
+    print(self.exploration_strategy)
     if self.exploration_strategy() != 'exhaustive':
       pytest.skip()
     self.do_test_shutdown_executor(fetch_delay_s=5)
diff --git a/tests/custom_cluster/test_scratch_disk.py b/tests/custom_cluster/test_scratch_disk.py
index 35adf1c40..4ed4ebba5 100644
--- a/tests/custom_cluster/test_scratch_disk.py
+++ b/tests/custom_cluster/test_scratch_disk.py
@@ -17,6 +17,7 @@
 #
 # Tests for query expiration.
 
+from __future__ import print_function
 import os
 import pytest
 import re
@@ -82,7 +83,7 @@ class TestScratchDir(CustomClusterTestSuite):
       if not non_existing:
         self.created_dirs.append(dir_path)
       result.append(dir_path)
-      print "Generated dir" + dir_path
+      print("Generated dir" + dir_path)
     return result
 
   def setup_method(self, method):
diff --git a/tests/custom_cluster/test_topic_update_frequency.py b/tests/custom_cluster/test_topic_update_frequency.py
index 99d7b659c..13402714e 100644
--- a/tests/custom_cluster/test_topic_update_frequency.py
+++ b/tests/custom_cluster/test_topic_update_frequency.py
@@ -9,6 +9,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from __future__ import print_function
 from multiprocessing.pool import ThreadPool
 
 import pytest
diff --git a/tests/custom_cluster/test_udf_concurrency.py b/tests/custom_cluster/test_udf_concurrency.py
index 8643ecf57..8e002655d 100644
--- a/tests/custom_cluster/test_udf_concurrency.py
+++ b/tests/custom_cluster/test_udf_concurrency.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import print_function
 import os
 import pytest
 import random
@@ -82,7 +83,7 @@ class TestUdfConcurrency(CustomClusterTestSuite):
     try:
       setup_client.execute(setup_query)
     except Exception as e:
-      print "Unable to create initial function: {0}".format(setup_query)
+      print("Unable to create initial function: {0}".format(setup_query))
       raise
 
     errors = []
@@ -126,7 +127,7 @@ class TestUdfConcurrency(CustomClusterTestSuite):
     # join all threads.
     for t in runner_threads: t.join()
 
-    for e in errors: print e
+    for e in errors: print(e)
 
     # Checks that no impalad has crashed.
     assert cluster.num_responsive_coordinators() == exp_num_coordinators
@@ -167,13 +168,13 @@ class TestUdfConcurrency(CustomClusterTestSuite):
       s = create_fn_to_use.format(unique_database, udf_tgt_path)
       setup_client.execute(s)
     except Exception as e:
-      print e
+      print(e)
       assert False
     for i in range(0, num_drops):
       try:
         setup_client.execute(create_fn_to_drop.format(unique_database, i, udf_tgt_path))
       except Exception as e:
-        print e
+        print(e)
         assert False
 
     errors = []
@@ -208,5 +209,5 @@ class TestUdfConcurrency(CustomClusterTestSuite):
     for t in runner_threads: t.join()
 
     # Check for any errors.
-    for e in errors: print e
+    for e in errors: print(e)
     assert len(errors) == 0
diff --git a/tests/custom_cluster/test_web_pages.py b/tests/custom_cluster/test_web_pages.py
index 687f6d054..fb983cb9d 100644
--- a/tests/custom_cluster/test_web_pages.py
+++ b/tests/custom_cluster/test_web_pages.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import print_function
 import json
 import random
 import re
diff --git a/tests/metadata/test_hms_integration.py b/tests/metadata/test_hms_integration.py
index 05909089a..828033a9f 100644
--- a/tests/metadata/test_hms_integration.py
+++ b/tests/metadata/test_hms_integration.py
@@ -23,7 +23,7 @@
 # TODO: For each test, verify all the metadata available via Hive and
 # Impala, in all the possible ways of validating that metadata.
 
-
+from __future__ import print_function
 import pytest
 import random
 import string
@@ -735,7 +735,7 @@ class TestHmsIntegration(ImpalaTestSuite):
                             '(x int, y int) stored as parquet') as table_name:
         self.client.execute('invalidate metadata')
         self.client.execute('invalidate metadata %s' % table_name)
-        print self.impala_table_stats(table_name)
+        print(self.impala_table_stats(table_name))
         assert 'PARQUET' == self.impala_table_stats(table_name)[()]['format']
         self.run_stmt_in_hive(
             'alter table %s set fileformat avro' % table_name)
diff --git a/tests/query_test/test_aggregation.py b/tests/query_test/test_aggregation.py
index 5e450485e..165e595ce 100644
--- a/tests/query_test/test_aggregation.py
+++ b/tests/query_test/test_aggregation.py
@@ -17,6 +17,7 @@
 
 # Validates all aggregate functions across all datatypes
 #
+from __future__ import print_function
 import pytest
 
 from testdata.common import widetable
@@ -150,7 +151,8 @@ class TestAggregation(ImpalaTestSuite):
       # NDV is inherently approximate. Compare with some tolerance.
       err = abs(result_lut[key] - int(actual_string))
       rel_err =  err / float(result_lut[key])
-      print key, result_lut[key], actual_string,abs(result_lut[key] - int(actual_string))
+      print(key, result_lut[key], actual_string,
+            abs(result_lut[key] - int(actual_string)))
       assert err <= 1 or rel_err < 0.05
     elif data_type in ('float', 'double') and agg_func != 'count':
       # Compare with a margin of error.
diff --git a/tests/query_test/test_compressed_formats.py b/tests/query_test/test_compressed_formats.py
index 5d8bb17b7..e9187445d 100644
--- a/tests/query_test/test_compressed_formats.py
+++ b/tests/query_test/test_compressed_formats.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import print_function
 import math
 import os
 import pytest
@@ -96,7 +97,7 @@ class TestCompressedFormatsBase(ImpalaTestSuite):
       assert result and int(result) > 0
     except Exception as e:
       error_msg = str(e)
-      print error_msg
+      print(error_msg)
       if expected_error is None or expected_error not in error_msg:
         print("Unexpected error:\n{0}".format(error_msg))
         raise
diff --git a/tests/query_test/test_hdfs_caching.py b/tests/query_test/test_hdfs_caching.py
index 4f9f82c4a..32f91702a 100644
--- a/tests/query_test/test_hdfs_caching.py
+++ b/tests/query_test/test_hdfs_caching.py
@@ -17,6 +17,7 @@
 
 # Validates limit on scan nodes
 
+from __future__ import print_function
 import pytest
 import re
 import time
@@ -87,7 +88,7 @@ class TestHdfsCaching(ImpalaTestSuite):
     elif num_metrics_increased != 1:
       # Test failed, print the metrics
       for i in range(0, len(cached_bytes_before)):
-        print "%d %d" % (cached_bytes_before[i], cached_bytes_after[i])
+        print("%d %d" % (cached_bytes_before[i], cached_bytes_after[i]))
       assert(False)
 
   def test_cache_cancellation(self, vector):
diff --git a/tests/query_test/test_kudu.py b/tests/query_test/test_kudu.py
index 587431dd0..0f1c59a17 100644
--- a/tests/query_test/test_kudu.py
+++ b/tests/query_test/test_kudu.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import print_function
 from kudu.schema import (
     BOOL,
     DOUBLE,
@@ -481,7 +482,7 @@ class TestKuduOperations(KuduTestSuite):
                 date(2010, 1, 1), '')]
         i += 1
     cursor.execute("select count(*) from %s" % table_name)
-    print cursor.fetchall() == [(i, )]
+    print(cursor.fetchall() == [(i, )])
 
   def test_concurrent_schema_change(self, cursor, unique_database):
     """Tests that an insert into a Kudu table with a concurrent schema change either
diff --git a/tests/query_test/test_limit.py b/tests/query_test/test_limit.py
index 322429906..5233b4682 100644
--- a/tests/query_test/test_limit.py
+++ b/tests/query_test/test_limit.py
@@ -17,6 +17,7 @@
 
 # Validates limit on scan nodes
 
+from __future__ import print_function
 from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
 from tests.common.impala_test_suite import ImpalaTestSuite
 from tests.common.test_vector import ImpalaTestDimension
@@ -79,5 +80,5 @@ class TestLimitBase(ImpalaTestSuite):
     except ImpalaBeeswaxException as e:
       assert not should_succeed, 'Query was not expected to fail: %s' % e
       if (expected_error not in str(e)):
-        print str(e)
+        print(str(e))
       assert expected_error in str(e)
diff --git a/tests/query_test/test_scanners.py b/tests/query_test/test_scanners.py
index 5298ab152..be5a47578 100644
--- a/tests/query_test/test_scanners.py
+++ b/tests/query_test/test_scanners.py
@@ -21,6 +21,7 @@
 # tests can run with the normal exploration strategy and the overall test runtime doesn't
 # explode.
 
+from __future__ import print_function
 import os
 import pytest
 import random
@@ -410,10 +411,10 @@ class TestHdfsScannerSkew(ImpalaTestSuite):
           ratio = float(a) / float(b)
         else:
           ratio = float(b) / float(a)
-        print "Intra-node bytes read ratio:", ratio
+        print("Intra-node bytes read ratio:", ratio)
         if ratio < SKEW_THRESHOLD:
           count_skew += 1
-          print "Profile of skewed execution: ", profile
+          print("Profile of skewed execution: ", profile)
       return count_skew
 
     tbl_name = unique_database + ".lineitem_skew"
diff --git a/tests/run-tests.py b/tests/run-tests.py
index 0ddabff4b..7c1ae8559 100755
--- a/tests/run-tests.py
+++ b/tests/run-tests.py
@@ -22,6 +22,7 @@
 # executing the remaining tests in parallel. To run only some of
 # these, use --skip-serial, --skip-stress, or --skip-parallel.
 # All additional command line options are passed to py.test.
+from __future__ import print_function
 from tests.common.impala_cluster import ImpalaCluster
 from tests.common.impala_service import ImpaladService
 from tests.conftest import configure_logging
@@ -226,16 +227,16 @@ def build_ignore_dir_arg_list(valid_dirs):
 def print_metrics(substring):
   """Prints metrics with the given substring in the name"""
   for impalad in ImpalaCluster.get_e2e_test_cluster().impalads:
-    print ">" * 80
+    print(">" * 80)
     port = impalad.get_webserver_port()
     cert = impalad._get_webserver_certificate_file()
-    print "connections metrics for impalad at port {0}:".format(port)
+    print("connections metrics for impalad at port {0}:".format(port))
     debug_info = json.loads(ImpaladService(impalad.hostname, webserver_port=port,
         webserver_certificate_file=cert).read_debug_webpage('metrics?json'))
     for metric in debug_info['metric_group']['metrics']:
       if substring in metric['name']:
-        print json.dumps(metric, indent=1)
-    print "<" * 80
+        print(json.dumps(metric, indent=1))
+    print("<" * 80)
 
 
 def detect_and_remove_flag(flag):
@@ -265,7 +266,7 @@ if __name__ == "__main__":
 
   def run(args):
     """Helper to print out arguments of test_executor before invoking."""
-    print "Running TestExecutor with args: %s" % (args,)
+    print("Running TestExecutor with args: %s" % (args,))
     test_executor.run_tests(args)
 
   os.chdir(TEST_DIR)
diff --git a/tests/shell/test_shell_interactive.py b/tests/shell/test_shell_interactive.py
index d957db135..30ae99f16 100755
--- a/tests/shell/test_shell_interactive.py
+++ b/tests/shell/test_shell_interactive.py
@@ -18,6 +18,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import print_function
 import httplib
 import logging
 import os
@@ -896,14 +897,14 @@ class TestImpalaShellInteractive(ImpalaTestSuite):
       os.chdir("%s/tests/shell/" % os.environ['IMPALA_HOME'])
       result = run_impala_shell_interactive(vector,
         "sOuRcE shell_case_sensitive.cmds; SeLeCt 'second command'")
-      print result.stderr
+      print(result.stderr)
 
       assert "Query: uSe FUNCTIONAL" in result.stderr
       assert "Query: ShOw TABLES" in result.stderr
       assert "alltypes" in result.stdout
       # This is from shell_case_sensitive2.cmds, the result of sourcing a file
       # from a sourced file.
-      print result.stderr
+      print(result.stderr)
       assert "SeLeCt 'second command'" in result.stderr
     finally:
       os.chdir(cwd)
diff --git a/tests/shell/util.py b/tests/shell/util.py
index 98982d126..f369c646d 100755
--- a/tests/shell/util.py
+++ b/tests/shell/util.py
@@ -18,6 +18,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import print_function
 import logging
 import os
 import socket
diff --git a/tests/statestore/test_statestore.py b/tests/statestore/test_statestore.py
index 9daedba30..5dfead822 100644
--- a/tests/statestore/test_statestore.py
+++ b/tests/statestore/test_statestore.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import print_function
 from collections import defaultdict
 import json
 import logging
@@ -155,7 +156,7 @@ class KillableThreadedServer(TServer):
     except TTransport.TTransportException as tx:
       pass
     except Exception as x:
-      print x
+      print(x)
 
     itrans.close()
     otrans.close()
diff --git a/tests/stress/extract_min_mem.py b/tests/stress/extract_min_mem.py
index e84148ba3..5749d164f 100755
--- a/tests/stress/extract_min_mem.py
+++ b/tests/stress/extract_min_mem.py
@@ -33,6 +33,7 @@
 #
 #   ./tests/stress/extract_min_mem.py mem_usage_scaling_runtime_info.json
 #
+from __future__ import print_function
 import json
 import sys
 
@@ -46,4 +47,4 @@ with open(sys.argv[1]) as f:
                    runtime_info['required_mem_mb_with_spilling']))
 
 results.sort()
-print ', '.join(["'Q{0}': {1}".format(num, mem) for num, mem in results])
+print(', '.join(["'Q{0}': {1}".format(num, mem) for num, mem in results]))
diff --git a/tests/stress/runtime_info.py b/tests/stress/runtime_info.py
index 6cba38590..d9ac7c757 100644
--- a/tests/stress/runtime_info.py
+++ b/tests/stress/runtime_info.py
@@ -20,6 +20,7 @@
 # Utility functions used by the stress test to save and load runtime info
 # about queries to and from JSON files.
 
+from __future__ import print_function
 from collections import defaultdict
 import json
 import logging
diff --git a/tests/unittests/test_file_parser.py b/tests/unittests/test_file_parser.py
index 54a00f709..5e79eae1e 100644
--- a/tests/unittests/test_file_parser.py
+++ b/tests/unittests/test_file_parser.py
@@ -17,6 +17,7 @@
 
 # Unit tests for the test file parser
 
+from __future__ import print_function
 from tests.common.base_test_suite import BaseTestSuite
 from tests.util.test_file_parser import parse_test_file_text
 
@@ -66,7 +67,7 @@ class TestTestFileParser(BaseTestSuite):
   def test_valid_parse(self):
     results = parse_test_file_text(test_text, VALID_SECTIONS)
     assert len(results) == 3
-    print results[0]
+    print(results[0])
     expected_results = {'QUERY': '# comment\nSELECT blah from Foo\ns\n',
                         'TYPES': 'string\n', 'RESULTS': "'Hi'\n",
                         'LINEAGE': "test_lineage_str > 'foo' AND 'bar'\nmulti_line\n"}
@@ -106,6 +107,6 @@ class TestTestFileParser(BaseTestSuite):
                         "#---- QUERY: TEST_WORKLOAD_Q2\n"
                         "#SELECT int_col from Bar\n"
                         "#---- RESULTS\n#231\n#---- TYPES\n#int\n"}
-    print expected_results
-    print results[1]
+    print(expected_results)
+    print(results[1])
     assert results[1] == expected_results
diff --git a/tests/util/acid_txn.py b/tests/util/acid_txn.py
index c50f4572f..0bc82125b 100644
--- a/tests/util/acid_txn.py
+++ b/tests/util/acid_txn.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from __future__ import print_function
 from tests.util.thrift_util import create_transport
 from hive_metastore import ThriftHiveMetastore
 from hive_metastore.ttypes import (AbortTxnRequest, AllocateTableWriteIdsRequest,
@@ -148,4 +149,4 @@ class AcidTxn(object):
         try:
           self.commit_txn(txn)
         except Exception as e:
-          print str(e)
+          print(str(e))
diff --git a/tests/util/run_impyla_http_query.py b/tests/util/run_impyla_http_query.py
index 39b1ba379..df81f8793 100755
--- a/tests/util/run_impyla_http_query.py
+++ b/tests/util/run_impyla_http_query.py
@@ -20,6 +20,7 @@
 # It can be used by other tests (e.g. LdapImpylaHttpTest.java) that start a cluster with
 # an LDAP server to validate Impyla's functionality.
 
+from __future__ import print_function
 import argparse
 import logging
 
diff --git a/tests/util/test_file_parser.py b/tests/util/test_file_parser.py
index 37e42652d..e94e290da 100644
--- a/tests/util/test_file_parser.py
+++ b/tests/util/test_file_parser.py
@@ -17,6 +17,7 @@
 
 # This module is used for common utilities related to parsing test files
 
+from __future__ import print_function
 import codecs
 import collections
 import logging
@@ -210,8 +211,8 @@ def parse_test_file_text(text, valid_section_names, skip_unknown_sections=True):
 
       if subsection_name not in valid_section_names:
         if skip_unknown_sections or not subsection_name:
-          print sub_section
-          print 'Unknown section \'%s\'' % subsection_name
+          print(sub_section)
+          print('Unknown section \'%s\'' % subsection_name)
           continue
         else:
           raise RuntimeError, 'Unknown subsection: %s' % subsection_name


[impala] 02/06: IMPALA-11951: Add tools for checking/fixing python 3 syntax

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit ff62a4df3927ad054d5374e922c8399c2a227182
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Sun Feb 26 13:04:52 2023 -0800

    IMPALA-11951: Add tools for checking/fixing python 3 syntax
    
    This adds the bin/check-python-syntax.sh script, which
    runs "python -m compileall" for all python files in
    Impala with both python2 and python3. This detects
    syntax errors in the python files. This will be
    incorporated into precommit once it is clean.
    
    This also adds future to the impala-python virtualenv.
    This provides the futurize script (exposed via
    impala-futurize), which can be used to automatically
    fix some py2/py3 issues. Future also provides the
    builtins library, which can provide python 3
    functionality on python 2.
    
    Testing:
     - Ran impala-futurize locally
     - Ran the script repeatedly while fixing syntax errors
    
    Change-Id: Iae2c51bc6ddc9b6a04469ee1b8284227fed3bd45
    Reviewed-on: http://gerrit.cloudera.org:8080/19550
    Reviewed-by: Michael Smith <mi...@cloudera.com>
    Tested-by: Michael Smith <mi...@cloudera.com>
---
 bin/check-python-syntax.sh         | 71 ++++++++++++++++++++++++++++++++++++++
 bin/impala-futurize                | 21 +++++++++++
 infra/python/deps/requirements.txt |  1 +
 3 files changed, 93 insertions(+)

diff --git a/bin/check-python-syntax.sh b/bin/check-python-syntax.sh
new file mode 100755
index 000000000..36a7ff87e
--- /dev/null
+++ b/bin/check-python-syntax.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -euo pipefail
+
+pushd ${IMPALA_HOME} > /dev/null 2>&1
+
+RETCODE=0
+for file in $(git ls-files '**/*.py'); do
+    # Skip the shell's ext-py code
+    if [[ "${file}" =~ "shell/ext-py" ]]; then
+        continue
+    fi
+    # Skip the shell's pkg_resources.py
+    if [[ "${file}" == "shell/pkg_resources.py" ]]; then
+        continue
+    fi
+
+    # Python 2 checks
+    # -l = no recursion
+    # -q = only print errors
+    # -f = force recompile
+    if ! python2 -m compileall -l -q -f ${file} > /dev/null 2>&1; then
+        RETCODE=1
+        echo "Python 2 compilation failed for ${file}:"
+        set +e
+        python2 -m compileall -l -q -f ${file}
+        set -e
+    fi
+    # Clean up the .pyc files generated by compilation
+    if [[ -f "${file}c" ]]; then
+        rm "${file}c"
+    fi
+
+    # Python 3 checks
+    # -l = no recursion
+    # -q = only print errors
+    # -f = force recompile
+    if ! python3 -m compileall -l -q -f ${file} > /dev/null 2>&1  ; then
+        RETCODE=1
+        echo "Python 3 compilation failed for ${file}:"
+        set +e
+        python3 -m compileall -l -q -f ${file}
+        set -e
+    fi
+    # Clean up the __pycache__ directories generated by compilation
+    py_cache_dir="$(dirname ${file})/__pycache__"
+    if [[ -d "${py_cache_dir}" ]]; then
+        rm -rf ${py_cache_dir}
+    fi
+done
+
+popd > /dev/null 2>&1
+
+exit ${RETCODE}
diff --git a/bin/impala-futurize b/bin/impala-futurize
new file mode 100755
index 000000000..b761b5212
--- /dev/null
+++ b/bin/impala-futurize
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+source "$(dirname "$0")/impala-python-common.sh"
+exec "$PY_ENV_DIR/bin/futurize" "$@"
diff --git a/infra/python/deps/requirements.txt b/infra/python/deps/requirements.txt
index 8f8ada7a7..271ddeadc 100644
--- a/infra/python/deps/requirements.txt
+++ b/infra/python/deps/requirements.txt
@@ -37,6 +37,7 @@ flake8 == 3.9.2
     contextlib2 == 0.6.0
     pathlib2 == 2.3.7.post1
     zipp == 1.2.0
+future == 0.18.3
 gcovr == 4.2
   Jinja2 == 2.11.3
     MarkupSafe == 1.1.1


[impala] 06/06: IMPALA-11952 (part 4): Fix odds and ends: Octals, long, lambda, etc.

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit ba3518366a503efceb859102c8e6d6450083ae38
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Sun Feb 26 14:40:24 2023 -0800

    IMPALA-11952 (part 4): Fix odds and ends: Octals, long, lambda, etc.
    
    There are a variety of small python 3 syntax differences:
     - Octal constants need to start with 0o rather than just 0
     - Long constants are not supported (i.e. numbers ending with L)
     - Lambda syntax is slightly different
     - The 'ur' string mode is no longer supported
    
    Testing:
     - check-python-syntax.sh now passes
    
    Change-Id: Ie027a50ddf6a2a0db4b34ec9b49484ce86947f20
    Reviewed-on: http://gerrit.cloudera.org:8080/19554
    Reviewed-by: Joe McDonnell <jo...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Michael Smith <mi...@cloudera.com>
---
 bin/check-rat-report.py                          |  2 +-
 docker/test-with-docker.py                       |  2 +-
 tests/comparison/data_generator_mapred_common.py |  2 +-
 tests/comparison/leopard/front_end.py            |  2 +-
 tests/comparison/leopard/impala_docker_env.py    |  3 +-
 tests/comparison/leopard/report.py               | 40 ++++++++++++------------
 tests/custom_cluster/test_breakpad.py            |  2 +-
 tests/custom_cluster/test_client_ssl.py          |  2 +-
 tests/custom_cluster/test_thrift_socket.py       |  2 +-
 tests/util/get_parquet_metadata.py               |  2 +-
 10 files changed, 29 insertions(+), 30 deletions(-)

diff --git a/bin/check-rat-report.py b/bin/check-rat-report.py
index 3c6870bee..0346a16ee 100755
--- a/bin/check-rat-report.py
+++ b/bin/check-rat-report.py
@@ -66,7 +66,7 @@ for r in resources:
   if not excluded:
     typename = r.findall('type')[0].attrib['name']
     if not (clean_name[0:9] == 'testdata/' and typename in ['archive', 'binary']
-            and clean_name[-4:] <> '.jar'):
+            and clean_name[-4:] != '.jar'):
       sys.stderr.write(
           "%s: %s\n" %
           ('UNAPPROVED' if approvals else "NO APPROVALS; " + typename, clean_name))
diff --git a/docker/test-with-docker.py b/docker/test-with-docker.py
index 47e505a7e..55ae96372 100755
--- a/docker/test-with-docker.py
+++ b/docker/test-with-docker.py
@@ -790,7 +790,7 @@ class TestSuiteRunner(object):
     # io-file-mgr-test expects a real-ish file system at /tmp;
     # we mount a temporary directory into the container to appease it.
     tmpdir = tempfile.mkdtemp(prefix=test_with_docker.name + "-" + self.name)
-    os.chmod(tmpdir, 01777)
+    os.chmod(tmpdir, 0o1777)
     # Container names are sometimes used as hostnames, and DNS names shouldn't
     # have underscores.
     container_name = test_with_docker.name + "-" + self.name.replace("_", "-")
diff --git a/tests/comparison/data_generator_mapred_common.py b/tests/comparison/data_generator_mapred_common.py
index bcf162089..df6c087b5 100644
--- a/tests/comparison/data_generator_mapred_common.py
+++ b/tests/comparison/data_generator_mapred_common.py
@@ -62,7 +62,7 @@ class TextTableDataGenerator(object):
           val_generator = col_val_generators[col_idx]
           for idx in xrange(val_buffer_size):
             val = next(val_generator)
-            val_buffer[idx] = "\N" if val is None else val
+            val_buffer[idx] = r"\N" if val is None else val
       for col_idx, col in enumerate(cols):
         if col_idx > 0:
           # Postgres doesn't seem to have an option to specify that the last column value
diff --git a/tests/comparison/leopard/front_end.py b/tests/comparison/leopard/front_end.py
index 9752f7b98..ada5bd130 100755
--- a/tests/comparison/leopard/front_end.py
+++ b/tests/comparison/leopard/front_end.py
@@ -309,7 +309,7 @@ def front_page():
       'index.template',
       assets=ASSETS,
       reports=sorted(
-          app.reports.items(), key=lambda (k, report): report.run_date, reverse=True),
+          app.reports.items(), key=lambda k, report: report.run_date, reverse=True),
       schedule_items=schedule_items)
 
 if __name__ == '__main__':
diff --git a/tests/comparison/leopard/impala_docker_env.py b/tests/comparison/leopard/impala_docker_env.py
index 8cb60101b..63fc33199 100755
--- a/tests/comparison/leopard/impala_docker_env.py
+++ b/tests/comparison/leopard/impala_docker_env.py
@@ -302,6 +302,7 @@ class ImpalaDockerEnv(object):
     # data as a volume to bypass AUFS. See also the README for Leopard.
     LOG.info('Warming testdata cluster external volume')
     self.start_new_container()
+    volume_map = None
     with settings(
         warn_only=True,
         host_string=self.host,
@@ -325,8 +326,6 @@ class ImpalaDockerEnv(object):
       volume_map = {
           HOST_TESTDATA_EXTERNAL_VOLUME_PATH: DOCKER_TESTDATA_VOLUME_PATH,
       }
-    else:
-      volume_map = None
 
     self.start_new_container(volume_map=volume_map)
     LOG.info('Container Started')
diff --git a/tests/comparison/leopard/report.py b/tests/comparison/leopard/report.py
index d1bbf5cca..db463c993 100644
--- a/tests/comparison/leopard/report.py
+++ b/tests/comparison/leopard/report.py
@@ -49,23 +49,23 @@ class Report(object):
 
   def classify_error(self, error):
     d = {
-      ur'LINE \d+:': 'Postgres_error',
-      ur'Permission denied': 'permission_denied',
-      ur'^AnalysisException': 'AnalysisException',
-      ur'^Column \d+ in row \d+ does not match': 'mismatch',
-      ur'^Could not connect': 'could_not_connect',
-      ur'^IllegalStateException': 'IllegalStateException',
-      ur'^Invalid query handle: ': 'invalid_query_handle',
-      ur'^Invalid or unknown query handle: ': 'invalid_query_handle',
-      ur'^Known issue:': 'known_issue',
-      ur'^Operation is in ERROR_STATE': 'error_state',
-      ur'^Query timed out after \d+ seconds': 'timeout',
-      ur'^Row counts do not match': 'row_counts',
-      ur'^Too much data': 'too_much_data',
-      ur'^Unknown expr node type: \d+': 'unkown_node',
-      ur'^Year is out of valid range': 'year_range',
-      ur'^[A-Za-z]+ out of range': 'out_of_range',
-      ur'^division by zero': 'division_by_zero'}
+      r'LINE \d+:': 'Postgres_error',
+      r'Permission denied': 'permission_denied',
+      r'^AnalysisException': 'AnalysisException',
+      r'^Column \d+ in row \d+ does not match': 'mismatch',
+      r'^Could not connect': 'could_not_connect',
+      r'^IllegalStateException': 'IllegalStateException',
+      r'^Invalid query handle: ': 'invalid_query_handle',
+      r'^Invalid or unknown query handle: ': 'invalid_query_handle',
+      r'^Known issue:': 'known_issue',
+      r'^Operation is in ERROR_STATE': 'error_state',
+      r'^Query timed out after \d+ seconds': 'timeout',
+      r'^Row counts do not match': 'row_counts',
+      r'^Too much data': 'too_much_data',
+      r'^Unknown expr node type: \d+': 'unkown_node',
+      r'^Year is out of valid range': 'year_range',
+      r'^[A-Za-z]+ out of range': 'out_of_range',
+      r'^division by zero': 'division_by_zero'}
 
     for r in d:
       if re.search(r, error):
@@ -94,7 +94,7 @@ class Report(object):
     stack = query_result['formatted_stack']
     if stack:
       for line in stack.split('\n'):
-        match = re.search(ur'(impala::.*) \(', line)
+        match = re.search(r'(impala::.*) \(', line)
         if match:
           return match.group(1)
     else:
@@ -106,10 +106,10 @@ class Report(object):
 
     def clean_frame(frame):
       #remove memory address from each frame
-      reg = re.match(ur'#\d+ *0x[0123456789abcdef]* in (.*)', frame)
+      reg = re.match(r'#\d+ *0x[0123456789abcdef]* in (.*)', frame)
       if reg: return reg.group(1)
       # this is for matching lines like "#7  SLL_Next (this=0x9046780, src=0x90467c8...
-      reg = re.match(ur'#\d+ *(\S.*)', frame)
+      reg = re.match(r'#\d+ *(\S.*)', frame)
       if reg: return reg.group(1)
       return frame
 
diff --git a/tests/custom_cluster/test_breakpad.py b/tests/custom_cluster/test_breakpad.py
index aee47772c..1d17f8570 100644
--- a/tests/custom_cluster/test_breakpad.py
+++ b/tests/custom_cluster/test_breakpad.py
@@ -463,7 +463,7 @@ class TestLogging(TestBreakpadBase):
     test_error_msg = ('123456789abcde_' * 64)  # 1 KB error message
     test_debug_actions = 'LOG_MAINTENANCE_STDERR:FAIL@1.0@' + test_error_msg
     daemon = 'impalad'
-    os.chmod(self.tmp_dir, 0744)
+    os.chmod(self.tmp_dir, 0o744)
 
     expected_log_max_bytes = int(1.2 * 1024**2)  # 1.2 MB
     self.assert_logs(daemon, 0, expected_log_max_bytes)
diff --git a/tests/custom_cluster/test_client_ssl.py b/tests/custom_cluster/test_client_ssl.py
index b2a5267d3..b02a688aa 100644
--- a/tests/custom_cluster/test_client_ssl.py
+++ b/tests/custom_cluster/test_client_ssl.py
@@ -35,7 +35,7 @@ from tests.common.test_dimensions import create_client_protocol_dimension
 from tests.shell.util import run_impala_shell_cmd, run_impala_shell_cmd_no_expect, \
     ImpalaShell, create_impala_shell_executable_dimension
 
-REQUIRED_MIN_OPENSSL_VERSION = 0x10001000L
+REQUIRED_MIN_OPENSSL_VERSION = 0x10001000
 # Python supports TLSv1.2 from 2.7.9 officially but on Red Hat/CentOS Python2.7.5
 # with newer python-libs (eg python-libs-2.7.5-77) supports TLSv1.2 already
 if IS_REDHAT_DERIVATIVE:
diff --git a/tests/custom_cluster/test_thrift_socket.py b/tests/custom_cluster/test_thrift_socket.py
index 42d5fa142..c276384fd 100644
--- a/tests/custom_cluster/test_thrift_socket.py
+++ b/tests/custom_cluster/test_thrift_socket.py
@@ -27,7 +27,7 @@ from tests.common.test_vector import ImpalaTestVector
 from tests.common.test_dimensions import create_client_protocol_dimension
 from tests.shell.util import ImpalaShell
 
-REQUIRED_MIN_OPENSSL_VERSION = 0x10001000L
+REQUIRED_MIN_OPENSSL_VERSION = 0x10001000
 # Python supports TLSv1.2 from 2.7.9 officially but on Red Hat/CentOS Python2.7.5
 # with newer python-libs (eg python-libs-2.7.5-77) supports TLSv1.2 already
 if IS_REDHAT_DERIVATIVE:
diff --git a/tests/util/get_parquet_metadata.py b/tests/util/get_parquet_metadata.py
index 107b33934..db4ef3f41 100644
--- a/tests/util/get_parquet_metadata.py
+++ b/tests/util/get_parquet_metadata.py
@@ -41,7 +41,7 @@ def julian_day_to_date(julian_day):
   arbitrarily and can be validated with an online converter like
   http://aa.usno.navy.mil/jdconverter?ID=AA&jd=2457755
   """
-  return date(2017, 01, 01) + timedelta(julian_day - 2457755)
+  return date(2017, 1, 1) + timedelta(julian_day - 2457755)
 
 
 def nanos_to_time(nanos):


[impala] 05/06: IMPALA-11952 (part 3): Fix raise syntax

Posted by jo...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit c1794023bc35062b041b76012bfd1b1f6078d036
Author: Joe McDonnell <jo...@cloudera.com>
AuthorDate: Sun Feb 26 14:05:05 2023 -0800

    IMPALA-11952 (part 3): Fix raise syntax
    
    Python 3 does not support this old raise syntax:
    
    raise Exception, "message"
    
    Instead, it should be:
    
    raise Exception("message")
    
    This fixes all locations with the old raise syntax.
    
    Testing:
     - check-python-syntax.sh shows no errors from raise syntax
    
    Change-Id: I2722dcc2727fb65c7aedede12d73ca5b088326d7
    Reviewed-on: http://gerrit.cloudera.org:8080/19553
    Reviewed-by: Joe McDonnell <jo...@cloudera.com>
    Reviewed-by: Michael Smith <mi...@cloudera.com>
    Tested-by: Michael Smith <mi...@cloudera.com>
---
 tests/beeswax/impala_beeswax.py      |  2 +-
 tests/common/impala_test_suite.py    |  2 +-
 tests/common/test_dimensions.py      | 14 +++++++-------
 tests/common/test_result_verifier.py |  4 ++--
 tests/common/test_vector.py          |  2 +-
 tests/util/plugin_runner.py          |  2 +-
 tests/util/test_file_parser.py       | 16 ++++++++--------
 7 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/tests/beeswax/impala_beeswax.py b/tests/beeswax/impala_beeswax.py
index 3edfb2584..201532dc9 100644
--- a/tests/beeswax/impala_beeswax.py
+++ b/tests/beeswax/impala_beeswax.py
@@ -136,7 +136,7 @@ class ImpalaBeeswaxClient(object):
 
   def set_query_options(self, query_option_dict):
     if query_option_dict is None:
-      raise ValueError, 'Cannot pass None value for query options'
+      raise ValueError('Cannot pass None value for query options')
     self.clear_query_options()
     for name, value in query_option_dict.iteritems():
       self.set_query_option(name, value)
diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py
index 033b0c4f4..0245b9330 100644
--- a/tests/common/impala_test_suite.py
+++ b/tests/common/impala_test_suite.py
@@ -1101,7 +1101,7 @@ class ImpalaTestSuite(BaseTestSuite):
       for workload_strategy in workload_strategies:
         workload_strategy = workload_strategy.split(':')
         if len(workload_strategy) != 2:
-          raise ValueError, 'Invalid workload:strategy format: %s' % workload_strategy
+          raise ValueError('Invalid workload:strategy format: %s' % workload_strategy)
         if cls.get_workload() == workload_strategy[0]:
           return workload_strategy[1]
     return default_strategy
diff --git a/tests/common/test_dimensions.py b/tests/common/test_dimensions.py
index 16a8c504c..869179402 100644
--- a/tests/common/test_dimensions.py
+++ b/tests/common/test_dimensions.py
@@ -60,14 +60,14 @@ class TableFormatInfo(object):
 
   def __validate(self):
     if self.file_format not in TableFormatInfo.KNOWN_FILE_FORMATS:
-      raise ValueError, 'Unknown file format: %s' % self.file_format
+      raise ValueError('Unknown file format: %s' % self.file_format)
     if self.compression_codec not in TableFormatInfo.KNOWN_COMPRESSION_CODECS:
-      raise ValueError, 'Unknown compression codec: %s' % self.compression_codec
+      raise ValueError('Unknown compression codec: %s' % self.compression_codec)
     if self.compression_type not in TableFormatInfo.KNOWN_COMPRESSION_TYPES:
-      raise ValueError, 'Unknown compression type: %s' % self.compression_type
+      raise ValueError('Unknown compression type: %s' % self.compression_type)
     if (self.compression_codec == 'none' or self.compression_type == 'none') and\
         self.compression_codec != self.compression_type:
-      raise ValueError, 'Invalid combination of compression codec/type: %s' % str(self)
+      raise ValueError('Invalid combination of compression codec/type: %s' % str(self))
 
   @staticmethod
   def create_from_string(dataset, table_format_string):
@@ -79,11 +79,11 @@ class TableFormatInfo(object):
     or 'none' if the table is uncompressed.
     """
     if table_format_string is None:
-      raise ValueError, 'Table format string cannot be None'
+      raise ValueError('Table format string cannot be None')
 
     format_parts = table_format_string.strip().split('/')
     if len(format_parts) not in range(2, 4):
-      raise ValueError, 'Invalid table format %s' % table_format_string
+      raise ValueError('Invalid table format %s' % table_format_string)
 
     file_format, compression_codec = format_parts[:2]
     if len(format_parts) == 3:
@@ -288,7 +288,7 @@ def load_table_info_dimension(workload_name, exploration_strategy, file_formats=
       WORKLOAD_DIR, workload_name, '%s_%s.csv' % (workload_name, exploration_strategy))
 
   if not os.path.isfile(test_vector_file):
-    raise RuntimeError, 'Vector file not found: ' + test_vector_file
+    raise RuntimeError('Vector file not found: ' + test_vector_file)
 
   vector_values = []
 
diff --git a/tests/common/test_result_verifier.py b/tests/common/test_result_verifier.py
index 790b895fc..1532712c4 100644
--- a/tests/common/test_result_verifier.py
+++ b/tests/common/test_result_verifier.py
@@ -139,12 +139,12 @@ class ResultRow(object):
     if isinstance(key, basestring):
       for col in self.columns:
         if col.column_label == key.lower(): return col.value
-      raise IndexError, 'No column with label: ' + key
+      raise IndexError('No column with label: ' + key)
     elif isinstance(key, int):
       # If the key (column position) does not exist this will throw an IndexError when
       # indexing into the self.columns
       return str(self.columns[key])
-    raise TypeError, 'Unsupported indexing key type: ' + type(key)
+    raise TypeError('Unsupported indexing key type: ' + type(key))
 
   def __eq__(self, other):
     if not isinstance(other, self.__class__):
diff --git a/tests/common/test_vector.py b/tests/common/test_vector.py
index 05b49b936..005c35adb 100644
--- a/tests/common/test_vector.py
+++ b/tests/common/test_vector.py
@@ -129,7 +129,7 @@ class ImpalaTestMatrix(object):
     elif exploration_strategy in ['core', 'pairwise']:
       return self.__generate_pairwise_combinations()
     else:
-      raise ValueError, 'Unknown exploration strategy: %s' % exploration_strategy
+      raise ValueError('Unknown exploration strategy: %s' % exploration_strategy)
 
   def __generate_exhaustive_combinations(self):
     return [ImpalaTestVector(vec) for vec in product(*self.__extract_vector_values())
diff --git a/tests/util/plugin_runner.py b/tests/util/plugin_runner.py
index a0f0d9014..2f2849f15 100644
--- a/tests/util/plugin_runner.py
+++ b/tests/util/plugin_runner.py
@@ -72,7 +72,7 @@ class PluginRunner(object):
     # If the user's entered a plugin that does not exist, raise an error.
     if len(plugins_not_found):
       msg = "Plugin(s) not found: %s" % (','.join(list(plugins_not_found)))
-      raise RuntimeError, msg
+      raise RuntimeError(msg)
 
   def __get_plugin_info(self, plugin_info):
     info = plugin_info.split(':')
diff --git a/tests/util/test_file_parser.py b/tests/util/test_file_parser.py
index e94e290da..26516600c 100644
--- a/tests/util/test_file_parser.py
+++ b/tests/util/test_file_parser.py
@@ -130,7 +130,7 @@ def parse_table_constraints(constraints_file):
           schema_exclude[table_name.lower()] +=\
               map(parse_table_format_constraint, table_formats.split(','))
         else:
-          raise ValueError, 'Unknown constraint type: %s' % constraint_type
+          raise ValueError('Unknown constraint type: %s' % constraint_type)
   return schema_include, schema_exclude, schema_only
 
 def parse_table_format_constraint(table_format_constraint):
@@ -178,11 +178,11 @@ def parse_test_file_text(text, valid_section_names, skip_unknown_sections=True):
     # with what looks like a subsection.
     header = text[:match.start()]
     if re.match(r'^%s' % SUBSECTION_DELIMITER, header):
-      raise RuntimeError, dedent("""
+      raise RuntimeError(dedent("""
           Header must not start with '%s'. Everything before the first line matching '%s'
           is considered header information and will be ignored. However a header must not
           start with '%s' to prevent test cases from accidentally being ignored.""" %
-          (SUBSECTION_DELIMITER, SECTION_DELIMITER, SUBSECTION_DELIMITER))
+          (SUBSECTION_DELIMITER, SECTION_DELIMITER, SUBSECTION_DELIMITER)))
     text = text[match.start():]
 
   # Split the test file up into sections. For each section, parse all subsections.
@@ -215,7 +215,7 @@ def parse_test_file_text(text, valid_section_names, skip_unknown_sections=True):
           print('Unknown section \'%s\'' % subsection_name)
           continue
         else:
-          raise RuntimeError, 'Unknown subsection: %s' % subsection_name
+          raise RuntimeError('Unknown subsection: %s' % subsection_name)
 
       if subsection_name == 'QUERY' and subsection_comment:
         parsed_sections['QUERY_NAME'] = subsection_comment
@@ -229,7 +229,7 @@ def parse_test_file_text(text, valid_section_names, skip_unknown_sections=True):
           elif comment.startswith('VERIFY'):
             parsed_sections['VERIFIER'] = comment
           else:
-            raise RuntimeError, 'Unknown subsection comment: %s' % comment
+            raise RuntimeError('Unknown subsection comment: %s' % comment)
 
       if subsection_name == 'CATCH':
         parsed_sections['CATCH'] = list()
@@ -238,7 +238,7 @@ def parse_test_file_text(text, valid_section_names, skip_unknown_sections=True):
         elif subsection_comment == 'ANY_OF':
           parsed_sections['CATCH'].extend(lines_content)
         else:
-          raise RuntimeError, 'Unknown subsection comment: %s' % subsection_comment
+          raise RuntimeError('Unknown subsection comment: %s' % subsection_comment)
         for exception_str in parsed_sections['CATCH']:
           assert exception_str.strip(), "Empty exception string."
         continue
@@ -251,8 +251,8 @@ def parse_test_file_text(text, valid_section_names, skip_unknown_sections=True):
       # not supported.
       if subsection_name == 'DML_RESULTS':
         if subsection_comment is None or subsection_comment == '':
-          raise RuntimeError, 'DML_RESULTS requires that the table is specified ' \
-              'in the comment.'
+          raise RuntimeError('DML_RESULTS requires that the table is specified ' \
+              'in the comment.')
         parsed_sections['DML_RESULTS_TABLE'] = subsection_comment
         parsed_sections['VERIFIER'] = 'VERIFY_IS_EQUAL_SORTED'