You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/05/15 22:24:10 UTC

impala git commit: IMPALA-6645: Enable disk spill encryption by default

Repository: impala
Updated Branches:
  refs/heads/master 97ecc154b -> 3661100fa


IMPALA-6645: Enable disk spill encryption by default

Perf:
Targeted benchmarks with a heavily spilling query on a machine
with PCLMULQDQ support show < 5% of CPU time spent in encryption and
decryption. PCLMULQDQ was introduced in AMD Bulldozer (c. 2011)
and Intel Westmere (c. 2010).

Testing:
Ran core tests with the change.

Updated the custom cluster test to exercise the non-default
configuration.

Change-Id: Iee4be2a95d689f66c3663d99e4df0fb3968893a9
Reviewed-on: http://gerrit.cloudera.org:8080/10345
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Tim Armstrong <ta...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/3661100f
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/3661100f
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/3661100f

Branch: refs/heads/master
Commit: 3661100fa3323b062adb0802323898b92e80212b
Parents: 97ecc15
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Wed Apr 11 11:00:35 2018 -0700
Committer: Tim Armstrong <ta...@cloudera.com>
Committed: Tue May 15 22:23:14 2018 +0000

----------------------------------------------------------------------
 be/src/runtime/tmp-file-mgr.cc                  |  2 +-
 .../queries/QueryTest/basic-spilling.test       | 16 +++++++++
 .../QueryTest/disk-spill-encryption.test        | 15 ---------
 .../test_disk_spill_configurations.py           | 34 ++++++++++++++++++++
 .../test_disk_spill_encryption.py               | 32 ------------------
 5 files changed, 51 insertions(+), 48 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/3661100f/be/src/runtime/tmp-file-mgr.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/tmp-file-mgr.cc b/be/src/runtime/tmp-file-mgr.cc
index 04e15d4..b995518 100644
--- a/be/src/runtime/tmp-file-mgr.cc
+++ b/be/src/runtime/tmp-file-mgr.cc
@@ -38,7 +38,7 @@
 
 #include "common/names.h"
 
-DEFINE_bool(disk_spill_encryption, false,
+DEFINE_bool(disk_spill_encryption, true,
     "Set this to encrypt and perform an integrity "
     "check on all data spilled to disk during a query");
 DEFINE_string(scratch_dirs, "/tmp", "Writable scratch directories");

http://git-wip-us.apache.org/repos/asf/impala/blob/3661100f/testdata/workloads/functional-query/queries/QueryTest/basic-spilling.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/basic-spilling.test b/testdata/workloads/functional-query/queries/QueryTest/basic-spilling.test
new file mode 100644
index 0000000..513ba2c
--- /dev/null
+++ b/testdata/workloads/functional-query/queries/QueryTest/basic-spilling.test
@@ -0,0 +1,16 @@
+====
+---- QUERY
+# A basic spilling query to exercise spill-to-disk end-to-end.
+set buffer_pool_limit=90m;
+set default_spillable_buffer_size=64k;
+select count(*)
+from (select distinct o_orderdate, o_custkey, o_comment
+      from tpch_parquet.orders) v;
+---- RESULTS
+1500000
+---- TYPES
+BIGINT
+---- RUNTIME_PROFILE
+# Verify that spilling was activated.
+row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
+====

http://git-wip-us.apache.org/repos/asf/impala/blob/3661100f/testdata/workloads/functional-query/queries/QueryTest/disk-spill-encryption.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-query/queries/QueryTest/disk-spill-encryption.test b/testdata/workloads/functional-query/queries/QueryTest/disk-spill-encryption.test
deleted file mode 100644
index 48649e5..0000000
--- a/testdata/workloads/functional-query/queries/QueryTest/disk-spill-encryption.test
+++ /dev/null
@@ -1,15 +0,0 @@
-====
----- QUERY
-set buffer_pool_limit=90m;
-set default_spillable_buffer_size=64k;
-select count(*)
-from (select distinct o_orderdate, o_custkey, o_comment
-      from tpch_parquet.orders) v;
----- RESULTS
-1500000
----- TYPES
-BIGINT
----- RUNTIME_PROFILE
-# Verify that spilling was activated.
-row_regex: .*SpilledPartitions: .* \([1-9][0-9]*\)
-====

http://git-wip-us.apache.org/repos/asf/impala/blob/3661100f/tests/custom_cluster/test_disk_spill_configurations.py
----------------------------------------------------------------------
diff --git a/tests/custom_cluster/test_disk_spill_configurations.py b/tests/custom_cluster/test_disk_spill_configurations.py
new file mode 100644
index 0000000..efddd23
--- /dev/null
+++ b/tests/custom_cluster/test_disk_spill_configurations.py
@@ -0,0 +1,34 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pytest
+
+from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
+
+class TestDiskSpillConfigurations(CustomClusterTestSuite):
+  """Tests to exercise non-default disk spill configurations end-to-end."""
+
+  @classmethod
+  def get_workload(self):
+    return 'functional-query'
+
+  @pytest.mark.execute_serially
+  @CustomClusterTestSuite.with_args("--disk_spill_encryption=false")
+  def test_disk_spill_encryption_disabled(self, vector):
+    """Disk spill encryption is enabled by default. We only need a custom cluster to test
+    the non-default configuration."""
+    self.run_test_case('QueryTest/basic-spilling', vector)

http://git-wip-us.apache.org/repos/asf/impala/blob/3661100f/tests/custom_cluster/test_disk_spill_encryption.py
----------------------------------------------------------------------
diff --git a/tests/custom_cluster/test_disk_spill_encryption.py b/tests/custom_cluster/test_disk_spill_encryption.py
deleted file mode 100644
index c9a5aeb..0000000
--- a/tests/custom_cluster/test_disk_spill_encryption.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import pytest
-
-from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
-
-class TestDiskSpillEncryption(CustomClusterTestSuite):
-  """ Tests to exercise disk spill encryption end-to-end. """
-
-  @classmethod
-  def get_workload(self):
-    return 'functional-query'
-
-  @pytest.mark.execute_serially
-  @CustomClusterTestSuite.with_args("--disk_spill_encryption=true")
-  def test_spilling_query(self, vector):
-    self.run_test_case('QueryTest/disk-spill-encryption', vector)