You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ab...@apache.org on 2018/02/06 23:50:33 UTC

[1/2] impala git commit: IMPALA-5037: Default PARQUET_ARRAY_RESOLUTION=THREE_LEVEL

Repository: impala
Updated Branches:
  refs/heads/master 32bf54dfd -> 1acddfc86


IMPALA-5037: Default PARQUET_ARRAY_RESOLUTION=THREE_LEVEL

Changes the default value for the PARQUET_ARRAY_RESOLUTION
query option to conform to the Parquet standard.
Before: TWO_LEVEL_THEN_THREE_LEVEL
After:  THREE_LEVEL

For more information see:
* IMPALA-4725
* https://github.com/apache/parquet-format/blob/master/LogicalTypes.md

Testing:
- expands and cleans up the existing tests for more coverage
  over the different resolution policies
- private core/hdfs run passed

Cherry-picks: not for 2.x.

Change-Id: Ib8f7e9010c4354d667305d9df7b78862efb23fe1
Reviewed-on: http://gerrit.cloudera.org:8080/9210
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/dedff5e2
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/dedff5e2
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/dedff5e2

Branch: refs/heads/master
Commit: dedff5e25c19da724ae032a52354cd4fd61ec40a
Parents: 32bf54d
Author: Alex Behm <al...@cloudera.com>
Authored: Thu Feb 1 16:57:53 2018 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Tue Feb 6 22:58:25 2018 +0000

----------------------------------------------------------------------
 common/thrift/ImpalaInternalService.thrift |   2 +-
 common/thrift/ImpalaService.thrift         |   4 -
 tests/query_test/test_nested_types.py      | 346 +++++++++++-------------
 3 files changed, 156 insertions(+), 196 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/dedff5e2/common/thrift/ImpalaInternalService.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/ImpalaInternalService.thrift b/common/thrift/ImpalaInternalService.thrift
index 37bf638..ae2eee6 100644
--- a/common/thrift/ImpalaInternalService.thrift
+++ b/common/thrift/ImpalaInternalService.thrift
@@ -225,7 +225,7 @@ struct TQueryOptions {
 
   // Policy for resolving nested array fields in Parquet files.
   54: optional TParquetArrayResolution parquet_array_resolution =
-    TParquetArrayResolution.TWO_LEVEL_THEN_THREE_LEVEL
+    TParquetArrayResolution.THREE_LEVEL
 
   // Indicates whether to read statistics from Parquet files and use them during query
   // processing. This includes skipping data based on the statistics and computing query

http://git-wip-us.apache.org/repos/asf/impala/blob/dedff5e2/common/thrift/ImpalaService.thrift
----------------------------------------------------------------------
diff --git a/common/thrift/ImpalaService.thrift b/common/thrift/ImpalaService.thrift
index 0360f6c..1911fc6 100644
--- a/common/thrift/ImpalaService.thrift
+++ b/common/thrift/ImpalaService.thrift
@@ -247,10 +247,6 @@ enum TImpalaQueryOptions {
   // between the two and three level encodings with index-based field resolution.
   // The ambiguity can manually be resolved using this query option, or by using
   // PARQUET_FALLBACK_SCHEMA_RESOLUTION=name.
-  // The value TWO_LEVEL_THEN_THREE_LEVEL was the default mode since Impala 2.3.
-  // It is preserved as the default for compatibility.
-  // TODO: Remove the TWO_LEVEL_THEN_THREE_LEVEL mode completely or at least make
-  // it non-default in a compatibility breaking release.
   PARQUET_ARRAY_RESOLUTION,
 
   // Indicates whether to read statistics from Parquet files and use them during query

http://git-wip-us.apache.org/repos/asf/impala/blob/dedff5e2/tests/query_test/test_nested_types.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_nested_types.py b/tests/query_test/test_nested_types.py
index 85ed3a4..d078cdf 100644
--- a/tests/query_test/test_nested_types.py
+++ b/tests/query_test/test_nested_types.py
@@ -19,6 +19,7 @@
 
 import os
 from subprocess import check_call
+from pytest import skip
 
 from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
 from tests.common.impala_test_suite import ImpalaTestSuite
@@ -27,6 +28,7 @@ from tests.common.skip import (
     SkipIfS3,
     SkipIfADLS,
     SkipIfLocal)
+from tests.common.test_vector import ImpalaTestDimension
 from tests.util.filesystem_utils import WAREHOUSE, get_fs_path
 
 class TestNestedTypes(ImpalaTestSuite):
@@ -130,6 +132,8 @@ class TestParquetArrayEncodings(ImpalaTestSuite):
   TESTFILE_DIR = os.path.join(os.environ['IMPALA_HOME'],
                               "testdata/parquet_nested_types_encodings")
 
+  ARRAY_RESOLUTION_POLICIES = ["three_level", "two_level", "two_level_then_three_level"]
+
   @classmethod
   def get_workload(self):
     return 'functional-query'
@@ -137,9 +141,17 @@ class TestParquetArrayEncodings(ImpalaTestSuite):
   @classmethod
   def add_test_dimensions(cls):
     super(TestParquetArrayEncodings, cls).add_test_dimensions()
+    cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension(
+      'parquet_array_resolution', *TestParquetArrayEncodings.ARRAY_RESOLUTION_POLICIES))
     cls.ImpalaTestMatrix.add_constraint(lambda v:
         v.get_value('table_format').file_format == 'parquet')
 
+  def __init_arr_res(self, vector):
+    arr_res = vector.get_value('parquet_array_resolution')
+    qopts = vector.get_value('exec_option')
+    qopts['parquet_array_resolution'] = arr_res
+    return (arr_res, qopts)
+
   # $ parquet-tools schema SingleFieldGroupInList.parquet
   # message SingleFieldGroupInList {
   #   optional group single_element_groups (LIST) {
@@ -156,22 +168,66 @@ class TestParquetArrayEncodings(ImpalaTestSuite):
   # .single_element_group:
   # ..count = 2345
   def test_single_field_group_in_list(self, vector, unique_database):
-    tablename = "SingleFieldGroupInList"
-    full_name = "%s.%s" % (unique_database, tablename)
-    self._create_test_table(unique_database, tablename, "SingleFieldGroupInList.parquet",
-        "col1 array<struct<count: bigint>>")
+    self.__test_single_field_group_in_list(unique_database, "SingleFieldGroupInList",
+      "SingleFieldGroupInList.parquet", vector)
 
-    result = self.client.execute("select item.count from %s.col1" % full_name)
-    assert len(result.data) == 2
-    assert result.data == ['1234', '2345']
-
-    result = self.client.execute("select item.count from %s t, t.col1" % full_name)
-    assert len(result.data) == 2
-    assert result.data == ['1234', '2345']
+  # $ parquet-tools schema AvroSingleFieldGroupInList.parquet
+  # message AvroSingleFieldGroupInList {
+  #   optional group single_element_groups (LIST) {
+  #     repeated group array {
+  #       required int64 count;
+  #     }
+  #   }
+  # }
+  #
+  # $ parquet-tools cat AvroSingleFieldGroupInList.parquet
+  # single_element_groups:
+  # .array:
+  # ..count = 1234
+  # .array:
+  # ..count = 2345
+  def test_avro_single_field_group_in_list(self, vector, unique_database):
+    self.__test_single_field_group_in_list(unique_database, "AvroSingleFieldGroupInList",
+      "AvroSingleFieldGroupInList.parquet", vector)
 
-    result = self.client.execute(
-      "select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name)
-    assert len(result.data) == 1
+  # $ parquet-tools schema ThriftSingleFieldGroupInList.parquet
+  # message ThriftSingleFieldGroupInList {
+  #   optional group single_element_groups (LIST) {
+  #     repeated group single_element_groups_tuple {
+  #       required int64 count;
+  #     }
+  #   }
+  # }
+  #
+  # $ parquet-tools cat ThriftSingleFieldGroupInList.parquet
+  # single_element_groups:
+  # .single_element_groups_tuple:
+  # ..count = 1234
+  # .single_element_groups_tuple:
+  # ..count = 2345
+  def test_thrift_single_field_group_in_list(self, vector, unique_database):
+    self.__test_single_field_group_in_list(unique_database,
+      "ThriftSingleFieldGroupInList", "ThriftSingleFieldGroupInList.parquet", vector)
+
+  def __test_single_field_group_in_list(self, db, tbl, parq_file, vector):
+    arr_res, qopts = self.__init_arr_res(vector)
+    full_name = "%s.%s" % (db, tbl)
+    if arr_res == "two_level" or arr_res == "two_level_then_three_level":
+      self._create_test_table(db, tbl, parq_file, "col1 array<struct<f1: bigint>>")
+      result = self.execute_query("select item.f1 from %s.col1" % full_name, qopts)
+      assert result.data == ['1234', '2345']
+      result = self.execute_query("select item.f1 from %s t, t.col1" % full_name, qopts)
+      assert result.data == ['1234', '2345']
+
+    if arr_res == "three_level":
+      self._create_test_table(db, tbl, parq_file, "col1 array<bigint>")
+      result = self.execute_query("select item from %s.col1" % full_name, qopts)
+      assert result.data == ['1234', '2345']
+      result = self.execute_query("select item from %s t, t.col1" % full_name, qopts)
+      assert result.data == ['1234', '2345']
+
+    result = self.execute_query(
+      "select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name, qopts)
     assert result.data == ['2']
 
   # $ parquet-tools schema AvroPrimitiveInList.parquet
@@ -187,58 +243,45 @@ class TestParquetArrayEncodings(ImpalaTestSuite):
   # .array = 35
   # .array = 36
   def test_avro_primitive_in_list(self, vector, unique_database):
-    tablename = "AvroPrimitiveInList"
-    full_name = "%s.%s" % (unique_database, tablename)
-    self._create_test_table(unique_database, tablename, "AvroPrimitiveInList.parquet",
-        "col1 array<int>")
-
-    result = self.client.execute("select item from %s.col1" % full_name)
-    assert len(result.data) == 3
-    assert result.data == ['34', '35', '36']
-
-    result = self.client.execute("select item from %s t, t.col1" % full_name)
-    assert len(result.data) == 3
-    assert result.data == ['34', '35', '36']
-
-    result = self.client.execute(
-      "select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name)
-    assert len(result.data) == 1
-    assert result.data == ['3']
+    self.__test_primitive_in_list(unique_database, "AvroPrimitiveInList",
+      "AvroPrimitiveInList.parquet", vector)
 
-  # $ parquet-tools schema AvroSingleFieldGroupInList.parquet
-  # message AvroSingleFieldGroupInList {
-  #   optional group single_element_groups (LIST) {
-  #     repeated group array {
-  #       required int64 count;
-  #     }
+  # $ parquet-tools schema ThriftPrimitiveInList.parquet
+  # message ThriftPrimitiveInList {
+  #   required group list_of_ints (LIST) {
+  #     repeated int32 list_of_ints_tuple;
   #   }
   # }
   #
-  # $ parquet-tools cat AvroSingleFieldGroupInList.parquet
-  # single_element_groups:
-  # .array:
-  # ..count = 1234
-  # .array:
-  # ..count = 2345
-  def test_avro_single_field_group_in_list(self, vector, unique_database):
-    tablename = "AvroSingleFieldGroupInList"
-    full_name = "%s.%s" % (unique_database, tablename)
-    # Note that the field name does not match the field name in the file schema.
-    self._create_test_table(unique_database, tablename,
-        "AvroSingleFieldGroupInList.parquet", "col1 array<struct<f1: bigint>>")
-
-    result = self.client.execute("select item.f1 from %s.col1" % full_name)
-    assert len(result.data) == 2
-    assert result.data == ['1234', '2345']
-
-    result = self.client.execute("select item.f1 from %s t, t.col1" % full_name)
-    assert len(result.data) == 2
-    assert result.data == ['1234', '2345']
-
-    result = self.client.execute(
-      "select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name)
-    assert len(result.data) == 1
-    assert result.data == ['2']
+  # $ parquet-tools cat ThriftPrimitiveInList.parquet
+  # list_of_ints:
+  # .list_of_ints_tuple = 34
+  # .list_of_ints_tuple = 35
+  # .list_of_ints_tuple = 36
+  def test_thrift_primitive_in_list(self, vector, unique_database):
+    self.__test_primitive_in_list(unique_database, "ThriftPrimitiveInList",
+      "ThriftPrimitiveInList.parquet", vector)
+
+  def __test_primitive_in_list(self, db, tbl, parq_file, vector):
+    arr_res, qopts = self.__init_arr_res(vector)
+    full_name = "%s.%s" % (db, tbl)
+    self._create_test_table(db, tbl, parq_file, "col1 array<int>")
+
+    if arr_res == "two_level" or arr_res == "two_level_then_three_level":
+      result = self.execute_query("select item from %s.col1" % full_name, qopts)
+      assert result.data == ['34', '35', '36']
+      result = self.execute_query("select item from %s t, t.col1" % full_name, qopts)
+      assert result.data == ['34', '35', '36']
+
+    if arr_res == "three_level":
+      result = self.execute_query("select item from %s.col1" % full_name, qopts)
+      assert result.data == ['NULL', 'NULL', 'NULL']
+      result = self.execute_query("select item from %s t, t.col1" % full_name, qopts)
+      assert result.data == ['NULL', 'NULL', 'NULL']
+
+    result = self.execute_query(
+      "select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name, qopts)
+    assert result.data == ['3']
 
   # $ parquet-tools schema bad-avro.parquet
   # message org.apache.spark.sql.execution.datasources.parquet.test.avro.AvroArrayOfArray {
@@ -280,94 +323,8 @@ class TestParquetArrayEncodings(ImpalaTestSuite):
   #
   # [Same int_arrays_column repeated 8x more]
   def test_avro_array_of_arrays(self, vector, unique_database):
-    tablename = "AvroArrayOfArrays"
-    full_name = "%s.%s" % (unique_database, tablename)
-    self._create_test_table(unique_database, tablename, "bad-avro.parquet",
-        "col1 array<array<int>>")
-
-    result = self.client.execute("select item from %s.col1.item" % full_name)
-    assert len(result.data) == 9 * 10
-    assert result.data == ['0', '1', '2', '3', '4', '5', '6', '7', '8'] * 10
-
-    result = self.client.execute(
-      "select a2.item from %s t, t.col1 a1, a1.item a2" % full_name)
-    assert len(result.data) == 9 * 10
-    assert result.data == ['0', '1', '2', '3', '4', '5', '6', '7', '8'] * 10
-
-    result = self.client.execute(
-      "select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name)
-    assert len(result.data) == 10
-    assert result.data == ['3'] * 10
-
-    result = self.client.execute(
-      "select cnt from %s t, t.col1 a1, (select count(*) cnt from a1.item) v" % full_name)
-    assert len(result.data) == 3 * 10
-    assert result.data == ['3', '3', '3'] * 10
-
-  # $ parquet-tools schema ThriftPrimitiveInList.parquet
-  # message ThriftPrimitiveInList {
-  #   required group list_of_ints (LIST) {
-  #     repeated int32 list_of_ints_tuple;
-  #   }
-  # }
-  #
-  # $ parquet-tools cat ThriftPrimitiveInList.parquet
-  # list_of_ints:
-  # .list_of_ints_tuple = 34
-  # .list_of_ints_tuple = 35
-  # .list_of_ints_tuple = 36
-  def test_thrift_primitive_in_list(self, vector, unique_database):
-    tablename = "ThriftPrimitiveInList"
-    full_name = "%s.%s" % (unique_database, tablename)
-    self._create_test_table(unique_database, tablename,
-        "ThriftPrimitiveInList.parquet", "col1 array<int>")
-
-    result = self.client.execute("select item from %s.col1" % full_name)
-    assert len(result.data) == 3
-    assert result.data == ['34', '35', '36']
-
-    result = self.client.execute("select item from %s t, t.col1" % full_name)
-    assert len(result.data) == 3
-    assert result.data == ['34', '35', '36']
-
-    result = self.client.execute(
-      "select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name)
-    assert len(result.data) == 1
-    assert result.data == ['3']
-
-  # $ parquet-tools schema ThriftSingleFieldGroupInList.parquet
-  # message ThriftSingleFieldGroupInList {
-  #   optional group single_element_groups (LIST) {
-  #     repeated group single_element_groups_tuple {
-  #       required int64 count;
-  #     }
-  #   }
-  # }
-  #
-  # $ parquet-tools cat ThriftSingleFieldGroupInList.parquet
-  # single_element_groups:
-  # .single_element_groups_tuple:
-  # ..count = 1234
-  # .single_element_groups_tuple:
-  # ..count = 2345
-  def test_thrift_single_field_group_in_list(self, vector, unique_database):
-    tablename = "ThriftSingleFieldGroupInList"
-    full_name = "%s.%s" % (unique_database, tablename)
-    self._create_test_table(unique_database, tablename,
-        "ThriftSingleFieldGroupInList.parquet", "col1 array<struct<f1: bigint>>")
-
-    result = self.client.execute("select item.f1 from %s.col1" % full_name)
-    assert len(result.data) == 2
-    assert result.data == ['1234', '2345']
-
-    result = self.client.execute("select item.f1 from %s t, t.col1" % full_name)
-    assert len(result.data) == 2
-    assert result.data == ['1234', '2345']
-
-    result = self.client.execute(
-      "select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name)
-    assert len(result.data) == 1
-    assert result.data == ['2']
+    self.__test_array_of_arrays(unique_database, "AvroArrayOfArrays",
+      "bad-avro.parquet", vector, 10)
 
   # $ parquet-tools schema bad-thrift.parquet
   # message ParquetSchema {
@@ -393,29 +350,39 @@ class TestParquetArrayEncodings(ImpalaTestSuite):
   # ..intListsColumn_tuple_tuple = 7
   # ..intListsColumn_tuple_tuple = 8
   def test_thrift_array_of_arrays(self, vector, unique_database):
-    tablename = "ThriftArrayOfArrays"
-    full_name = "%s.%s" % (unique_database, tablename)
-    self._create_test_table(unique_database, tablename, "bad-thrift.parquet",
-        "col1 array<array<int>>")
-
-    result = self.client.execute("select item from %s.col1.item" % full_name)
-    assert len(result.data) == 9
-    assert result.data == ['0', '1', '2', '3', '4', '5', '6', '7', '8']
-
-    result = self.client.execute(
-      "select a2.item from %s t, t.col1 a1, a1.item a2" % full_name)
-    assert len(result.data) == 9
-    assert result.data == ['0', '1', '2', '3', '4', '5', '6', '7', '8']
-
-    result = self.client.execute(
-      "select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name)
-    assert len(result.data) == 1
-    assert result.data == ['3']
-
-    result = self.client.execute(
-      "select cnt from %s t, t.col1 a1, (select count(*) cnt from a1.item) v" % full_name)
-    assert len(result.data) == 3
-    assert result.data == ['3', '3', '3']
+    self.__test_array_of_arrays(unique_database, "ThriftArrayOfArrays",
+      "bad-thrift.parquet", vector, 1)
+
+  def __test_array_of_arrays(self, db, tbl, parq_file, vector, mult):
+    arr_res, qopts = self.__init_arr_res(vector)
+    full_name = "%s.%s" % (db, tbl)
+    self._create_test_table(db, tbl, parq_file, "col1 array<array<int>>")
+
+    if arr_res == "two_level" or arr_res == "two_level_then_three_level":
+      result = self.execute_query("select item from %s.col1.item" % full_name, qopts)
+      assert result.data == ['0', '1', '2', '3', '4', '5', '6', '7', '8'] * mult
+      result = self.execute_query(
+        "select a2.item from %s t, t.col1 a1, a1.item a2" % full_name)
+      assert result.data == ['0', '1', '2', '3', '4', '5', '6', '7', '8'] * mult
+      result = self.execute_query(
+        "select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name, qopts)
+      assert result.data == ['3'] * mult
+      result = self.execute_query(
+        "select cnt from %s t, t.col1 a1, (select count(*) cnt from a1.item) v"\
+        % full_name, qopts)
+      assert result.data == ['3', '3', '3'] * mult
+
+    if arr_res == "three_level":
+      expected_err = "has an incompatible Parquet schema"
+      try:
+        self.execute_query("select item from %s.col1.item" % full_name, qopts)
+      except Exception, e:
+        assert expected_err in str(e)
+      try:
+        self.execute_query("select cnt from %s t, (select count(*) cnt from t.col1) v"\
+          % full_name, qopts)
+      except Exception, e:
+        assert expected_err in str(e)
 
   # $ parquet-tools schema UnannotatedListOfPrimitives.parquet
   # message UnannotatedListOfPrimitives {
@@ -427,22 +394,18 @@ class TestParquetArrayEncodings(ImpalaTestSuite):
   # list_of_ints = 35
   # list_of_ints = 36
   def test_unannotated_list_of_primitives(self, vector, unique_database):
+    arr_res, qopts = self.__init_arr_res(vector)
     tablename = "UnannotatedListOfPrimitives"
     full_name = "%s.%s" % (unique_database, tablename)
     self._create_test_table(unique_database, tablename,
         "UnannotatedListOfPrimitives.parquet", "col1 array<int>")
 
-    result = self.client.execute("select item from %s.col1" % full_name)
-    assert len(result.data) == 3
+    result = self.execute_query("select item from %s.col1" % full_name, qopts)
     assert result.data == ['34', '35', '36']
-
-    result = self.client.execute("select item from %s t, t.col1" % full_name)
-    assert len(result.data) == 3
+    result = self.execute_query("select item from %s t, t.col1" % full_name, qopts)
     assert result.data == ['34', '35', '36']
-
-    result = self.client.execute(
-      "select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name)
-    assert len(result.data) == 1
+    result = self.execute_query(
+      "select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name, qopts)
     assert result.data == ['3']
 
   # $ parquet-tools schema UnannotatedListOfGroups.parquet
@@ -461,22 +424,18 @@ class TestParquetArrayEncodings(ImpalaTestSuite):
   # .x = 2.0
   # .y = 2.0
   def test_unannotated_list_of_groups(self, vector, unique_database):
+    arr_res, qopts = self.__init_arr_res(vector)
     tablename = "UnannotatedListOfGroups"
     full_name = "%s.%s" % (unique_database, tablename)
     self._create_test_table(unique_database, tablename,
         "UnannotatedListOfGroups.parquet", "col1 array<struct<f1: float, f2: float>>")
 
-    result = self.client.execute("select f1, f2 from %s.col1" % full_name)
-    assert len(result.data) == 2
+    result = self.execute_query("select f1, f2 from %s.col1" % full_name, qopts)
     assert result.data == ['1\t1', '2\t2']
-
-    result = self.client.execute("select f1, f2 from %s t, t.col1" % full_name)
-    assert len(result.data) == 2
+    result = self.execute_query("select f1, f2 from %s t, t.col1" % full_name, qopts)
     assert result.data == ['1\t1', '2\t2']
-
-    result = self.client.execute(
-      "select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name)
-    assert len(result.data) == 1
+    result = self.execute_query(
+      "select cnt from %s t, (select count(*) cnt from t.col1) v" % full_name, qopts)
     assert result.data == ['2']
 
   # $ parquet-tools schema AmbiguousList_Modern.parquet
@@ -548,6 +507,11 @@ class TestParquetArrayEncodings(ImpalaTestSuite):
     not exactly match the data files'. The name-based policy generally does not have
     this problem because it avoids traversing incorrect schema paths.
     """
+
+    # The Parquet resolution policy is manually set in the .test files.
+    if vector.get_value('parquet_array_resolution') != "three_level":
+      skip("Test only run with three_level")
+
     ambig_modern_tbl = "ambig_modern"
     self._create_test_table(unique_database, ambig_modern_tbl,
         "AmbiguousList_Modern.parquet",


[2/2] impala git commit: Revert "IMPALA-6219: Use AES-GCM for spill-to-disk encryption"

Posted by ab...@apache.org.
Revert "IMPALA-6219: Use AES-GCM for spill-to-disk encryption"

This reverts commit 9b68645f9eb9e08899fda860e0946cc05f205479.

Change-Id: Ia06f061a4ecedd1df0d359fe06fe84618b5e07fb
Reviewed-on: http://gerrit.cloudera.org:8080/9226
Reviewed-by: Sailesh Mukil <sa...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/1acddfc8
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/1acddfc8
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/1acddfc8

Branch: refs/heads/master
Commit: 1acddfc86fbacf0eb498c65d00c1eb0e9b486875
Parents: dedff5e
Author: Alex Behm <al...@cloudera.com>
Authored: Tue Feb 6 11:50:51 2018 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Tue Feb 6 23:40:43 2018 +0000

----------------------------------------------------------------------
 be/src/runtime/tmp-file-mgr.cc   | 15 ++----
 be/src/util/cpu-info.cc          | 13 +++--
 be/src/util/cpu-info.h           | 13 +++--
 be/src/util/openssl-util-test.cc | 95 +++++++++++-----------------------
 be/src/util/openssl-util.cc      | 96 +++--------------------------------
 be/src/util/openssl-util.h       | 70 ++++++++-----------------
 6 files changed, 74 insertions(+), 228 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/1acddfc8/be/src/runtime/tmp-file-mgr.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/tmp-file-mgr.cc b/be/src/runtime/tmp-file-mgr.cc
index 3807670..d35d302 100644
--- a/be/src/runtime/tmp-file-mgr.cc
+++ b/be/src/runtime/tmp-file-mgr.cc
@@ -612,26 +612,19 @@ void TmpFileMgr::WriteHandle::WaitForWrite() {
 Status TmpFileMgr::WriteHandle::EncryptAndHash(MemRange buffer) {
   DCHECK(FLAGS_disk_spill_encryption);
   SCOPED_TIMER(encryption_timer_);
-  // Since we're using GCM/CTR/CFB mode, we must take care not to reuse a
+  // Since we're using AES-CTR/AES-CFB mode, we must take care not to reuse a
   // key/IV pair. Regenerate a new key and IV for every data buffer we write.
   key_.InitializeRandom();
   RETURN_IF_ERROR(key_.Encrypt(buffer.data(), buffer.len(), buffer.data()));
-
-  if (!key_.IsGcmMode()) {
-    hash_.Compute(buffer.data(), buffer.len());
-  }
+  hash_.Compute(buffer.data(), buffer.len());
   return Status::OK();
 }
 
 Status TmpFileMgr::WriteHandle::CheckHashAndDecrypt(MemRange buffer) {
   DCHECK(FLAGS_disk_spill_encryption);
   SCOPED_TIMER(encryption_timer_);
-
-  // GCM mode will verify the integrity by itself
-  if (!key_.IsGcmMode()) {
-    if (!hash_.Verify(buffer.data(), buffer.len())) {
-      return Status("Block verification failure");
-    }
+  if (!hash_.Verify(buffer.data(), buffer.len())) {
+    return Status("Block verification failure");
   }
   return key_.Decrypt(buffer.data(), buffer.len(), buffer.data());
 }

http://git-wip-us.apache.org/repos/asf/impala/blob/1acddfc8/be/src/util/cpu-info.cc
----------------------------------------------------------------------
diff --git a/be/src/util/cpu-info.cc b/be/src/util/cpu-info.cc
index 1e3fcde..a32571e 100644
--- a/be/src/util/cpu-info.cc
+++ b/be/src/util/cpu-info.cc
@@ -85,13 +85,12 @@ static struct {
   int64_t flag;
 } flag_mappings[] =
 {
-  { "ssse3",     CpuInfo::SSSE3 },
-  { "sse4_1",    CpuInfo::SSE4_1 },
-  { "sse4_2",    CpuInfo::SSE4_2 },
-  { "popcnt",    CpuInfo::POPCNT },
-  { "avx",       CpuInfo::AVX },
-  { "avx2",      CpuInfo::AVX2 },
-  { "pclmuldqd", CpuInfo::PCLMULQDQ }
+  { "ssse3",  CpuInfo::SSSE3 },
+  { "sse4_1", CpuInfo::SSE4_1 },
+  { "sse4_2", CpuInfo::SSE4_2 },
+  { "popcnt", CpuInfo::POPCNT },
+  { "avx",    CpuInfo::AVX },
+  { "avx2",   CpuInfo::AVX2 },
 };
 static const long num_flags = sizeof(flag_mappings) / sizeof(flag_mappings[0]);
 

http://git-wip-us.apache.org/repos/asf/impala/blob/1acddfc8/be/src/util/cpu-info.h
----------------------------------------------------------------------
diff --git a/be/src/util/cpu-info.h b/be/src/util/cpu-info.h
index e60babc..38d6782 100644
--- a/be/src/util/cpu-info.h
+++ b/be/src/util/cpu-info.h
@@ -34,13 +34,12 @@ namespace impala {
 /// /sys/devices)
 class CpuInfo {
  public:
-  static const int64_t SSSE3     = (1 << 1);
-  static const int64_t SSE4_1    = (1 << 2);
-  static const int64_t SSE4_2    = (1 << 3);
-  static const int64_t POPCNT    = (1 << 4);
-  static const int64_t AVX       = (1 << 5);
-  static const int64_t AVX2      = (1 << 6);
-  static const int64_t PCLMULQDQ = (1 << 7);
+  static const int64_t SSSE3   = (1 << 1);
+  static const int64_t SSE4_1  = (1 << 2);
+  static const int64_t SSE4_2  = (1 << 3);
+  static const int64_t POPCNT  = (1 << 4);
+  static const int64_t AVX     = (1 << 5);
+  static const int64_t AVX2    = (1 << 6);
 
   /// Cache enums for L1 (data), L2 and L3
   enum CacheLevel {

http://git-wip-us.apache.org/repos/asf/impala/blob/1acddfc8/be/src/util/openssl-util-test.cc
----------------------------------------------------------------------
diff --git a/be/src/util/openssl-util-test.cc b/be/src/util/openssl-util-test.cc
index 76f65a5..8d98b0d 100644
--- a/be/src/util/openssl-util-test.cc
+++ b/be/src/util/openssl-util-test.cc
@@ -44,41 +44,6 @@ class OpenSSLUtilTest : public ::testing::Test {
     }
   }
 
-  /// Fill arbitrary-length buffer with random bytes
-  void GenerateRandomBytes(uint8_t* data, int64_t len) {
-    DCHECK_GE(len, 0);
-    for (int64_t i = 0; i < len; i++) {
-      data[i] = uniform_int_distribution<uint8_t>(0, UINT8_MAX)(rng_);
-    }
-  }
-
-  void TestEncryptionDecryption(const int64_t buffer_size) {
-    vector<uint8_t> original(buffer_size);
-    vector<uint8_t> scratch(buffer_size); // Scratch buffer for in-place encryption.
-    if (buffer_size % 8 == 0) {
-      GenerateRandomData(original.data(), buffer_size);
-    } else {
-      GenerateRandomBytes(original.data(), buffer_size);
-    }
-
-    // Check all the modes
-    AES_CIPHER_MODE modes[] = {AES_256_GCM, AES_256_CTR, AES_256_CFB};
-    for (auto m : modes) {
-      memcpy(scratch.data(), original.data(), buffer_size);
-
-      EncryptionKey key;
-      key.InitializeRandom();
-      key.SetCipherMode(m);
-
-      ASSERT_OK(key.Encrypt(scratch.data(), buffer_size, scratch.data()));
-      // Check that encryption did something
-      ASSERT_NE(0, memcmp(original.data(), scratch.data(), buffer_size));
-      ASSERT_OK(key.Decrypt(scratch.data(), buffer_size, scratch.data()));
-      // Check that we get the original data back.
-      ASSERT_EQ(0, memcmp(original.data(), scratch.data(), buffer_size));
-    }
-  }
-
   mt19937_64 rng_;
 };
 
@@ -92,7 +57,7 @@ TEST_F(OpenSSLUtilTest, Encryption) {
   GenerateRandomData(original.data(), buffer_size);
 
   // Check both CTR & CFB
-  AES_CIPHER_MODE modes[] = {AES_256_GCM, AES_256_CTR, AES_256_CFB};
+  AES_CIPHER_MODE modes[] = {AES_256_CTR, AES_256_CFB};
   for (auto m : modes) {
     // Iterate multiple times to ensure that key regeneration works correctly.
     EncryptionKey key;
@@ -120,42 +85,44 @@ TEST_F(OpenSSLUtilTest, Encryption) {
 /// Test that encryption and decryption work in-place.
 TEST_F(OpenSSLUtilTest, EncryptInPlace) {
   const int buffer_size = 1024 * 1024;
-  TestEncryptionDecryption(buffer_size);
+  vector<uint8_t> original(buffer_size);
+  vector<uint8_t> scratch(buffer_size); // Scratch buffer for in-place encryption.
+
+  EncryptionKey key;
+  // Check both CTR & CFB
+  AES_CIPHER_MODE modes[] = {AES_256_CTR, AES_256_CFB};
+  for (auto m : modes) {
+    GenerateRandomData(original.data(), buffer_size);
+    memcpy(scratch.data(), original.data(), buffer_size);
+
+    key.InitializeRandom();
+    key.SetCipherMode(m);
+
+    ASSERT_OK(key.Encrypt(scratch.data(), buffer_size, scratch.data()));
+    // Check that encryption did something
+    ASSERT_NE(0, memcmp(original.data(), scratch.data(), buffer_size));
+    ASSERT_OK(key.Decrypt(scratch.data(), buffer_size, scratch.data()));
+    // Check that we get the original data back.
+    ASSERT_EQ(0, memcmp(original.data(), scratch.data(), buffer_size));
+  }
 }
 
 /// Test that encryption works with buffer lengths that don't fit in a 32-bit integer.
 TEST_F(OpenSSLUtilTest, EncryptInPlaceHugeBuffer) {
   const int64_t buffer_size = 3 * 1024L * 1024L * 1024L;
-  TestEncryptionDecryption(buffer_size);
-}
-
-/// Test that encryption works with arbitrary-length buffer
-TEST_F(OpenSSLUtilTest, EncryptArbitraryLength) {
-  std::uniform_int_distribution<uint64_t> dis(0, 1024 * 1024);
-  const int buffer_size = dis(rng_);
-  TestEncryptionDecryption(buffer_size);
-}
-
-/// Test integrity in GCM mode
-TEST_F(OpenSSLUtilTest, GcmIntegrity) {
-  const int buffer_size = 1024 * 1024;
-  vector<uint8_t> buffer(buffer_size);
+  vector<uint8_t> original(buffer_size);
+  vector<uint8_t> scratch(buffer_size); // Scratch buffer for in-place encryption.
+  GenerateRandomData(original.data(), buffer_size);
+  memcpy(scratch.data(), original.data(), buffer_size);
 
   EncryptionKey key;
   key.InitializeRandom();
-  key.SetCipherMode(AES_256_GCM);
-
-  // Even it has been set as GCM mode, it may fall back to other modes.
-  // Check if GCM mode is supported at runtime.
-  if (key.IsGcmMode()) {
-    GenerateRandomData(buffer.data(), buffer_size);
-    ASSERT_OK(key.Encrypt(buffer.data(), buffer_size, buffer.data()));
-
-    // tamper the data
-    ++buffer[0];
-    Status s = key.Decrypt(buffer.data(), buffer_size, buffer.data());
-    EXPECT_STR_CONTAINS(s.GetDetail(), "EVP_DecryptFinal");
-  }
+  ASSERT_OK(key.Encrypt(scratch.data(), buffer_size, scratch.data()));
+  // Check that encryption did something
+  ASSERT_NE(0, memcmp(original.data(), scratch.data(), buffer_size));
+  ASSERT_OK(key.Decrypt(scratch.data(), buffer_size, scratch.data()));
+  // Check that we get the original data back.
+  ASSERT_EQ(0, memcmp(original.data(), scratch.data(), buffer_size));
 }
 
 /// Test basic integrity hash functionality.

http://git-wip-us.apache.org/repos/asf/impala/blob/1acddfc8/be/src/util/openssl-util.cc
----------------------------------------------------------------------
diff --git a/be/src/util/openssl-util.cc b/be/src/util/openssl-util.cc
index ffb47eb..69dc676 100644
--- a/be/src/util/openssl-util.cc
+++ b/be/src/util/openssl-util.cc
@@ -20,7 +20,6 @@
 #include <limits.h>
 #include <sstream>
 
-#include <glog/logging.h>
 #include <openssl/err.h>
 #include <openssl/evp.h>
 #include <openssl/rand.h>
@@ -31,7 +30,6 @@
 #include "gutil/strings/substitute.h"
 
 #include "common/names.h"
-#include "cpu-info.h"
 
 DECLARE_string(ssl_client_ca_certificate);
 DECLARE_string(ssl_server_certificate);
@@ -109,20 +107,19 @@ void EncryptionKey::InitializeRandom() {
   }
   RAND_bytes(key_, sizeof(key_));
   RAND_bytes(iv_, sizeof(iv_));
-  memset(gcm_tag_, 0, sizeof(gcm_tag_));
   initialized_ = true;
 }
 
-Status EncryptionKey::Encrypt(const uint8_t* data, int64_t len, uint8_t* out) {
+Status EncryptionKey::Encrypt(const uint8_t* data, int64_t len, uint8_t* out) const {
   return EncryptInternal(true, data, len, out);
 }
 
-Status EncryptionKey::Decrypt(const uint8_t* data, int64_t len, uint8_t* out) {
+Status EncryptionKey::Decrypt(const uint8_t* data, int64_t len, uint8_t* out) const {
   return EncryptInternal(false, data, len, out);
 }
 
 Status EncryptionKey::EncryptInternal(
-    bool encrypt, const uint8_t* data, int64_t len, uint8_t* out) {
+    bool encrypt, const uint8_t* data, int64_t len, uint8_t* out) const {
   DCHECK(initialized_);
   DCHECK_GE(len, 0);
   // Create and initialize the context for encryption
@@ -130,10 +127,6 @@ Status EncryptionKey::EncryptInternal(
   EVP_CIPHER_CTX_init(&ctx);
   EVP_CIPHER_CTX_set_padding(&ctx, 0);
 
-  if (IsGcmMode()) {
-    EVP_CIPHER_CTX_ctrl(&ctx, EVP_CTRL_GCM_SET_IVLEN, AES_BLOCK_SIZE, NULL);
-  }
-
   // Start encryption/decryption.  We use a 256-bit AES key, and the cipher block mode
   // is either CTR or CFB(stream cipher), both of which support arbitrary length
   // ciphertexts - it doesn't have to be a multiple of 16 bytes. Additionally, CTR
@@ -164,11 +157,6 @@ Status EncryptionKey::EncryptInternal(
     offset += in_len;
   }
 
-  if (IsGcmMode() && !encrypt) {
-    // Set expected tag value
-    EVP_CIPHER_CTX_ctrl(&ctx, EVP_CTRL_GCM_SET_TAG, AES_BLOCK_SIZE, gcm_tag_);
-  }
-
   // Finalize encryption or decryption.
   int final_out_len;
   success = encrypt ? EVP_EncryptFinal_ex(&ctx, out + offset, &final_out_len) :
@@ -176,93 +164,21 @@ Status EncryptionKey::EncryptInternal(
   if (success != 1) {
     return OpenSSLErr(encrypt ? "EVP_EncryptFinal" : "EVP_DecryptFinal");
   }
-
-  if (IsGcmMode() && encrypt) {
-    EVP_CIPHER_CTX_ctrl(&ctx, EVP_CTRL_GCM_GET_TAG, AES_BLOCK_SIZE, gcm_tag_);
-  }
-
-  // Again safe due to GCM/CTR/CFB with no padding
+  // Again safe due to CTR/CFB with no padding
   DCHECK_EQ(final_out_len, 0);
   return Status::OK();
 }
 
-/// OpenSSL 1.0.1d
-#define OPENSSL_VERSION_1_0_1D 0x1000104fL
-
-/// If not defined at compile time, define them manually
-/// see: openssl/evp.h
-#ifndef EVP_CIPH_GCM_MODE
-#define EVP_CTRL_GCM_SET_IVLEN 0x9
-#define EVP_CTRL_GCM_GET_TAG 0x10
-#define EVP_CTRL_GCM_SET_TAG 0x11
-#endif
-
 extern "C" {
 ATTRIBUTE_WEAK
 const EVP_CIPHER* EVP_aes_256_ctr();
-
-ATTRIBUTE_WEAK
-const EVP_CIPHER* EVP_aes_256_gcm();
 }
 
 const EVP_CIPHER* EncryptionKey::GetCipher() const {
   // use weak symbol to avoid compiling error on OpenSSL 1.0.0 environment
-  if (mode_ == AES_256_CTR) return EVP_aes_256_ctr();
-  if (mode_ == AES_256_GCM) return EVP_aes_256_gcm();
+  if (mode_ == AES_256_CTR && EVP_aes_256_ctr) return EVP_aes_256_ctr();
 
+  // otherwise, fallback to CFB mode
   return EVP_aes_256_cfb();
 }
-
-void EncryptionKey::SetCipherMode(AES_CIPHER_MODE m) {
-  mode_ = m;
-
-  if (!IsModeSupported(m)) {
-    mode_ = GetSupportedDefaultMode();
-    LOG(WARNING) << Substitute("$0 is not supported, fall back to $1.",
-        ModeToString(m), ModeToString(mode_));
-  }
-}
-
-bool EncryptionKey::IsModeSupported(AES_CIPHER_MODE m) const {
-  switch (m) {
-    case AES_256_GCM:
-      // It becomes a bit tricky for GCM mode, because GCM mode is enabled since
-      // OpenSSL 1.0.1, but the tag validation only works since 1.0.1d. We have
-      // to make sure that OpenSSL version >= 1.0.1d for GCM. So we need
-      // SSLeay(). Note that SSLeay() may return the compiling version on
-      // certain platforms if it was built against an older version(see:
-      // IMPALA-6418). In this case, it will return false, and EncryptionKey
-      // will try to fall back to CTR mode, so it is not ideal but is OK to use
-      // SSLeay() for GCM mode here since in the worst case, we will be using
-      // AES_256_CTR in a system that supports AES_256_GCM.
-      return (CpuInfo::IsSupported(CpuInfo::PCLMULQDQ)
-          && SSLeay() >= OPENSSL_VERSION_1_0_1D && EVP_aes_256_gcm);
-
-    case AES_256_CTR:
-      // If TLS1.2 is supported, then we're on a verison of OpenSSL that
-      // supports AES-256-CTR.
-      return (MaxSupportedTlsVersion() >= TLS1_2_VERSION && EVP_aes_256_ctr);
-
-    case AES_256_CFB:
-      return true;
-
-    default:
-      return false;
-  }
-}
-
-AES_CIPHER_MODE EncryptionKey::GetSupportedDefaultMode() const {
-  if (IsModeSupported(AES_256_GCM)) return AES_256_GCM;
-  if (IsModeSupported(AES_256_CTR)) return AES_256_CTR;
-  return AES_256_CFB;
-}
-
-const string EncryptionKey::ModeToString(AES_CIPHER_MODE m) const {
-  switch(m) {
-    case AES_256_GCM: return "AES-GCM";
-    case AES_256_CTR: return "AES-CTR";
-    case AES_256_CFB: return "AES-CFB";
-  }
-  return "Unknown mode";
-}
 }

http://git-wip-us.apache.org/repos/asf/impala/blob/1acddfc8/be/src/util/openssl-util.h
----------------------------------------------------------------------
diff --git a/be/src/util/openssl-util.h b/be/src/util/openssl-util.h
index ef53425..7b1b28e 100644
--- a/be/src/util/openssl-util.h
+++ b/be/src/util/openssl-util.h
@@ -60,9 +60,9 @@ bool IsExternalTlsConfigured();
 void SeedOpenSSLRNG();
 
 enum AES_CIPHER_MODE {
-  AES_256_CFB,
   AES_256_CTR,
-  AES_256_GCM
+  AES_256_CFB,
+  AES_256_GCM // not supported now.
 };
 
 /// The hash of a data buffer used for checking integrity. A SHA256 hash is used
@@ -83,56 +83,43 @@ class IntegrityHash {
 /// The key and initialization vector (IV) required to encrypt and decrypt a buffer of
 /// data. This should be regenerated for each buffer of data.
 ///
-/// We use AES with a 256-bit key and GCM/CTR/CFB cipher block mode, which gives us a
-/// stream cipher that can support arbitrary-length ciphertexts. The mode is chosen
-/// depends on the OpenSSL version & the hardware support at runtime. The IV is used as
+/// We use AES with a 256-bit key and CTR/CFB cipher block mode, which gives us a stream
+/// cipher that can support arbitrary-length ciphertexts. If OpenSSL version at runtime
+/// is 1.0.1 or above, CTR mode is used, otherwise CFB mode is used. The IV is used as
 /// an input to the cipher as the "block to supply before the first block of plaintext".
 /// This is required because all ciphers (except the weak ECB) are built such that each
 /// block depends on the output from the previous block. Since the first block doesn't
 /// have a previous block, we supply this IV. Think of it  as starting off the chain of
 /// encryption.
-///
-/// Notes for GCM:
-/// (1) GCM mode was supported since OpenSSL 1.0.1, however the tag verification
-/// in decryption was only supported since OpenSSL 1.0.1d.
-/// (2) The plaintext and the Additional Authenticated Data(AAD) are the two
-/// categories of data that GCM protects. GCM protects the authenticity of the
-/// plaintext and the AAD, and GCM also protects the confidentiality of the
-/// plaintext. The AAD itself is not required or won't change the security.
-/// In our case(Spill to Disk), we just ignore the AAD.
-
 class EncryptionKey {
  public:
-  EncryptionKey() : initialized_(false) { mode_ = GetSupportedDefaultMode(); }
-
-  /// Initializes a key for temporary use with randomly generated data, and clears the
-  /// tag for GCM mode. Reinitializes with new random values if the key was already
-  /// initialized. We use AES-GCM/AES-CTR/AES-CFB mode so key/IV pairs should not be
-  /// reused. This function automatically reseeds the RNG periodically, so callers do
-  /// not need to do it.
+  EncryptionKey() : initialized_(false) {
+    // If TLS1.2 is supported, then we're on a verison of OpenSSL that supports
+    // AES-256-CTR.
+    mode_ = MaxSupportedTlsVersion() < TLS1_2_VERSION ? AES_256_CFB : AES_256_CTR;
+  }
+
+  /// Initialize a key for temporary use with randomly generated data. Reinitializes with
+  /// new random values if the key was already initialized. We use AES-CTR/AES-CFB mode
+  /// so key/IV pairs should not be reused. This function automatically reseeds the RNG
+  /// periodically, so callers do not need to do it.
   void InitializeRandom();
 
   /// Encrypts a buffer of input data 'data' of length 'len' into an output buffer 'out'.
   /// Exactly 'len' bytes will be written to 'out'. This key must be initialized before
   /// calling. Operates in-place if 'in' == 'out', otherwise the buffers must not overlap.
-  /// For GCM mode, the hash tag will be kept inside(gcm_tag_ variable).
-  Status Encrypt(const uint8_t* data, int64_t len, uint8_t* out) WARN_UNUSED_RESULT;
+  Status Encrypt(const uint8_t* data, int64_t len, uint8_t* out) const WARN_UNUSED_RESULT;
 
   /// Decrypts a buffer of input data 'data' of length 'len' that was encrypted with this
   /// key into an output buffer 'out'. Exactly 'len' bytes will be written to 'out'.
   /// This key must be initialized before calling. Operates in-place if 'in' == 'out',
-  /// otherwise the buffers must not overlap. For GCM mode, the hash tag, which is
-  /// computed during encryption, will be used for intgerity verification.
-  Status Decrypt(const uint8_t* data, int64_t len, uint8_t* out) WARN_UNUSED_RESULT;
+  /// otherwise the buffers must not overlap.
+  Status Decrypt(const uint8_t* data, int64_t len, uint8_t* out) const WARN_UNUSED_RESULT;
 
   /// Specify a cipher mode. Currently used only for testing but maybe in future we
   /// can provide a configuration option for the end user who can choose a preferred
   /// mode(GCM, CTR, CFB...) based on their software/hardware environment.
-  /// If not supported, fall back to the supported mode at runtime.
-  void SetCipherMode(AES_CIPHER_MODE m);
-
-  /// If is GCM mode at runtime
-  bool IsGcmMode() const { return mode_ == AES_256_GCM; }
+  void SetCipherMode(AES_CIPHER_MODE m) { mode_ = m; }
 
  private:
   /// Helper method that encrypts/decrypts if 'encrypt' is true/false respectively.
@@ -141,25 +128,13 @@ class EncryptionKey {
   /// This key must be initialized before calling. Operates in-place if 'in' == 'out',
   /// otherwise the buffers must not overlap.
   Status EncryptInternal(bool encrypt, const uint8_t* data, int64_t len,
-      uint8_t* out) WARN_UNUSED_RESULT;
-
-  /// Check if mode m is supported at runtime
-  bool IsModeSupported(AES_CIPHER_MODE m) const;
-
-  /// Returns the a default mode which is supported at runtime. If GCM mode
-  /// is supported, return AES_256_GCM as the default. If GCM is not supported,
-  /// but CTR is still supported, return AES_256_CTR. When both GCM and
-  /// CTR modes are not supported, return AES_256_CFB.
-  AES_CIPHER_MODE GetSupportedDefaultMode() const;
-
-  /// Converts mode type to string.
-  const string ModeToString(AES_CIPHER_MODE m) const;
+      uint8_t* out) const WARN_UNUSED_RESULT;
 
   /// Track whether this key has been initialized, to avoid accidentally using
   /// uninitialized keys.
   bool initialized_;
 
-  /// Returns a EVP_CIPHER according to cipher mode at runtime
+  /// return a EVP_CIPHER according to cipher mode at runtime
   const EVP_CIPHER* GetCipher() const;
 
   /// An AES 256-bit key.
@@ -168,9 +143,6 @@ class EncryptionKey {
   /// An initialization vector to feed as the first block to AES.
   uint8_t iv_[AES_BLOCK_SIZE];
 
-  /// Tag for GCM mode
-  uint8_t gcm_tag_[AES_BLOCK_SIZE];
-
   /// Cipher Mode
   AES_CIPHER_MODE mode_;
 };