You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by sh...@apache.org on 2021/11/24 16:57:52 UTC

[parquet-mr] branch master updated: PARQUET-2040: Uniform encryption (#935)

This is an automated email from the ASF dual-hosted git repository.

shangxinli pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new 23a217a  PARQUET-2040: Uniform encryption (#935)
23a217a is described below

commit 23a217a21ce4aaac8a92dca057e9352c51719f8f
Author: ggershinsky <gg...@users.noreply.github.com>
AuthorDate: Wed Nov 24 18:57:42 2021 +0200

    PARQUET-2040: Uniform encryption (#935)
    
    * Initial commit
    
    * Uniform encryption - count and limit operations with same key
    
    * fix the limit value
    
    Co-authored-by: Gidon Gershinsky <gg...@apple.com>
---
 .../java/org/apache/parquet/crypto/AesCipher.java  | 24 ++++++----
 .../org/apache/parquet/crypto/AesCtrEncryptor.java |  8 +++-
 .../org/apache/parquet/crypto/AesGcmEncryptor.java |  9 +++-
 .../keytools/PropertiesDrivenCryptoFactory.java    | 54 ++++++++++++++++------
 .../crypto/TestPropertiesDrivenEncryption.java     | 40 ++++++++++++----
 5 files changed, 103 insertions(+), 32 deletions(-)

diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/crypto/AesCipher.java b/parquet-hadoop/src/main/java/org/apache/parquet/crypto/AesCipher.java
index 6b9f24c..26175a4 100755
--- a/parquet-hadoop/src/main/java/org/apache/parquet/crypto/AesCipher.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/crypto/AesCipher.java
@@ -37,6 +37,12 @@ public class AesCipher {
   protected static final int GCM_TAG_LENGTH_BITS = 8 * GCM_TAG_LENGTH;
   protected static final int CHUNK_LENGTH = 4 * 1024;
   protected static final int SIZE_LENGTH = ModuleCipherFactory.SIZE_LENGTH;
+  // NIST SP 800-38D section 8.3 specifies limit on AES GCM encryption operations with same key and random IV/nonce
+  protected static final long GCM_RANDOM_IV_SAME_KEY_MAX_OPS = 1L << 32;
+  // NIST SP 800-38A doesn't specify limit on AES CTR operations.
+  // However, Parquet uses a random IV (with 12-byte random nonce). To avoid repetition due to "birthday problem",
+  // setting a conservative limit equal to GCM's value for random IVs
+  protected static final long CTR_RANDOM_IV_SAME_KEY_MAX_OPS = GCM_RANDOM_IV_SAME_KEY_MAX_OPS;
 
   protected SecretKeySpec aesKey;
   protected final SecureRandom randomGenerator;
@@ -65,14 +71,14 @@ public class AesCipher {
     localNonce = new byte[NONCE_LENGTH];
   }
 
-  public static byte[] createModuleAAD(byte[] fileAAD, ModuleType moduleType, 
+  public static byte[] createModuleAAD(byte[] fileAAD, ModuleType moduleType,
       int rowGroupOrdinal, int columnOrdinal, int pageOrdinal) {
-    
+
     byte[] typeOrdinalBytes = new byte[1];
     typeOrdinalBytes[0] = moduleType.getValue();
-    
+
     if (ModuleType.Footer == moduleType) {
-      return concatByteArrays(fileAAD, typeOrdinalBytes);      
+      return concatByteArrays(fileAAD, typeOrdinalBytes);
     }
 
     if (rowGroupOrdinal < 0) {
@@ -84,7 +90,7 @@ public class AesCipher {
           + "more than " + Short.MAX_VALUE + " row groups: " + rowGroupOrdinal);
     }
     byte[] rowGroupOrdinalBytes = shortToBytesLE(shortRGOrdinal);
-    
+
     if (columnOrdinal < 0) {
       throw new IllegalArgumentException("Wrong column ordinal: " + columnOrdinal);
     }
@@ -94,9 +100,9 @@ public class AesCipher {
           + "more than " + Short.MAX_VALUE + " columns: " + columnOrdinal);
     }
     byte[] columnOrdinalBytes = shortToBytesLE(shortColumOrdinal);
-    
+
     if (ModuleType.DataPage != moduleType && ModuleType.DataPageHeader != moduleType) {
-      return concatByteArrays(fileAAD, typeOrdinalBytes, rowGroupOrdinalBytes, columnOrdinalBytes); 
+      return concatByteArrays(fileAAD, typeOrdinalBytes, rowGroupOrdinalBytes, columnOrdinalBytes);
     }
 
     if (pageOrdinal < 0) {
@@ -108,7 +114,7 @@ public class AesCipher {
           + "more than " + Short.MAX_VALUE + " pages per chunk: " + pageOrdinal);
     }
     byte[] pageOrdinalBytes = shortToBytesLE(shortPageOrdinal);
-    
+
     return concatByteArrays(fileAAD, typeOrdinalBytes, rowGroupOrdinalBytes, columnOrdinalBytes, pageOrdinalBytes);
   }
 
@@ -127,7 +133,7 @@ public class AesCipher {
       throw new ParquetCryptoRuntimeException("Encrypted parquet files can't have "
           + "more than " + Short.MAX_VALUE + " pages per chunk: " + newPageOrdinal);
     }
-    
+
     byte[] pageOrdinalBytes = shortToBytesLE(shortPageOrdinal);
     System.arraycopy(pageOrdinalBytes, 0, pageAAD, pageAAD.length - 2, 2);
   }
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/crypto/AesCtrEncryptor.java b/parquet-hadoop/src/main/java/org/apache/parquet/crypto/AesCtrEncryptor.java
index 537789c..6df6a57 100755
--- a/parquet-hadoop/src/main/java/org/apache/parquet/crypto/AesCtrEncryptor.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/crypto/AesCtrEncryptor.java
@@ -30,9 +30,11 @@ import java.security.GeneralSecurityException;
 public class AesCtrEncryptor extends AesCipher implements BlockCipher.Encryptor{
 
   private final byte[] ctrIV;
+  private long operationCounter;
 
   AesCtrEncryptor(byte[] keyBytes) {
     super(AesMode.CTR, keyBytes);
+    operationCounter = 0;
 
     try {
       cipher = Cipher.getInstance(AesMode.CTR.getCipherName());
@@ -55,7 +57,11 @@ public class AesCtrEncryptor extends AesCipher implements BlockCipher.Encryptor{
     return encrypt(writeLength, plainText, localNonce, AAD);
   }
 
-  public byte[] encrypt(boolean writeLength, byte[] plainText, byte[] nonce, byte[] AAD) { 
+  public byte[] encrypt(boolean writeLength, byte[] plainText, byte[] nonce, byte[] AAD) {
+    if (operationCounter > CTR_RANDOM_IV_SAME_KEY_MAX_OPS) {
+      throw new ParquetCryptoRuntimeException("Exceeded limit of AES CTR encryption operations with same key and random IV");
+    }
+    operationCounter++;
 
     if (nonce.length != NONCE_LENGTH) {
       throw new ParquetCryptoRuntimeException("Wrong nonce length " + nonce.length);
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/crypto/AesGcmEncryptor.java b/parquet-hadoop/src/main/java/org/apache/parquet/crypto/AesGcmEncryptor.java
index d456447..e4d51cd 100755
--- a/parquet-hadoop/src/main/java/org/apache/parquet/crypto/AesGcmEncryptor.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/crypto/AesGcmEncryptor.java
@@ -29,8 +29,11 @@ import java.security.GeneralSecurityException;
 
 public class AesGcmEncryptor extends AesCipher implements BlockCipher.Encryptor{
 
+  private long operationCounter;
+
   AesGcmEncryptor(byte[] keyBytes) {
     super(AesMode.GCM, keyBytes);
+    operationCounter = 0;
 
     try {
       cipher = Cipher.getInstance(AesMode.GCM.getCipherName());
@@ -49,7 +52,11 @@ public class AesGcmEncryptor extends AesCipher implements BlockCipher.Encryptor{
     return encrypt(writeLength, plainText, localNonce, AAD);
   }
 
-  public byte[] encrypt(boolean writeLength, byte[] plainText, byte[] nonce, byte[] AAD) { 
+  public byte[] encrypt(boolean writeLength, byte[] plainText, byte[] nonce, byte[] AAD) {
+    if (operationCounter > GCM_RANDOM_IV_SAME_KEY_MAX_OPS) {
+      throw new ParquetCryptoRuntimeException("Exceeded limit of AES GCM encryption operations with same key and random IV");
+    }
+    operationCounter++;
 
     if (nonce.length != NONCE_LENGTH) {
       throw new ParquetCryptoRuntimeException("Wrong nonce length " + nonce.length);
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/crypto/keytools/PropertiesDrivenCryptoFactory.java b/parquet-hadoop/src/main/java/org/apache/parquet/crypto/keytools/PropertiesDrivenCryptoFactory.java
index f35d9ec..817ab4d 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/crypto/keytools/PropertiesDrivenCryptoFactory.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/crypto/keytools/PropertiesDrivenCryptoFactory.java
@@ -57,6 +57,10 @@ public class PropertiesDrivenCryptoFactory implements EncryptionPropertiesFactor
    */
   public static final String FOOTER_KEY_PROPERTY_NAME = "parquet.encryption.footer.key";
   /**
+   * Master key ID for uniform encryption (same key for all columns and footer).
+   */
+  public static final String UNIFORM_KEY_PROPERTY_NAME = "parquet.encryption.uniform.key";
+  /**
    * Parquet encryption algorithm. Can be "AES_GCM_V1" (default), or "AES_GCM_CTR_V1".
    */
   public static final String ENCRYPTION_ALGORITHM_PROPERTY_NAME = "parquet.encryption.algorithm";
@@ -77,15 +81,39 @@ public class PropertiesDrivenCryptoFactory implements EncryptionPropertiesFactor
 
     String footerKeyId = fileHadoopConfig.getTrimmed(FOOTER_KEY_PROPERTY_NAME);
     String columnKeysStr = fileHadoopConfig.getTrimmed(COLUMN_KEYS_PROPERTY_NAME);
+    String uniformKeyId = fileHadoopConfig.getTrimmed(UNIFORM_KEY_PROPERTY_NAME);
+
+    boolean emptyFooterKeyId = stringIsEmpty(footerKeyId);
+    boolean emptyColumnKeyIds = stringIsEmpty(columnKeysStr);
+    boolean emptyUniformKeyId = stringIsEmpty(uniformKeyId);
 
     // File shouldn't be encrypted
-    if (stringIsEmpty(footerKeyId) && stringIsEmpty(columnKeysStr)) {
+    if (emptyFooterKeyId && emptyColumnKeyIds && emptyUniformKeyId) {
       LOG.debug("Unencrypted file: {}", tempFilePath);
       return null;
     }
 
-    if (stringIsEmpty(footerKeyId)) {
-      throw new ParquetCryptoRuntimeException("Undefined footer key");
+    if (emptyUniformKeyId) {
+      // Non-uniform encryption.Must have both footer and column key ids
+      if (emptyFooterKeyId) {
+        throw new ParquetCryptoRuntimeException("No footer key configured in " + FOOTER_KEY_PROPERTY_NAME);
+      }
+      if (emptyColumnKeyIds) {
+        throw new ParquetCryptoRuntimeException("No column keys configured in " + COLUMN_KEYS_PROPERTY_NAME);
+      }
+    } else {
+      // Uniform encryption. Can't have configuration of footer and column key ids
+      if (!emptyFooterKeyId) {
+        throw new ParquetCryptoRuntimeException("Uniform encryption. Cant have footer key configured in " +
+          FOOTER_KEY_PROPERTY_NAME);
+      }
+      if (!emptyColumnKeyIds) {
+        throw new ParquetCryptoRuntimeException("Uniform encryption. Cant have column keys configured in " +
+          COLUMN_KEYS_PROPERTY_NAME);
+      }
+
+      // Now assign footer key id to uniform key id
+      footerKeyId = uniformKeyId;
     }
 
     FileKeyMaterialStore keyMaterialStore = null;
@@ -126,14 +154,17 @@ public class PropertiesDrivenCryptoFactory implements EncryptionPropertiesFactor
     RANDOM.nextBytes(footerKeyBytes);
     byte[] footerKeyMetadata = keyWrapper.getEncryptionKeyMetadata(footerKeyBytes, footerKeyId, true);
 
-    Map<ColumnPath, ColumnEncryptionProperties> encryptedColumns = getColumnEncryptionProperties(dekLength, columnKeysStr, keyWrapper);
-
     boolean plaintextFooter = fileHadoopConfig.getBoolean(PLAINTEXT_FOOTER_PROPERTY_NAME, PLAINTEXT_FOOTER_DEFAULT);
 
     FileEncryptionProperties.Builder propertiesBuilder = FileEncryptionProperties.builder(footerKeyBytes)
         .withFooterKeyMetadata(footerKeyMetadata)
-        .withAlgorithm(cipher)
-        .withEncryptedColumns(encryptedColumns);
+        .withAlgorithm(cipher);
+
+    if (emptyUniformKeyId) {
+      Map<ColumnPath, ColumnEncryptionProperties> encryptedColumns =
+        getColumnEncryptionProperties(dekLength, columnKeysStr, keyWrapper);
+      propertiesBuilder = propertiesBuilder.withEncryptedColumns(encryptedColumns);
+    }
 
     if (plaintextFooter) {
       propertiesBuilder = propertiesBuilder.withPlaintextFooter();
@@ -144,9 +175,9 @@ public class PropertiesDrivenCryptoFactory implements EncryptionPropertiesFactor
     }
 
     if (LOG.isDebugEnabled()) {
-      LOG.debug("File encryption properties for {} - algo: {}; footer key id: {}; plaintext footer: {}; "
-          + "internal key material: {}; encrypted columns: {}",
-          tempFilePath, cipher, footerKeyId, plaintextFooter, keyMaterialInternalStorage, columnKeysStr);
+      LOG.debug("File encryption properties for {} - algo: {}; footer key id: {}; uniform key id: {}; " + "" +
+          "plaintext footer: {}; internal key material: {}; encrypted columns: {}",
+          tempFilePath, cipher, footerKeyId, uniformKeyId, plaintextFooter, keyMaterialInternalStorage, columnKeysStr);
     }
 
     return propertiesBuilder.build();
@@ -154,9 +185,6 @@ public class PropertiesDrivenCryptoFactory implements EncryptionPropertiesFactor
 
   private Map<ColumnPath, ColumnEncryptionProperties> getColumnEncryptionProperties(int dekLength, String columnKeys,
       FileKeyWrapper keyWrapper) throws ParquetCryptoRuntimeException {
-    if (stringIsEmpty(columnKeys)) {
-      throw new ParquetCryptoRuntimeException("No column keys configured in " + COLUMN_KEYS_PROPERTY_NAME);
-    }
     Map<ColumnPath, ColumnEncryptionProperties> encryptedColumns = new HashMap<ColumnPath, ColumnEncryptionProperties>();
     String keyToColumns[] = columnKeys.split(";");
     for (int i = 0; i < keyToColumns.length; ++i) {
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/crypto/TestPropertiesDrivenEncryption.java b/parquet-hadoop/src/test/java/org/apache/parquet/crypto/TestPropertiesDrivenEncryption.java
index dc9b005..4f9eb78 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/crypto/TestPropertiesDrivenEncryption.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/crypto/TestPropertiesDrivenEncryption.java
@@ -149,8 +149,11 @@ public class TestPropertiesDrivenEncryption {
     encoder.encodeToString("1234567890123453".getBytes(StandardCharsets.UTF_8)),
     encoder.encodeToString("1234567890123454".getBytes(StandardCharsets.UTF_8)),
     encoder.encodeToString("1234567890123455".getBytes(StandardCharsets.UTF_8))};
+  private static final String UNIFORM_MASTER_KEY =
+    encoder.encodeToString("0123456789012346".getBytes(StandardCharsets.UTF_8));
   private static final String[] COLUMN_MASTER_KEY_IDS = { "kc1", "kc2", "kc3", "kc4", "kc5", "kc6"};
   private static final String FOOTER_MASTER_KEY_ID = "kf";
+  private static final String UNIFORM_MASTER_KEY_ID = "ku";
 
   private static final String KEY_LIST =  new StringBuilder()
     .append(COLUMN_MASTER_KEY_IDS[0]).append(": ").append(COLUMN_MASTER_KEYS[0]).append(", ")
@@ -159,6 +162,7 @@ public class TestPropertiesDrivenEncryption {
     .append(COLUMN_MASTER_KEY_IDS[3]).append(": ").append(COLUMN_MASTER_KEYS[3]).append(", ")
     .append(COLUMN_MASTER_KEY_IDS[4]).append(": ").append(COLUMN_MASTER_KEYS[4]).append(", ")
     .append(COLUMN_MASTER_KEY_IDS[5]).append(": ").append(COLUMN_MASTER_KEYS[5]).append(", ")
+    .append(UNIFORM_MASTER_KEY_ID).append(": ").append(UNIFORM_MASTER_KEY).append(", ")
     .append(FOOTER_MASTER_KEY_ID).append(": ").append(FOOTER_MASTER_KEY).toString();
 
   private static final String NEW_FOOTER_MASTER_KEY =
@@ -170,6 +174,8 @@ public class TestPropertiesDrivenEncryption {
     encoder.encodeToString("9234567890123453".getBytes(StandardCharsets.UTF_8)),
     encoder.encodeToString("9234567890123454".getBytes(StandardCharsets.UTF_8)),
     encoder.encodeToString("9234567890123455".getBytes(StandardCharsets.UTF_8))};
+  private static final String NEW_UNIFORM_MASTER_KEY =
+    encoder.encodeToString("9123456789012346".getBytes(StandardCharsets.UTF_8));
 
   private static final String NEW_KEY_LIST =  new StringBuilder()
     .append(COLUMN_MASTER_KEY_IDS[0]).append(": ").append(NEW_COLUMN_MASTER_KEYS[0]).append(", ")
@@ -178,6 +184,7 @@ public class TestPropertiesDrivenEncryption {
     .append(COLUMN_MASTER_KEY_IDS[3]).append(": ").append(NEW_COLUMN_MASTER_KEYS[3]).append(", ")
     .append(COLUMN_MASTER_KEY_IDS[4]).append(": ").append(NEW_COLUMN_MASTER_KEYS[4]).append(", ")
     .append(COLUMN_MASTER_KEY_IDS[5]).append(": ").append(NEW_COLUMN_MASTER_KEYS[5]).append(", ")
+    .append(UNIFORM_MASTER_KEY_ID).append(": ").append(NEW_UNIFORM_MASTER_KEY).append(", ")
     .append(FOOTER_MASTER_KEY_ID).append(": ").append(NEW_FOOTER_MASTER_KEY).toString();
 
   private static final String COLUMN_KEY_MAPPING = new StringBuilder()
@@ -201,35 +208,45 @@ public class TestPropertiesDrivenEncryption {
   public enum EncryptionConfiguration {
     ENCRYPT_COLUMNS_AND_FOOTER {
       /**
-       * Encrypt two columns and the footer, with different keys.
+       * Encrypt two columns and the footer, with different master keys.
        */
       public Configuration getHadoopConfiguration(TestPropertiesDrivenEncryption test) {
         Configuration conf = getCryptoProperties(test);
-        setEncryptionKeys(conf);
+        setColumnAndFooterKeys(conf);
+        return conf;
+      }
+    },
+    UNIFORM_ENCRYPTION {
+      /**
+       * Encrypt all columns and the footer, with same master key.
+       */
+      public Configuration getHadoopConfiguration(TestPropertiesDrivenEncryption test) {
+        Configuration conf = getCryptoProperties(test);
+        setUniformKey(conf);
         return conf;
       }
     },
     ENCRYPT_COLUMNS_PLAINTEXT_FOOTER {
       /**
-       * Encrypt two columns, with different keys.
+       * Encrypt two columns, with different master keys.
        * Don't encrypt footer.
        * (plaintext footer mode, readable by legacy readers)
        */
       public Configuration getHadoopConfiguration(TestPropertiesDrivenEncryption test) {
         Configuration conf = getCryptoProperties(test);
-        setEncryptionKeys(conf);
+        setColumnAndFooterKeys(conf);
         conf.setBoolean(PropertiesDrivenCryptoFactory.PLAINTEXT_FOOTER_PROPERTY_NAME, true);
         return conf;
       }
     },
     ENCRYPT_COLUMNS_AND_FOOTER_CTR {
       /**
-       * Encrypt two columns and the footer, with different keys.
+       * Encrypt two columns and the footer, with different master keys.
        * Use AES_GCM_CTR_V1 algorithm.
        */
       public Configuration getHadoopConfiguration(TestPropertiesDrivenEncryption test) {
         Configuration conf = getCryptoProperties(test);
-        setEncryptionKeys(conf);
+        setColumnAndFooterKeys(conf);
         conf.set(PropertiesDrivenCryptoFactory.ENCRYPTION_ALGORITHM_PROPERTY_NAME,
           ParquetCipher.AES_GCM_CTR_V1.toString());
         return conf;
@@ -292,13 +309,20 @@ public class TestPropertiesDrivenEncryption {
   }
 
   /**
-   * Set configuration properties to encrypt columns and the footer with different keys
+   * Set configuration properties to encrypt columns and the footer with different master keys
    */
-  private static void setEncryptionKeys(Configuration conf) {
+  private static void setColumnAndFooterKeys(Configuration conf) {
     conf.set(PropertiesDrivenCryptoFactory.COLUMN_KEYS_PROPERTY_NAME, COLUMN_KEY_MAPPING);
     conf.set(PropertiesDrivenCryptoFactory.FOOTER_KEY_PROPERTY_NAME, FOOTER_MASTER_KEY_ID);
   }
 
+  /**
+   * Set uniform encryption configuration property
+   */
+  private static void setUniformKey(Configuration conf) {
+    conf.set(PropertiesDrivenCryptoFactory.UNIFORM_KEY_PROPERTY_NAME, UNIFORM_MASTER_KEY_ID);
+  }
+
 
   @Test
   public void testWriteReadEncryptedParquetFiles() throws IOException {