You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by sh...@apache.org on 2021/11/24 16:57:52 UTC
[parquet-mr] branch master updated: PARQUET-2040: Uniform encryption (#935)
This is an automated email from the ASF dual-hosted git repository.
shangxinli pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/master by this push:
new 23a217a PARQUET-2040: Uniform encryption (#935)
23a217a is described below
commit 23a217a21ce4aaac8a92dca057e9352c51719f8f
Author: ggershinsky <gg...@users.noreply.github.com>
AuthorDate: Wed Nov 24 18:57:42 2021 +0200
PARQUET-2040: Uniform encryption (#935)
* Initial commit
* Uniform encryption - count and limit operations with same key
* fix the limit value
Co-authored-by: Gidon Gershinsky <gg...@apple.com>
---
.../java/org/apache/parquet/crypto/AesCipher.java | 24 ++++++----
.../org/apache/parquet/crypto/AesCtrEncryptor.java | 8 +++-
.../org/apache/parquet/crypto/AesGcmEncryptor.java | 9 +++-
.../keytools/PropertiesDrivenCryptoFactory.java | 54 ++++++++++++++++------
.../crypto/TestPropertiesDrivenEncryption.java | 40 ++++++++++++----
5 files changed, 103 insertions(+), 32 deletions(-)
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/crypto/AesCipher.java b/parquet-hadoop/src/main/java/org/apache/parquet/crypto/AesCipher.java
index 6b9f24c..26175a4 100755
--- a/parquet-hadoop/src/main/java/org/apache/parquet/crypto/AesCipher.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/crypto/AesCipher.java
@@ -37,6 +37,12 @@ public class AesCipher {
protected static final int GCM_TAG_LENGTH_BITS = 8 * GCM_TAG_LENGTH;
protected static final int CHUNK_LENGTH = 4 * 1024;
protected static final int SIZE_LENGTH = ModuleCipherFactory.SIZE_LENGTH;
+ // NIST SP 800-38D section 8.3 specifies limit on AES GCM encryption operations with same key and random IV/nonce
+ protected static final long GCM_RANDOM_IV_SAME_KEY_MAX_OPS = 1L << 32;
+ // NIST SP 800-38A doesn't specify limit on AES CTR operations.
+ // However, Parquet uses a random IV (with 12-byte random nonce). To avoid repetition due to "birthday problem",
+ // setting a conservative limit equal to GCM's value for random IVs
+ protected static final long CTR_RANDOM_IV_SAME_KEY_MAX_OPS = GCM_RANDOM_IV_SAME_KEY_MAX_OPS;
protected SecretKeySpec aesKey;
protected final SecureRandom randomGenerator;
@@ -65,14 +71,14 @@ public class AesCipher {
localNonce = new byte[NONCE_LENGTH];
}
- public static byte[] createModuleAAD(byte[] fileAAD, ModuleType moduleType,
+ public static byte[] createModuleAAD(byte[] fileAAD, ModuleType moduleType,
int rowGroupOrdinal, int columnOrdinal, int pageOrdinal) {
-
+
byte[] typeOrdinalBytes = new byte[1];
typeOrdinalBytes[0] = moduleType.getValue();
-
+
if (ModuleType.Footer == moduleType) {
- return concatByteArrays(fileAAD, typeOrdinalBytes);
+ return concatByteArrays(fileAAD, typeOrdinalBytes);
}
if (rowGroupOrdinal < 0) {
@@ -84,7 +90,7 @@ public class AesCipher {
+ "more than " + Short.MAX_VALUE + " row groups: " + rowGroupOrdinal);
}
byte[] rowGroupOrdinalBytes = shortToBytesLE(shortRGOrdinal);
-
+
if (columnOrdinal < 0) {
throw new IllegalArgumentException("Wrong column ordinal: " + columnOrdinal);
}
@@ -94,9 +100,9 @@ public class AesCipher {
+ "more than " + Short.MAX_VALUE + " columns: " + columnOrdinal);
}
byte[] columnOrdinalBytes = shortToBytesLE(shortColumOrdinal);
-
+
if (ModuleType.DataPage != moduleType && ModuleType.DataPageHeader != moduleType) {
- return concatByteArrays(fileAAD, typeOrdinalBytes, rowGroupOrdinalBytes, columnOrdinalBytes);
+ return concatByteArrays(fileAAD, typeOrdinalBytes, rowGroupOrdinalBytes, columnOrdinalBytes);
}
if (pageOrdinal < 0) {
@@ -108,7 +114,7 @@ public class AesCipher {
+ "more than " + Short.MAX_VALUE + " pages per chunk: " + pageOrdinal);
}
byte[] pageOrdinalBytes = shortToBytesLE(shortPageOrdinal);
-
+
return concatByteArrays(fileAAD, typeOrdinalBytes, rowGroupOrdinalBytes, columnOrdinalBytes, pageOrdinalBytes);
}
@@ -127,7 +133,7 @@ public class AesCipher {
throw new ParquetCryptoRuntimeException("Encrypted parquet files can't have "
+ "more than " + Short.MAX_VALUE + " pages per chunk: " + newPageOrdinal);
}
-
+
byte[] pageOrdinalBytes = shortToBytesLE(shortPageOrdinal);
System.arraycopy(pageOrdinalBytes, 0, pageAAD, pageAAD.length - 2, 2);
}
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/crypto/AesCtrEncryptor.java b/parquet-hadoop/src/main/java/org/apache/parquet/crypto/AesCtrEncryptor.java
index 537789c..6df6a57 100755
--- a/parquet-hadoop/src/main/java/org/apache/parquet/crypto/AesCtrEncryptor.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/crypto/AesCtrEncryptor.java
@@ -30,9 +30,11 @@ import java.security.GeneralSecurityException;
public class AesCtrEncryptor extends AesCipher implements BlockCipher.Encryptor{
private final byte[] ctrIV;
+ private long operationCounter;
AesCtrEncryptor(byte[] keyBytes) {
super(AesMode.CTR, keyBytes);
+ operationCounter = 0;
try {
cipher = Cipher.getInstance(AesMode.CTR.getCipherName());
@@ -55,7 +57,11 @@ public class AesCtrEncryptor extends AesCipher implements BlockCipher.Encryptor{
return encrypt(writeLength, plainText, localNonce, AAD);
}
- public byte[] encrypt(boolean writeLength, byte[] plainText, byte[] nonce, byte[] AAD) {
+ public byte[] encrypt(boolean writeLength, byte[] plainText, byte[] nonce, byte[] AAD) {
+ if (operationCounter > CTR_RANDOM_IV_SAME_KEY_MAX_OPS) {
+ throw new ParquetCryptoRuntimeException("Exceeded limit of AES CTR encryption operations with same key and random IV");
+ }
+ operationCounter++;
if (nonce.length != NONCE_LENGTH) {
throw new ParquetCryptoRuntimeException("Wrong nonce length " + nonce.length);
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/crypto/AesGcmEncryptor.java b/parquet-hadoop/src/main/java/org/apache/parquet/crypto/AesGcmEncryptor.java
index d456447..e4d51cd 100755
--- a/parquet-hadoop/src/main/java/org/apache/parquet/crypto/AesGcmEncryptor.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/crypto/AesGcmEncryptor.java
@@ -29,8 +29,11 @@ import java.security.GeneralSecurityException;
public class AesGcmEncryptor extends AesCipher implements BlockCipher.Encryptor{
+ private long operationCounter;
+
AesGcmEncryptor(byte[] keyBytes) {
super(AesMode.GCM, keyBytes);
+ operationCounter = 0;
try {
cipher = Cipher.getInstance(AesMode.GCM.getCipherName());
@@ -49,7 +52,11 @@ public class AesGcmEncryptor extends AesCipher implements BlockCipher.Encryptor{
return encrypt(writeLength, plainText, localNonce, AAD);
}
- public byte[] encrypt(boolean writeLength, byte[] plainText, byte[] nonce, byte[] AAD) {
+ public byte[] encrypt(boolean writeLength, byte[] plainText, byte[] nonce, byte[] AAD) {
+ if (operationCounter > GCM_RANDOM_IV_SAME_KEY_MAX_OPS) {
+ throw new ParquetCryptoRuntimeException("Exceeded limit of AES GCM encryption operations with same key and random IV");
+ }
+ operationCounter++;
if (nonce.length != NONCE_LENGTH) {
throw new ParquetCryptoRuntimeException("Wrong nonce length " + nonce.length);
diff --git a/parquet-hadoop/src/main/java/org/apache/parquet/crypto/keytools/PropertiesDrivenCryptoFactory.java b/parquet-hadoop/src/main/java/org/apache/parquet/crypto/keytools/PropertiesDrivenCryptoFactory.java
index f35d9ec..817ab4d 100644
--- a/parquet-hadoop/src/main/java/org/apache/parquet/crypto/keytools/PropertiesDrivenCryptoFactory.java
+++ b/parquet-hadoop/src/main/java/org/apache/parquet/crypto/keytools/PropertiesDrivenCryptoFactory.java
@@ -57,6 +57,10 @@ public class PropertiesDrivenCryptoFactory implements EncryptionPropertiesFactor
*/
public static final String FOOTER_KEY_PROPERTY_NAME = "parquet.encryption.footer.key";
/**
+ * Master key ID for uniform encryption (same key for all columns and footer).
+ */
+ public static final String UNIFORM_KEY_PROPERTY_NAME = "parquet.encryption.uniform.key";
+ /**
* Parquet encryption algorithm. Can be "AES_GCM_V1" (default), or "AES_GCM_CTR_V1".
*/
public static final String ENCRYPTION_ALGORITHM_PROPERTY_NAME = "parquet.encryption.algorithm";
@@ -77,15 +81,39 @@ public class PropertiesDrivenCryptoFactory implements EncryptionPropertiesFactor
String footerKeyId = fileHadoopConfig.getTrimmed(FOOTER_KEY_PROPERTY_NAME);
String columnKeysStr = fileHadoopConfig.getTrimmed(COLUMN_KEYS_PROPERTY_NAME);
+ String uniformKeyId = fileHadoopConfig.getTrimmed(UNIFORM_KEY_PROPERTY_NAME);
+
+ boolean emptyFooterKeyId = stringIsEmpty(footerKeyId);
+ boolean emptyColumnKeyIds = stringIsEmpty(columnKeysStr);
+ boolean emptyUniformKeyId = stringIsEmpty(uniformKeyId);
// File shouldn't be encrypted
- if (stringIsEmpty(footerKeyId) && stringIsEmpty(columnKeysStr)) {
+ if (emptyFooterKeyId && emptyColumnKeyIds && emptyUniformKeyId) {
LOG.debug("Unencrypted file: {}", tempFilePath);
return null;
}
- if (stringIsEmpty(footerKeyId)) {
- throw new ParquetCryptoRuntimeException("Undefined footer key");
+ if (emptyUniformKeyId) {
+ // Non-uniform encryption.Must have both footer and column key ids
+ if (emptyFooterKeyId) {
+ throw new ParquetCryptoRuntimeException("No footer key configured in " + FOOTER_KEY_PROPERTY_NAME);
+ }
+ if (emptyColumnKeyIds) {
+ throw new ParquetCryptoRuntimeException("No column keys configured in " + COLUMN_KEYS_PROPERTY_NAME);
+ }
+ } else {
+ // Uniform encryption. Can't have configuration of footer and column key ids
+ if (!emptyFooterKeyId) {
+ throw new ParquetCryptoRuntimeException("Uniform encryption. Cant have footer key configured in " +
+ FOOTER_KEY_PROPERTY_NAME);
+ }
+ if (!emptyColumnKeyIds) {
+ throw new ParquetCryptoRuntimeException("Uniform encryption. Cant have column keys configured in " +
+ COLUMN_KEYS_PROPERTY_NAME);
+ }
+
+ // Now assign footer key id to uniform key id
+ footerKeyId = uniformKeyId;
}
FileKeyMaterialStore keyMaterialStore = null;
@@ -126,14 +154,17 @@ public class PropertiesDrivenCryptoFactory implements EncryptionPropertiesFactor
RANDOM.nextBytes(footerKeyBytes);
byte[] footerKeyMetadata = keyWrapper.getEncryptionKeyMetadata(footerKeyBytes, footerKeyId, true);
- Map<ColumnPath, ColumnEncryptionProperties> encryptedColumns = getColumnEncryptionProperties(dekLength, columnKeysStr, keyWrapper);
-
boolean plaintextFooter = fileHadoopConfig.getBoolean(PLAINTEXT_FOOTER_PROPERTY_NAME, PLAINTEXT_FOOTER_DEFAULT);
FileEncryptionProperties.Builder propertiesBuilder = FileEncryptionProperties.builder(footerKeyBytes)
.withFooterKeyMetadata(footerKeyMetadata)
- .withAlgorithm(cipher)
- .withEncryptedColumns(encryptedColumns);
+ .withAlgorithm(cipher);
+
+ if (emptyUniformKeyId) {
+ Map<ColumnPath, ColumnEncryptionProperties> encryptedColumns =
+ getColumnEncryptionProperties(dekLength, columnKeysStr, keyWrapper);
+ propertiesBuilder = propertiesBuilder.withEncryptedColumns(encryptedColumns);
+ }
if (plaintextFooter) {
propertiesBuilder = propertiesBuilder.withPlaintextFooter();
@@ -144,9 +175,9 @@ public class PropertiesDrivenCryptoFactory implements EncryptionPropertiesFactor
}
if (LOG.isDebugEnabled()) {
- LOG.debug("File encryption properties for {} - algo: {}; footer key id: {}; plaintext footer: {}; "
- + "internal key material: {}; encrypted columns: {}",
- tempFilePath, cipher, footerKeyId, plaintextFooter, keyMaterialInternalStorage, columnKeysStr);
+ LOG.debug("File encryption properties for {} - algo: {}; footer key id: {}; uniform key id: {}; " + "" +
+ "plaintext footer: {}; internal key material: {}; encrypted columns: {}",
+ tempFilePath, cipher, footerKeyId, uniformKeyId, plaintextFooter, keyMaterialInternalStorage, columnKeysStr);
}
return propertiesBuilder.build();
@@ -154,9 +185,6 @@ public class PropertiesDrivenCryptoFactory implements EncryptionPropertiesFactor
private Map<ColumnPath, ColumnEncryptionProperties> getColumnEncryptionProperties(int dekLength, String columnKeys,
FileKeyWrapper keyWrapper) throws ParquetCryptoRuntimeException {
- if (stringIsEmpty(columnKeys)) {
- throw new ParquetCryptoRuntimeException("No column keys configured in " + COLUMN_KEYS_PROPERTY_NAME);
- }
Map<ColumnPath, ColumnEncryptionProperties> encryptedColumns = new HashMap<ColumnPath, ColumnEncryptionProperties>();
String keyToColumns[] = columnKeys.split(";");
for (int i = 0; i < keyToColumns.length; ++i) {
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/crypto/TestPropertiesDrivenEncryption.java b/parquet-hadoop/src/test/java/org/apache/parquet/crypto/TestPropertiesDrivenEncryption.java
index dc9b005..4f9eb78 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/crypto/TestPropertiesDrivenEncryption.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/crypto/TestPropertiesDrivenEncryption.java
@@ -149,8 +149,11 @@ public class TestPropertiesDrivenEncryption {
encoder.encodeToString("1234567890123453".getBytes(StandardCharsets.UTF_8)),
encoder.encodeToString("1234567890123454".getBytes(StandardCharsets.UTF_8)),
encoder.encodeToString("1234567890123455".getBytes(StandardCharsets.UTF_8))};
+ private static final String UNIFORM_MASTER_KEY =
+ encoder.encodeToString("0123456789012346".getBytes(StandardCharsets.UTF_8));
private static final String[] COLUMN_MASTER_KEY_IDS = { "kc1", "kc2", "kc3", "kc4", "kc5", "kc6"};
private static final String FOOTER_MASTER_KEY_ID = "kf";
+ private static final String UNIFORM_MASTER_KEY_ID = "ku";
private static final String KEY_LIST = new StringBuilder()
.append(COLUMN_MASTER_KEY_IDS[0]).append(": ").append(COLUMN_MASTER_KEYS[0]).append(", ")
@@ -159,6 +162,7 @@ public class TestPropertiesDrivenEncryption {
.append(COLUMN_MASTER_KEY_IDS[3]).append(": ").append(COLUMN_MASTER_KEYS[3]).append(", ")
.append(COLUMN_MASTER_KEY_IDS[4]).append(": ").append(COLUMN_MASTER_KEYS[4]).append(", ")
.append(COLUMN_MASTER_KEY_IDS[5]).append(": ").append(COLUMN_MASTER_KEYS[5]).append(", ")
+ .append(UNIFORM_MASTER_KEY_ID).append(": ").append(UNIFORM_MASTER_KEY).append(", ")
.append(FOOTER_MASTER_KEY_ID).append(": ").append(FOOTER_MASTER_KEY).toString();
private static final String NEW_FOOTER_MASTER_KEY =
@@ -170,6 +174,8 @@ public class TestPropertiesDrivenEncryption {
encoder.encodeToString("9234567890123453".getBytes(StandardCharsets.UTF_8)),
encoder.encodeToString("9234567890123454".getBytes(StandardCharsets.UTF_8)),
encoder.encodeToString("9234567890123455".getBytes(StandardCharsets.UTF_8))};
+ private static final String NEW_UNIFORM_MASTER_KEY =
+ encoder.encodeToString("9123456789012346".getBytes(StandardCharsets.UTF_8));
private static final String NEW_KEY_LIST = new StringBuilder()
.append(COLUMN_MASTER_KEY_IDS[0]).append(": ").append(NEW_COLUMN_MASTER_KEYS[0]).append(", ")
@@ -178,6 +184,7 @@ public class TestPropertiesDrivenEncryption {
.append(COLUMN_MASTER_KEY_IDS[3]).append(": ").append(NEW_COLUMN_MASTER_KEYS[3]).append(", ")
.append(COLUMN_MASTER_KEY_IDS[4]).append(": ").append(NEW_COLUMN_MASTER_KEYS[4]).append(", ")
.append(COLUMN_MASTER_KEY_IDS[5]).append(": ").append(NEW_COLUMN_MASTER_KEYS[5]).append(", ")
+ .append(UNIFORM_MASTER_KEY_ID).append(": ").append(NEW_UNIFORM_MASTER_KEY).append(", ")
.append(FOOTER_MASTER_KEY_ID).append(": ").append(NEW_FOOTER_MASTER_KEY).toString();
private static final String COLUMN_KEY_MAPPING = new StringBuilder()
@@ -201,35 +208,45 @@ public class TestPropertiesDrivenEncryption {
public enum EncryptionConfiguration {
ENCRYPT_COLUMNS_AND_FOOTER {
/**
- * Encrypt two columns and the footer, with different keys.
+ * Encrypt two columns and the footer, with different master keys.
*/
public Configuration getHadoopConfiguration(TestPropertiesDrivenEncryption test) {
Configuration conf = getCryptoProperties(test);
- setEncryptionKeys(conf);
+ setColumnAndFooterKeys(conf);
+ return conf;
+ }
+ },
+ UNIFORM_ENCRYPTION {
+ /**
+ * Encrypt all columns and the footer, with same master key.
+ */
+ public Configuration getHadoopConfiguration(TestPropertiesDrivenEncryption test) {
+ Configuration conf = getCryptoProperties(test);
+ setUniformKey(conf);
return conf;
}
},
ENCRYPT_COLUMNS_PLAINTEXT_FOOTER {
/**
- * Encrypt two columns, with different keys.
+ * Encrypt two columns, with different master keys.
* Don't encrypt footer.
* (plaintext footer mode, readable by legacy readers)
*/
public Configuration getHadoopConfiguration(TestPropertiesDrivenEncryption test) {
Configuration conf = getCryptoProperties(test);
- setEncryptionKeys(conf);
+ setColumnAndFooterKeys(conf);
conf.setBoolean(PropertiesDrivenCryptoFactory.PLAINTEXT_FOOTER_PROPERTY_NAME, true);
return conf;
}
},
ENCRYPT_COLUMNS_AND_FOOTER_CTR {
/**
- * Encrypt two columns and the footer, with different keys.
+ * Encrypt two columns and the footer, with different master keys.
* Use AES_GCM_CTR_V1 algorithm.
*/
public Configuration getHadoopConfiguration(TestPropertiesDrivenEncryption test) {
Configuration conf = getCryptoProperties(test);
- setEncryptionKeys(conf);
+ setColumnAndFooterKeys(conf);
conf.set(PropertiesDrivenCryptoFactory.ENCRYPTION_ALGORITHM_PROPERTY_NAME,
ParquetCipher.AES_GCM_CTR_V1.toString());
return conf;
@@ -292,13 +309,20 @@ public class TestPropertiesDrivenEncryption {
}
/**
- * Set configuration properties to encrypt columns and the footer with different keys
+ * Set configuration properties to encrypt columns and the footer with different master keys
*/
- private static void setEncryptionKeys(Configuration conf) {
+ private static void setColumnAndFooterKeys(Configuration conf) {
conf.set(PropertiesDrivenCryptoFactory.COLUMN_KEYS_PROPERTY_NAME, COLUMN_KEY_MAPPING);
conf.set(PropertiesDrivenCryptoFactory.FOOTER_KEY_PROPERTY_NAME, FOOTER_MASTER_KEY_ID);
}
+ /**
+ * Set uniform encryption configuration property
+ */
+ private static void setUniformKey(Configuration conf) {
+ conf.set(PropertiesDrivenCryptoFactory.UNIFORM_KEY_PROPERTY_NAME, UNIFORM_MASTER_KEY_ID);
+ }
+
@Test
public void testWriteReadEncryptedParquetFiles() throws IOException {