You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@parquet.apache.org by GitBox <gi...@apache.org> on 2020/05/10 02:50:07 UTC

[GitHub] [parquet-mr] shangxinli commented on a change in pull request #776: PARQUET-1229: Parquet MR encryption

shangxinli commented on a change in pull request #776:
URL: https://github.com/apache/parquet-mr/pull/776#discussion_r422575326



##########
File path: parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java
##########
@@ -1185,70 +1275,189 @@ static long getOffset(ColumnChunk columnChunk) {
     return offset;
   }
 
+  private static void verifyFooterIntegrity(InputStream from, InternalFileDecryptor fileDecryptor, 
+      int combinedFooterLength) throws IOException {
+    
+    byte[] nonce = new byte[AesCipher.NONCE_LENGTH];
+    from.read(nonce);
+    byte[] gcmTag = new byte[AesCipher.GCM_TAG_LENGTH];
+    from.read(gcmTag);
+    
+    AesGcmEncryptor footerSigner =  fileDecryptor.createSignedFooterEncryptor();
+    
+    byte[] footerAndSignature = ((ByteBufferInputStream) from).slice(0).array();
+    int footerSignatureLength = AesCipher.NONCE_LENGTH + AesCipher.GCM_TAG_LENGTH;
+    byte[] serializedFooter = new byte[combinedFooterLength - footerSignatureLength];
+    System.arraycopy(footerAndSignature, 0, serializedFooter, 0, serializedFooter.length);
+
+    byte[] signedFooterAAD = AesCipher.createFooterAAD(fileDecryptor.getFileAAD());
+    byte[] encryptedFooterBytes = footerSigner.encrypt(false, serializedFooter, nonce, signedFooterAAD);
+    byte[] calculatedTag = new byte[AesCipher.GCM_TAG_LENGTH];
+    System.arraycopy(encryptedFooterBytes, encryptedFooterBytes.length - AesCipher.GCM_TAG_LENGTH, 
+        calculatedTag, 0, AesCipher.GCM_TAG_LENGTH);
+    if (!Arrays.equals(gcmTag, calculatedTag)) {
+      throw new TagVerificationException("Signature mismatch in plaintext footer");
+    }
+  }
+
   public ParquetMetadata readParquetMetadata(final InputStream from, MetadataFilter filter) throws IOException {
+    return readParquetMetadata(from, filter, null, false, 0);
+  }
+
+  public ParquetMetadata readParquetMetadata(final InputStream from, MetadataFilter filter,
+      final InternalFileDecryptor fileDecryptor, final boolean encryptedFooter, 
+      final int combinedFooterLength) throws IOException {
+    
+    final BlockCipher.Decryptor footerDecryptor = (encryptedFooter? fileDecryptor.fetchFooterDecryptor() : null);
+    final byte[] encryptedFooterAAD = (encryptedFooter? AesCipher.createFooterAAD(fileDecryptor.getFileAAD()) : null);
+    
     FileMetaData fileMetaData = filter.accept(new MetadataFilterVisitor<FileMetaData, IOException>() {
       @Override
       public FileMetaData visit(NoFilter filter) throws IOException {
-        return readFileMetaData(from);
+        return readFileMetaData(from, footerDecryptor, encryptedFooterAAD);
       }
 
       @Override
       public FileMetaData visit(SkipMetadataFilter filter) throws IOException {
-        return readFileMetaData(from, true);
+        return readFileMetaData(from, true, footerDecryptor, encryptedFooterAAD);
       }
 
       @Override
       public FileMetaData visit(OffsetMetadataFilter filter) throws IOException {
-        return filterFileMetaDataByStart(readFileMetaData(from), filter);
+        return filterFileMetaDataByStart(readFileMetaData(from, footerDecryptor, encryptedFooterAAD), filter);
       }
 
       @Override
       public FileMetaData visit(RangeMetadataFilter filter) throws IOException {
-        return filterFileMetaDataByMidpoint(readFileMetaData(from), filter);
+        return filterFileMetaDataByMidpoint(readFileMetaData(from, footerDecryptor, encryptedFooterAAD), filter);
       }
     });
     LOG.debug("{}", fileMetaData);
-    ParquetMetadata parquetMetadata = fromParquetMetadata(fileMetaData);
+    
+    if (!encryptedFooter && null != fileDecryptor) {
+      if (!fileMetaData.isSetEncryption_algorithm()) { // Plaintext file

Review comment:
       No. I see most of them on up line but a few on the same line. It is not a must. 




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org