You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by ga...@apache.org on 2021/03/11 15:37:53 UTC

[parquet-mr] branch master updated: PARQUET-1992: Manually download interop files inside the test and move encryption interop test to maven integration-test phase (#878)

This is an automated email from the ASF dual-hosted git repository.

gabor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new d81b815  PARQUET-1992: Manually download interop files inside the test and move encryption interop test to maven integration-test phase (#878)
d81b815 is described below

commit d81b815fadc252bddb583d400e6097e56aa6708b
Author: andersonm-ibm <63...@users.noreply.github.com>
AuthorDate: Thu Mar 11 17:37:45 2021 +0200

    PARQUET-1992: Manually download interop files inside the test and move encryption interop test to maven integration-test phase (#878)
---
 .gitmodules                                        |  3 -
 .../parquet/hadoop/ITTestEncryptionOptions.java    | 50 ++++++++++++++++
 .../parquet/hadoop/TestEncryptionOptions.java      | 69 +++++++++++++++++++---
 pom.xml                                            | 25 --------
 submodules/parquet-testing                         |  1 -
 5 files changed, 111 insertions(+), 37 deletions(-)

diff --git a/.gitmodules b/.gitmodules
deleted file mode 100644
index 2708799..0000000
--- a/.gitmodules
+++ /dev/null
@@ -1,3 +0,0 @@
-[submodule "submodules/parquet-testing"]
-	path = submodules/parquet-testing
-	url = https://github.com/apache/parquet-testing.git
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/ITTestEncryptionOptions.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/ITTestEncryptionOptions.java
new file mode 100644
index 0000000..06bfb3a
--- /dev/null
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/ITTestEncryptionOptions.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.hadoop;
+
+import org.junit.Test;
+import org.junit.Rule;
+import org.junit.rules.ErrorCollector;
+
+import okhttp3.OkHttpClient;
+
+
+import java.io.IOException;
+
+/*
+ * This file continues the testing in TestEncryptionOptions. This test goals:
+ *  Perform interoperability tests with other (eg parquet-cpp) writers, by reading
+ *    encrypted files produced by these writers.
+ *
+ * For a full description and the actual implementation see TestEncryptionOptions.
+ */
+public class ITTestEncryptionOptions {
+  @Rule
+  public ErrorCollector errorCollector = new ErrorCollector();
+
+  TestEncryptionOptions test = new TestEncryptionOptions();
+
+  OkHttpClient httpClient = new OkHttpClient();
+
+  @Test
+  public void testInteropReadEncryptedParquetFiles() throws IOException {
+    test.testInteropReadEncryptedParquetFiles(errorCollector, httpClient);
+  }
+
+}
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestEncryptionOptions.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestEncryptionOptions.java
index 12f7ff5..982224c 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestEncryptionOptions.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestEncryptionOptions.java
@@ -20,6 +20,9 @@ package org.apache.parquet.hadoop;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.parquet.crypto.ColumnDecryptionProperties;
 import org.apache.parquet.crypto.ColumnEncryptionProperties;
 import org.apache.parquet.crypto.DecryptionKeyRetrieverMock;
@@ -42,6 +45,12 @@ import org.junit.rules.ErrorCollector;
 import org.junit.rules.TemporaryFolder;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import okhttp3.ConnectionSpec;
+import okhttp3.MediaType;
+import okhttp3.OkHttpClient;
+import okhttp3.Request;
+import okhttp3.RequestBody;
+import okhttp3.Response;
 
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
@@ -50,7 +59,6 @@ import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.Random;
 
 import static org.apache.parquet.hadoop.ParquetFileWriter.Mode.OVERWRITE;
 import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BOOLEAN;
@@ -64,8 +72,6 @@ import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
  *    readers that support encryption.
  * 3) Produce encrypted files with plaintext footer, for testing the ability of legacy
  *    readers to parse the footer and read unencrypted columns.
- * 4) Perform interoperability tests with other (eg parquet-cpp) writers, by reading
- *    encrypted files produced by these writers.
  *
  * The write sample produces number of parquet files, each encrypted with a different
  * encryption configuration as described below.
@@ -118,14 +124,17 @@ import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
  */
 public class TestEncryptionOptions {
   private static final Logger LOG = LoggerFactory.getLogger(TestEncryptionOptions.class);
+  // The link includes a reference to a specific commit. To take a newer version - update this link.
+  private static final String PARQUET_TESTING_REPO = "https://github.com/apache/parquet-testing/raw/40379b3/data/";
 
   @Rule
   public TemporaryFolder temporaryFolder = new TemporaryFolder();
 
   @Rule
-  public ErrorCollector errorCollector = new ErrorCollector();
+  public ErrorCollector localErrorCollector = new ErrorCollector();
+  private ErrorCollector errorCollector;
 
-  private static String PARQUET_TESTING_PATH = "../submodules/parquet-testing/data";
+  private static String PARQUET_TESTING_PATH = "target/parquet-testing/data";
 
   private static final byte[] FOOTER_ENCRYPTION_KEY = "0123456789012345".getBytes();
   private static final byte[][] COLUMN_ENCRYPTION_KEYS = { "1234567890123450".getBytes(),
@@ -290,6 +299,7 @@ public class TestEncryptionOptions {
 
   @Test
   public void testWriteReadEncryptedParquetFiles() throws IOException {
+    this.errorCollector = localErrorCollector;
     Path rootPath = new Path(temporaryFolder.getRoot().getPath());
     LOG.info("======== testWriteReadEncryptedParquetFiles {} ========", rootPath.toString());
     byte[] AADPrefix = AAD_PREFIX_STRING.getBytes(StandardCharsets.UTF_8);
@@ -299,13 +309,22 @@ public class TestEncryptionOptions {
     testReadEncryptedParquetFiles(rootPath, DATA);
   }
 
-  @Test
-  public void testInteropReadEncryptedParquetFiles() throws IOException {
+  /**
+   * This interop test should be run from a separate integration tests suite, so it's not marked with @Test.
+   * It's not moved into a separate file since it shares many utilities with the unit tests in this file.
+   * @param errorCollector - the error collector of the integration tests suite
+   * @param httpClient - HTTP client to be used for fetching parquet files for interop tests
+   * @throws IOException
+   */
+  public void testInteropReadEncryptedParquetFiles(ErrorCollector errorCollector, OkHttpClient httpClient) throws IOException {
+    this.errorCollector = errorCollector;
     Path rootPath = new Path(PARQUET_TESTING_PATH);
     LOG.info("======== testInteropReadEncryptedParquetFiles {} ========", rootPath.toString());
+    boolean readOnlyEncrypted = true;
+    downloadInteropFiles(rootPath, readOnlyEncrypted, httpClient);
     byte[] AADPrefix = AAD_PREFIX_STRING.getBytes(StandardCharsets.UTF_8);
     // Read using various decryption configurations.
-    testInteropReadEncryptedParquetFiles(rootPath, true/*readOnlyEncrypted*/, LINEAR_DATA);
+    testInteropReadEncryptedParquetFiles(rootPath, readOnlyEncrypted, LINEAR_DATA);
   }
 
   private void testWriteEncryptedParquetFiles(Path root, List<SingleRow> data) throws IOException {
@@ -419,6 +438,40 @@ public class TestEncryptionOptions {
     }
   }
 
+  private void downloadInteropFiles(Path rootPath, boolean readOnlyEncrypted, OkHttpClient httpClient) throws IOException {
+    LOG.info("Download interop files if needed");
+    Configuration conf = new Configuration();
+    FileSystem fs = rootPath.getFileSystem(conf);
+    LOG.info(rootPath + " exists?: " + fs.exists(rootPath));
+    if (!fs.exists(rootPath)) {
+      LOG.info("Create folder for interop files: " + rootPath);
+      if (!fs.mkdirs(rootPath)) {
+        throw new IOException("Cannot create path " + rootPath);
+      }
+    }
+
+    EncryptionConfiguration[] encryptionConfigurations = EncryptionConfiguration.values();
+    for (EncryptionConfiguration encryptionConfiguration : encryptionConfigurations) {
+      if (readOnlyEncrypted && (EncryptionConfiguration.NO_ENCRYPTION == encryptionConfiguration)) {
+        continue;
+      }
+      String fileName = getFileName(encryptionConfiguration);
+      Path file = new Path(rootPath, fileName);
+      if (!fs.exists(file)) {
+        String downloadUrl = PARQUET_TESTING_REPO + fileName;
+        LOG.info("Download interop file: " + downloadUrl);
+        Request request = new Request.Builder().url(downloadUrl).build();
+        Response response = httpClient.newCall(request).execute();
+        if (!response.isSuccessful()) {
+          throw new IOException("Failed to download file: " + response);
+        }
+        try (FSDataOutputStream fdos = fs.create(file)) {
+          fdos.write(response.body().bytes());
+        }
+      }
+    }
+  }
+
   private void testInteropReadEncryptedParquetFiles(Path root, boolean readOnlyEncrypted, List<SingleRow> data) throws IOException {
     Configuration conf = new Configuration();
     DecryptionConfiguration[] decryptionConfigurations = DecryptionConfiguration.values();
diff --git a/pom.xml b/pom.xml
index 20a30f4..dd80da3 100644
--- a/pom.xml
+++ b/pom.xml
@@ -214,30 +214,6 @@
     <pluginManagement>
       <plugins>
         <plugin>
-          <groupId>org.codehaus.mojo</groupId>
-          <artifactId>exec-maven-plugin</artifactId>
-          <version>${exec-maven-plugin.version}</version>
-          <executions>
-            <execution>
-              <id>git submodule update</id>
-              <phase>initialize</phase>
-              <configuration>
-                <executable>git</executable>
-                <arguments>
-                  <argument>submodule</argument>
-                  <argument>update</argument>
-                  <argument>--init</argument>
-                  <argument>--recursive</argument>
-                </arguments>
-              </configuration>
-              <goals>
-                <goal>exec</goal>
-              </goals>
-            </execution>
-          </executions>
-        </plugin>
-
-        <plugin>
           <!-- Disable the source artifact from ASF parent -->
           <artifactId>maven-assembly-plugin</artifactId>
           <executions>
@@ -643,6 +619,5 @@
         </plugins>
       </build>
     </profile>
-
   </profiles>
 </project>
diff --git a/submodules/parquet-testing b/submodules/parquet-testing
deleted file mode 160000
index 40379b3..0000000
--- a/submodules/parquet-testing
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 40379b3c58298fd22589dec7e41748375b5a8e82