You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by ga...@apache.org on 2021/03/11 15:37:53 UTC
[parquet-mr] branch master updated: PARQUET-1992: Manually download
interop files inside the test and move encryption interop test to maven
integration-test phase (#878)
This is an automated email from the ASF dual-hosted git repository.
gabor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/master by this push:
new d81b815 PARQUET-1992: Manually download interop files inside the test and move encryption interop test to maven integration-test phase (#878)
d81b815 is described below
commit d81b815fadc252bddb583d400e6097e56aa6708b
Author: andersonm-ibm <63...@users.noreply.github.com>
AuthorDate: Thu Mar 11 17:37:45 2021 +0200
PARQUET-1992: Manually download interop files inside the test and move encryption interop test to maven integration-test phase (#878)
---
.gitmodules | 3 -
.../parquet/hadoop/ITTestEncryptionOptions.java | 50 ++++++++++++++++
.../parquet/hadoop/TestEncryptionOptions.java | 69 +++++++++++++++++++---
pom.xml | 25 --------
submodules/parquet-testing | 1 -
5 files changed, 111 insertions(+), 37 deletions(-)
diff --git a/.gitmodules b/.gitmodules
deleted file mode 100644
index 2708799..0000000
--- a/.gitmodules
+++ /dev/null
@@ -1,3 +0,0 @@
-[submodule "submodules/parquet-testing"]
- path = submodules/parquet-testing
- url = https://github.com/apache/parquet-testing.git
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/ITTestEncryptionOptions.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/ITTestEncryptionOptions.java
new file mode 100644
index 0000000..06bfb3a
--- /dev/null
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/ITTestEncryptionOptions.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.hadoop;
+
+import org.junit.Test;
+import org.junit.Rule;
+import org.junit.rules.ErrorCollector;
+
+import okhttp3.OkHttpClient;
+
+
+import java.io.IOException;
+
+/*
+ * This file continues the testing in TestEncryptionOptions. This test goals:
+ * Perform interoperability tests with other (eg parquet-cpp) writers, by reading
+ * encrypted files produced by these writers.
+ *
+ * For a full description and the actual implementation see TestEncryptionOptions.
+ */
+public class ITTestEncryptionOptions {
+ @Rule
+ public ErrorCollector errorCollector = new ErrorCollector();
+
+ TestEncryptionOptions test = new TestEncryptionOptions();
+
+ OkHttpClient httpClient = new OkHttpClient();
+
+ @Test
+ public void testInteropReadEncryptedParquetFiles() throws IOException {
+ test.testInteropReadEncryptedParquetFiles(errorCollector, httpClient);
+ }
+
+}
diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestEncryptionOptions.java b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestEncryptionOptions.java
index 12f7ff5..982224c 100644
--- a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestEncryptionOptions.java
+++ b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestEncryptionOptions.java
@@ -20,6 +20,9 @@ package org.apache.parquet.hadoop;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.parquet.crypto.ColumnDecryptionProperties;
import org.apache.parquet.crypto.ColumnEncryptionProperties;
import org.apache.parquet.crypto.DecryptionKeyRetrieverMock;
@@ -42,6 +45,12 @@ import org.junit.rules.ErrorCollector;
import org.junit.rules.TemporaryFolder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import okhttp3.ConnectionSpec;
+import okhttp3.MediaType;
+import okhttp3.OkHttpClient;
+import okhttp3.Request;
+import okhttp3.RequestBody;
+import okhttp3.Response;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
@@ -50,7 +59,6 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.util.Random;
import static org.apache.parquet.hadoop.ParquetFileWriter.Mode.OVERWRITE;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BOOLEAN;
@@ -64,8 +72,6 @@ import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
* readers that support encryption.
* 3) Produce encrypted files with plaintext footer, for testing the ability of legacy
* readers to parse the footer and read unencrypted columns.
- * 4) Perform interoperability tests with other (eg parquet-cpp) writers, by reading
- * encrypted files produced by these writers.
*
* The write sample produces number of parquet files, each encrypted with a different
* encryption configuration as described below.
@@ -118,14 +124,17 @@ import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
*/
public class TestEncryptionOptions {
private static final Logger LOG = LoggerFactory.getLogger(TestEncryptionOptions.class);
+ // The link includes a reference to a specific commit. To take a newer version - update this link.
+ private static final String PARQUET_TESTING_REPO = "https://github.com/apache/parquet-testing/raw/40379b3/data/";
@Rule
public TemporaryFolder temporaryFolder = new TemporaryFolder();
@Rule
- public ErrorCollector errorCollector = new ErrorCollector();
+ public ErrorCollector localErrorCollector = new ErrorCollector();
+ private ErrorCollector errorCollector;
- private static String PARQUET_TESTING_PATH = "../submodules/parquet-testing/data";
+ private static String PARQUET_TESTING_PATH = "target/parquet-testing/data";
private static final byte[] FOOTER_ENCRYPTION_KEY = "0123456789012345".getBytes();
private static final byte[][] COLUMN_ENCRYPTION_KEYS = { "1234567890123450".getBytes(),
@@ -290,6 +299,7 @@ public class TestEncryptionOptions {
@Test
public void testWriteReadEncryptedParquetFiles() throws IOException {
+ this.errorCollector = localErrorCollector;
Path rootPath = new Path(temporaryFolder.getRoot().getPath());
LOG.info("======== testWriteReadEncryptedParquetFiles {} ========", rootPath.toString());
byte[] AADPrefix = AAD_PREFIX_STRING.getBytes(StandardCharsets.UTF_8);
@@ -299,13 +309,22 @@ public class TestEncryptionOptions {
testReadEncryptedParquetFiles(rootPath, DATA);
}
- @Test
- public void testInteropReadEncryptedParquetFiles() throws IOException {
+ /**
+ * This interop test should be run from a separate integration tests suite, so it's not marked with @Test.
+ * It's not moved into a separate file since it shares many utilities with the unit tests in this file.
+ * @param errorCollector - the error collector of the integration tests suite
+ * @param httpClient - HTTP client to be used for fetching parquet files for interop tests
+ * @throws IOException
+ */
+ public void testInteropReadEncryptedParquetFiles(ErrorCollector errorCollector, OkHttpClient httpClient) throws IOException {
+ this.errorCollector = errorCollector;
Path rootPath = new Path(PARQUET_TESTING_PATH);
LOG.info("======== testInteropReadEncryptedParquetFiles {} ========", rootPath.toString());
+ boolean readOnlyEncrypted = true;
+ downloadInteropFiles(rootPath, readOnlyEncrypted, httpClient);
byte[] AADPrefix = AAD_PREFIX_STRING.getBytes(StandardCharsets.UTF_8);
// Read using various decryption configurations.
- testInteropReadEncryptedParquetFiles(rootPath, true/*readOnlyEncrypted*/, LINEAR_DATA);
+ testInteropReadEncryptedParquetFiles(rootPath, readOnlyEncrypted, LINEAR_DATA);
}
private void testWriteEncryptedParquetFiles(Path root, List<SingleRow> data) throws IOException {
@@ -419,6 +438,40 @@ public class TestEncryptionOptions {
}
}
+ private void downloadInteropFiles(Path rootPath, boolean readOnlyEncrypted, OkHttpClient httpClient) throws IOException {
+ LOG.info("Download interop files if needed");
+ Configuration conf = new Configuration();
+ FileSystem fs = rootPath.getFileSystem(conf);
+ LOG.info(rootPath + " exists?: " + fs.exists(rootPath));
+ if (!fs.exists(rootPath)) {
+ LOG.info("Create folder for interop files: " + rootPath);
+ if (!fs.mkdirs(rootPath)) {
+ throw new IOException("Cannot create path " + rootPath);
+ }
+ }
+
+ EncryptionConfiguration[] encryptionConfigurations = EncryptionConfiguration.values();
+ for (EncryptionConfiguration encryptionConfiguration : encryptionConfigurations) {
+ if (readOnlyEncrypted && (EncryptionConfiguration.NO_ENCRYPTION == encryptionConfiguration)) {
+ continue;
+ }
+ String fileName = getFileName(encryptionConfiguration);
+ Path file = new Path(rootPath, fileName);
+ if (!fs.exists(file)) {
+ String downloadUrl = PARQUET_TESTING_REPO + fileName;
+ LOG.info("Download interop file: " + downloadUrl);
+ Request request = new Request.Builder().url(downloadUrl).build();
+ Response response = httpClient.newCall(request).execute();
+ if (!response.isSuccessful()) {
+ throw new IOException("Failed to download file: " + response);
+ }
+ try (FSDataOutputStream fdos = fs.create(file)) {
+ fdos.write(response.body().bytes());
+ }
+ }
+ }
+ }
+
private void testInteropReadEncryptedParquetFiles(Path root, boolean readOnlyEncrypted, List<SingleRow> data) throws IOException {
Configuration conf = new Configuration();
DecryptionConfiguration[] decryptionConfigurations = DecryptionConfiguration.values();
diff --git a/pom.xml b/pom.xml
index 20a30f4..dd80da3 100644
--- a/pom.xml
+++ b/pom.xml
@@ -214,30 +214,6 @@
<pluginManagement>
<plugins>
<plugin>
- <groupId>org.codehaus.mojo</groupId>
- <artifactId>exec-maven-plugin</artifactId>
- <version>${exec-maven-plugin.version}</version>
- <executions>
- <execution>
- <id>git submodule update</id>
- <phase>initialize</phase>
- <configuration>
- <executable>git</executable>
- <arguments>
- <argument>submodule</argument>
- <argument>update</argument>
- <argument>--init</argument>
- <argument>--recursive</argument>
- </arguments>
- </configuration>
- <goals>
- <goal>exec</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
-
- <plugin>
<!-- Disable the source artifact from ASF parent -->
<artifactId>maven-assembly-plugin</artifactId>
<executions>
@@ -643,6 +619,5 @@
</plugins>
</build>
</profile>
-
</profiles>
</project>
diff --git a/submodules/parquet-testing b/submodules/parquet-testing
deleted file mode 160000
index 40379b3..0000000
--- a/submodules/parquet-testing
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 40379b3c58298fd22589dec7e41748375b5a8e82