You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by ga...@apache.org on 2021/09/30 08:08:03 UTC

[parquet-mr] branch master updated: PARQUET-2094: Handle negative values in page headers (#933)

This is an automated email from the ASF dual-hosted git repository.

gabor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/master by this push:
     new 1695d92  PARQUET-2094: Handle negative values in page headers (#933)
1695d92 is described below

commit 1695d92cc07288713a9f2230f3aac61e2dc6a8e4
Author: Gabor Szadovszky <ga...@apache.org>
AuthorDate: Thu Sep 30 10:07:06 2021 +0200

    PARQUET-2094: Handle negative values in page headers (#933)
---
 .../format/InvalidParquetMetadataException.java    | 30 +++++++++++++++
 .../apache/parquet/format/MetadataValidator.java   | 44 ++++++++++++++++++++++
 .../main/java/org/apache/parquet/format/Util.java  |  2 +-
 .../java/org/apache/parquet/format/TestUtil.java   | 20 +++++++++-
 4 files changed, 94 insertions(+), 2 deletions(-)

diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/InvalidParquetMetadataException.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/InvalidParquetMetadataException.java
new file mode 100644
index 0000000..c0852bb
--- /dev/null
+++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/InvalidParquetMetadataException.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.parquet.format;
+
+/**
+ * A specific RuntimeException thrown when invalid values are found in the Parquet file metadata (including the footer,
+ * page header etc.).
+ */
+public class InvalidParquetMetadataException extends RuntimeException {
+  InvalidParquetMetadataException(String message) {
+    super(message);
+  }
+}
diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/MetadataValidator.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/MetadataValidator.java
new file mode 100644
index 0000000..b3738ec
--- /dev/null
+++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/MetadataValidator.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.parquet.format;
+
+/**
+ * Utility class to validate different types of Parquet metadata (e.g. footer, page headers etc.).
+ */
+public class MetadataValidator {
+
+  static PageHeader validate(PageHeader pageHeader) {
+    int compressed_page_size = pageHeader.getCompressed_page_size();
+    validateValue(compressed_page_size >= 0,
+        String.format("Compressed page size must not be negative but was: %s", compressed_page_size));
+    return pageHeader;
+  }
+
+  private static <T> void validateValue(boolean valid, String message) {
+    if (!valid) {
+      throw new InvalidParquetMetadataException(message);
+    }
+  }
+
+  private MetadataValidator() {
+    // Private constructor to prevent instantiation
+  }
+
+}
diff --git a/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java b/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java
index 32c1986..4d4c893 100644
--- a/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java
+++ b/parquet-format-structures/src/main/java/org/apache/parquet/format/Util.java
@@ -130,7 +130,7 @@ public class Util {
 
   public static PageHeader readPageHeader(InputStream from, 
       BlockCipher.Decryptor decryptor, byte[] AAD) throws IOException {
-    return read(from, new PageHeader(), decryptor, AAD);
+    return MetadataValidator.validate(read(from, new PageHeader(), decryptor, AAD));
   }
 
   public static void writeFileMetaData(org.apache.parquet.format.FileMetaData fileMetadata, 
diff --git a/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java b/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java
index 1adf099..685e251 100644
--- a/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java
+++ b/parquet-format-structures/src/test/java/org/apache/parquet/format/TestUtil.java
@@ -23,13 +23,16 @@ import static junit.framework.Assert.assertEquals;
 import static junit.framework.Assert.assertNull;
 import static org.apache.parquet.format.Util.readFileMetaData;
 import static org.apache.parquet.format.Util.writeFileMetaData;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
+import java.io.IOException;
 
 import org.junit.Test;
-
 import org.apache.parquet.format.Util.DefaultFileMetaDataConsumer;
+
 public class TestUtil {
 
   @Test
@@ -77,6 +80,21 @@ public class TestUtil {
     assertEquals(md, md6);
   }
 
+  @Test
+  public void testInvalidPageHeader() throws IOException {
+    PageHeader ph = new PageHeader(PageType.DATA_PAGE, 100, -50);
+    ByteArrayOutputStream out = new ByteArrayOutputStream();
+    Util.writePageHeader(ph, out);
+
+    try {
+      Util.readPageHeader(in(out));
+      fail("Expected exception but did not thrown");
+    } catch (InvalidParquetMetadataException e) {
+      assertTrue("Exception message does not contain the expected parts",
+          e.getMessage().contains("Compressed page size"));
+    }
+  }
+
   private ByteArrayInputStream in(ByteArrayOutputStream baos) {
     return new ByteArrayInputStream(baos.toByteArray());
   }