You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by bl...@apache.org on 2019/08/13 15:49:17 UTC

[parquet-format] branch master updated: PARQUET-1630: add empty compression union for Bloom filter (#149)

This is an automated email from the ASF dual-hosted git repository.

blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-format.git


The following commit(s) were added to refs/heads/master by this push:
     new 556ebee  PARQUET-1630: add empty compression union for Bloom filter (#149)
556ebee is described below

commit 556ebee2107e4223aad40573e27e9f62075dddd7
Author: Jim Apple <jb...@apache.org>
AuthorDate: Tue Aug 13 08:49:12 2019 -0700

    PARQUET-1630: add empty compression union for Bloom filter (#149)
    
    Right now no compression methods are supported. For more on Bloom
    filter compression, see Michael Mitzenmacher's "Compressed Bloom
    Filters",
    https://www.eecs.harvard.edu/~michaelm/NEWWORK/postscripts/cbf2.pdf
---
 BloomFilter.md                 | 10 ++++++++++
 src/main/thrift/parquet.thrift | 11 +++++++++++
 2 files changed, 21 insertions(+)

diff --git a/BloomFilter.md b/BloomFilter.md
index 8ce22ae..e5ec30c 100644
--- a/BloomFilter.md
+++ b/BloomFilter.md
@@ -264,6 +264,14 @@ union BloomFilterHash {
 }
 
 /**
+ * The compression used in the Bloom filter.
+ **/
+struct Uncompressed {}
+union BloomFilterCompression {
+  1: Uncompressed UNCOMPRESSED;
+}
+
+/**
   * Bloom filter header is stored at beginning of Bloom filter data of each column
   * and followed by its bitset.
   **/
@@ -274,6 +282,8 @@ struct BloomFilterPageHeader {
   2: required BloomFilterAlgorithm algorithm;
   /** The hash function used for Bloom filter. **/
   3: required BloomFilterHash hash;
+  /** The compression used in the Bloom filter **/
+  4: required BloomFilterCompression compression;
 }
 
 struct ColumnMetaData {
diff --git a/src/main/thrift/parquet.thrift b/src/main/thrift/parquet.thrift
index da90acd..a062b4f 100644
--- a/src/main/thrift/parquet.thrift
+++ b/src/main/thrift/parquet.thrift
@@ -582,6 +582,15 @@ union BloomFilterHash {
   /** xxHash Strategy. **/
   1: XxHash XXHASH;
 }
+
+/**
+ * The compression used in the Bloom filter.
+ **/
+struct Uncompressed {}
+union BloomFilterCompression {
+  1: Uncompressed UNCOMPRESSED;
+}
+
 /**
   * Bloom filter header is stored at beginning of Bloom filter data of each column
   * and followed by its bitset.
@@ -593,6 +602,8 @@ struct BloomFilterHeader {
   2: required BloomFilterAlgorithm algorithm;
   /** The hash function used for Bloom filter. **/
   3: required BloomFilterHash hash;
+  /** The compression used in the Bloom filter **/
+  4: required BloomFilterCompression compression;
 }
 
 struct PageHeader {