You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by bl...@apache.org on 2019/08/13 15:49:17 UTC
[parquet-format] branch master updated: PARQUET-1630: add empty
compression union for Bloom filter (#149)
This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-format.git
The following commit(s) were added to refs/heads/master by this push:
new 556ebee PARQUET-1630: add empty compression union for Bloom filter (#149)
556ebee is described below
commit 556ebee2107e4223aad40573e27e9f62075dddd7
Author: Jim Apple <jb...@apache.org>
AuthorDate: Tue Aug 13 08:49:12 2019 -0700
PARQUET-1630: add empty compression union for Bloom filter (#149)
Right now no compression methods are supported. For more on Bloom
filter compression, see Michael Mitzenmacher's "Compressed Bloom
Filters",
https://www.eecs.harvard.edu/~michaelm/NEWWORK/postscripts/cbf2.pdf
---
BloomFilter.md | 10 ++++++++++
src/main/thrift/parquet.thrift | 11 +++++++++++
2 files changed, 21 insertions(+)
diff --git a/BloomFilter.md b/BloomFilter.md
index 8ce22ae..e5ec30c 100644
--- a/BloomFilter.md
+++ b/BloomFilter.md
@@ -264,6 +264,14 @@ union BloomFilterHash {
}
/**
+ * The compression used in the Bloom filter.
+ **/
+struct Uncompressed {}
+union BloomFilterCompression {
+ 1: Uncompressed UNCOMPRESSED;
+}
+
+/**
* Bloom filter header is stored at beginning of Bloom filter data of each column
* and followed by its bitset.
**/
@@ -274,6 +282,8 @@ struct BloomFilterPageHeader {
2: required BloomFilterAlgorithm algorithm;
/** The hash function used for Bloom filter. **/
3: required BloomFilterHash hash;
+ /** The compression used in the Bloom filter **/
+ 4: required BloomFilterCompression compression;
}
struct ColumnMetaData {
diff --git a/src/main/thrift/parquet.thrift b/src/main/thrift/parquet.thrift
index da90acd..a062b4f 100644
--- a/src/main/thrift/parquet.thrift
+++ b/src/main/thrift/parquet.thrift
@@ -582,6 +582,15 @@ union BloomFilterHash {
/** xxHash Strategy. **/
1: XxHash XXHASH;
}
+
+/**
+ * The compression used in the Bloom filter.
+ **/
+struct Uncompressed {}
+union BloomFilterCompression {
+ 1: Uncompressed UNCOMPRESSED;
+}
+
/**
* Bloom filter header is stored at beginning of Bloom filter data of each column
* and followed by its bitset.
@@ -593,6 +602,8 @@ struct BloomFilterHeader {
2: required BloomFilterAlgorithm algorithm;
/** The hash function used for Bloom filter. **/
3: required BloomFilterHash hash;
+ /** The compression used in the Bloom filter **/
+ 4: required BloomFilterCompression compression;
}
struct PageHeader {