You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by we...@apache.org on 2020/05/22 01:15:03 UTC

[arrow] branch master updated: PARQUET-1861: [Parquet][Documentation] Clarify buffered stream option

This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 5602c45  PARQUET-1861: [Parquet][Documentation] Clarify buffered stream option
5602c45 is described below

commit 5602c459eb8773b6be8059b1b118175e9f16b7a3
Author: François Saint-Jacques <fs...@gmail.com>
AuthorDate: Thu May 21 20:12:45 2020 -0500

    PARQUET-1861: [Parquet][Documentation] Clarify buffered stream option
    
    Closes #7221 from fsaintjacques/PARQUET-1861-buffered-stream
    
    Authored-by: François Saint-Jacques <fs...@gmail.com>
    Signed-off-by: Wes McKinney <we...@apache.org>
---
 cpp/src/parquet/properties.h       | 25 +++++++++++++------------
 cpp/src/parquet/properties_test.cc |  4 ++--
 2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/cpp/src/parquet/properties.h b/cpp/src/parquet/properties.h
index 2b07569..2d9725c 100644
--- a/cpp/src/parquet/properties.h
+++ b/cpp/src/parquet/properties.h
@@ -59,31 +59,32 @@ struct ParquetVersion {
 /// DataPageV2 at all.
 enum class ParquetDataPageVersion { V1, V2 };
 
-static int64_t DEFAULT_BUFFER_SIZE = 1024;
-static bool DEFAULT_USE_BUFFERED_STREAM = false;
+/// Align the default buffer size to a small multiple of a page size.
+constexpr int64_t kDefaultBufferSize = 4096 * 4;
 
 class PARQUET_EXPORT ReaderProperties {
  public:
   explicit ReaderProperties(MemoryPool* pool = ::arrow::default_memory_pool())
-      : pool_(pool) {
-    buffered_stream_enabled_ = DEFAULT_USE_BUFFERED_STREAM;
-    buffer_size_ = DEFAULT_BUFFER_SIZE;
-  }
+      : pool_(pool) {}
 
   MemoryPool* memory_pool() const { return pool_; }
 
   std::shared_ptr<ArrowInputStream> GetStream(std::shared_ptr<ArrowInputFile> source,
                                               int64_t start, int64_t num_bytes);
 
+  /// Buffered stream reading allows the user to control the memory usage of
+  /// parquet readers. This ensure that all `RandomAccessFile::ReadAt` calls are
+  /// wrapped in a buffered reader that uses a fix sized buffer (of size
+  /// `buffer_size()`) instead of the full size of the ReadAt.
+  ///
+  /// The primary reason for this control knobs is for resource control and not
+  /// performance.
   bool is_buffered_stream_enabled() const { return buffered_stream_enabled_; }
-
   void enable_buffered_stream() { buffered_stream_enabled_ = true; }
-
   void disable_buffered_stream() { buffered_stream_enabled_ = false; }
 
-  void set_buffer_size(int64_t buf_size) { buffer_size_ = buf_size; }
-
   int64_t buffer_size() const { return buffer_size_; }
+  void set_buffer_size(int64_t size) { buffer_size_ = size; }
 
   void file_decryption_properties(std::shared_ptr<FileDecryptionProperties> decryption) {
     file_decryption_properties_ = std::move(decryption);
@@ -95,8 +96,8 @@ class PARQUET_EXPORT ReaderProperties {
 
  private:
   MemoryPool* pool_;
-  int64_t buffer_size_;
-  bool buffered_stream_enabled_;
+  int64_t buffer_size_ = kDefaultBufferSize;
+  bool buffered_stream_enabled_ = false;
   std::shared_ptr<FileDecryptionProperties> file_decryption_properties_;
 };
 
diff --git a/cpp/src/parquet/properties_test.cc b/cpp/src/parquet/properties_test.cc
index aef563b..39dbb7f 100644
--- a/cpp/src/parquet/properties_test.cc
+++ b/cpp/src/parquet/properties_test.cc
@@ -34,8 +34,8 @@ namespace test {
 TEST(TestReaderProperties, Basics) {
   ReaderProperties props;
 
-  ASSERT_EQ(DEFAULT_BUFFER_SIZE, props.buffer_size());
-  ASSERT_EQ(DEFAULT_USE_BUFFERED_STREAM, props.is_buffered_stream_enabled());
+  ASSERT_EQ(props.buffer_size(), kDefaultBufferSize);
+  ASSERT_FALSE(props.is_buffered_stream_enabled());
 }
 
 TEST(TestWriterProperties, Basics) {