You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by we...@apache.org on 2017/06/08 02:56:06 UTC

parquet-cpp git commit: PARQUET-1008: [C++] TypedColumnReader::ReadBatch method updated to ac…

Repository: parquet-cpp
Updated Branches:
  refs/heads/master a7eada4ca -> ce5e1e7dd


PARQUET-1008: [C++] TypedColumnReader::ReadBatch method updated to ac…

…cept batch_size param of int64_t type

Author: Max Risuhin <ri...@gmail.com>

Closes #349 from MaxRis/PARQUET-1008 and squashes the following commits:

9e0db07 [Max Risuhin] PARQUET-1008: [C++] TypedColumnReader::ReadBatch method updated to accept batch_size param of int64_t type


Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/ce5e1e7d
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/ce5e1e7d
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/ce5e1e7d

Branch: refs/heads/master
Commit: ce5e1e7dd6799df5411148479f0eb2626e3fff72
Parents: a7eada4
Author: Max Risuhin <ri...@gmail.com>
Authored: Wed Jun 7 22:56:01 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Wed Jun 7 22:56:01 2017 -0400

----------------------------------------------------------------------
 src/parquet/column/reader.cc | 14 +++++++-------
 src/parquet/column/reader.h  | 20 ++++++++++----------
 2 files changed, 17 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/ce5e1e7d/src/parquet/column/reader.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column/reader.cc b/src/parquet/column/reader.cc
index fe2de57..bc4e4a0 100644
--- a/src/parquet/column/reader.cc
+++ b/src/parquet/column/reader.cc
@@ -119,9 +119,9 @@ bool TypedColumnReader<DType>::ReadNewPage() {
       // Levels are encoded as rle or bit-packed.
       // Init repetition levels
       if (descr_->max_repetition_level() > 0) {
-        int64_t rep_levels_bytes =
-            repetition_level_decoder_.SetData(page->repetition_level_encoding(),
-                descr_->max_repetition_level(), num_buffered_values_, buffer);
+        int64_t rep_levels_bytes = repetition_level_decoder_.SetData(
+            page->repetition_level_encoding(), descr_->max_repetition_level(),
+            static_cast<int>(num_buffered_values_), buffer);
         buffer += rep_levels_bytes;
         data_size -= rep_levels_bytes;
       }
@@ -130,9 +130,9 @@ bool TypedColumnReader<DType>::ReadNewPage() {
 
       // Init definition levels
       if (descr_->max_definition_level() > 0) {
-        int64_t def_levels_bytes =
-            definition_level_decoder_.SetData(page->definition_level_encoding(),
-                descr_->max_definition_level(), num_buffered_values_, buffer);
+        int64_t def_levels_bytes = definition_level_decoder_.SetData(
+            page->definition_level_encoding(), descr_->max_definition_level(),
+            static_cast<int>(num_buffered_values_), buffer);
         buffer += def_levels_bytes;
         data_size -= def_levels_bytes;
       }
@@ -170,7 +170,7 @@ bool TypedColumnReader<DType>::ReadNewPage() {
         }
       }
       current_decoder_->SetData(
-          num_buffered_values_, buffer, static_cast<int>(data_size));
+          static_cast<int>(num_buffered_values_), buffer, static_cast<int>(data_size));
       return true;
     } else {
       // We don't know what this page type is. We're allowed to skip non-data

http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/ce5e1e7d/src/parquet/column/reader.h
----------------------------------------------------------------------
diff --git a/src/parquet/column/reader.h b/src/parquet/column/reader.h
index 80084b2..f36db5e 100644
--- a/src/parquet/column/reader.h
+++ b/src/parquet/column/reader.h
@@ -91,11 +91,11 @@ class PARQUET_EXPORT ColumnReader {
   // values. For repeated or optional values, there may be fewer data values
   // than levels, and this tells you how many encoded levels there are in that
   // case.
-  int num_buffered_values_;
+  int64_t num_buffered_values_;
 
   // The number of values from the current data page that have been decoded
   // into memory
-  int num_decoded_values_;
+  int64_t num_decoded_values_;
 
   ::arrow::MemoryPool* pool_;
 };
@@ -128,8 +128,8 @@ class PARQUET_EXPORT TypedColumnReader : public ColumnReader {
   // This API is the same for both V1 and V2 of the DataPage
   //
   // @returns: actual number of levels read (see values_read for number of values read)
-  int64_t ReadBatch(int batch_size, int16_t* def_levels, int16_t* rep_levels, T* values,
-      int64_t* values_read);
+  int64_t ReadBatch(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
+      T* values, int64_t* values_read);
 
   /// Read a batch of repetition levels, definition levels, and values from the
   /// column and leave spaces for null entries on the lowest level in the values
@@ -165,7 +165,7 @@ class PARQUET_EXPORT TypedColumnReader : public ColumnReader {
   ///   (i.e. definition_level == max_definition_level - 1)
   /// @param[out] null_count The number of nulls on the lowest levels.
   ///   (i.e. (values_read - null_count) is total number of non-null entries)
-  int64_t ReadBatchSpaced(int batch_size, int16_t* def_levels, int16_t* rep_levels,
+  int64_t ReadBatchSpaced(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
       T* values, uint8_t* valid_bits, int64_t valid_bits_offset, int64_t* levels_read,
       int64_t* values_read, int64_t* null_count);
 
@@ -217,8 +217,8 @@ inline int64_t TypedColumnReader<DType>::ReadValuesSpaced(int64_t batch_size, T*
 }
 
 template <typename DType>
-inline int64_t TypedColumnReader<DType>::ReadBatch(int batch_size, int16_t* def_levels,
-    int16_t* rep_levels, T* values, int64_t* values_read) {
+inline int64_t TypedColumnReader<DType>::ReadBatch(int64_t batch_size,
+    int16_t* def_levels, int16_t* rep_levels, T* values, int64_t* values_read) {
   // HasNext invokes ReadNewPage
   if (!HasNext()) {
     *values_read = 0;
@@ -257,7 +257,7 @@ inline int64_t TypedColumnReader<DType>::ReadBatch(int batch_size, int16_t* def_
 
   *values_read = ReadValues(values_to_read, values);
   int64_t total_values = std::max(num_def_levels, *values_read);
-  num_decoded_values_ += static_cast<int>(total_values);
+  num_decoded_values_ += total_values;
 
   return total_values;
 }
@@ -293,7 +293,7 @@ inline void DefinitionLevelsToBitmap(const int16_t* def_levels, int64_t num_def_
 }
 
 template <typename DType>
-inline int64_t TypedColumnReader<DType>::ReadBatchSpaced(int batch_size,
+inline int64_t TypedColumnReader<DType>::ReadBatchSpaced(int64_t batch_size,
     int16_t* def_levels, int16_t* rep_levels, T* values, uint8_t* valid_bits,
     int64_t valid_bits_offset, int64_t* levels_read, int64_t* values_read,
     int64_t* null_count_out) {
@@ -354,7 +354,7 @@ inline int64_t TypedColumnReader<DType>::ReadBatchSpaced(int batch_size,
     *levels_read = total_values;
   }
 
-  num_decoded_values_ += static_cast<int>(*levels_read);
+  num_decoded_values_ += *levels_read;
   return total_values;
 }