You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by we...@apache.org on 2017/06/08 02:56:06 UTC
parquet-cpp git commit: PARQUET-1008: [C++] TypedColumnReader::ReadBatch method updated to ac…
Repository: parquet-cpp
Updated Branches:
refs/heads/master a7eada4ca -> ce5e1e7dd
PARQUET-1008: [C++] TypedColumnReader::ReadBatch method updated to ac…
…cept batch_size param of int64_t type
Author: Max Risuhin <ri...@gmail.com>
Closes #349 from MaxRis/PARQUET-1008 and squashes the following commits:
9e0db07 [Max Risuhin] PARQUET-1008: [C++] TypedColumnReader::ReadBatch method updated to accept batch_size param of int64_t type
Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/ce5e1e7d
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/ce5e1e7d
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/ce5e1e7d
Branch: refs/heads/master
Commit: ce5e1e7dd6799df5411148479f0eb2626e3fff72
Parents: a7eada4
Author: Max Risuhin <ri...@gmail.com>
Authored: Wed Jun 7 22:56:01 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Wed Jun 7 22:56:01 2017 -0400
----------------------------------------------------------------------
src/parquet/column/reader.cc | 14 +++++++-------
src/parquet/column/reader.h | 20 ++++++++++----------
2 files changed, 17 insertions(+), 17 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/ce5e1e7d/src/parquet/column/reader.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column/reader.cc b/src/parquet/column/reader.cc
index fe2de57..bc4e4a0 100644
--- a/src/parquet/column/reader.cc
+++ b/src/parquet/column/reader.cc
@@ -119,9 +119,9 @@ bool TypedColumnReader<DType>::ReadNewPage() {
// Levels are encoded as rle or bit-packed.
// Init repetition levels
if (descr_->max_repetition_level() > 0) {
- int64_t rep_levels_bytes =
- repetition_level_decoder_.SetData(page->repetition_level_encoding(),
- descr_->max_repetition_level(), num_buffered_values_, buffer);
+ int64_t rep_levels_bytes = repetition_level_decoder_.SetData(
+ page->repetition_level_encoding(), descr_->max_repetition_level(),
+ static_cast<int>(num_buffered_values_), buffer);
buffer += rep_levels_bytes;
data_size -= rep_levels_bytes;
}
@@ -130,9 +130,9 @@ bool TypedColumnReader<DType>::ReadNewPage() {
// Init definition levels
if (descr_->max_definition_level() > 0) {
- int64_t def_levels_bytes =
- definition_level_decoder_.SetData(page->definition_level_encoding(),
- descr_->max_definition_level(), num_buffered_values_, buffer);
+ int64_t def_levels_bytes = definition_level_decoder_.SetData(
+ page->definition_level_encoding(), descr_->max_definition_level(),
+ static_cast<int>(num_buffered_values_), buffer);
buffer += def_levels_bytes;
data_size -= def_levels_bytes;
}
@@ -170,7 +170,7 @@ bool TypedColumnReader<DType>::ReadNewPage() {
}
}
current_decoder_->SetData(
- num_buffered_values_, buffer, static_cast<int>(data_size));
+ static_cast<int>(num_buffered_values_), buffer, static_cast<int>(data_size));
return true;
} else {
// We don't know what this page type is. We're allowed to skip non-data
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/ce5e1e7d/src/parquet/column/reader.h
----------------------------------------------------------------------
diff --git a/src/parquet/column/reader.h b/src/parquet/column/reader.h
index 80084b2..f36db5e 100644
--- a/src/parquet/column/reader.h
+++ b/src/parquet/column/reader.h
@@ -91,11 +91,11 @@ class PARQUET_EXPORT ColumnReader {
// values. For repeated or optional values, there may be fewer data values
// than levels, and this tells you how many encoded levels there are in that
// case.
- int num_buffered_values_;
+ int64_t num_buffered_values_;
// The number of values from the current data page that have been decoded
// into memory
- int num_decoded_values_;
+ int64_t num_decoded_values_;
::arrow::MemoryPool* pool_;
};
@@ -128,8 +128,8 @@ class PARQUET_EXPORT TypedColumnReader : public ColumnReader {
// This API is the same for both V1 and V2 of the DataPage
//
// @returns: actual number of levels read (see values_read for number of values read)
- int64_t ReadBatch(int batch_size, int16_t* def_levels, int16_t* rep_levels, T* values,
- int64_t* values_read);
+ int64_t ReadBatch(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
+ T* values, int64_t* values_read);
/// Read a batch of repetition levels, definition levels, and values from the
/// column and leave spaces for null entries on the lowest level in the values
@@ -165,7 +165,7 @@ class PARQUET_EXPORT TypedColumnReader : public ColumnReader {
/// (i.e. definition_level == max_definition_level - 1)
/// @param[out] null_count The number of nulls on the lowest levels.
/// (i.e. (values_read - null_count) is total number of non-null entries)
- int64_t ReadBatchSpaced(int batch_size, int16_t* def_levels, int16_t* rep_levels,
+ int64_t ReadBatchSpaced(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
T* values, uint8_t* valid_bits, int64_t valid_bits_offset, int64_t* levels_read,
int64_t* values_read, int64_t* null_count);
@@ -217,8 +217,8 @@ inline int64_t TypedColumnReader<DType>::ReadValuesSpaced(int64_t batch_size, T*
}
template <typename DType>
-inline int64_t TypedColumnReader<DType>::ReadBatch(int batch_size, int16_t* def_levels,
- int16_t* rep_levels, T* values, int64_t* values_read) {
+inline int64_t TypedColumnReader<DType>::ReadBatch(int64_t batch_size,
+ int16_t* def_levels, int16_t* rep_levels, T* values, int64_t* values_read) {
// HasNext invokes ReadNewPage
if (!HasNext()) {
*values_read = 0;
@@ -257,7 +257,7 @@ inline int64_t TypedColumnReader<DType>::ReadBatch(int batch_size, int16_t* def_
*values_read = ReadValues(values_to_read, values);
int64_t total_values = std::max(num_def_levels, *values_read);
- num_decoded_values_ += static_cast<int>(total_values);
+ num_decoded_values_ += total_values;
return total_values;
}
@@ -293,7 +293,7 @@ inline void DefinitionLevelsToBitmap(const int16_t* def_levels, int64_t num_def_
}
template <typename DType>
-inline int64_t TypedColumnReader<DType>::ReadBatchSpaced(int batch_size,
+inline int64_t TypedColumnReader<DType>::ReadBatchSpaced(int64_t batch_size,
int16_t* def_levels, int16_t* rep_levels, T* values, uint8_t* valid_bits,
int64_t valid_bits_offset, int64_t* levels_read, int64_t* values_read,
int64_t* null_count_out) {
@@ -354,7 +354,7 @@ inline int64_t TypedColumnReader<DType>::ReadBatchSpaced(int batch_size,
*levels_read = total_values;
}
- num_decoded_values_ += static_cast<int>(*levels_read);
+ num_decoded_values_ += *levels_read;
return total_values;
}