You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by we...@apache.org on 2017/05/29 19:56:06 UTC
[2/2] parquet-cpp git commit: PARQUET-991: Resolve msvc warnings; Appveyor treats msvc warnings as …
PARQUET-991: Resolve msvc warnings; Appveyor treats msvc warnings as …
…errors (/WX flag)
Author: Max Risuhin <ri...@gmail.com>
Closes #340 from MaxRis/PARQUET-991 and squashes the following commits:
98a2544 [Max Risuhin] PARQUET-991: Resolve msvc warnings; Appveyor treats msvc warnings as errors (/WX flag)
Project: http://git-wip-us.apache.org/repos/asf/parquet-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/parquet-cpp/commit/fc5228af
Tree: http://git-wip-us.apache.org/repos/asf/parquet-cpp/tree/fc5228af
Diff: http://git-wip-us.apache.org/repos/asf/parquet-cpp/diff/fc5228af
Branch: refs/heads/master
Commit: fc5228af3eee2ec8176e404ecb34b7ba985d0e4d
Parents: b36c9ac
Author: Max Risuhin <ri...@gmail.com>
Authored: Mon May 29 15:55:59 2017 -0400
Committer: Wes McKinney <we...@twosigma.com>
Committed: Mon May 29 15:55:59 2017 -0400
----------------------------------------------------------------------
CMakeLists.txt | 2 +-
ci/msvc-build.bat | 9 +-
examples/reader-writer.cc | 8 +-
src/parquet/arrow/arrow-reader-writer-test.cc | 10 +-
src/parquet/arrow/reader.cc | 138 +++++++++++----------
src/parquet/arrow/test-util.h | 8 +-
src/parquet/arrow/writer.cc | 18 +--
src/parquet/column/column-reader-test.cc | 36 +++---
src/parquet/column/column-writer-test.cc | 13 +-
src/parquet/column/levels-test.cc | 30 +++--
src/parquet/column/levels.cc | 8 +-
src/parquet/column/page.h | 2 +-
src/parquet/column/reader.cc | 23 ++--
src/parquet/column/reader.h | 40 +++---
src/parquet/column/scanner.h | 5 +-
src/parquet/column/statistics-test.cc | 6 +-
src/parquet/column/statistics.cc | 23 ++--
src/parquet/column/statistics.h | 16 +--
src/parquet/column/test-specialization.h | 10 +-
src/parquet/column/test-util.h | 20 +--
src/parquet/column/writer.cc | 55 ++++----
src/parquet/column/writer.h | 22 ++--
src/parquet/compression-test.cc | 10 +-
src/parquet/compression.cc | 8 +-
src/parquet/encoding-internal.h | 22 ++--
src/parquet/encoding-test.cc | 16 +--
src/parquet/encoding.h | 2 +-
src/parquet/file/file-deserialize-test.cc | 6 +-
src/parquet/file/file-metadata-test.cc | 4 +-
src/parquet/file/metadata.cc | 14 ++-
src/parquet/file/reader-internal.cc | 2 +-
src/parquet/file/writer-internal.cc | 12 +-
src/parquet/reader-test.cc | 2 +-
src/parquet/schema-test.cc | 2 +-
src/parquet/schema.cc | 4 +-
src/parquet/schema.h | 4 +-
src/parquet/types-test.cc | 4 +-
src/parquet/util/bit-stream-utils.h | 8 +-
src/parquet/util/bit-stream-utils.inline.h | 36 ++++--
src/parquet/util/bit-util.h | 2 +-
src/parquet/util/comparison-test.cc | 4 +-
src/parquet/util/cpu-info.cc | 4 +-
src/parquet/util/hash-util.h | 2 +-
src/parquet/util/memory.cc | 2 +-
src/parquet/util/memory.h | 4 +-
src/parquet/util/rle-encoding.h | 28 +++--
src/parquet/util/rle-test.cc | 20 +--
src/parquet/util/visibility.h | 28 +++++
48 files changed, 430 insertions(+), 322 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 02700c8..6c0156c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -367,7 +367,7 @@ include(ThirdpartyToolchain)
# Thrift requires these definitions for some types that we use
add_definitions(-DHAVE_INTTYPES_H -DHAVE_NETDB_H)
if (MSVC)
- add_definitions(-DNOMINMAX)
+ add_definitions(-DNOMINMAX -D_CRT_SECURE_NO_WARNINGS)
else()
add_definitions(-DHAVE_NETINET_IN_H -fPIC)
endif()
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/ci/msvc-build.bat
----------------------------------------------------------------------
diff --git a/ci/msvc-build.bat b/ci/msvc-build.bat
index 04743e6..9c3f8c1 100644
--- a/ci/msvc-build.bat
+++ b/ci/msvc-build.bat
@@ -21,6 +21,11 @@ mkdir build
cd build
SET PARQUET_TEST_DATA=%APPVEYOR_BUILD_FOLDER%\data
+set PARQUET_CXXFLAGS=/MP
+
+if NOT "%CONFIGURATION%" == "Debug" (
+ set PARQUET_CXXFLAGS="%PARQUET_CXXFLAGS% /WX"
+)
if "%CONFIGURATION%" == "Toolchain" (
conda install -y boost-cpp=1.63 brotli=0.6.0 zlib=1.2.11 snappy=1.1.4 thrift-cpp=0.10.0 -c conda-forge
@@ -30,7 +35,7 @@ if "%CONFIGURATION%" == "Toolchain" (
cmake -G "%GENERATOR%" ^
-DCMAKE_BUILD_TYPE=Release ^
-DPARQUET_BOOST_USE_SHARED=OFF ^
- -DPARQUET_CXXFLAGS="/MP" ^
+ -DPARQUET_CXXFLAGS=%PARQUET_CXXFLAGS% ^
-DPARQUET_ZLIB_VENDORED=OFF ^
.. || exit /B
@@ -42,7 +47,7 @@ if NOT "%CONFIGURATION%" == "Toolchain" (
cmake -G "%GENERATOR%" ^
-DCMAKE_BUILD_TYPE=%CONFIGURATION% ^
-DPARQUET_BOOST_USE_SHARED=OFF ^
- -DPARQUET_CXXFLAGS="/MP" ^
+ -DPARQUET_CXXFLAGS=%PARQUET_CXXFLAGS% ^
.. || exit /B
cmake --build . --config %CONFIGURATION% || exit /B
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/examples/reader-writer.cc
----------------------------------------------------------------------
diff --git a/examples/reader-writer.cc b/examples/reader-writer.cc
index 9118c88..6f21f6c 100644
--- a/examples/reader-writer.cc
+++ b/examples/reader-writer.cc
@@ -30,7 +30,7 @@
* This example describes writing and reading Parquet Files in C++ and serves as a
* reference to the API.
* The file contains all the physical data types supported by Parquet.
-**/
+ **/
/* Parquet is a structured columnar file format
* Parquet File = "Parquet data" + "Parquet Metadata"
@@ -42,7 +42,7 @@
* complex (nested) type (internal nodes)
* For specific details, please refer the format here:
* https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
-**/
+ **/
constexpr int NUM_ROWS_PER_ROW_GROUP = 500;
constexpr int FIXED_LENGTH = 10;
@@ -168,7 +168,7 @@ int main(int argc, char** argv) {
parquet::FloatWriter* float_writer =
static_cast<parquet::FloatWriter*>(rg_writer->NextColumn());
for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) {
- float value = i * 1.1;
+ float value = i * 1.1f;
float_writer->WriteBatch(1, nullptr, nullptr, &value);
}
@@ -367,7 +367,7 @@ int main(int argc, char** argv) {
// There are no NULL values in the rows written
assert(values_read == 1);
// Verify the value written
- float expected_value = i * 1.1;
+ float expected_value = i * 1.1f;
assert(value == expected_value);
i++;
}
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/arrow/arrow-reader-writer-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/arrow-reader-writer-test.cc b/src/parquet/arrow/arrow-reader-writer-test.cc
index 3d156b5..b9c77f1 100644
--- a/src/parquet/arrow/arrow-reader-writer-test.cc
+++ b/src/parquet/arrow/arrow-reader-writer-test.cc
@@ -15,6 +15,12 @@
// specific language governing permissions and limitations
// under the License.
+#ifdef _MSC_VER
+#pragma warning(push)
+// Disable forcing value to bool warnings
+#pragma warning(disable : 4800)
+#endif
+
#include "gtest/gtest.h"
#include <sstream>
@@ -1172,13 +1178,11 @@ TEST(TestArrowReaderAdHoc, Int96BadMemoryAccess) {
std::unique_ptr<FileReader> arrow_reader;
ASSERT_NO_THROW(
- arrow_reader.reset(new FileReader(pool,
- ParquetFileReader::OpenFile(path, false))));
+ arrow_reader.reset(new FileReader(pool, ParquetFileReader::OpenFile(path, false))));
std::shared_ptr<::arrow::Table> table;
ASSERT_OK_NO_THROW(arrow_reader->ReadTable(&table));
}
-
} // namespace arrow
} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/arrow/reader.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/reader.cc b/src/parquet/arrow/reader.cc
index 826fe37..a531454 100644
--- a/src/parquet/arrow/reader.cc
+++ b/src/parquet/arrow/reader.cc
@@ -295,7 +295,7 @@ Status FileReader::Impl::ReadColumn(int i, std::shared_ptr<Array>* out) {
batch_size += reader_->metadata()->RowGroup(j)->ColumnChunk(i)->num_values();
}
- return flat_column_reader->NextBatch(batch_size, out);
+ return flat_column_reader->NextBatch(static_cast<int>(batch_size), out);
}
Status FileReader::Impl::GetSchema(
@@ -319,7 +319,7 @@ Status FileReader::Impl::ReadRowGroup(int row_group_index,
// TODO(wesm): Refactor to share more code with ReadTable
auto ReadColumnFunc = [&indices, &row_group_index, &schema, &columns, &rg_metadata,
- this](int i) {
+ this](int i) {
int column_index = indices[i];
int64_t batch_size = rg_metadata->ColumnChunk(column_index)->num_values();
@@ -331,7 +331,7 @@ Status FileReader::Impl::ReadRowGroup(int row_group_index,
ColumnReader flat_column_reader(std::move(impl));
std::shared_ptr<Array> array;
- RETURN_NOT_OK(flat_column_reader.NextBatch(batch_size, &array));
+ RETURN_NOT_OK(flat_column_reader.NextBatch(static_cast<int>(batch_size), &array));
columns[i] = std::make_shared<Column>(schema->field(i), array);
return Status::OK();
};
@@ -380,7 +380,7 @@ Status FileReader::Impl::ReadTable(std::shared_ptr<Table>* table) {
std::vector<int> indices(reader_->metadata()->num_columns());
for (size_t i = 0; i < indices.size(); ++i) {
- indices[i] = i;
+ indices[i] = static_cast<int>(i);
}
return ReadTable(indices, table);
}
@@ -389,7 +389,7 @@ Status FileReader::Impl::ReadRowGroup(int i, std::shared_ptr<Table>* table) {
std::vector<int> indices(reader_->metadata()->num_columns());
for (size_t i = 0; i < indices.size(); ++i) {
- indices[i] = i;
+ indices[i] = static_cast<int>(i);
}
return ReadRowGroup(i, indices, table);
}
@@ -479,8 +479,8 @@ Status ColumnReader::Impl::ReadNonNullableBatch(TypedColumnReader<ParquetType>*
RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(ParquetCType), false));
auto values = reinterpret_cast<ParquetCType*>(values_buffer_.mutable_data());
int64_t values_read;
- PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch(
- values_to_read, nullptr, nullptr, values, &values_read));
+ PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch(static_cast<int>(values_to_read),
+ nullptr, nullptr, values, &values_read));
ArrowCType* out_ptr = reinterpret_cast<ArrowCType*>(data_buffer_ptr_);
std::copy(values, values + values_read, out_ptr + valid_bits_idx_);
@@ -489,19 +489,20 @@ Status ColumnReader::Impl::ReadNonNullableBatch(TypedColumnReader<ParquetType>*
return Status::OK();
}
-#define NONNULLABLE_BATCH_FAST_PATH(ArrowType, ParquetType, CType) \
- template <> \
- Status ColumnReader::Impl::ReadNonNullableBatch<ArrowType, ParquetType>( \
- TypedColumnReader<ParquetType> * reader, int64_t values_to_read, \
- int64_t * levels_read) { \
- int64_t values_read; \
- CType* out_ptr = reinterpret_cast<CType*>(data_buffer_ptr_); \
- PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch(values_to_read, nullptr, \
- nullptr, out_ptr + valid_bits_idx_, &values_read)); \
- \
- valid_bits_idx_ += values_read; \
- \
- return Status::OK(); \
+#define NONNULLABLE_BATCH_FAST_PATH(ArrowType, ParquetType, CType) \
+ template <> \
+ Status ColumnReader::Impl::ReadNonNullableBatch<ArrowType, ParquetType>( \
+ TypedColumnReader<ParquetType> * reader, int64_t values_to_read, \
+ int64_t * levels_read) { \
+ int64_t values_read; \
+ CType* out_ptr = reinterpret_cast<CType*>(data_buffer_ptr_); \
+ PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch( \
+ static_cast<int>(values_to_read), nullptr, nullptr, \
+ out_ptr + valid_bits_idx_, &values_read)); \
+ \
+ valid_bits_idx_ += values_read; \
+ \
+ return Status::OK(); \
}
NONNULLABLE_BATCH_FAST_PATH(::arrow::Int32Type, Int32Type, int32_t)
@@ -519,8 +520,8 @@ Status ColumnReader::Impl::ReadNonNullableBatch<::arrow::TimestampType, Int96Typ
RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(Int96), false));
auto values = reinterpret_cast<Int96*>(values_buffer_.mutable_data());
int64_t values_read;
- PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch(
- values_to_read, nullptr, nullptr, values, &values_read));
+ PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch(static_cast<int>(values_to_read),
+ nullptr, nullptr, values, &values_read));
int64_t* out_ptr = reinterpret_cast<int64_t*>(data_buffer_ptr_) + valid_bits_idx_;
for (int64_t i = 0; i < values_read; i++) {
@@ -537,8 +538,8 @@ Status ColumnReader::Impl::ReadNonNullableBatch<::arrow::Date64Type, Int32Type>(
RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(int32_t), false));
auto values = reinterpret_cast<int32_t*>(values_buffer_.mutable_data());
int64_t values_read;
- PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch(
- values_to_read, nullptr, nullptr, values, &values_read));
+ PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch(static_cast<int>(values_to_read),
+ nullptr, nullptr, values, &values_read));
int64_t* out_ptr = reinterpret_cast<int64_t*>(data_buffer_ptr_) + valid_bits_idx_;
for (int64_t i = 0; i < values_read; i++) {
@@ -556,8 +557,8 @@ Status ColumnReader::Impl::ReadNonNullableBatch<::arrow::BooleanType, BooleanTyp
RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(bool), false));
auto values = reinterpret_cast<bool*>(values_buffer_.mutable_data());
int64_t values_read;
- PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch(
- values_to_read, nullptr, nullptr, values, &values_read));
+ PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch(static_cast<int>(values_to_read),
+ nullptr, nullptr, values, &values_read));
for (int64_t i = 0; i < values_read; i++) {
if (values[i]) { ::arrow::BitUtil::SetBit(data_buffer_ptr_, valid_bits_idx_); }
@@ -577,11 +578,12 @@ Status ColumnReader::Impl::ReadNullableBatch(TypedColumnReader<ParquetType>* rea
RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(ParquetCType), false));
auto values = reinterpret_cast<ParquetCType*>(values_buffer_.mutable_data());
int64_t null_count;
- PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(values_to_read, def_levels, rep_levels,
- values, valid_bits_ptr_, valid_bits_idx_, levels_read, values_read, &null_count));
+ PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(static_cast<int>(values_to_read),
+ def_levels, rep_levels, values, valid_bits_ptr_, valid_bits_idx_, levels_read,
+ values_read, &null_count));
auto data_ptr = reinterpret_cast<ArrowCType*>(data_buffer_ptr_);
- INIT_BITSET(valid_bits_ptr_, valid_bits_idx_);
+ INIT_BITSET(valid_bits_ptr_, static_cast<int>(valid_bits_idx_));
for (int64_t i = 0; i < *values_read; i++) {
if (bitset_valid_bits_ptr_ & (1 << bit_offset_valid_bits_ptr_)) {
@@ -595,22 +597,22 @@ Status ColumnReader::Impl::ReadNullableBatch(TypedColumnReader<ParquetType>* rea
return Status::OK();
}
-#define NULLABLE_BATCH_FAST_PATH(ArrowType, ParquetType, CType) \
- template <> \
- Status ColumnReader::Impl::ReadNullableBatch<ArrowType, ParquetType>( \
- TypedColumnReader<ParquetType> * reader, int16_t * def_levels, \
- int16_t * rep_levels, int64_t values_to_read, int64_t * levels_read, \
- int64_t * values_read) { \
- auto data_ptr = reinterpret_cast<CType*>(data_buffer_ptr_); \
- int64_t null_count; \
- PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(values_to_read, def_levels, rep_levels, \
- data_ptr + valid_bits_idx_, valid_bits_ptr_, valid_bits_idx_, levels_read, \
- values_read, &null_count)); \
- \
- valid_bits_idx_ += *values_read; \
- null_count_ += null_count; \
- \
- return Status::OK(); \
+#define NULLABLE_BATCH_FAST_PATH(ArrowType, ParquetType, CType) \
+ template <> \
+ Status ColumnReader::Impl::ReadNullableBatch<ArrowType, ParquetType>( \
+ TypedColumnReader<ParquetType> * reader, int16_t * def_levels, \
+ int16_t * rep_levels, int64_t values_to_read, int64_t * levels_read, \
+ int64_t * values_read) { \
+ auto data_ptr = reinterpret_cast<CType*>(data_buffer_ptr_); \
+ int64_t null_count; \
+ PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(static_cast<int>(values_to_read), \
+ def_levels, rep_levels, data_ptr + valid_bits_idx_, valid_bits_ptr_, \
+ valid_bits_idx_, levels_read, values_read, &null_count)); \
+ \
+ valid_bits_idx_ += *values_read; \
+ null_count_ += null_count; \
+ \
+ return Status::OK(); \
}
NULLABLE_BATCH_FAST_PATH(::arrow::Int32Type, Int32Type, int32_t)
@@ -629,11 +631,12 @@ Status ColumnReader::Impl::ReadNullableBatch<::arrow::TimestampType, Int96Type>(
RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(Int96), false));
auto values = reinterpret_cast<Int96*>(values_buffer_.mutable_data());
int64_t null_count;
- PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(values_to_read, def_levels, rep_levels,
- values, valid_bits_ptr_, valid_bits_idx_, levels_read, values_read, &null_count));
+ PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(static_cast<int>(values_to_read),
+ def_levels, rep_levels, values, valid_bits_ptr_, valid_bits_idx_, levels_read,
+ values_read, &null_count));
auto data_ptr = reinterpret_cast<int64_t*>(data_buffer_ptr_);
- INIT_BITSET(valid_bits_ptr_, valid_bits_idx_);
+ INIT_BITSET(valid_bits_ptr_, static_cast<int>(valid_bits_idx_));
for (int64_t i = 0; i < *values_read; i++) {
if (bitset_valid_bits_ptr_ & (1 << bit_offset_valid_bits_ptr_)) {
data_ptr[valid_bits_idx_ + i] = impala_timestamp_to_nanoseconds(values[i]);
@@ -653,11 +656,12 @@ Status ColumnReader::Impl::ReadNullableBatch<::arrow::Date64Type, Int32Type>(
RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(int32_t), false));
auto values = reinterpret_cast<int32_t*>(values_buffer_.mutable_data());
int64_t null_count;
- PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(values_to_read, def_levels, rep_levels,
- values, valid_bits_ptr_, valid_bits_idx_, levels_read, values_read, &null_count));
+ PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(static_cast<int>(values_to_read),
+ def_levels, rep_levels, values, valid_bits_ptr_, valid_bits_idx_, levels_read,
+ values_read, &null_count));
auto data_ptr = reinterpret_cast<int64_t*>(data_buffer_ptr_);
- INIT_BITSET(valid_bits_ptr_, valid_bits_idx_);
+ INIT_BITSET(valid_bits_ptr_, static_cast<int>(valid_bits_idx_));
for (int64_t i = 0; i < *values_read; i++) {
if (bitset_valid_bits_ptr_ & (1 << bit_offset_valid_bits_ptr_)) {
data_ptr[valid_bits_idx_ + i] = static_cast<int64_t>(values[i]) * 86400000;
@@ -677,10 +681,11 @@ Status ColumnReader::Impl::ReadNullableBatch<::arrow::BooleanType, BooleanType>(
RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(bool), false));
auto values = reinterpret_cast<bool*>(values_buffer_.mutable_data());
int64_t null_count;
- PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(values_to_read, def_levels, rep_levels,
- values, valid_bits_ptr_, valid_bits_idx_, levels_read, values_read, &null_count));
+ PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(static_cast<int>(values_to_read),
+ def_levels, rep_levels, values, valid_bits_ptr_, valid_bits_idx_, levels_read,
+ values_read, &null_count));
- INIT_BITSET(valid_bits_ptr_, valid_bits_idx_);
+ INIT_BITSET(valid_bits_ptr_, static_cast<int>(valid_bits_idx_));
for (int64_t i = 0; i < *values_read; i++) {
if (bitset_valid_bits_ptr_ & (1 << bit_offset_valid_bits_ptr_)) {
if (values[i]) { ::arrow::BitUtil::SetBit(data_buffer_ptr_, valid_bits_idx_ + i); }
@@ -716,7 +721,8 @@ Status ColumnReader::Impl::InitDataBuffer<::arrow::BooleanType>(int batch_size)
Status ColumnReader::Impl::InitValidBits(int batch_size) {
valid_bits_idx_ = 0;
if (descr_->max_definition_level() > 0) {
- int valid_bits_size = ::arrow::BitUtil::CeilByte(batch_size + 1) / 8;
+ int valid_bits_size =
+ static_cast<int>(::arrow::BitUtil::CeilByte(batch_size + 1)) / 8;
valid_bits_buffer_ = std::make_shared<PoolBuffer>(pool_);
RETURN_NOT_OK(valid_bits_buffer_->Resize(valid_bits_size, false));
valid_bits_ptr_ = valid_bits_buffer_->mutable_data();
@@ -786,7 +792,8 @@ Status ColumnReader::Impl::WrapIntoListArray(const int16_t* def_levels,
if (j == (list_depth - 1)) {
RETURN_NOT_OK(offset_builders[j]->Append(values_offset));
} else {
- RETURN_NOT_OK(offset_builders[j]->Append(offset_builders[j + 1]->length()));
+ RETURN_NOT_OK(offset_builders[j]->Append(
+ static_cast<int32_t>(offset_builders[j + 1]->length())));
}
if (((empty_def_level[j] - 1) == def_levels[i]) && (nullable[j])) {
@@ -806,7 +813,8 @@ Status ColumnReader::Impl::WrapIntoListArray(const int16_t* def_levels,
if (j == (list_depth - 1)) {
RETURN_NOT_OK(offset_builders[j]->Append(values_offset));
} else {
- RETURN_NOT_OK(offset_builders[j]->Append(offset_builders[j + 1]->length()));
+ RETURN_NOT_OK(offset_builders[j]->Append(
+ static_cast<int32_t>(offset_builders[j + 1]->length())));
}
}
@@ -864,9 +872,9 @@ Status ColumnReader::Impl::TypedReadBatch(int batch_size, std::shared_ptr<Array>
RETURN_NOT_OK((ReadNullableBatch<ArrowType, ParquetType>(reader,
def_levels + total_levels_read, rep_levels + total_levels_read, values_to_read,
&levels_read, &values_read)));
- total_levels_read += levels_read;
+ total_levels_read += static_cast<int>(levels_read);
}
- values_to_read -= values_read;
+ values_to_read -= static_cast<int>(values_read);
if (!column_reader_->HasNext()) { NextRowGroup(); }
}
@@ -925,9 +933,9 @@ Status ColumnReader::Impl::TypedReadBatch<::arrow::BooleanType, BooleanType>(
RETURN_NOT_OK((ReadNullableBatch<::arrow::BooleanType, BooleanType>(reader,
def_levels + total_levels_read, rep_levels + total_levels_read, values_to_read,
&levels_read, &values_read)));
- total_levels_read += levels_read;
+ total_levels_read += static_cast<int>(levels_read);
}
- values_to_read -= values_read;
+ values_to_read -= static_cast<int>(values_read);
if (!column_reader_->HasNext()) { NextRowGroup(); }
}
@@ -991,7 +999,7 @@ Status ColumnReader::Impl::ReadByteArrayBatch(
PARQUET_CATCH_NOT_OK(
levels_read = reader->ReadBatch(values_to_read, def_levels + total_levels_read,
rep_levels + total_levels_read, values, &values_read));
- values_to_read -= levels_read;
+ values_to_read -= static_cast<int>(levels_read);
if (descr_->max_definition_level() == 0) {
for (int64_t i = 0; i < levels_read; i++) {
RETURN_NOT_OK(
@@ -1012,7 +1020,7 @@ Status ColumnReader::Impl::ReadByteArrayBatch(
values_idx++;
}
}
- total_levels_read += levels_read;
+ total_levels_read += static_cast<int>(levels_read);
}
if (!column_reader_->HasNext()) { NextRowGroup(); }
}
@@ -1047,7 +1055,7 @@ Status ColumnReader::Impl::ReadFLBABatch(
PARQUET_CATCH_NOT_OK(
levels_read = reader->ReadBatch(values_to_read, def_levels + total_levels_read,
rep_levels + total_levels_read, values, &values_read));
- values_to_read -= levels_read;
+ values_to_read -= static_cast<int>(levels_read);
if (descr_->max_definition_level() == 0) {
for (int64_t i = 0; i < levels_read; i++) {
RETURN_NOT_OK(builder.Append(values[i].ptr));
@@ -1064,7 +1072,7 @@ Status ColumnReader::Impl::ReadFLBABatch(
values_idx++;
}
}
- total_levels_read += levels_read;
+ total_levels_read += static_cast<int>(levels_read);
}
if (!column_reader_->HasNext()) { NextRowGroup(); }
}
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/arrow/test-util.h
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/test-util.h b/src/parquet/arrow/test-util.h
index 388250e..a5337cf 100644
--- a/src/parquet/arrow/test-util.h
+++ b/src/parquet/arrow/test-util.h
@@ -210,7 +210,7 @@ NullableArray(
if (!valid_bytes[i]) {
builder.AppendNull();
} else {
- ::arrow::test::random_bytes(kBufferSize, seed + i, buffer);
+ ::arrow::test::random_bytes(kBufferSize, seed + static_cast<uint32_t>(i), buffer);
builder.Append(buffer, kBufferSize);
}
}
@@ -240,7 +240,7 @@ NullableArray(
if (!valid_bytes[i]) {
builder.AppendNull();
} else {
- ::arrow::test::random_bytes(kBufferSize, seed + i, buffer);
+ ::arrow::test::random_bytes(kBufferSize, seed + static_cast<uint32_t>(i), buffer);
builder.Append(buffer);
}
}
@@ -294,10 +294,10 @@ Status MakeListArary(const std::shared_ptr<Array>& values, int64_t size,
if (!(((i % 2) == 0) && ((i / 2) < null_count))) {
// Non-null list (list with index 1 is always empty).
::arrow::BitUtil::SetBit(null_bitmap_ptr, i);
- if (i != 1) { current_offset += length_per_entry; }
+ if (i != 1) { current_offset += static_cast<int32_t>(length_per_entry); }
}
}
- offsets_ptr[size] = values->length();
+ offsets_ptr[size] = static_cast<int32_t>(values->length());
auto value_field =
std::make_shared<::arrow::Field>("item", values->type(), nullable_values);
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/arrow/writer.cc
----------------------------------------------------------------------
diff --git a/src/parquet/arrow/writer.cc b/src/parquet/arrow/writer.cc
index 631e16c..3344d1b 100644
--- a/src/parquet/arrow/writer.cc
+++ b/src/parquet/arrow/writer.cc
@@ -63,7 +63,7 @@ class LevelBuilder {
Status VisitInline(const Array& array);
Status Visit(const ::arrow::PrimitiveArray& array) {
- array_offsets_.push_back(array.offset());
+ array_offsets_.push_back(static_cast<int32_t>(array.offset()));
valid_bitmaps_.push_back(array.null_bitmap_data());
null_counts_.push_back(array.null_count());
values_type_ = array.type_id();
@@ -72,7 +72,7 @@ class LevelBuilder {
}
Status Visit(const ::arrow::BinaryArray& array) {
- array_offsets_.push_back(array.offset());
+ array_offsets_.push_back(static_cast<int32_t>(array.offset()));
valid_bitmaps_.push_back(array.null_bitmap_data());
null_counts_.push_back(array.null_count());
values_type_ = array.type_id();
@@ -81,7 +81,7 @@ class LevelBuilder {
}
Status Visit(const ListArray& array) {
- array_offsets_.push_back(array.offset());
+ array_offsets_.push_back(static_cast<int32_t>(array.offset()));
valid_bitmaps_.push_back(array.null_bitmap_data());
null_counts_.push_back(array.null_count());
offsets_.push_back(array.raw_value_offsets());
@@ -111,7 +111,7 @@ class LevelBuilder {
std::shared_ptr<Buffer>* rep_levels, const Array** values_array) {
// Work downwards to extract bitmaps and offsets
min_offset_idx_ = 0;
- max_offset_idx_ = array.length();
+ max_offset_idx_ = static_cast<int32_t>(array.length());
RETURN_NOT_OK(VisitInline(array));
*num_values = max_offset_idx_ - min_offset_idx_;
*values_offset = min_offset_idx_;
@@ -143,7 +143,7 @@ class LevelBuilder {
std::fill(def_levels_ptr, def_levels_ptr + array.length(), 1);
} else {
const uint8_t* valid_bits = array.null_bitmap_data();
- INIT_BITSET(valid_bits, array.offset());
+ INIT_BITSET(valid_bits, static_cast<int>(array.offset()));
for (int i = 0; i < array.length(); i++) {
if (bitset_valid_bits & (1 << bit_offset_valid_bits)) {
def_levels_ptr[i] = 1;
@@ -396,7 +396,7 @@ Status FileWriter::Impl::WriteNullableBatch(TypedColumnWriter<ParquetType>* writ
RETURN_NOT_OK(data_buffer_.Resize(num_values * sizeof(ParquetCType)));
auto buffer_ptr = reinterpret_cast<ParquetCType*>(data_buffer_.mutable_data());
- INIT_BITSET(valid_bits, valid_bits_offset);
+ INIT_BITSET(valid_bits, static_cast<int>(valid_bits_offset));
for (int i = 0; i < num_values; i++) {
if (bitset_valid_bits & (1 << bit_offset_valid_bits)) {
buffer_ptr[i] = static_cast<ParquetCType>(data_ptr[i]);
@@ -417,7 +417,7 @@ Status FileWriter::Impl::WriteNullableBatch<Int32Type, ::arrow::Date64Type>(
const int64_t* data_ptr) {
RETURN_NOT_OK(data_buffer_.Resize(num_values * sizeof(int32_t)));
auto buffer_ptr = reinterpret_cast<int32_t*>(data_buffer_.mutable_data());
- INIT_BITSET(valid_bits, valid_bits_offset);
+ INIT_BITSET(valid_bits, static_cast<int>(valid_bits_offset));
for (int i = 0; i < num_values; i++) {
if (bitset_valid_bits & (1 << bit_offset_valid_bits)) {
// Convert from milliseconds into days since the epoch
@@ -439,7 +439,7 @@ Status FileWriter::Impl::WriteNullableBatch<Int32Type, ::arrow::Time32Type>(
const int32_t* data_ptr) {
RETURN_NOT_OK(data_buffer_.Resize(num_values * sizeof(int32_t)));
auto buffer_ptr = reinterpret_cast<int32_t*>(data_buffer_.mutable_data());
- INIT_BITSET(valid_bits, valid_bits_offset);
+ INIT_BITSET(valid_bits, static_cast<int>(valid_bits_offset));
if (type.unit() == TimeUnit::SECOND) {
for (int i = 0; i < num_values; i++) {
@@ -497,7 +497,7 @@ Status FileWriter::Impl::TypedWriteBatch<BooleanType, ::arrow::BooleanType>(
auto writer = reinterpret_cast<TypedColumnWriter<BooleanType>*>(column_writer);
int buffer_idx = 0;
- int32_t offset = array->offset();
+ int64_t offset = array->offset();
for (int i = 0; i < data->length(); i++) {
if (!data->IsNull(i)) {
buffer_ptr[buffer_idx++] = BitUtil::GetBit(data_ptr, offset + i);
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/column-reader-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column/column-reader-test.cc b/src/parquet/column/column-reader-test.cc
index 6bf6651..a31c817 100644
--- a/src/parquet/column/column-reader-test.cc
+++ b/src/parquet/column/column-reader-test.cc
@@ -89,9 +89,9 @@ class TestPrimitiveReader : public ::testing::Test {
// 1) batch_size < page_size (multiple ReadBatch from a single page)
// 2) batch_size > page_size (BatchRead limits to a single page)
do {
- batch = reader->ReadBatch(batch_size, &dresult[0] + batch_actual,
- &rresult[0] + batch_actual, &vresult[0] + total_values_read, &values_read);
- total_values_read += values_read;
+ batch = static_cast<int>(reader->ReadBatch(batch_size, &dresult[0] + batch_actual,
+ &rresult[0] + batch_actual, &vresult[0] + total_values_read, &values_read));
+ total_values_read += static_cast<int>(values_read);
batch_actual += batch;
batch_size = std::max(batch_size * 2, 4096);
} while (batch > 0);
@@ -102,7 +102,8 @@ class TestPrimitiveReader : public ::testing::Test {
if (max_def_level_ > 0) { ASSERT_TRUE(vector_equal(def_levels_, dresult)); }
if (max_rep_level_ > 0) { ASSERT_TRUE(vector_equal(rep_levels_, rresult)); }
// catch improper writes at EOS
- batch_actual = reader->ReadBatch(5, nullptr, nullptr, nullptr, &values_read);
+ batch_actual =
+ static_cast<int>(reader->ReadBatch(5, nullptr, nullptr, nullptr, &values_read));
ASSERT_EQ(0, batch_actual);
ASSERT_EQ(0, values_read);
}
@@ -126,12 +127,13 @@ class TestPrimitiveReader : public ::testing::Test {
// 1) batch_size < page_size (multiple ReadBatch from a single page)
// 2) batch_size > page_size (BatchRead limits to a single page)
do {
- batch = reader->ReadBatchSpaced(batch_size, dresult.data() + levels_actual,
- rresult.data() + levels_actual, vresult.data() + batch_actual,
- valid_bits.data() + batch_actual, 0, &levels_read, &values_read, &null_count);
- total_values_read += batch - null_count;
+ batch = static_cast<int>(reader->ReadBatchSpaced(batch_size,
+ dresult.data() + levels_actual, rresult.data() + levels_actual,
+ vresult.data() + batch_actual, valid_bits.data() + batch_actual, 0,
+ &levels_read, &values_read, &null_count));
+ total_values_read += batch - static_cast<int>(null_count);
batch_actual += batch;
- levels_actual += levels_read;
+ levels_actual += static_cast<int>(levels_read);
batch_size = std::max(batch_size * 2, 4096);
} while ((batch > 0) || (levels_read > 0));
@@ -146,8 +148,8 @@ class TestPrimitiveReader : public ::testing::Test {
}
if (max_rep_level_ > 0) { ASSERT_TRUE(vector_equal(rep_levels_, rresult)); }
// catch improper writes at EOS
- batch_actual = reader->ReadBatchSpaced(5, nullptr, nullptr, nullptr,
- valid_bits.data(), 0, &levels_read, &values_read, &null_count);
+ batch_actual = static_cast<int>(reader->ReadBatchSpaced(5, nullptr, nullptr, nullptr,
+ valid_bits.data(), 0, &levels_read, &values_read, &null_count));
ASSERT_EQ(0, batch_actual);
ASSERT_EQ(0, null_count);
}
@@ -262,8 +264,8 @@ TEST_F(TestPrimitiveReader, TestInt32FlatRequiredSkip) {
// Read half a page
reader->ReadBatch(
levels_per_page / 2, dresult.data(), rresult.data(), vresult.data(), &values_read);
- vector<int32_t> sub_values(
- values_.begin() + 2 * levels_per_page, values_.begin() + 2.5 * levels_per_page);
+ vector<int32_t> sub_values(values_.begin() + 2 * levels_per_page,
+ values_.begin() + static_cast<int>(2.5 * static_cast<double>(levels_per_page)));
ASSERT_TRUE(vector_equal(sub_values, vresult));
// 2) skip_size == page_size (skip across two pages)
@@ -273,7 +275,8 @@ TEST_F(TestPrimitiveReader, TestInt32FlatRequiredSkip) {
reader->ReadBatch(
levels_per_page / 2, dresult.data(), rresult.data(), vresult.data(), &values_read);
sub_values.clear();
- sub_values.insert(sub_values.end(), values_.begin() + 3.5 * levels_per_page,
+ sub_values.insert(sub_values.end(),
+ values_.begin() + static_cast<int>(3.5 * static_cast<double>(levels_per_page)),
values_.begin() + 4 * levels_per_page);
ASSERT_TRUE(vector_equal(sub_values, vresult));
@@ -285,8 +288,9 @@ TEST_F(TestPrimitiveReader, TestInt32FlatRequiredSkip) {
reader->ReadBatch(
levels_per_page / 2, dresult.data(), rresult.data(), vresult.data(), &values_read);
sub_values.clear();
- sub_values.insert(
- sub_values.end(), values_.begin() + 4.5 * levels_per_page, values_.end());
+ sub_values.insert(sub_values.end(),
+ values_.begin() + static_cast<int>(4.5 * static_cast<double>(levels_per_page)),
+ values_.end());
ASSERT_TRUE(vector_equal(sub_values, vresult));
values_.clear();
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/column-writer-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column/column-writer-test.cc b/src/parquet/column/column-writer-test.cc
index dedb2c2..33eefac 100644
--- a/src/parquet/column/column-writer-test.cc
+++ b/src/parquet/column/column-writer-test.cc
@@ -93,8 +93,9 @@ class TestPrimitiveWriter : public PrimitiveTypedTest<TestType> {
void ReadColumn(Compression::type compression = Compression::UNCOMPRESSED) {
BuildReader(static_cast<int64_t>(this->values_out_.size()), compression);
- reader_->ReadBatch(this->values_out_.size(), definition_levels_out_.data(),
- repetition_levels_out_.data(), this->values_out_ptr_, &values_read_);
+ reader_->ReadBatch(static_cast<int>(this->values_out_.size()),
+ definition_levels_out_.data(), repetition_levels_out_.data(),
+ this->values_out_ptr_, &values_read_);
this->SyncValuesOut();
}
@@ -133,7 +134,7 @@ class TestPrimitiveWriter : public PrimitiveTypedTest<TestType> {
Compression::type compression, bool enable_dictionary, bool enable_statistics,
int64_t num_rows) {
std::vector<uint8_t> valid_bits(
- BitUtil::RoundUpNumBytes(this->values_.size()) + 1, 255);
+ BitUtil::RoundUpNumBytes(static_cast<uint32_t>(this->values_.size())) + 1, 255);
ColumnProperties column_properties(
encoding, compression, enable_dictionary, enable_statistics);
std::shared_ptr<TypedColumnWriter<TestType>> writer =
@@ -204,7 +205,8 @@ void TestPrimitiveWriter<TestType>::ReadColumnFully(Compression::type compressio
values_read_ = 0;
while (values_read_ < total_values) {
int64_t values_read_recently = 0;
- reader_->ReadBatch(this->values_out_.size() - values_read_,
+ reader_->ReadBatch(
+ static_cast<int>(this->values_out_.size()) - static_cast<int>(values_read_),
definition_levels_out_.data() + values_read_,
repetition_levels_out_.data() + values_read_,
this->values_out_ptr_ + values_read_, &values_read_recently);
@@ -222,7 +224,8 @@ void TestPrimitiveWriter<FLBAType>::ReadColumnFully(Compression::type compressio
values_read_ = 0;
while (values_read_ < total_values) {
int64_t values_read_recently = 0;
- reader_->ReadBatch(this->values_out_.size() - values_read_,
+ reader_->ReadBatch(
+ static_cast<int>(this->values_out_.size()) - static_cast<int>(values_read_),
definition_levels_out_.data() + values_read_,
repetition_levels_out_.data() + values_read_,
this->values_out_ptr_ + values_read_, &values_read_recently);
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/levels-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column/levels-test.cc b/src/parquet/column/levels-test.cc
index 1d29313..a6284a9 100644
--- a/src/parquet/column/levels-test.cc
+++ b/src/parquet/column/levels-test.cc
@@ -52,17 +52,18 @@ void EncodeLevels(Encoding::type encoding, int max_level, int num_levels,
LevelEncoder encoder;
int levels_count = 0;
bytes.resize(2 * num_levels);
- ASSERT_EQ(2 * num_levels, bytes.size());
+ ASSERT_EQ(2 * num_levels, static_cast<int>(bytes.size()));
// encode levels
if (encoding == Encoding::RLE) {
// leave space to write the rle length value
- encoder.Init(
- encoding, max_level, num_levels, bytes.data() + sizeof(int32_t), bytes.size());
+ encoder.Init(encoding, max_level, num_levels, bytes.data() + sizeof(int32_t),
+ static_cast<int>(bytes.size()));
levels_count = encoder.Encode(num_levels, input_levels);
(reinterpret_cast<int32_t*>(bytes.data()))[0] = encoder.len();
} else {
- encoder.Init(encoding, max_level, num_levels, bytes.data(), bytes.size());
+ encoder.Init(
+ encoding, max_level, num_levels, bytes.data(), static_cast<int>(bytes.size()));
levels_count = encoder.Encode(num_levels, input_levels);
}
ASSERT_EQ(num_levels, levels_count);
@@ -73,10 +74,10 @@ void VerifyDecodingLevels(Encoding::type encoding, int max_level,
LevelDecoder decoder;
int levels_count = 0;
std::vector<int16_t> output_levels;
- int num_levels = input_levels.size();
+ int num_levels = static_cast<int>(input_levels.size());
output_levels.resize(num_levels);
- ASSERT_EQ(num_levels, output_levels.size());
+ ASSERT_EQ(num_levels, static_cast<int>(output_levels.size()));
// Decode levels and test with multiple decode calls
decoder.SetData(encoding, max_level, num_levels, bytes.data());
@@ -112,13 +113,13 @@ void VerifyDecodingMultipleSetData(Encoding::type encoding, int max_level,
std::vector<int16_t> output_levels;
// Decode levels and test with multiple SetData calls
- int setdata_count = bytes.size();
- int num_levels = input_levels.size() / setdata_count;
+ int setdata_count = static_cast<int>(bytes.size());
+ int num_levels = static_cast<int>(input_levels.size()) / setdata_count;
output_levels.resize(num_levels);
// Try multiple SetData
for (int ct = 0; ct < setdata_count; ct++) {
int offset = ct * num_levels;
- ASSERT_EQ(num_levels, output_levels.size());
+ ASSERT_EQ(num_levels, static_cast<int>(output_levels.size()));
decoder.SetData(encoding, max_level, num_levels, bytes[ct].data());
levels_count = decoder.Decode(num_levels, output_levels.data());
ASSERT_EQ(num_levels, levels_count);
@@ -149,7 +150,8 @@ TEST(TestLevels, TestLevelsDecodeMultipleBitWidth) {
int max_level = (1 << bit_width) - 1;
// Generate levels
GenerateLevels(min_repeat_factor, max_repeat_factor, max_level, input_levels);
- EncodeLevels(encoding, max_level, input_levels.size(), input_levels.data(), bytes);
+ EncodeLevels(encoding, max_level, static_cast<int>(input_levels.size()),
+ input_levels.data(), bytes);
VerifyDecodingLevels(encoding, max_level, input_levels, bytes);
input_levels.clear();
}
@@ -166,7 +168,7 @@ TEST(TestLevels, TestLevelsDecodeMultipleSetData) {
std::vector<std::vector<uint8_t>> bytes;
Encoding::type encodings[2] = {Encoding::RLE, Encoding::BIT_PACKED};
GenerateLevels(min_repeat_factor, max_repeat_factor, max_level, input_levels);
- int num_levels = input_levels.size();
+ int num_levels = static_cast<int>(input_levels.size());
int setdata_factor = 8;
int split_level_size = num_levels / setdata_factor;
bytes.resize(setdata_factor);
@@ -200,7 +202,8 @@ TEST(TestLevelEncoder, MinimumBufferSize) {
LevelEncoder::MaxBufferSize(Encoding::RLE, 1, kNumToEncode));
LevelEncoder encoder;
- encoder.Init(Encoding::RLE, 1, kNumToEncode, output.data(), output.size());
+ encoder.Init(
+ Encoding::RLE, 1, kNumToEncode, output.data(), static_cast<int>(output.size()));
int encode_count = encoder.Encode(kNumToEncode, levels.data());
ASSERT_EQ(kNumToEncode, encode_count);
@@ -231,7 +234,8 @@ TEST(TestLevelEncoder, MinimumBufferSize2) {
LevelEncoder::MaxBufferSize(Encoding::RLE, bit_width, kNumToEncode));
LevelEncoder encoder;
- encoder.Init(Encoding::RLE, bit_width, kNumToEncode, output.data(), output.size());
+ encoder.Init(Encoding::RLE, bit_width, kNumToEncode, output.data(),
+ static_cast<int>(output.size()));
int encode_count = encoder.Encode(kNumToEncode, levels.data());
ASSERT_EQ(kNumToEncode, encode_count);
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/levels.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column/levels.cc b/src/parquet/column/levels.cc
index 716e08a..fd25420 100644
--- a/src/parquet/column/levels.cc
+++ b/src/parquet/column/levels.cc
@@ -36,7 +36,8 @@ void LevelEncoder::Init(Encoding::type encoding, int16_t max_level,
break;
}
case Encoding::BIT_PACKED: {
- int num_bytes = BitUtil::Ceil(num_buffered_values * bit_width_, 8);
+ int num_bytes =
+ static_cast<int>(BitUtil::Ceil(num_buffered_values * bit_width_, 8));
bit_packed_encoder_.reset(new BitWriter(data, num_bytes));
break;
}
@@ -58,7 +59,7 @@ int LevelEncoder::MaxBufferSize(
break;
}
case Encoding::BIT_PACKED: {
- num_bytes = BitUtil::Ceil(num_buffered_values * bit_width, 8);
+ num_bytes = static_cast<int>(BitUtil::Ceil(num_buffered_values * bit_width, 8));
break;
}
default:
@@ -112,7 +113,8 @@ int LevelDecoder::SetData(Encoding::type encoding, int16_t max_level,
return sizeof(int32_t) + num_bytes;
}
case Encoding::BIT_PACKED: {
- num_bytes = BitUtil::Ceil(num_buffered_values * bit_width_, 8);
+ num_bytes =
+ static_cast<int32_t>(BitUtil::Ceil(num_buffered_values * bit_width_, 8));
if (!bit_packed_decoder_) {
bit_packed_decoder_.reset(new BitReader(data, num_bytes));
} else {
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/page.h
----------------------------------------------------------------------
diff --git a/src/parquet/column/page.h b/src/parquet/column/page.h
index bca0ca4..a3813c5 100644
--- a/src/parquet/column/page.h
+++ b/src/parquet/column/page.h
@@ -52,7 +52,7 @@ class Page {
const uint8_t* data() const { return buffer_->data(); }
// @returns: the total size in bytes of the page's data buffer
- int32_t size() const { return buffer_->size(); }
+ int32_t size() const { return static_cast<int32_t>(buffer_->size()); }
private:
std::shared_ptr<Buffer> buffer_;
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/reader.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column/reader.cc b/src/parquet/column/reader.cc
index 71bb689..fe2de57 100644
--- a/src/parquet/column/reader.cc
+++ b/src/parquet/column/reader.cc
@@ -169,7 +169,8 @@ bool TypedColumnReader<DType>::ReadNewPage() {
throw ParquetException("Unknown encoding type.");
}
}
- current_decoder_->SetData(num_buffered_values_, buffer, data_size);
+ current_decoder_->SetData(
+ num_buffered_values_, buffer, static_cast<int>(data_size));
return true;
} else {
// We don't know what this page type is. We're allowed to skip non-data
@@ -185,12 +186,12 @@ bool TypedColumnReader<DType>::ReadNewPage() {
int64_t ColumnReader::ReadDefinitionLevels(int64_t batch_size, int16_t* levels) {
if (descr_->max_definition_level() == 0) { return 0; }
- return definition_level_decoder_.Decode(batch_size, levels);
+ return definition_level_decoder_.Decode(static_cast<int>(batch_size), levels);
}
int64_t ColumnReader::ReadRepetitionLevels(int64_t batch_size, int16_t* levels) {
if (descr_->max_repetition_level() == 0) { return 0; }
- return repetition_level_decoder_.Decode(batch_size, levels);
+ return repetition_level_decoder_.Decode(static_cast<int>(batch_size), levels);
}
// ----------------------------------------------------------------------
@@ -225,13 +226,13 @@ std::shared_ptr<ColumnReader> ColumnReader::Make(
// ----------------------------------------------------------------------
// Instantiate templated classes
-template class TypedColumnReader<BooleanType>;
-template class TypedColumnReader<Int32Type>;
-template class TypedColumnReader<Int64Type>;
-template class TypedColumnReader<Int96Type>;
-template class TypedColumnReader<FloatType>;
-template class TypedColumnReader<DoubleType>;
-template class TypedColumnReader<ByteArrayType>;
-template class TypedColumnReader<FLBAType>;
+template class PARQUET_TEMPLATE_EXPORT TypedColumnReader<BooleanType>;
+template class PARQUET_TEMPLATE_EXPORT TypedColumnReader<Int32Type>;
+template class PARQUET_TEMPLATE_EXPORT TypedColumnReader<Int64Type>;
+template class PARQUET_TEMPLATE_EXPORT TypedColumnReader<Int96Type>;
+template class PARQUET_TEMPLATE_EXPORT TypedColumnReader<FloatType>;
+template class PARQUET_TEMPLATE_EXPORT TypedColumnReader<DoubleType>;
+template class PARQUET_TEMPLATE_EXPORT TypedColumnReader<ByteArrayType>;
+template class PARQUET_TEMPLATE_EXPORT TypedColumnReader<FLBAType>;
} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/reader.h
----------------------------------------------------------------------
diff --git a/src/parquet/column/reader.h b/src/parquet/column/reader.h
index e0c6585..80084b2 100644
--- a/src/parquet/column/reader.h
+++ b/src/parquet/column/reader.h
@@ -205,7 +205,7 @@ class PARQUET_EXPORT TypedColumnReader : public ColumnReader {
template <typename DType>
inline int64_t TypedColumnReader<DType>::ReadValues(int64_t batch_size, T* out) {
- int64_t num_decoded = current_decoder_->Decode(out, batch_size);
+ int64_t num_decoded = current_decoder_->Decode(out, static_cast<int>(batch_size));
return num_decoded;
}
@@ -213,7 +213,7 @@ template <typename DType>
inline int64_t TypedColumnReader<DType>::ReadValuesSpaced(int64_t batch_size, T* out,
int null_count, uint8_t* valid_bits, int64_t valid_bits_offset) {
return current_decoder_->DecodeSpaced(
- out, batch_size, null_count, valid_bits, valid_bits_offset);
+ out, static_cast<int>(batch_size), null_count, valid_bits, valid_bits_offset);
}
template <typename DType>
@@ -257,7 +257,7 @@ inline int64_t TypedColumnReader<DType>::ReadBatch(int batch_size, int16_t* def_
*values_read = ReadValues(values_to_read, values);
int64_t total_values = std::max(num_def_levels, *values_read);
- num_decoded_values_ += total_values;
+ num_decoded_values_ += static_cast<int>(total_values);
return total_values;
}
@@ -265,8 +265,8 @@ inline int64_t TypedColumnReader<DType>::ReadBatch(int batch_size, int16_t* def_
inline void DefinitionLevelsToBitmap(const int16_t* def_levels, int64_t num_def_levels,
int16_t max_definition_level, int64_t* values_read, int64_t* null_count,
uint8_t* valid_bits, int64_t valid_bits_offset) {
- int byte_offset = valid_bits_offset / 8;
- int bit_offset = valid_bits_offset % 8;
+ int byte_offset = static_cast<int>(valid_bits_offset) / 8;
+ int bit_offset = static_cast<int>(valid_bits_offset) % 8;
uint8_t bitset = valid_bits[byte_offset];
for (int i = 0; i < num_def_levels; ++i) {
@@ -338,8 +338,8 @@ inline int64_t TypedColumnReader<DType>::ReadBatchSpaced(int batch_size,
int16_t max_definition_level = descr_->max_definition_level();
DefinitionLevelsToBitmap(def_levels, num_def_levels, max_definition_level,
values_read, &null_count, valid_bits, valid_bits_offset);
- total_values = ReadValuesSpaced(
- *values_read, values, null_count, valid_bits, valid_bits_offset);
+ total_values = ReadValuesSpaced(*values_read, values, static_cast<int>(null_count),
+ valid_bits, valid_bits_offset);
}
*levels_read = num_def_levels;
*null_count_out = null_count;
@@ -354,7 +354,7 @@ inline int64_t TypedColumnReader<DType>::ReadBatchSpaced(int batch_size,
*levels_read = total_values;
}
- num_decoded_values_ += *levels_read;
+ num_decoded_values_ += static_cast<int>(*levels_read);
return total_values;
}
@@ -383,10 +383,10 @@ inline int64_t TypedColumnReader<DType>::Skip(int64_t num_rows_to_skip) {
do {
batch_size = std::min(batch_size, rows_to_skip);
- values_read =
- ReadBatch(batch_size, reinterpret_cast<int16_t*>(def_levels->mutable_data()),
- reinterpret_cast<int16_t*>(rep_levels->mutable_data()),
- reinterpret_cast<T*>(vals->mutable_data()), &values_read);
+ values_read = ReadBatch(static_cast<int>(batch_size),
+ reinterpret_cast<int16_t*>(def_levels->mutable_data()),
+ reinterpret_cast<int16_t*>(rep_levels->mutable_data()),
+ reinterpret_cast<T*>(vals->mutable_data()), &values_read);
rows_to_skip -= values_read;
} while (values_read > 0 && rows_to_skip > 0);
}
@@ -403,14 +403,14 @@ typedef TypedColumnReader<DoubleType> DoubleReader;
typedef TypedColumnReader<ByteArrayType> ByteArrayReader;
typedef TypedColumnReader<FLBAType> FixedLenByteArrayReader;
-extern template class PARQUET_EXPORT TypedColumnReader<BooleanType>;
-extern template class PARQUET_EXPORT TypedColumnReader<Int32Type>;
-extern template class PARQUET_EXPORT TypedColumnReader<Int64Type>;
-extern template class PARQUET_EXPORT TypedColumnReader<Int96Type>;
-extern template class PARQUET_EXPORT TypedColumnReader<FloatType>;
-extern template class PARQUET_EXPORT TypedColumnReader<DoubleType>;
-extern template class PARQUET_EXPORT TypedColumnReader<ByteArrayType>;
-extern template class PARQUET_EXPORT TypedColumnReader<FLBAType>;
+PARQUET_EXTERN_TEMPLATE TypedColumnReader<BooleanType>;
+PARQUET_EXTERN_TEMPLATE TypedColumnReader<Int32Type>;
+PARQUET_EXTERN_TEMPLATE TypedColumnReader<Int64Type>;
+PARQUET_EXTERN_TEMPLATE TypedColumnReader<Int96Type>;
+PARQUET_EXTERN_TEMPLATE TypedColumnReader<FloatType>;
+PARQUET_EXTERN_TEMPLATE TypedColumnReader<DoubleType>;
+PARQUET_EXTERN_TEMPLATE TypedColumnReader<ByteArrayType>;
+PARQUET_EXTERN_TEMPLATE TypedColumnReader<FLBAType>;
} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/scanner.h
----------------------------------------------------------------------
diff --git a/src/parquet/column/scanner.h b/src/parquet/column/scanner.h
index 914f2ad..a9b83c1 100644
--- a/src/parquet/column/scanner.h
+++ b/src/parquet/column/scanner.h
@@ -103,8 +103,9 @@ class PARQUET_EXPORT TypedScanner : public Scanner {
bool NextLevels(int16_t* def_level, int16_t* rep_level) {
if (level_offset_ == levels_buffered_) {
- levels_buffered_ = typed_reader_->ReadBatch(batch_size_, def_levels_.data(),
- rep_levels_.data(), values_, &values_buffered_);
+ levels_buffered_ =
+ static_cast<int>(typed_reader_->ReadBatch(static_cast<int>(batch_size_),
+ def_levels_.data(), rep_levels_.data(), values_, &values_buffered_));
value_offset_ = 0;
level_offset_ = 0;
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/statistics-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column/statistics-test.cc b/src/parquet/column/statistics-test.cc
index d631d98..e656f81 100644
--- a/src/parquet/column/statistics-test.cc
+++ b/src/parquet/column/statistics-test.cc
@@ -72,7 +72,7 @@ class TestRowGroupStatistics : public PrimitiveTypedTest<TestType> {
TypedStats statistics3(this->schema_.Column(0));
std::vector<uint8_t> valid_bits(
- BitUtil::RoundUpNumBytes(this->values_.size()) + 1, 255);
+ BitUtil::RoundUpNumBytes(static_cast<uint32_t>(this->values_.size())) + 1, 255);
statistics3.UpdateSpaced(
this->values_ptr_, valid_bits.data(), 0, this->values_.size(), 0);
std::string encoded_min_spaced = statistics3.EncodeMin();
@@ -145,8 +145,8 @@ class TestRowGroupStatistics : public PrimitiveTypedTest<TestType> {
// simulate the case when data comes from multiple buffers,
// in which case special care is necessary for FLBA/ByteArray types
for (int i = 0; i < 2; i++) {
- int batch_num_values = i ? num_values - num_values / 2 : num_values / 2;
- int batch_null_count = i ? null_count : 0;
+ int64_t batch_num_values = i ? num_values - num_values / 2 : num_values / 2;
+ int64_t batch_null_count = i ? null_count : 0;
DCHECK(null_count <= num_values); // avoid too much headache
std::vector<int16_t> definition_levels(batch_null_count, 0);
definition_levels.insert(
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/statistics.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column/statistics.cc b/src/parquet/column/statistics.cc
index e67a3d3..961a2af 100644
--- a/src/parquet/column/statistics.cc
+++ b/src/parquet/column/statistics.cc
@@ -120,7 +120,7 @@ void TypedRowGroupStatistics<DType>::UpdateSpaced(const T* values,
if (num_not_null == 0) return;
Compare<T> compare(descr_);
- INIT_BITSET(valid_bits, valid_bits_offset);
+ INIT_BITSET(valid_bits, static_cast<int>(valid_bits_offset));
// Find first valid entry and use that for min/max
// As (num_not_null != 0) there must be one
int64_t length = num_null + num_not_null;
@@ -216,7 +216,8 @@ void TypedRowGroupStatistics<DType>::PlainEncode(const T& src, std::string* dst)
template <typename DType>
void TypedRowGroupStatistics<DType>::PlainDecode(const std::string& src, T* dst) {
PlainDecoder<DType> decoder(descr());
- decoder.SetData(1, reinterpret_cast<const uint8_t*>(src.c_str()), src.size());
+ decoder.SetData(
+ 1, reinterpret_cast<const uint8_t*>(src.c_str()), static_cast<int>(src.size()));
decoder.Decode(dst, 1);
}
@@ -227,17 +228,17 @@ void TypedRowGroupStatistics<ByteArrayType>::PlainEncode(const T& src, std::stri
template <>
void TypedRowGroupStatistics<ByteArrayType>::PlainDecode(const std::string& src, T* dst) {
- dst->len = src.size();
+ dst->len = static_cast<uint32_t>(src.size());
dst->ptr = reinterpret_cast<const uint8_t*>(src.c_str());
}
-template class TypedRowGroupStatistics<BooleanType>;
-template class TypedRowGroupStatistics<Int32Type>;
-template class TypedRowGroupStatistics<Int64Type>;
-template class TypedRowGroupStatistics<Int96Type>;
-template class TypedRowGroupStatistics<FloatType>;
-template class TypedRowGroupStatistics<DoubleType>;
-template class TypedRowGroupStatistics<ByteArrayType>;
-template class TypedRowGroupStatistics<FLBAType>;
+template class PARQUET_TEMPLATE_EXPORT TypedRowGroupStatistics<BooleanType>;
+template class PARQUET_TEMPLATE_EXPORT TypedRowGroupStatistics<Int32Type>;
+template class PARQUET_TEMPLATE_EXPORT TypedRowGroupStatistics<Int64Type>;
+template class PARQUET_TEMPLATE_EXPORT TypedRowGroupStatistics<Int96Type>;
+template class PARQUET_TEMPLATE_EXPORT TypedRowGroupStatistics<FloatType>;
+template class PARQUET_TEMPLATE_EXPORT TypedRowGroupStatistics<DoubleType>;
+template class PARQUET_TEMPLATE_EXPORT TypedRowGroupStatistics<ByteArrayType>;
+template class PARQUET_TEMPLATE_EXPORT TypedRowGroupStatistics<FLBAType>;
} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/statistics.h
----------------------------------------------------------------------
diff --git a/src/parquet/column/statistics.h b/src/parquet/column/statistics.h
index 6f12eb9..c6a2487 100644
--- a/src/parquet/column/statistics.h
+++ b/src/parquet/column/statistics.h
@@ -216,14 +216,14 @@ typedef TypedRowGroupStatistics<FLBAType> FLBAStatistics;
#pragma GCC diagnostic ignored "-Wattributes"
#endif
-extern template class PARQUET_EXPORT TypedRowGroupStatistics<BooleanType>;
-extern template class PARQUET_EXPORT TypedRowGroupStatistics<Int32Type>;
-extern template class PARQUET_EXPORT TypedRowGroupStatistics<Int64Type>;
-extern template class PARQUET_EXPORT TypedRowGroupStatistics<Int96Type>;
-extern template class PARQUET_EXPORT TypedRowGroupStatistics<FloatType>;
-extern template class PARQUET_EXPORT TypedRowGroupStatistics<DoubleType>;
-extern template class PARQUET_EXPORT TypedRowGroupStatistics<ByteArrayType>;
-extern template class PARQUET_EXPORT TypedRowGroupStatistics<FLBAType>;
+PARQUET_EXTERN_TEMPLATE TypedRowGroupStatistics<BooleanType>;
+PARQUET_EXTERN_TEMPLATE TypedRowGroupStatistics<Int32Type>;
+PARQUET_EXTERN_TEMPLATE TypedRowGroupStatistics<Int64Type>;
+PARQUET_EXTERN_TEMPLATE TypedRowGroupStatistics<Int96Type>;
+PARQUET_EXTERN_TEMPLATE TypedRowGroupStatistics<FloatType>;
+PARQUET_EXTERN_TEMPLATE TypedRowGroupStatistics<DoubleType>;
+PARQUET_EXTERN_TEMPLATE TypedRowGroupStatistics<ByteArrayType>;
+PARQUET_EXTERN_TEMPLATE TypedRowGroupStatistics<FLBAType>;
#if defined(__GNUC__) && !defined(__clang__)
#pragma GCC diagnostic pop
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/test-specialization.h
----------------------------------------------------------------------
diff --git a/src/parquet/column/test-specialization.h b/src/parquet/column/test-specialization.h
index 27781cc..07767c0 100644
--- a/src/parquet/column/test-specialization.h
+++ b/src/parquet/column/test-specialization.h
@@ -115,7 +115,11 @@ void PrimitiveTypedTest<TestType>::SyncValuesOut() {}
template <>
void PrimitiveTypedTest<BooleanType>::SyncValuesOut() {
- std::copy(bool_buffer_out_.begin(), bool_buffer_out_.end(), values_out_.begin());
+ std::vector<uint8_t>::const_iterator source_iterator = bool_buffer_out_.begin();
+ std::vector<T>::iterator destination_iterator = values_out_.begin();
+ while (source_iterator != bool_buffer_out_.end()) {
+ *destination_iterator++ = *source_iterator++ != 0;
+ }
}
template <typename TestType>
@@ -143,7 +147,7 @@ void PrimitiveTypedTest<TestType>::GenerateData(int64_t num_values) {
def_levels_.resize(num_values);
values_.resize(num_values);
- InitValues<T>(num_values, values_, buffer_);
+ InitValues<T>(static_cast<int>(num_values), values_, buffer_);
values_ptr_ = values_.data();
std::fill(def_levels_.begin(), def_levels_.end(), 1);
@@ -154,7 +158,7 @@ void PrimitiveTypedTest<BooleanType>::GenerateData(int64_t num_values) {
def_levels_.resize(num_values);
values_.resize(num_values);
- InitValues<T>(num_values, values_, buffer_);
+ InitValues<T>(static_cast<int>(num_values), values_, buffer_);
bool_buffer_.resize(num_values);
std::copy(values_.begin(), values_.end(), bool_buffer_.begin());
values_ptr_ = reinterpret_cast<bool*>(bool_buffer_.data());
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/test-util.h
----------------------------------------------------------------------
diff --git a/src/parquet/column/test-util.h b/src/parquet/column/test-util.h
index 97e936a..c133734 100644
--- a/src/parquet/column/test-util.h
+++ b/src/parquet/column/test-util.h
@@ -131,7 +131,7 @@ class DataPageBuilder {
void AppendValues(const ColumnDescriptor* d, const vector<T>& values,
Encoding::type encoding = Encoding::PLAIN) {
PlainEncoder<Type> encoder(d);
- encoder.Put(&values[0], values.size());
+ encoder.Put(&values[0], static_cast<int>(values.size()));
std::shared_ptr<Buffer> values_sink = encoder.FlushValues();
sink_->Write(values_sink->data(), values_sink->size());
@@ -174,10 +174,10 @@ class DataPageBuilder {
// RLE-encoded bytes have to be preceded in the stream by their absolute
// size.
LevelEncoder encoder;
- encoder.Init(
- encoding, max_level, levels.size(), encode_buffer.data(), encode_buffer.size());
+ encoder.Init(encoding, max_level, static_cast<int>(levels.size()),
+ encode_buffer.data(), static_cast<int>(encode_buffer.size()));
- encoder.Encode(levels.size(), levels.data());
+ encoder.Encode(static_cast<int>(levels.size()), levels.data());
int32_t rle_bytes = encoder.len();
sink_->Write(reinterpret_cast<const uint8_t*>(&rle_bytes), sizeof(int32_t));
@@ -192,7 +192,7 @@ void DataPageBuilder<BooleanType>::AppendValues(
ParquetException::NYI("only plain encoding currently implemented");
}
PlainEncoder<BooleanType> encoder(d);
- encoder.Put(values, values.size());
+ encoder.Put(values, static_cast<int>(values.size()));
std::shared_ptr<Buffer> buffer = encoder.FlushValues();
sink_->Write(buffer->data(), buffer->size());
@@ -243,7 +243,7 @@ class DictionaryPageBuilder {
~DictionaryPageBuilder() { pool_.FreeAll(); }
shared_ptr<Buffer> AppendValues(const vector<TC>& values) {
- int num_values = values.size();
+ int num_values = static_cast<int>(values.size());
// Dictionary encoding
encoder_->Put(values.data(), num_values);
num_dict_values_ = encoder_->num_entries();
@@ -291,7 +291,7 @@ static shared_ptr<DictionaryPage> MakeDictPage(const ColumnDescriptor* d,
Encoding::type encoding, vector<shared_ptr<Buffer>>& rle_indices) {
InMemoryOutputStream page_stream;
test::DictionaryPageBuilder<Type> page_builder(d);
- int num_pages = values_per_page.size();
+ int num_pages = static_cast<int>(values_per_page.size());
int value_start = 0;
for (int i = 0; i < num_pages; i++) {
@@ -313,7 +313,7 @@ static void PaginateDict(const ColumnDescriptor* d,
int16_t max_def_level, const vector<int16_t>& rep_levels, int16_t max_rep_level,
int num_levels_per_page, const vector<int>& values_per_page,
vector<shared_ptr<Page>>& pages, Encoding::type encoding = Encoding::RLE_DICTIONARY) {
- int num_pages = values_per_page.size();
+ int num_pages = static_cast<int>(values_per_page.size());
vector<shared_ptr<Buffer>> rle_indices;
shared_ptr<DictionaryPage> dict_page =
MakeDictPage<Type>(d, values, values_per_page, encoding, rle_indices);
@@ -332,7 +332,7 @@ static void PaginateDict(const ColumnDescriptor* d,
rep_level_end = (i + 1) * num_levels_per_page;
}
shared_ptr<DataPage> data_page = MakeDataPage<Int32Type>(d, {}, values_per_page[i],
- encoding, rle_indices[i]->data(), rle_indices[i]->size(),
+ encoding, rle_indices[i]->data(), static_cast<int>(rle_indices[i]->size()),
slice(def_levels, def_level_start, def_level_end), max_def_level,
slice(rep_levels, rep_level_start, rep_level_end), max_rep_level);
pages.push_back(data_page);
@@ -346,7 +346,7 @@ static void PaginatePlain(const ColumnDescriptor* d,
int16_t max_def_level, const vector<int16_t>& rep_levels, int16_t max_rep_level,
int num_levels_per_page, const vector<int>& values_per_page,
vector<shared_ptr<Page>>& pages, Encoding::type encoding = Encoding::PLAIN) {
- int num_pages = values_per_page.size();
+ int num_pages = static_cast<int>(values_per_page.size());
int def_level_start = 0;
int def_level_end = 0;
int rep_level_start = 0;
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/writer.cc
----------------------------------------------------------------------
diff --git a/src/parquet/column/writer.cc b/src/parquet/column/writer.cc
index bd23b06..59f9999 100644
--- a/src/parquet/column/writer.cc
+++ b/src/parquet/column/writer.cc
@@ -87,19 +87,19 @@ void ColumnWriter::WriteRepetitionLevels(int64_t num_levels, const int16_t* leve
int64_t ColumnWriter::RleEncodeLevels(
const Buffer& src_buffer, ResizableBuffer* dest_buffer, int16_t max_level) {
// TODO: This only works with due to some RLE specifics
- int64_t rle_size =
- LevelEncoder::MaxBufferSize(Encoding::RLE, max_level, num_buffered_values_) +
- sizeof(int32_t);
+ int64_t rle_size = LevelEncoder::MaxBufferSize(Encoding::RLE, max_level,
+ static_cast<int>(num_buffered_values_)) +
+ sizeof(int32_t);
// Use Arrow::Buffer::shrink_to_fit = false
// underlying buffer only keeps growing. Resize to a smaller size does not reallocate.
PARQUET_THROW_NOT_OK(dest_buffer->Resize(rle_size, false));
- level_encoder_.Init(Encoding::RLE, max_level, num_buffered_values_,
+ level_encoder_.Init(Encoding::RLE, max_level, static_cast<int>(num_buffered_values_),
dest_buffer->mutable_data() + sizeof(int32_t),
- dest_buffer->size() - sizeof(int32_t));
- int encoded = level_encoder_.Encode(
- num_buffered_values_, reinterpret_cast<const int16_t*>(src_buffer.data()));
+ static_cast<int>(dest_buffer->size()) - sizeof(int32_t));
+ int encoded = level_encoder_.Encode(static_cast<int>(num_buffered_values_),
+ reinterpret_cast<const int16_t*>(src_buffer.data()));
DCHECK_EQ(encoded, num_buffered_values_);
reinterpret_cast<int32_t*>(dest_buffer->mutable_data())[0] = level_encoder_.len();
int64_t encoded_size = level_encoder_.len() + sizeof(int32_t);
@@ -154,12 +154,13 @@ void ColumnWriter::AddDataPage() {
std::shared_ptr<Buffer> compressed_data_copy;
PARQUET_THROW_NOT_OK(compressed_data->Copy(
0, compressed_data->size(), allocator_, &compressed_data_copy));
- CompressedDataPage page(compressed_data_copy, num_buffered_values_, encoding_,
- Encoding::RLE, Encoding::RLE, uncompressed_size, page_stats);
+ CompressedDataPage page(compressed_data_copy,
+ static_cast<int32_t>(num_buffered_values_), encoding_, Encoding::RLE,
+ Encoding::RLE, uncompressed_size, page_stats);
data_pages_.push_back(std::move(page));
} else { // Eagerly write pages
- CompressedDataPage page(compressed_data, num_buffered_values_, encoding_,
- Encoding::RLE, Encoding::RLE, uncompressed_size, page_stats);
+ CompressedDataPage page(compressed_data, static_cast<int32_t>(num_buffered_values_),
+ encoding_, Encoding::RLE, Encoding::RLE, uncompressed_size, page_stats);
WriteDataPage(page);
}
@@ -170,8 +171,7 @@ void ColumnWriter::AddDataPage() {
}
void ColumnWriter::WriteDataPage(const CompressedDataPage& page) {
- int64_t bytes_written = pager_->WriteDataPage(page);
- total_bytes_written_ += bytes_written;
+ total_bytes_written_ += pager_->WriteDataPage(page);
}
int64_t ColumnWriter::Close() {
@@ -361,7 +361,7 @@ inline int64_t TypedColumnWriter<DType>::WriteMiniBatch(int64_t num_values,
WriteRepetitionLevels(num_values, rep_levels);
} else {
// Each value is exactly one row
- num_rows_ += num_values;
+ num_rows_ += static_cast<int>(num_values);
}
if (num_rows_ > expected_rows_) {
@@ -422,7 +422,7 @@ inline int64_t TypedColumnWriter<DType>::WriteMiniBatchSpaced(int64_t num_values
WriteRepetitionLevels(num_values, rep_levels);
} else {
// Each value is exactly one row
- num_rows_ += num_values;
+ num_rows_ += static_cast<int>(num_values);
}
if (num_rows_ > expected_rows_) {
@@ -461,7 +461,7 @@ void TypedColumnWriter<DType>::WriteBatch(int64_t num_values, const int16_t* def
// of values, the chunking will ensure the AddDataPage() is called at a reasonable
// pagesize limit
int64_t write_batch_size = properties_->write_batch_size();
- int num_batches = num_values / write_batch_size;
+ int num_batches = static_cast<int>(num_values / write_batch_size);
int64_t num_remaining = num_values % write_batch_size;
int64_t value_offset = 0;
for (int round = 0; round < num_batches; round++) {
@@ -486,7 +486,7 @@ void TypedColumnWriter<DType>::WriteBatchSpaced(int64_t num_values,
// of values, the chunking will ensure the AddDataPage() is called at a reasonable
// pagesize limit
int64_t write_batch_size = properties_->write_batch_size();
- int num_batches = num_values / write_batch_size;
+ int num_batches = static_cast<int>(num_values / write_batch_size);
int64_t num_remaining = num_values % write_batch_size;
int64_t num_spaced_written = 0;
int64_t values_offset = 0;
@@ -506,22 +506,23 @@ void TypedColumnWriter<DType>::WriteBatchSpaced(int64_t num_values,
template <typename DType>
void TypedColumnWriter<DType>::WriteValues(int64_t num_values, const T* values) {
- current_encoder_->Put(values, num_values);
+ current_encoder_->Put(values, static_cast<int>(num_values));
}
template <typename DType>
void TypedColumnWriter<DType>::WriteValuesSpaced(int64_t num_values,
const uint8_t* valid_bits, int64_t valid_bits_offset, const T* values) {
- current_encoder_->PutSpaced(values, num_values, valid_bits, valid_bits_offset);
+ current_encoder_->PutSpaced(
+ values, static_cast<int>(num_values), valid_bits, valid_bits_offset);
}
-template class TypedColumnWriter<BooleanType>;
-template class TypedColumnWriter<Int32Type>;
-template class TypedColumnWriter<Int64Type>;
-template class TypedColumnWriter<Int96Type>;
-template class TypedColumnWriter<FloatType>;
-template class TypedColumnWriter<DoubleType>;
-template class TypedColumnWriter<ByteArrayType>;
-template class TypedColumnWriter<FLBAType>;
+template class PARQUET_TEMPLATE_EXPORT TypedColumnWriter<BooleanType>;
+template class PARQUET_TEMPLATE_EXPORT TypedColumnWriter<Int32Type>;
+template class PARQUET_TEMPLATE_EXPORT TypedColumnWriter<Int64Type>;
+template class PARQUET_TEMPLATE_EXPORT TypedColumnWriter<Int96Type>;
+template class PARQUET_TEMPLATE_EXPORT TypedColumnWriter<FloatType>;
+template class PARQUET_TEMPLATE_EXPORT TypedColumnWriter<DoubleType>;
+template class PARQUET_TEMPLATE_EXPORT TypedColumnWriter<ByteArrayType>;
+template class PARQUET_TEMPLATE_EXPORT TypedColumnWriter<FLBAType>;
} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/column/writer.h
----------------------------------------------------------------------
diff --git a/src/parquet/column/writer.h b/src/parquet/column/writer.h
index 305c35e..c7f9ea0 100644
--- a/src/parquet/column/writer.h
+++ b/src/parquet/column/writer.h
@@ -118,17 +118,17 @@ class PARQUET_EXPORT ColumnWriter {
// values. For repeated or optional values, there may be fewer data values
// than levels, and this tells you how many encoded levels there are in that
// case.
- int num_buffered_values_;
+ int64_t num_buffered_values_;
// The total number of stored values. For repeated or optional values, this
// number may be lower than num_buffered_values_.
- int num_buffered_encoded_values_;
+ int64_t num_buffered_encoded_values_;
// Total number of rows written with this ColumnWriter
int num_rows_;
// Records the total number of bytes written by the serializer
- int total_bytes_written_;
+ int64_t total_bytes_written_;
// Flag to check if the Writer has been closed
bool closed_;
@@ -212,14 +212,14 @@ typedef TypedColumnWriter<DoubleType> DoubleWriter;
typedef TypedColumnWriter<ByteArrayType> ByteArrayWriter;
typedef TypedColumnWriter<FLBAType> FixedLenByteArrayWriter;
-extern template class PARQUET_EXPORT TypedColumnWriter<BooleanType>;
-extern template class PARQUET_EXPORT TypedColumnWriter<Int32Type>;
-extern template class PARQUET_EXPORT TypedColumnWriter<Int64Type>;
-extern template class PARQUET_EXPORT TypedColumnWriter<Int96Type>;
-extern template class PARQUET_EXPORT TypedColumnWriter<FloatType>;
-extern template class PARQUET_EXPORT TypedColumnWriter<DoubleType>;
-extern template class PARQUET_EXPORT TypedColumnWriter<ByteArrayType>;
-extern template class PARQUET_EXPORT TypedColumnWriter<FLBAType>;
+PARQUET_EXTERN_TEMPLATE TypedColumnWriter<BooleanType>;
+PARQUET_EXTERN_TEMPLATE TypedColumnWriter<Int32Type>;
+PARQUET_EXTERN_TEMPLATE TypedColumnWriter<Int64Type>;
+PARQUET_EXTERN_TEMPLATE TypedColumnWriter<Int96Type>;
+PARQUET_EXTERN_TEMPLATE TypedColumnWriter<FloatType>;
+PARQUET_EXTERN_TEMPLATE TypedColumnWriter<DoubleType>;
+PARQUET_EXTERN_TEMPLATE TypedColumnWriter<ByteArrayType>;
+PARQUET_EXTERN_TEMPLATE TypedColumnWriter<FLBAType>;
} // namespace parquet
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/compression-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/compression-test.cc b/src/parquet/compression-test.cc
index f4fd3ba..feaf9e3 100644
--- a/src/parquet/compression-test.cc
+++ b/src/parquet/compression-test.cc
@@ -34,13 +34,13 @@ void CheckCodecRoundtrip(const vector<uint8_t>& data) {
T c1;
T c2;
- int max_compressed_len = c1.MaxCompressedLen(data.size(), &data[0]);
+ int max_compressed_len = static_cast<int>(c1.MaxCompressedLen(data.size(), &data[0]));
std::vector<uint8_t> compressed(max_compressed_len);
std::vector<uint8_t> decompressed(data.size());
// compress with c1
- int actual_size =
- c1.Compress(data.size(), &data[0], max_compressed_len, &compressed[0]);
+ int actual_size = static_cast<int>(
+ c1.Compress(data.size(), &data[0], max_compressed_len, &compressed[0]));
compressed.resize(actual_size);
// decompress with c2
@@ -49,8 +49,8 @@ void CheckCodecRoundtrip(const vector<uint8_t>& data) {
ASSERT_TRUE(test::vector_equal(data, decompressed));
// compress with c2
- int actual_size2 =
- c2.Compress(data.size(), &data[0], max_compressed_len, &compressed[0]);
+ int actual_size2 = static_cast<int>(
+ c2.Compress(data.size(), &data[0], max_compressed_len, &compressed[0]));
ASSERT_EQ(actual_size2, actual_size);
// decompress with c1
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/compression.cc
----------------------------------------------------------------------
diff --git a/src/parquet/compression.cc b/src/parquet/compression.cc
index 7d219fe..dc6b93d 100644
--- a/src/parquet/compression.cc
+++ b/src/parquet/compression.cc
@@ -152,9 +152,9 @@ class GZipCodec::GZipCodecImpl {
// from the beginning again.
while (ret != Z_STREAM_END) {
stream_.next_in = const_cast<Bytef*>(reinterpret_cast<const Bytef*>(input));
- stream_.avail_in = input_length;
+ stream_.avail_in = static_cast<uInt>(input_length);
stream_.next_out = reinterpret_cast<Bytef*>(output);
- stream_.avail_out = output_length;
+ stream_.avail_out = static_cast<uInt>(output_length);
// We know the output size. In this case, we can use Z_FINISH
// which is more efficient.
@@ -188,9 +188,9 @@ class GZipCodec::GZipCodecImpl {
uint8_t* output) {
if (!compressor_initialized_) { InitCompressor(); }
stream_.next_in = const_cast<Bytef*>(reinterpret_cast<const Bytef*>(input));
- stream_.avail_in = input_length;
+ stream_.avail_in = static_cast<uInt>(input_length);
stream_.next_out = reinterpret_cast<Bytef*>(output);
- stream_.avail_out = output_length;
+ stream_.avail_out = static_cast<uInt>(output_length);
int64_t ret = 0;
if ((ret = deflate(&stream_, Z_FINISH)) != Z_STREAM_END) {
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/encoding-internal.h
----------------------------------------------------------------------
diff --git a/src/parquet/encoding-internal.h b/src/parquet/encoding-internal.h
index 7e90254..7e78123 100644
--- a/src/parquet/encoding-internal.h
+++ b/src/parquet/encoding-internal.h
@@ -196,7 +196,8 @@ class PlainEncoder<BooleanType> : public Encoder<BooleanType> {
bits_available_(kInMemoryDefaultCapacity * 8),
bits_buffer_(AllocateBuffer(pool, kInMemoryDefaultCapacity)),
values_sink_(new InMemoryOutputStream(pool)) {
- bit_writer_.reset(new BitWriter(bits_buffer_->mutable_data(), bits_buffer_->size()));
+ bit_writer_.reset(new BitWriter(
+ bits_buffer_->mutable_data(), static_cast<int>(bits_buffer_->size())));
}
int64_t EstimatedDataEncodedSize() override {
@@ -208,7 +209,7 @@ class PlainEncoder<BooleanType> : public Encoder<BooleanType> {
bit_writer_->Flush();
values_sink_->Write(bit_writer_->buffer(), bit_writer_->bytes_written());
bit_writer_->Clear();
- bits_available_ = bits_buffer_->size() * 8;
+ bits_available_ = static_cast<int>(bits_buffer_->size()) * 8;
}
std::shared_ptr<Buffer> buffer = values_sink_->GetBuffer();
@@ -236,7 +237,7 @@ class PlainEncoder<BooleanType> : public Encoder<BooleanType> {
\
int bits_remaining = num_values - bit_offset; \
while (bit_offset < num_values) { \
- bits_available_ = bits_buffer_->size() * 8; \
+ bits_available_ = static_cast<int>(bits_buffer_->size()) * 8; \
\
int bits_to_write = std::min(bits_available_, bits_remaining); \
for (int i = bit_offset; i < bit_offset + bits_to_write; i++) { \
@@ -463,7 +464,9 @@ class DictEncoder : public Encoder<DType> {
// reserve
// an extra "RleEncoder::MinBufferSize" bytes. These extra bytes won't be used
// but not reserving them would cause the encoder to fail.
- return 1 + RleEncoder::MaxBufferSize(bit_width(), buffered_indices_.size()) +
+ return 1 +
+ RleEncoder::MaxBufferSize(
+ bit_width(), static_cast<int>(buffered_indices_.size())) +
RleEncoder::MinBufferSize(bit_width());
}
@@ -493,7 +496,8 @@ class DictEncoder : public Encoder<DType> {
std::shared_ptr<Buffer> FlushValues() override {
std::shared_ptr<PoolBuffer> buffer =
AllocateBuffer(this->allocator_, EstimatedDataEncodedSize());
- int result_size = WriteIndices(buffer->mutable_data(), EstimatedDataEncodedSize());
+ int result_size = WriteIndices(
+ buffer->mutable_data(), static_cast<int>(EstimatedDataEncodedSize()));
ClearIndices();
PARQUET_THROW_NOT_OK(buffer->Resize(result_size, false));
return buffer;
@@ -507,7 +511,7 @@ class DictEncoder : public Encoder<DType> {
void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits,
int64_t valid_bits_offset) override {
- INIT_BITSET(valid_bits, valid_bits_offset);
+ INIT_BITSET(valid_bits, static_cast<int>(valid_bits_offset));
for (int32_t i = 0; i < num_values; i++) {
if (bitset_valid_bits & (1 << bit_offset_valid_bits)) { Put(src[i]); }
READ_NEXT_BITSET(valid_bits);
@@ -521,7 +525,7 @@ class DictEncoder : public Encoder<DType> {
ChunkedAllocator* mem_pool() { return pool_; }
/// The number of entries in the dictionary.
- int num_entries() const { return uniques_.size(); }
+ int num_entries() const { return static_cast<int>(uniques_.size()); }
private:
::arrow::MemoryPool* allocator_;
@@ -607,7 +611,7 @@ inline void DictEncoder<DType>::Put(const typename DType::c_type& v) {
if (index == HASH_SLOT_EMPTY) {
// Not in the hash table, so we insert it now
- index = uniques_.size();
+ index = static_cast<hash_slot_t>(uniques_.size());
hash_slots_[j] = index;
AddDictKey(v);
@@ -808,7 +812,7 @@ class DeltaBitPackDecoder : public Decoder<DType> {
int64_t delta;
if (!decoder_.GetValue(delta_bit_width_, &delta)) ParquetException::EofException();
delta += min_delta_;
- last_value_ += delta;
+ last_value_ += static_cast<int32_t>(delta);
buffer[i] = last_value_;
--values_current_mini_block_;
}
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/encoding-test.cc
----------------------------------------------------------------------
diff --git a/src/parquet/encoding-test.cc b/src/parquet/encoding-test.cc
index fbf6812..2e78036 100644
--- a/src/parquet/encoding-test.cc
+++ b/src/parquet/encoding-test.cc
@@ -42,7 +42,7 @@ namespace test {
TEST(VectorBooleanTest, TestEncodeDecode) {
// PARQUET-454
int nvalues = 10000;
- int nbytes = BitUtil::Ceil(nvalues, 8);
+ int nbytes = static_cast<int>(BitUtil::Ceil(nvalues, 8));
// seed the prng so failure is deterministic
vector<bool> draws = flip_coins_seed(nvalues, 0.5, 0);
@@ -58,7 +58,8 @@ TEST(VectorBooleanTest, TestEncodeDecode) {
vector<uint8_t> decode_buffer(nbytes);
const uint8_t* decode_data = &decode_buffer[0];
- decoder.SetData(nvalues, encode_buffer->data(), encode_buffer->size());
+ decoder.SetData(
+ nvalues, encode_buffer->data(), static_cast<int>(encode_buffer->size()));
int values_decoded = decoder.Decode(&decode_buffer[0], nvalues);
ASSERT_EQ(nvalues, values_decoded);
@@ -218,7 +219,8 @@ class TestPlainEncoding : public TestEncodingBase<Type> {
encoder.Put(draws_, num_values_);
encode_buffer_ = encoder.FlushValues();
- decoder.SetData(num_values_, encode_buffer_->data(), encode_buffer_->size());
+ decoder.SetData(
+ num_values_, encode_buffer_->data(), static_cast<int>(encode_buffer_->size()));
int values_decoded = decoder.Decode(decode_buf_, num_values_);
ASSERT_EQ(num_values_, values_decoded);
VerifyResults<T>(decode_buf_, draws_, num_values_);
@@ -263,13 +265,13 @@ class TestDictionaryEncoding : public TestEncodingBase<Type> {
ASSERT_TRUE(indices_from_spaced->Equals(*indices));
PlainDecoder<Type> dict_decoder(descr_.get());
- dict_decoder.SetData(
- encoder.num_entries(), dict_buffer_->data(), dict_buffer_->size());
+ dict_decoder.SetData(encoder.num_entries(), dict_buffer_->data(),
+ static_cast<int>(dict_buffer_->size()));
DictionaryDecoder<Type> decoder(descr_.get());
decoder.SetDict(&dict_decoder);
- decoder.SetData(num_values_, indices->data(), indices->size());
+ decoder.SetData(num_values_, indices->data(), static_cast<int>(indices->size()));
int values_decoded = decoder.Decode(decode_buf_, num_values_);
ASSERT_EQ(num_values_, values_decoded);
@@ -279,7 +281,7 @@ class TestDictionaryEncoding : public TestEncodingBase<Type> {
VerifyResults<T>(decode_buf_, draws_, num_values_);
// Also test spaced decoding
- decoder.SetData(num_values_, indices->data(), indices->size());
+ decoder.SetData(num_values_, indices->data(), static_cast<int>(indices->size()));
values_decoded =
decoder.DecodeSpaced(decode_buf_, num_values_, 0, valid_bits.data(), 0);
ASSERT_EQ(num_values_, values_decoded);
http://git-wip-us.apache.org/repos/asf/parquet-cpp/blob/fc5228af/src/parquet/encoding.h
----------------------------------------------------------------------
diff --git a/src/parquet/encoding.h b/src/parquet/encoding.h
index 69fc40e..47f2b75 100644
--- a/src/parquet/encoding.h
+++ b/src/parquet/encoding.h
@@ -52,7 +52,7 @@ class Encoder {
PoolBuffer buffer(pool_);
buffer.Resize(num_values * sizeof(T));
int32_t num_valid_values = 0;
- INIT_BITSET(valid_bits, valid_bits_offset);
+ INIT_BITSET(valid_bits, static_cast<int>(valid_bits_offset));
T* data = reinterpret_cast<T*>(buffer.mutable_data());
for (int32_t i = 0; i < num_values; i++) {
if (bitset_valid_bits & (1 << bit_offset_valid_bits)) {