You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by ap...@apache.org on 2021/11/18 09:04:44 UTC

[arrow] branch master updated: ARROW-14704: [C++] Fix Valgrind failure in parquet-arrow-test

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 2fec2fa  ARROW-14704: [C++] Fix Valgrind failure in parquet-arrow-test
2fec2fa is described below

commit 2fec2fa3dd6f0c078a4ed83466269258ea8338cc
Author: Antoine Pitrou <an...@python.org>
AuthorDate: Thu Nov 18 10:02:58 2021 +0100

    ARROW-14704: [C++] Fix Valgrind failure in parquet-arrow-test
    
    Error log of Valgrind failure:
    ```
    [----------] 3 tests from TestArrowReadDeltaEncoding
    [ RUN      ] TestArrowReadDeltaEncoding.DeltaBinaryPacked
    [       OK ] TestArrowReadDeltaEncoding.DeltaBinaryPacked (812 ms)
    [ RUN      ] TestArrowReadDeltaEncoding.DeltaByteArray
    ==12587== Conditional jump or move depends on uninitialised value(s)
    ==12587==    at 0x4F12C57: Advance (bit_stream_utils.h:426)
    ==12587==    by 0x4F12C57: parquet::(anonymous namespace)::DeltaBitPackDecoder<parquet::PhysicalType<(parquet::Type::type)1> >::GetInternal(int*, int) (encoding.cc:2216)
    ==12587==    by 0x4F13823: Decode (encoding.cc:2091)
    ==12587==    by 0x4F13823: parquet::(anonymous namespace)::DeltaByteArrayDecoder::SetData(int, unsigned char const*, int) (encoding.cc:2360)
    ==12587==    by 0x4E89EF5: parquet::(anonymous namespace)::ColumnReaderImplBase<parquet::PhysicalType<(parquet::Type::type)6> >::InitializeDataDecoder(parquet::DataPage const&, long) (column_reader.cc:797)
    ==12587==    by 0x4E9AE63: ReadNewPage (column_reader.cc:614)
    ==12587==    by 0x4E9AE63: HasNextInternal (column_reader.cc:576)
    ==12587==    by 0x4E9AE63: parquet::internal::(anonymous namespace)::TypedRecordReader<parquet::PhysicalType<(parquet::Type::type)6> >::ReadRecords(long) (column_reader.cc:1228)
    ==12587==    by 0x4DFB19F: parquet::arrow::(anonymous namespace)::LeafReader::LoadBatch(long) (reader.cc:467)
    ==12587==    by 0x4DF513C: parquet::arrow::ColumnReaderImpl::NextBatch(long, std::shared_ptr<arrow::ChunkedArray>*) (reader.cc:108)
    ==12587==    by 0x4DFB74D: parquet::arrow::(anonymous namespace)::FileReaderImpl::ReadColumn(int, std::vector<int, std::allocator<int> > const&, parquet::arrow::ColumnReader*, std::shared_ptr<arrow::ChunkedArray>*) (reader.cc:273)
    ==12587==    by 0x4E11FDA: operator() (reader.cc:1180)
    ==12587==    by 0x4E11FDA: arrow::Future<std::vector<std::shared_ptr<arrow::ChunkedArray>, std::allocator<arrow::Future> > > arrow::internal::OptionalParallelForAsync<parquet::arrow::(anonymous namespace)::FileReaderImpl::DecodeRowGroups(std::shared_ptr<parquet::arrow::(anonymous namespace)::FileReaderImpl>, std::vector<int, std::allocator<int> > const&, std::vector<int, std::allocator<int> > const&, arrow::internal::Executor*)::{lambda(unsigned long, std::shared_ptr<parquet::arrow::C [...]
    ==12587==    by 0x4E126A9: parquet::arrow::(anonymous namespace)::FileReaderImpl::DecodeRowGroups(std::shared_ptr<parquet::arrow::(anonymous namespace)::FileReaderImpl>, std::vector<int, std::allocator<int> > const&, std::vector<int, std::allocator<int> > const&, arrow::internal::Executor*) (reader.cc:1198)
    ==12587==    by 0x4E12F50: parquet::arrow::(anonymous namespace)::FileReaderImpl::ReadRowGroups(std::vector<int, std::allocator<int> > const&, std::vector<int, std::allocator<int> > const&, std::shared_ptr<arrow::Table>*) (reader.cc:1160)
    ==12587==    by 0x4DFA2BC: parquet::arrow::(anonymous namespace)::FileReaderImpl::ReadTable(std::vector<int, std::allocator<int> > const&, std::shared_ptr<arrow::Table>*) (reader.cc:198)
    ==12587==    by 0x4DFA392: parquet::arrow::(anonymous namespace)::FileReaderImpl::ReadTable(std::shared_ptr<arrow::Table>*) (reader.cc:289)
    ==12587==    by 0x1DCE62: parquet::arrow::TestArrowReadDeltaEncoding::ReadTableFromParquetFile(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::shared_ptr<arrow::Table>*) (arrow_reader_writer_test.cc:4174)
    ==12587==    by 0x2266D2: parquet::arrow::TestArrowReadDeltaEncoding_DeltaByteArray_Test::TestBody() (arrow_reader_writer_test.cc:4209)
    ==12587==    by 0x4AD2C9B: void testing::internal::HandleSehExceptionsInMethodIfSupported<testing::Test, void>(testing::Test*, void (testing::Test::*)(), char const*) (gtest.cc:2607)
    ==12587==    by 0x4AC9DD1: void testing::internal::HandleExceptionsInMethodIfSupported<testing::Test, void>(testing::Test*, void (testing::Test::*)(), char const*) (gtest.cc:2643)
    ==12587==    by 0x4AA4C02: testing::Test::Run() (gtest.cc:2682)
    ==12587==    by 0x4AA563A: testing::TestInfo::Run() (gtest.cc:2861)
    ==12587==    by 0x4AA600F: testing::TestSuite::Run() (gtest.cc:3015)
    ==12587==    by 0x4AB631B: testing::internal::UnitTestImpl::RunAllTests() (gtest.cc:5855)
    ==12587==    by 0x4AD3CE7: bool testing::internal::HandleSehExceptionsInMethodIfSupported<testing::internal::UnitTestImpl, bool>(testing::internal::UnitTestImpl*, bool (testing::internal::UnitTestImpl::*)(), char const*) (gtest.cc:2607)
    ==12587==    by 0x4ACB063: bool testing::internal::HandleExceptionsInMethodIfSupported<testing::internal::UnitTestImpl, bool>(testing::internal::UnitTestImpl*, bool (testing::internal::UnitTestImpl::*)(), char const*) (gtest.cc:2643)
    ==12587==    by 0x4AB47B6: testing::UnitTest::Run() (gtest.cc:5438)
    ==12587==    by 0x4218918: RUN_ALL_TESTS() (gtest.h:2490)
    ==12587==    by 0x421895B: main (gtest_main.cc:52)
    ```
    
    Closes #11725 from pitrou/ARROW-14704-parquet-valgrind
    
    Authored-by: Antoine Pitrou <an...@python.org>
    Signed-off-by: Antoine Pitrou <an...@python.org>
---
 cpp/src/parquet/encoding.cc | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
index 4c980df..783e868 100644
--- a/cpp/src/parquet/encoding.cc
+++ b/cpp/src/parquet/encoding.cc
@@ -2175,6 +2175,10 @@ class DeltaBitPackDecoder : public DecoderImpl, virtual public TypedDecoder<DTyp
 
   int GetInternal(T* buffer, int max_values) {
     max_values = std::min(max_values, this->num_values_);
+    if (max_values == 0) {
+      return 0;
+    }
+
     DCHECK_LE(static_cast<uint32_t>(max_values), total_value_count_);
     int i = 0;
     while (i < max_values) {
@@ -2272,6 +2276,9 @@ class DeltaLengthByteArrayDecoder : public DecoderImpl,
     // Decode up to `max_values` strings into an internal buffer
     // and reference them into `buffer`.
     max_values = std::min(max_values, num_valid_values_);
+    if (max_values == 0) {
+      return 0;
+    }
 
     int32_t data_size = 0;
     const int32_t* length_ptr =
@@ -2406,6 +2413,10 @@ class DeltaByteArrayDecoder : public DecoderImpl,
     // Decode up to `max_values` strings into an internal buffer
     // and reference them into `buffer`.
     max_values = std::min(max_values, num_valid_values_);
+    if (max_values == 0) {
+      return max_values;
+    }
+
     suffix_decoder_.Decode(buffer, max_values);
 
     int64_t data_size = 0;