You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/10/13 10:10:57 UTC

[GitHub] [arrow] pitrou commented on pull request #14147: ARROW-17450 : [C++][Parquet] Support RLE decode for boolean datatype

pitrou commented on PR #14147:
URL: https://github.com/apache/arrow/pull/14147#issuecomment-1277369600

   This PR triggers undefined behavior in fuzzing builds:
   ```
   /build/build-fuzz/debug/parquet-arrow-fuzz: Running 1 inputs 1 time(s) each.
   Running: clusterfuzz-testcase-minimized-parquet-arrow-fuzz-5017972913864704
   /home/antoine/arrow/dev/cpp/src/arrow/util/bit_stream_utils.h:415:42: runtime error: load of value 32, which is not a valid value for type 'bool'
   SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior /home/antoine/arrow/dev/cpp/src/arrow/util/bit_stream_utils.h:415:42 in 
   ==34310== ERROR: libFuzzer: deadly signal
       #0 0x55771d20cef1 in __sanitizer_print_stack_trace (/build/build-fuzz/debug/parquet-arrow-fuzz+0xe5ef1) (BuildId: 7423ef34b89be7542c884a94acdc1490cca072b4)
       #1 0x55771d17fe77 in fuzzer::PrintStackTrace() crtstuff.c
       #2 0x55771d1659a3 in fuzzer::Fuzzer::CrashCallback() crtstuff.c
       #3 0x7f9ccb2e741f  (/lib/x86_64-linux-gnu/libpthread.so.0+0x1441f) (BuildId: 7b4536f41cdaa5888408e82d0836e33dcf436466)
       #4 0x7f9ccb10900a in __libc_signal_restore_set /build/glibc-SzIz7B/glibc-2.31/signal/../sysdeps/unix/sysv/linux/internal-signals.h:86:3
       #5 0x7f9ccb10900a in raise /build/glibc-SzIz7B/glibc-2.31/signal/../sysdeps/unix/sysv/linux/raise.c:48:3
       #6 0x7f9ccb0e8858 in abort /build/glibc-SzIz7B/glibc-2.31/stdlib/abort.c:79:7
       #7 0x55771d227176 in __sanitizer::Abort() crtstuff.c
       #8 0x55771d225000 in __sanitizer::Die() crtstuff.c
       #9 0x55771d238d3b in __ubsan::ScopedReport::~ScopedReport() crtstuff.c
       #10 0x55771d23baea in handleLoadInvalidValue(__ubsan::InvalidValueData*, unsigned long, __ubsan::ReportOptions) crtstuff.c
       #11 0x55771d23bb2d in __ubsan_handle_load_invalid_value_abort (/build/build-fuzz/debug/parquet-arrow-fuzz+0x114b2d) (BuildId: 7423ef34b89be7542c884a94acdc1490cca072b4)
       #12 0x7f9ce5fbe8fa in bool arrow::bit_util::BitReader::GetAligned<bool>(int, bool*) /home/antoine/arrow/dev/cpp/src/arrow/util/bit_stream_utils.h:415:42
       #13 0x7f9ce5fbc533 in bool arrow::util::RleDecoder::NextCounts<bool>() /home/antoine/arrow/dev/cpp/src/arrow/util/rle_encoding.h:663:22
       #14 0x7f9ce5fb9087 in int arrow::util::RleDecoder::GetBatch<bool>(bool*, int) /home/antoine/arrow/dev/cpp/src/arrow/util/rle_encoding.h:329:12
       #15 0x7f9ce5faa13d in parquet::(anonymous namespace)::RleBooleanDecoder::Decode(bool*, int) /home/antoine/arrow/dev/cpp/src/parquet/encoding.cc:2388:19
       #16 0x7f9ce5d179a8 in parquet::internal::(anonymous namespace)::TypedRecordReader<parquet::PhysicalType<(parquet::Type::type)0> >::ReadValuesDense(long) /home/antoine/arrow/dev/cpp/src/parquet/column_reader.cc:1531:33
       #17 0x7f9ce5d1ac37 in parquet::internal::(anonymous namespace)::TypedRecordReader<parquet::PhysicalType<(parquet::Type::type)0> >::ReadRecordData(long) /home/antoine/arrow/dev/cpp/src/parquet/column_reader.cc:1575:7
       #18 0x7f9ce5d139b4 in parquet::internal::(anonymous namespace)::TypedRecordReader<parquet::PhysicalType<(parquet::Type::type)0> >::ReadRecords(long) /home/antoine/arrow/dev/cpp/src/parquet/column_reader.cc:1331:25
       #19 0x7f9ce572137d in parquet::arrow::(anonymous namespace)::LeafReader::LoadBatch(long) /home/antoine/arrow/dev/cpp/src/parquet/arrow/reader.cc:479:46
       #20 0x7f9ce571821e in parquet::arrow::ColumnReaderImpl::NextBatch(long, std::shared_ptr<arrow::ChunkedArray>*) /home/antoine/arrow/dev/cpp/src/parquet/arrow/reader.cc:109:5
       #21 0x7f9ce5785828 in parquet::arrow::(anonymous namespace)::FileReaderImpl::ReadColumn(int, std::vector<int, std::allocator<int> > const&, parquet::arrow::ColumnReader*, std::shared_ptr<arrow::ChunkedArray>*) /home/antoine/arrow/dev/cpp/src/parquet/arrow/reader.cc:285:20
       #22 0x7f9ce57fe70b in parquet::arrow::(anonymous namespace)::FileReaderImpl::DecodeRowGroups(std::shared_ptr<parquet::arrow::(anonymous namespace)::FileReaderImpl>, std::vector<int, std::allocator<int> > const&, std::vector<int, std::allocator<int> > const&, arrow::internal::Executor*)::$_4::operator()(unsigned long, std::shared_ptr<parquet::arrow::ColumnReaderImpl>) const /home/antoine/arrow/dev/cpp/src/parquet/arrow/reader.cc:1236:5
       #23 0x7f9ce57fa00c in arrow::Future<std::vector<std::shared_ptr<arrow::ChunkedArray>, std::allocator<std::shared_ptr<arrow::ChunkedArray> > > > arrow::internal::OptionalParallelForAsync<parquet::arrow::(anonymous namespace)::FileReaderImpl::DecodeRowGroups(std::shared_ptr<parquet::arrow::(anonymous namespace)::FileReaderImpl>, std::vector<int, std::allocator<int> > const&, std::vector<int, std::allocator<int> > const&, arrow::internal::Executor*)::$_4&, std::shared_ptr<parquet::arrow::ColumnReaderImpl>, std::shared_ptr<arrow::ChunkedArray> >(bool, std::vector<std::shared_ptr<parquet::arrow::ColumnReaderImpl>, std::allocator<std::shared_ptr<parquet::arrow::ColumnReaderImpl> > >, parquet::arrow::(anonymous namespace)::FileReaderImpl::DecodeRowGroups(std::shared_ptr<parquet::arrow::(anonymous namespace)::FileReaderImpl>, std::vector<int, std::allocator<int> > const&, std::vector<int, std::allocator<int> > const&, arrow::internal::Executor*)::$_4&, arrow::internal::Executor*) /hom
 e/antoine/arrow/dev/cpp/src/arrow/util/parallel.h:95:7
       #24 0x7f9ce57f89bb in parquet::arrow::(anonymous namespace)::FileReaderImpl::DecodeRowGroups(std::shared_ptr<parquet::arrow::(anonymous namespace)::FileReaderImpl>, std::vector<int, std::allocator<int> > const&, std::vector<int, std::allocator<int> > const&, arrow::internal::Executor*) /home/antoine/arrow/dev/cpp/src/parquet/arrow/reader.cc:1254:10
       #25 0x7f9ce56f9266 in parquet::arrow::(anonymous namespace)::FileReaderImpl::ReadRowGroups(std::vector<int, std::allocator<int> > const&, std::vector<int, std::allocator<int> > const&, std::shared_ptr<arrow::Table>*) /home/antoine/arrow/dev/cpp/src/parquet/arrow/reader.cc:1215:14
       #26 0x7f9ce56f7e57 in parquet::arrow::(anonymous namespace)::FileReaderImpl::ReadRowGroup(int, std::vector<int, std::allocator<int> > const&, std::shared_ptr<arrow::Table>*) /home/antoine/arrow/dev/cpp/src/parquet/arrow/reader.cc:322:12
       #27 0x7f9ce56f83ab in parquet::arrow::(anonymous namespace)::FileReaderImpl::ReadRowGroup(int, std::shared_ptr<arrow::Table>*) /home/antoine/arrow/dev/cpp/src/parquet/arrow/reader.cc:326:12
       #28 0x7f9ce56e85d1 in parquet::arrow::internal::FuzzReader(std::unique_ptr<parquet::arrow::FileReader, std::default_delete<parquet::arrow::FileReader> >) /home/antoine/arrow/dev/cpp/src/parquet/arrow/reader.cc:1338:37
       #29 0x7f9ce56e9c35 in parquet::arrow::internal::FuzzReader(unsigned char const*, long) /home/antoine/arrow/dev/cpp/src/parquet/arrow/reader.cc:1355:10
       #30 0x55771d2402e7 in LLVMFuzzerTestOneInput /home/antoine/arrow/dev/cpp/src/parquet/arrow/fuzz.cc:22:17
       #31 0x55771d1670b3 in fuzzer::Fuzzer::ExecuteCallback(unsigned char const*, unsigned long) crtstuff.c
       #32 0x55771d15146f in fuzzer::RunOneTest(fuzzer::Fuzzer*, char const*, unsigned long) crtstuff.c
       #33 0x55771d157176 in fuzzer::FuzzerDriver(int*, char***, int (*)(unsigned char const*, unsigned long)) crtstuff.c
       #34 0x55771d1807b2 in main (/build/build-fuzz/debug/parquet-arrow-fuzz+0x597b2) (BuildId: 7423ef34b89be7542c884a94acdc1490cca072b4)
       #35 0x7f9ccb0ea082 in __libc_start_main /build/glibc-SzIz7B/glibc-2.31/csu/../csu/libc-start.c:308:16
       #36 0x55771d14bc5d in _start (/build/build-fuzz/debug/parquet-arrow-fuzz+0x24c5d) (BuildId: 7423ef34b89be7542c884a94acdc1490cca072b4)
   
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org