You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@orc.apache.org by "Gang Wu (Jira)" <ji...@apache.org> on 2022/11/16 06:16:00 UTC
[jira] [Resolved] (ORC-1304) [C++] throw ParseError when using SearchArgument with nested struct
[ https://issues.apache.org/jira/browse/ORC-1304?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Gang Wu resolved ORC-1304.
--------------------------
Resolution: Fixed
> [C++] throw ParseError when using SearchArgument with nested struct
> -------------------------------------------------------------------
>
> Key: ORC-1304
> URL: https://issues.apache.org/jira/browse/ORC-1304
> Project: ORC
> Issue Type: Bug
> Reporter: ZhangXin
> Assignee: ZhangXin
> Priority: Major
>
> [link title|https://github.com/apache/orc/issues/1296]
> code example:
> {code:c++}
> WriterOptions options;
> auto stream = writeLocalFile("orc_file_test");
> MemoryPool* pool = getDefaultPool();
> std::unique_ptr<Type> type(Type::buildTypeFromString(
> "struct<col0:struct<col1:int>,col2:struct<col3:int>>"));
> size_t num = 50000;
> std::unique_ptr<Writer> writer = createWriter(*type, stream.get(), options);
> std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(num);
> StructVectorBatch* structBatch =
> dynamic_cast<StructVectorBatch*>(batch.get());
> StructVectorBatch* structBatch2 =
> dynamic_cast<StructVectorBatch*>(structBatch->fields[0]);
> LongVectorBatch* intBatch =
> dynamic_cast<LongVectorBatch*>(structBatch2->fields[0]);
> StructVectorBatch* structBatch3 =
> dynamic_cast<StructVectorBatch*>(structBatch->fields[1]);
> LongVectorBatch* intBatch2 =
> dynamic_cast<LongVectorBatch*>(structBatch3->fields[0]);
> structBatch->numElements = num;
> structBatch2->numElements = num;
> structBatch3->numElements = num;
> structBatch3->hasNulls = true;
> for (int64_t i = 0; i < num; ++i) {
> intBatch->data.data()[i] = rand() % 150000;
> intBatch->notNull[i] = 1;
> intBatch2->notNull[i] = 0;
> intBatch2->hasNulls = true;
> structBatch3->notNull[i] = 0;
> }
> intBatch->hasNulls = false;
> writer->add(*batch);
> writer->close();
> ReaderOptions readOptions;
> readOptions.setMemoryPool(*getDefaultPool());
> auto reader = createReader(readLocalFile("orc_file_test"), readOptions);
> orc::RowReaderOptions rowOptions;
> rowOptions.searchArgument(
> SearchArgumentFactory::newBuilder()
> ->startAnd()
> .equals(2, PredicateDataType::LONG, Literal((int64_t)5))
> .end()
> .build());
> std::unique_ptr<RowReader> rowReader = reader->createRowReader(rowOptions);
> batch = rowReader->createRowBatch(num);
> structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
> structBatch2 = dynamic_cast<StructVectorBatch*>(structBatch->fields[0]);
> intBatch = dynamic_cast<LongVectorBatch*>(structBatch2->fields[0]);
> structBatch3 = dynamic_cast<StructVectorBatch*>(structBatch->fields[1]);
> while (rowReader->next(*batch)) {
> for (size_t i = 0; i < batch->numElements; i++) {
>
> }
> }
> {code}
> stack trace
> {code:java}
> terminate called after throwing an instance of 'orc::ParseError'
> what(): bad read in nextBuffer
> *** Aborted at 1666816640 (Unix time, try 'date -d @1666816640') ***
> *** Signal 6 (SIGABRT) (0x2035c0002b7ad) received by PID 178093 (pthread TID 0x7ffb12545a80) (linux TID 178093) (maybe from PID 178093, UID 131932) (code: -6), stack trace: ***
> @ 0000000000000000 (unknown)
> @ 000000000009c9d3 __GI___pthread_kill
> @ 00000000000444ec __GI_raise
> @ 000000000002c432 __GI_abort
> @ 00000000000a3fd4 __gnu_cxx::__verbose_terminate_handler()
> @ 00000000000a1b39 __cxxabiv1::__terminate(void (*)())
> @ 00000000000a1ba4 std::terminate()
> @ 00000000000a1e6f __cxa_throw
> @ 0000000001efcd55 __cxa_throw
> @ 00000000075b676c orc::BooleanRleDecoderImpl::seek(orc::PositionProvider&)
> /home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/ByteRLE.cc:526
> @ 00000000075af711 orc::IntegerColumnReader::seekToRowGroup(std::unordered_map<unsigned long, orc::PositionProvider, std::hash<unsigned long>, std::equal_to<unsigned long>, std::allocator<std::pair<unsigned long const, orc::PositionProvider> > >&)
> /home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/ColumnReader.cc:120
> @ 00000000075af67f orc::StructColumnReader::seekToRowGroup(std::unordered_map<unsigned long, orc::PositionProvider, std::hash<unsigned long>, std::equal_to<unsigned long>, std::allocator<std::pair<unsigned long const, orc::PositionProvider> > >&)
> /home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/ColumnReader.cc:965
> @ 00000000075af67f orc::StructColumnReader::seekToRowGroup(std::unordered_map<unsigned long, orc::PositionProvider, std::hash<unsigned long>, std::equal_to<unsigned long>, std::allocator<std::pair<unsigned long const, orc::PositionProvider> > >&)
> /home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/ColumnReader.cc:965
> @ 0000000007598179 orc::RowReaderImpl::seekToRowGroup(unsigned int)
> /home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/Reader.cc:440
> @ 000000000759d700 orc::RowReaderImpl::startNextStripe()
> /home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/Reader.cc:1037
> @ 000000000759daf4 orc::RowReaderImpl::next(orc::ColumnVectorBatch&)
> /home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/Reader.cc:1055
> @ 0000000002fba9bc main
> @ 000000000002c656 __libc_start_call_main
> @ 000000000002c717 __libc_start_main_alias_2
> @ 0000000002fb2780 _start
> {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)