You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@orc.apache.org by "Gang Wu (Jira)" <ji...@apache.org> on 2022/11/16 06:16:00 UTC

[jira] [Assigned] (ORC-1304) [C++] throw ParseError when using SearchArgument with nested struct

     [ https://issues.apache.org/jira/browse/ORC-1304?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Gang Wu reassigned ORC-1304:
----------------------------

    Assignee: ZhangXin

> [C++] throw ParseError when using SearchArgument with nested struct
> -------------------------------------------------------------------
>
>                 Key: ORC-1304
>                 URL: https://issues.apache.org/jira/browse/ORC-1304
>             Project: ORC
>          Issue Type: Bug
>            Reporter: ZhangXin
>            Assignee: ZhangXin
>            Priority: Major
>
> [link title|https://github.com/apache/orc/issues/1296]
> code example: 
> {code:c++}
> WriterOptions options;
>   auto stream = writeLocalFile("orc_file_test");
>   MemoryPool* pool = getDefaultPool();
>   std::unique_ptr<Type> type(Type::buildTypeFromString(
>       "struct<col0:struct<col1:int>,col2:struct<col3:int>>"));
>   size_t num = 50000;
>   std::unique_ptr<Writer> writer = createWriter(*type, stream.get(), options);
>   std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(num);
>   StructVectorBatch* structBatch =
>       dynamic_cast<StructVectorBatch*>(batch.get());
>   StructVectorBatch* structBatch2 =
>       dynamic_cast<StructVectorBatch*>(structBatch->fields[0]);
>   LongVectorBatch* intBatch =
>       dynamic_cast<LongVectorBatch*>(structBatch2->fields[0]);
>   StructVectorBatch* structBatch3 =
>       dynamic_cast<StructVectorBatch*>(structBatch->fields[1]);
>   LongVectorBatch* intBatch2 =
>       dynamic_cast<LongVectorBatch*>(structBatch3->fields[0]);
>   structBatch->numElements = num;
>   structBatch2->numElements = num;
>   structBatch3->numElements = num;
>   structBatch3->hasNulls = true;
>   for (int64_t i = 0; i < num; ++i) {
>     intBatch->data.data()[i] = rand() % 150000;
>     intBatch->notNull[i] = 1;
>     intBatch2->notNull[i] = 0;
>     intBatch2->hasNulls = true;
>     structBatch3->notNull[i] = 0;
>   }
>   intBatch->hasNulls = false;
>   writer->add(*batch);
>   writer->close();
>   ReaderOptions readOptions;
>   readOptions.setMemoryPool(*getDefaultPool());
>   auto reader = createReader(readLocalFile("orc_file_test"), readOptions);
>   orc::RowReaderOptions rowOptions;
>   rowOptions.searchArgument(
>       SearchArgumentFactory::newBuilder()
>           ->startAnd()
>           .equals(2, PredicateDataType::LONG, Literal((int64_t)5))
>           .end()
>           .build());
>   std::unique_ptr<RowReader> rowReader = reader->createRowReader(rowOptions);
>   batch = rowReader->createRowBatch(num);
>   structBatch = dynamic_cast<StructVectorBatch*>(batch.get());
>   structBatch2 = dynamic_cast<StructVectorBatch*>(structBatch->fields[0]);
>   intBatch = dynamic_cast<LongVectorBatch*>(structBatch2->fields[0]);
>   structBatch3 = dynamic_cast<StructVectorBatch*>(structBatch->fields[1]);
>   while (rowReader->next(*batch)) {
>     for (size_t i = 0; i < batch->numElements; i++) {
>       
>     }
>   }
> {code}
> stack trace
> {code:java}
> terminate called after throwing an instance of 'orc::ParseError'
>   what():  bad read in nextBuffer
> *** Aborted at 1666816640 (Unix time, try 'date -d @1666816640') ***
> *** Signal 6 (SIGABRT) (0x2035c0002b7ad) received by PID 178093 (pthread TID 0x7ffb12545a80) (linux TID 178093) (maybe from PID 178093, UID 131932) (code: -6), stack trace: ***
>     @ 0000000000000000 (unknown)
>     @ 000000000009c9d3 __GI___pthread_kill
>     @ 00000000000444ec __GI_raise
>     @ 000000000002c432 __GI_abort
>     @ 00000000000a3fd4 __gnu_cxx::__verbose_terminate_handler()
>     @ 00000000000a1b39 __cxxabiv1::__terminate(void (*)())
>     @ 00000000000a1ba4 std::terminate()
>     @ 00000000000a1e6f __cxa_throw
>     @ 0000000001efcd55 __cxa_throw
>     @ 00000000075b676c orc::BooleanRleDecoderImpl::seek(orc::PositionProvider&)
>                        /home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/ByteRLE.cc:526
>     @ 00000000075af711 orc::IntegerColumnReader::seekToRowGroup(std::unordered_map<unsigned long, orc::PositionProvider, std::hash<unsigned long>, std::equal_to<unsigned long>, std::allocator<std::pair<unsigned long const, orc::PositionProvider> > >&)
>                        /home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/ColumnReader.cc:120
>     @ 00000000075af67f orc::StructColumnReader::seekToRowGroup(std::unordered_map<unsigned long, orc::PositionProvider, std::hash<unsigned long>, std::equal_to<unsigned long>, std::allocator<std::pair<unsigned long const, orc::PositionProvider> > >&)
>                        /home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/ColumnReader.cc:965
>     @ 00000000075af67f orc::StructColumnReader::seekToRowGroup(std::unordered_map<unsigned long, orc::PositionProvider, std::hash<unsigned long>, std::equal_to<unsigned long>, std::allocator<std::pair<unsigned long const, orc::PositionProvider> > >&)
>                        /home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/ColumnReader.cc:965
>     @ 0000000007598179 orc::RowReaderImpl::seekToRowGroup(unsigned int)
>                        /home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/Reader.cc:440
>     @ 000000000759d700 orc::RowReaderImpl::startNextStripe()
>                        /home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/Reader.cc:1037
>     @ 000000000759daf4 orc::RowReaderImpl::next(orc::ColumnVectorBatch&)
>                        /home/engshare/third-party2/apache-orc/1.7.2/src/orc/c++/src/Reader.cc:1055
>     @ 0000000002fba9bc main
>     @ 000000000002c656 __libc_start_call_main
>     @ 000000000002c717 __libc_start_main_alias_2
>     @ 0000000002fb2780 _start
> {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)