You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues-all@impala.apache.org by "Quanlong Huang (Jira)" <ji...@apache.org> on 2020/01/19 11:25:00 UTC

[jira] [Assigned] (IMPALA-9306) HdfsOrcScanner crashes in orc::readFully

     [ https://issues.apache.org/jira/browse/IMPALA-9306?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Quanlong Huang reassigned IMPALA-9306:
--------------------------------------

    Assignee: Quanlong Huang

> HdfsOrcScanner crashes in orc::readFully
> ----------------------------------------
>
>                 Key: IMPALA-9306
>                 URL: https://issues.apache.org/jira/browse/IMPALA-9306
>             Project: IMPALA
>          Issue Type: Bug
>          Components: Backend
>            Reporter: Quanlong Huang
>            Assignee: Quanlong Huang
>            Priority: Blocker
>         Attachments: alltypes_uncompressed_corrupt.orc
>
>
> Hit a crash after running test_scanners_fuzz.py for 317 times (took 20h). Using the latest Impala and latest ORC library:
> * Impala git-hash: d66610837e53965cb969b78116aec58164bb8548
> * ORC git-hash: 35385bdff1374417831628b72468e03558c969f7
> Stacktrace:
> {code}
> Thread 295 (crashed)
>  0  impalad!orc::readFully(char*, long, orc::SeekableInputStream*) [ColumnReader.cc : 522 + 0x0]
>  1  impalad!orc::StringDictionaryColumnReader::StringDictionaryColumnReader(orc::Type const&, orc::StripeStreams&) [ColumnReader.cc : 596 + 0x11]
>  2  impalad!orc::buildReader(orc::Type const&, orc::StripeStreams&) [ColumnReader.cc : 1756 + 0x1b]
>  3  impalad!orc::StructColumnReader::StructColumnReader(orc::Type const&, orc::StripeStreams&) [ColumnReader.cc : 876 + 0x10]
>  4  impalad!orc::buildReader(orc::Type const&, orc::StripeStreams&) [ColumnReader.cc : 1787 + 0x1b]
>  5  impalad!orc::RowReaderImpl::startNextStripe() [Reader.cc : 917 + 0x12]
>  6  impalad!orc::RowReaderImpl::next(orc::ColumnVectorBatch&) [Reader.cc : 932 + 0x5]
>  7  impalad!impala::HdfsOrcScanner::AssembleRows(impala::RowBatch*) [hdfs-orc-scanner.cc : 618 + 0x40]
>  8  impalad!impala::HdfsOrcScanner::GetNextInternal(impala::RowBatch*) [hdfs-orc-scanner.cc : 516 + 0x20]
>  9  impalad!impala::HdfsOrcScanner::ProcessSplit() [hdfs-orc-scanner.cc : 435 + 0x39]
> 10  impalad!impala::HdfsScanNode::ProcessSplit(std::vector<impala::FilterContext, std::allocator<impala::FilterContext> > const&, impala::MemPool*, impala::io::ScanRange*, long*) [hdfs-scan-node.cc : 514 + 0x28]
> 11  impalad!impala::HdfsScanNode::ScannerThread(bool, long) [hdfs-scan-node.cc : 416 + 0x2a]
> 12  impalad!impala::HdfsScanNode::ThreadTokenAvailableCb(impala::ThreadResourcePool*)::{lambda()#1}::operator()() const + 0x30
> 13  impalad!boost::detail::function::void_function_obj_invoker0<impala::HdfsScanNode::ThreadTokenAvailableCb(impala::ThreadResourcePool*)::<lambda()>, void>::invoke [function_template.hpp : 153 + 0xc]
> 14  impalad!boost::function0<void>::operator()() const [function_template.hpp : 767 + 0x11]
> 15  impalad!impala::Thread::SuperviseThread(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*) [thread.cc : 360 + 0xf]
> 16  impalad!void boost::_bi::list5<boost::_bi::value<std::string>, boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::ThreadDebugInfo*>, boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> >::operator()<void (*)(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list0>(boost::_bi::type<void>, void (*&)(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list0&, int) [bind.hpp : 525 + 0x15]
> 17  impalad!boost::_bi::bind_t<void, void (*)(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list5<boost::_bi::value<std::string>, boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::ThreadDebugInfo*>, boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> > >::operator()() [bind_template.hpp : 20 + 0x22]
> 18  impalad!boost::detail::thread_data<boost::_bi::bind_t<void, void (*)(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list5<boost::_bi::value<std::string>, boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::ThreadDebugInfo*>, boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> > > >::run() [thread.hpp : 116 + 0x12]
> 19  impalad!thread_proxy + 0xda
> 20  libpthread-2.23.so + 0x76ba
> 21  libc-2.23.so + 0x10741d
> {code}
> *How to reproduce*
> Build Impala using the latest ORC library according to https://cwiki.apache.org/confluence/display/IMPALA/Compile+Impala+with+the+latest+ORC+library
> Create table by:
> {code:sql}
> CREATE TABLE uncomp_dst_alltypes (
>   id INT,                 
>   bool_col BOOLEAN,       
>   tinyint_col TINYINT,    
>   smallint_col SMALLINT,  
>   int_col INT,            
>   bigint_col BIGINT,      
>   float_col FLOAT,        
>   double_col DOUBLE,      
>   date_string_col STRING, 
>   string_col STRING,      
>   timestamp_col TIMESTAMP,
>   year INT,               
>   month INT               
> )                          
> STORED AS ORC;
> {code}
> Load the attached orc file and run the query
> {code:sql}
> select count(*) from (select distinct * from uncomp_dst_alltypes) q;
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-all-unsubscribe@impala.apache.org
For additional commands, e-mail: issues-all-help@impala.apache.org