You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@impala.apache.org by "Quanlong Huang (Jira)" <ji...@apache.org> on 2020/01/29 01:04:00 UTC

[jira] [Resolved] (IMPALA-9324) HdfsOrcScanner crashes in DCHECK failure in OrcSchemaResolver::BuildSchemaPath

     [ https://issues.apache.org/jira/browse/IMPALA-9324?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Quanlong Huang resolved IMPALA-9324.
------------------------------------
    Fix Version/s: Impala 3.4.0
       Resolution: Fixed

> HdfsOrcScanner crashes in DCHECK failure in OrcSchemaResolver::BuildSchemaPath
> ------------------------------------------------------------------------------
>
>                 Key: IMPALA-9324
>                 URL: https://issues.apache.org/jira/browse/IMPALA-9324
>             Project: IMPALA
>          Issue Type: Bug
>          Components: Backend
>            Reporter: Quanlong Huang
>            Assignee: Quanlong Huang
>            Priority: Blocker
>             Fix For: Impala 3.4.0
>
>         Attachments: complextypes_crash.orc, complextypes_crash2.orc
>
>
> Hit a crash after running test_fuzz_scanners.py for orc/def/block for 2 days.
> FATAL log:
> {code}
> F0123 03:46:22.084527 15347 orc-metadata-utils.cc:44] 9b4d52b2f594f58b:687b5fe200000001] Check failed: paths->size() == node.getColumnId() (3 vs. 4)
> {code}
> stacktrace:
> {code}
> Crash reason:  SIGABRT
> Crash address: 0x3e8000033b3
> Process uptime: not available
> Thread 306 (crashed)
>  0  libc-2.23.so + 0x35428
>  1  libc-2.23.so + 0x3702a
>  2  impalad!google_breakpad::ExceptionHandler::HandleSignal(int, siginfo_t*, void*) + 0x1e0
>  3  impalad!google::DumpStackTraceAndExit() + 0x24
>  4  impalad!google::LogMessage::Fail() + 0xd
>  5  impalad!google::LogMessage::SendToLog() + 0x2b2
>  6  impalad!google::LogMessage::Flush() + 0x157
>  7  impalad!google::LogMessageFatal::~LogMessageFatal() + 0xe
>  8  impalad!impala::OrcSchemaResolver::BuildSchemaPath(orc::Type const&, std::vector<int, std::allocator<int> >*, std::vector<std::vector<int, std::allocator<int> >, std::allocator<std::vector<int, std::allocator<int> > > >*) [orc-metadata-utils.cc : 44 + 0xf]
>  9  impalad!impala::OrcSchemaResolver::BuildSchemaPaths(int, std::vector<std::vector<int, std::allocator<int> >, std::allocator<std::vector<int, std::allocator<int> > > >*) [orc-metadata-utils.cc : 36 + 0x4a]
> 10  impalad!impala::HdfsOrcScanner::Open(impala::ScannerContext*) [hdfs-orc-scanner.cc : 187 + 0x51]
> 11  impalad!impala::HdfsScanNodeBase::CreateAndOpenScannerHelper(impala::HdfsPartitionDescriptor*, impala::ScannerContext*, boost::scoped_ptr<impala::HdfsScanner>*) [hdfs-scan-node-base.cc : 819 + 0x29]
> 12  impalad!impala::HdfsScanNode::ProcessSplit(std::vector<impala::FilterContext, std::allocator<impala::FilterContext> > const&, impala::MemPool*, impala::io::ScanRange*, long*) [hdfs-scan-node.cc : 494 + 0x2b]
> 13  impalad!impala::HdfsScanNode::ScannerThread(bool, long) [hdfs-scan-node.cc : 416 + 0x2a]
> 14  impalad!impala::HdfsScanNode::ThreadTokenAvailableCb(impala::ThreadResourcePool*)::{lambda()#1}::operator()() const + 0x30
> 15  impalad!boost::detail::function::void_function_obj_invoker0<impala::HdfsScanNode::ThreadTokenAvailableCb(impala::ThreadResourcePool*)::<lambda()>, void>::invoke [function_template.hpp : 153 + 0xc]
> 16  impalad!boost::function0<void>::operator()() const [function_template.hpp : 767 + 0x11]
> 17  impalad!impala::Thread::SuperviseThread(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*) [thread.cc : 360 + 0xf]
> 18  impalad!void boost::_bi::list5<boost::_bi::value<std::string>, boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::ThreadDebugInfo*>, boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> >::operator()<void (*)(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list0>(boost::_bi::type<void>, void (*&)(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list0&, int) [bind.hpp : 525 + 0x15]
> 19  impalad!boost::_bi::bind_t<void, void (*)(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list5<boost::_bi::value<std::string>, boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::ThreadDebugInfo*>, boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> > >::operator()() [bind_template.hpp : 20 + 0x22]
> 20  impalad!boost::detail::thread_data<boost::_bi::bind_t<void, void (*)(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list5<boost::_bi::value<std::string>, boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::ThreadDebugInfo*>, boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> > > >::run() [thread.hpp : 116 + 0x12]
> 21  impalad!thread_proxy + 0xda
> 22  libpthread-2.23.so + 0x76ba
> 23  libc-2.23.so + 0x10741d
> {code}
> Code snipper
> {code:c++}
>  42 void OrcSchemaResolver::BuildSchemaPath(const orc::Type& node, SchemaPath* path,
>  43     vector<SchemaPath>* paths) {
>  44   DCHECK_EQ(paths->size(), node.getColumnId());
>  45   paths->push_back(*path);
>  46   if (node.getKind() == orc::TypeKind::STRUCT) {
>  47     int size = node.getSubtypeCount();
>  48     for (int i = 0; i < size; ++i) {
>  49       path->push_back(i);
>  50       const orc::Type* child = node.getSubtype(i);
>  51       BuildSchemaPath(*child, path, paths);
>  52       path->pop_back();
>  53     }
>  54   } else if (node.getKind() == orc::TypeKind::LIST) {
>  55     DCHECK_EQ(node.getSubtypeCount(), 1);
>  56     const orc::Type* child = node.getSubtype(0);
>  57     path->push_back(SchemaPathConstants::ARRAY_ITEM);
>  58     BuildSchemaPath(*child, path, paths);
>  59     path->pop_back();
>  60   } else if (node.getKind() == orc::TypeKind::MAP) {
>  61     DCHECK_EQ(node.getSubtypeCount(), 2);
>  62     const orc::Type* key_child = node.getSubtype(0);
>  63     const orc::Type* value_child = node.getSubtype(1);
>  64     path->push_back(SchemaPathConstants::MAP_KEY);
>  65     BuildSchemaPath(*key_child, path, paths);
>  66     (*path)[path->size() - 1] = SchemaPathConstants::MAP_VALUE;
>  67     BuildSchemaPath(*value_child, path, paths);
>  68     path->pop_back();
>  69   }
>  70 }
> {code}
> Reproduce
> {code:sql}
> CREATE EXTERNAL TABLE my_complextypes_tbl (
>   id BIGINT,
>   int_array ARRAY<INT>,
>   int_array_array ARRAY<ARRAY<INT>>,
>   int_map MAP<STRING,INT>,
>   int_map_array ARRAY<MAP<STRING,INT>>,
>   nested_struct STRUCT<a:INT,b:ARRAY<INT>,c:STRUCT<d:ARRAY<ARRAY<STRUCT<e:INT,f:STRING>>>>,g:MAP<STRING,STRUCT<h:STRUCT<i:ARRAY<DOUBLE>>>>>
> )
> STORED AS ORC;
> {code}
> Load the attached corrupt ORC file into this table and run:
> {code:sql}
> select * from my_complextypes_tbl;
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)