You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@impala.apache.org by "Quanlong Huang (Jira)" <ji...@apache.org> on 2020/01/23 12:24:00 UTC

[jira] [Created] (IMPALA-9324) HdfsOrcScanner crashes in DCHECK failure in OrcSchemaResolver::BuildSchemaPath

Quanlong Huang created IMPALA-9324:
--------------------------------------

             Summary: HdfsOrcScanner crashes in DCHECK failure in OrcSchemaResolver::BuildSchemaPath
                 Key: IMPALA-9324
                 URL: https://issues.apache.org/jira/browse/IMPALA-9324
             Project: IMPALA
          Issue Type: Bug
          Components: Backend
            Reporter: Quanlong Huang
            Assignee: Quanlong Huang
         Attachments: complextypes_crash.orc

Hit a crash after running test_fuzz_scanners.py for orc/def/block for 2 days.

FATAL log:
{code}
F0123 03:46:22.084527 15347 orc-metadata-utils.cc:44] 9b4d52b2f594f58b:687b5fe200000001] Check failed: paths->size() == node.getColumnId() (3 vs. 4)
{code}

stacktrace:
{code}
Crash reason:  SIGABRT
Crash address: 0x3e8000033b3
Process uptime: not available

Thread 306 (crashed)
 0  libc-2.23.so + 0x35428
 1  libc-2.23.so + 0x3702a
 2  impalad!google_breakpad::ExceptionHandler::HandleSignal(int, siginfo_t*, void*) + 0x1e0
 3  impalad!google::DumpStackTraceAndExit() + 0x24
 4  impalad!google::LogMessage::Fail() + 0xd
 5  impalad!google::LogMessage::SendToLog() + 0x2b2
 6  impalad!google::LogMessage::Flush() + 0x157
 7  impalad!google::LogMessageFatal::~LogMessageFatal() + 0xe
 8  impalad!impala::OrcSchemaResolver::BuildSchemaPath(orc::Type const&, std::vector<int, std::allocator<int> >*, std::vector<std::vector<int, std::allocator<int> >, std::allocator<std::vector<int, std::allocator<int> > > >*) [orc-metadata-utils.cc : 44 + 0xf]
 9  impalad!impala::OrcSchemaResolver::BuildSchemaPaths(int, std::vector<std::vector<int, std::allocator<int> >, std::allocator<std::vector<int, std::allocator<int> > > >*) [orc-metadata-utils.cc : 36 + 0x4a]
10  impalad!impala::HdfsOrcScanner::Open(impala::ScannerContext*) [hdfs-orc-scanner.cc : 187 + 0x51]
11  impalad!impala::HdfsScanNodeBase::CreateAndOpenScannerHelper(impala::HdfsPartitionDescriptor*, impala::ScannerContext*, boost::scoped_ptr<impala::HdfsScanner>*) [hdfs-scan-node-base.cc : 819 + 0x29]
12  impalad!impala::HdfsScanNode::ProcessSplit(std::vector<impala::FilterContext, std::allocator<impala::FilterContext> > const&, impala::MemPool*, impala::io::ScanRange*, long*) [hdfs-scan-node.cc : 494 + 0x2b]
13  impalad!impala::HdfsScanNode::ScannerThread(bool, long) [hdfs-scan-node.cc : 416 + 0x2a]
14  impalad!impala::HdfsScanNode::ThreadTokenAvailableCb(impala::ThreadResourcePool*)::{lambda()#1}::operator()() const + 0x30
15  impalad!boost::detail::function::void_function_obj_invoker0<impala::HdfsScanNode::ThreadTokenAvailableCb(impala::ThreadResourcePool*)::<lambda()>, void>::invoke [function_template.hpp : 153 + 0xc]
16  impalad!boost::function0<void>::operator()() const [function_template.hpp : 767 + 0x11]
17  impalad!impala::Thread::SuperviseThread(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*) [thread.cc : 360 + 0xf]
18  impalad!void boost::_bi::list5<boost::_bi::value<std::string>, boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::ThreadDebugInfo*>, boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> >::operator()<void (*)(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list0>(boost::_bi::type<void>, void (*&)(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list0&, int) [bind.hpp : 525 + 0x15]
19  impalad!boost::_bi::bind_t<void, void (*)(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list5<boost::_bi::value<std::string>, boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::ThreadDebugInfo*>, boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> > >::operator()() [bind_template.hpp : 20 + 0x22]
20  impalad!boost::detail::thread_data<boost::_bi::bind_t<void, void (*)(std::string const&, std::string const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list5<boost::_bi::value<std::string>, boost::_bi::value<std::string>, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::ThreadDebugInfo*>, boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> > > >::run() [thread.hpp : 116 + 0x12]
21  impalad!thread_proxy + 0xda
22  libpthread-2.23.so + 0x76ba
23  libc-2.23.so + 0x10741d
{code}

Code snipper
{code:c++}
 42 void OrcSchemaResolver::BuildSchemaPath(const orc::Type& node, SchemaPath* path,
 43     vector<SchemaPath>* paths) {
 44   DCHECK_EQ(paths->size(), node.getColumnId());
 45   paths->push_back(*path);
 46   if (node.getKind() == orc::TypeKind::STRUCT) {
 47     int size = node.getSubtypeCount();
 48     for (int i = 0; i < size; ++i) {
 49       path->push_back(i);
 50       const orc::Type* child = node.getSubtype(i);
 51       BuildSchemaPath(*child, path, paths);
 52       path->pop_back();
 53     }
 54   } else if (node.getKind() == orc::TypeKind::LIST) {
 55     DCHECK_EQ(node.getSubtypeCount(), 1);
 56     const orc::Type* child = node.getSubtype(0);
 57     path->push_back(SchemaPathConstants::ARRAY_ITEM);
 58     BuildSchemaPath(*child, path, paths);
 59     path->pop_back();
 60   } else if (node.getKind() == orc::TypeKind::MAP) {
 61     DCHECK_EQ(node.getSubtypeCount(), 2);
 62     const orc::Type* key_child = node.getSubtype(0);
 63     const orc::Type* value_child = node.getSubtype(1);
 64     path->push_back(SchemaPathConstants::MAP_KEY);
 65     BuildSchemaPath(*key_child, path, paths);
 66     (*path)[path->size() - 1] = SchemaPathConstants::MAP_VALUE;
 67     BuildSchemaPath(*value_child, path, paths);
 68     path->pop_back();
 69   }
 70 }

{code}

Reproduce
{code:sql}
CREATE EXTERNAL TABLE my_complextypes_tbl (
  id BIGINT,
  int_array ARRAY<INT>,
  int_array_array ARRAY<ARRAY<INT>>,
  int_map MAP<STRING,INT>,
  int_map_array ARRAY<MAP<STRING,INT>>,
  nested_struct STRUCT<a:INT,b:ARRAY<INT>,c:STRUCT<d:ARRAY<ARRAY<STRUCT<e:INT,f:STRING>>>>,g:MAP<STRING,STRUCT<h:STRUCT<i:ARRAY<DOUBLE>>>>>
)
STORED AS ORC;
{code}
Load the attached corrupt ORC file into this table and run:
{code:sql}
select * from my_complextypes_tbl;
{code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)