You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues-all@impala.apache.org by "Yida Wu (Jira)" <ji...@apache.org> on 2020/10/14 02:29:00 UTC

[jira] [Commented] (IMPALA-10102) Impalad crashses when writting a parquet file with large rows

    [ https://issues.apache.org/jira/browse/IMPALA-10102?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17213527#comment-17213527 ] 

Yida Wu commented on IMPALA-10102:
----------------------------------

The value of 'max_compressed_size' when it crashes is around 200-300MB, which is reasonable. The stack is always the same.

I have done some tests under different circumstances, if the memory resource is enough, such as running in a 256GB memory VM, the issue doesn't appear, if the mem_limit of the startup option is set to lower than needed, the query would fail without crashing.

In summary, the issue happens in memory scarcity situation, the process either being killed or crashes due to using a nullptr allocated from the pool. So we can use TryAllocate to avoid the crash, however the process is still probably killed later due to OOM and the efficiency can be lowered by using TryAllocate for most of the normal cases. Maybe consider it as a configuration issue is an other option, and the solution is to set a proper mem_limit option to avoid the OOM happens.

> Impalad crashses when writting a parquet file with large rows
> -------------------------------------------------------------
>
>                 Key: IMPALA-10102
>                 URL: https://issues.apache.org/jira/browse/IMPALA-10102
>             Project: IMPALA
>          Issue Type: Bug
>            Reporter: Quanlong Huang
>            Assignee: Yida Wu
>            Priority: Critical
>              Labels: crash
>
> Encountered a crash when testing following queries on my local branch:
> {code:sql}
> create table bigstrs3 stored as parquet as
> select *, repeat(uuid(), cast(random() * 200000 as int)) as bigstr
> from functional.alltypes
> limit 1000;
> # Length of uuid() is 36. So the max row size is 7,200,000.
> set MAX_ROW_SIZE=8m;
> create table my_str_group stored as parquet as
>   select group_concat(string_col) as ss, bigstr
>   from bigstrs3 group by bigstr;
> create table my_cnt stored as parquet as
>   select count(*) as cnt, bigstr
>   from bigstrs3 group by bigstr;
> {code}
> The crash stacktrace:
> {code}
> Crash reason:  SIGSEGV
> Crash address: 0x0
> Process uptime: not available
> Thread 336 (crashed)
>  0  libc-2.23.so + 0x14e10b
>  1  impalad!snappy::UncheckedByteArraySink::Append(char const*, unsigned long) [clone .localalias.0] + 0x1a 
>  2  impalad!snappy::Compress(snappy::Source*, snappy::Sink*) + 0xb1 
>  3  impalad!snappy::RawCompress(char const*, unsigned long, char*, unsigned long*) + 0x51 
>  4  impalad!impala::SnappyCompressor::ProcessBlock(bool, long, unsigned char const*, long*, unsigned char**) [compress.cc : 295 + 0x24]
>  5  impalad!impala::Codec::ProcessBlock32(bool, int, unsigned char const*, int*, unsigned char**) [codec.cc : 211 + 0x41]
>  6  impalad!impala::HdfsParquetTableWriter::BaseColumnWriter::Flush(long*, long*, long*) [hdfs-parquet-table-writer.cc : 775 + 0x56]
>  7  impalad!impala::HdfsParquetTableWriter::FlushCurrentRowGroup() [hdfs-parquet-table-writer.cc : 1330 + 0x60]
>  8  impalad!impala::HdfsParquetTableWriter::Finalize() [hdfs-parquet-table-writer.cc : 1297 + 0x19]
>  9  impalad!impala::HdfsTableSink::FinalizePartitionFile(impala::RuntimeState*, impala::OutputPartition*) [hdfs-table-sink.cc : 652 + 0x2e]
> 10  impalad!impala::HdfsTableSink::WriteRowsToPartition(impala::RuntimeState*, impala::RowBatch*, std::pair<std::unique_ptr<impala::OutputPartition, std::default_delete<impala::OutputPartition> >, std::vector<int, std::allocator<int> > >*) [hdfs-table-sink.cc : 282 + 0x21]
> 11  impalad!impala::HdfsTableSink::Send(impala::RuntimeState*, impala::RowBatch*) [hdfs-table-sink.cc : 621 + 0x2e]
> 12  impalad!impala::FragmentInstanceState::ExecInternal() [fragment-instance-state.cc : 422 + 0x58]
> 13  impalad!impala::FragmentInstanceState::Exec() [fragment-instance-state.cc : 106 + 0x16]
> 14  impalad!impala::QueryState::ExecFInstance(impala::FragmentInstanceState*) [query-state.cc : 836 + 0x19]
> 15  impalad!impala::QueryState::StartFInstances()::{lambda()#1}::operator()() const + 0x26 
> 16  impalad!boost::detail::function::void_function_obj_invoker0<impala::QueryState::StartFInstances()::<lambda()>, void>::invoke [function_template.hpp : 159 + 0xc] 
> 17  impalad!boost::function0<void>::operator()() const [function_template.hpp : 770 + 0x1d]
> 18  impalad!impala::Thread::SuperviseThread(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*) [thread.cc : 360 + 0xf]
> 19  impalad!void boost::_bi::list5<boost::_bi::value<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, boost::_bi::value<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::ThreadDebugInfo*>, boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> >::operator()<void (*)(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list0>(boost::_bi::type<void>, void (*&)(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list0&, int) [bind.hpp : 531 + 0x15]
> 20  impalad!boost::_bi::bind_t<void, void (*)(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list5<boost::_bi::value<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, boost::_bi::value<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::ThreadDebugInfo*>, boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> > >::operator()() [bind.hpp : 1222 + 0x22]
> 21  impalad!boost::detail::thread_data<boost::_bi::bind_t<void, void (*)(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*), boost::_bi::list5<boost::_bi::value<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, boost::_bi::value<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >, boost::_bi::value<boost::function<void ()> >, boost::_bi::value<impala::ThreadDebugInfo*>, boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> > > >::run() [thread.hpp : 116 + 0x12]
> 22  impalad!thread_proxy + 0x72 
> 23  libpthread-2.23.so + 0x76ba
> 24  libc-2.23.so + 0x1074dd
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-all-unsubscribe@impala.apache.org
For additional commands, e-mail: issues-all-help@impala.apache.org