You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@impala.apache.org by "Attila Jeges (JIRA)" <ji...@apache.org> on 2017/04/27 12:27:04 UTC
[jira] [Resolved] (IMPALA-3079) Fix Sequence file writer (crashes
or produces invalid files)
[ https://issues.apache.org/jira/browse/IMPALA-3079?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Attila Jeges resolved IMPALA-3079.
----------------------------------
Resolution: Fixed
commit 59b2db6ba722e5bef297bb4603519e06333ce5cb
Author: Attila Jeges <at...@cloudera.com>
Date: Mon Feb 20 18:07:25 2017 +0100
IMPALA-3079: Fix sequence file writer
This change fixes the following issues in the Sequence File Writer:
1. ReadWriteUtil::VLongRequiredBytes() and ReadWriteUtil::PutVLong()
were broken. As a result, Impala created corrupt uncompressed
sequence files.
2. KEY_CLASS_NAME was missing from the sequence file header. As a
result, Hive could not read back uncompressed sequence files
created by Impala.
3. Impala created record-compressed sequence files with empty keys
block. As a result, Hive could not read back record-compressed
sequence files created by Impala.
4. Impala created block-compressed files with:
- empty key-lengths block
- empty keys block
- empty value-lengths block
This resulted in invalid block-compressed sequence files that Hive could
not read back.
5. In some cases the wrong Record-compression flag was written to the
sequence file header. As a result, Hive could not read back record-
compressed sequence files created by Impala.
6. Impala added 'sync_marker' instead of 'neg1_sync_marker' to the
beginning of blocks in block-compressed sequence files. Hive could
not read these files back.
7. The calculation of block sizes in SnappyBlockCompressor class was
incorrect for odd-length buffers.
Change-Id: I0db642ad35132a9a5a6611810a6cafbbe26e7487
Reviewed-on: http://gerrit.cloudera.org:8080/6107
Reviewed-by: Michael Ho <kw...@cloudera.com>
Reviewed-by: Attila Jeges <at...@cloudera.com>
Reviewed-by: Dan Hecht <dh...@cloudera.com>
Tested-by: Impala Public Jenkins
> Fix Sequence file writer (crashes or produces invalid files)
> ------------------------------------------------------------
>
> Key: IMPALA-3079
> URL: https://issues.apache.org/jira/browse/IMPALA-3079
> Project: IMPALA
> Issue Type: Bug
> Components: Backend
> Affects Versions: Impala 2.5.0
> Reporter: Mostafa Mokhtar
> Assignee: Attila Jeges
>
> Stack
> {code}
> #0 0x0000003f38832625 in raise () from /lib64/libc.so.6
> #1 0x0000003f38833e05 in abort () from /lib64/libc.so.6
> #2 0x00007f956d814c55 in os::abort(bool) () from /opt/toolchain/sun-jdk-64bit-1.7.0.75/jre/lib/amd64/server/libjvm.so
> #3 0x00007f956d996cd7 in VMError::report_and_die() () from /opt/toolchain/sun-jdk-64bit-1.7.0.75/jre/lib/amd64/server/libjvm.so
> #4 0x00007f956d819b6f in JVM_handle_linux_signal () from /opt/toolchain/sun-jdk-64bit-1.7.0.75/jre/lib/amd64/server/libjvm.so
> #5 <signal handler called>
> #6 0x0000000000cf1992 in snappy::internal::CompressFragment(char const*, unsigned long, char*, unsigned short*, int) ()
> #7 0x0000000000cf2087 in snappy::Compress(snappy::Source*, snappy::Sink*) ()
> #8 0x0000000000cf3051 in snappy::RawCompress(char const*, unsigned long, char*, unsigned long*) ()
> #9 0x0000000000a8c158 in impala::SnappyBlockCompressor::ProcessBlock(bool, long, unsigned char const*, long*, unsigned char**) ()
> #10 0x0000000000cc9177 in impala::HdfsSequenceTableWriter::WriteCompressedBlock() ()
> #11 0x0000000000cc97d0 in impala::HdfsSequenceTableWriter::Flush() ()
> #12 0x0000000000cc9ac3 in impala::HdfsSequenceTableWriter::AppendRowBatch(impala::RowBatch*, std::vector<int, std::allocator<int> > const&, bool*) ()
> #13 0x0000000000cc37e9 in impala::HdfsTableSink::Send(impala::RuntimeState*, impala::RowBatch*, bool) ()
> #14 0x0000000000ca9364 in impala::PlanFragmentExecutor::OpenInternal() ()
> #15 0x0000000000ca98e7 in impala::PlanFragmentExecutor::Open() ()
> #16 0x0000000000a60df8 in impala::FragmentMgr::FragmentExecState::Exec() ()
> #17 0x0000000000a589d2 in impala::FragmentMgr::FragmentThread(impala::TUniqueId) ()
> #18 0x0000000000a59bea in boost::detail::function::void_function_obj_invoker0<boost::_bi::bind_t<void, boost::_mfi::mf1<void, impala::FragmentMgr, impala::TUniqueId>, boost::_bi::list2<boost::_bi::value<impala::FragmentMgr*>, boost::_bi::value<impala::TUniqueId> > >, void>::invoke(boost::detail::function::function_buffer&) ()
> #19 0x0000000000af7da7 in impala::Thread::SuperviseThread(std::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, boost::function<void ()()>, impala::Promise<long>*) ()
> #20 0x0000000000af86c4 in boost::detail::thread_data<boost::_bi::bind_t<void, void (*)(std::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, boost::function<void ()()>, impala::Promise<long>*), boost::_bi::list4<boost::_bi::value<std::basic_string<char, std::char_traits<char>, std::allocator<char> > >, boost::_bi::value<std::basic_string<char, std::char_traits<char>, std::allocator<char> > >, boost::_bi::value<boost::function<void ()()> >, boost::_bi::value<impala::Promise<long>*> > > >::run() ()
> #21 0x0000000000d31c4a in ?? ()
> #22 0x0000003f38c079d1 in start_thread () from /lib64/libpthread.so.0
> #23 0x0000003f388e88fd in clone () from /lib64/libc.so.6
> {code}
> DDL
> {code}
> set ALLOW_UNSUPPORTED_FORMATS=1;
> CREATE TABLE tpcds_1000_parquet.store_sales_sequencefile ( |
> ss_sold_time_sk INT, |
> ss_item_sk BIGINT, |
> ss_customer_sk INT, |
> ss_cdemo_sk INT, |
> ss_hdemo_sk INT, |
> ss_addr_sk INT, |
> ss_store_sk INT, |
> ss_promo_sk INT, |
> ss_ticket_number BIGINT, |
> ss_quantity INT, |
> ss_wholesale_cost DOUBLE, |
> ss_list_price DOUBLE, |
> ss_sales_price DOUBLE, |
> ss_ext_discount_amt DOUBLE, |
> ss_ext_sales_price DOUBLE, |
> ss_ext_wholesale_cost DOUBLE, |
> ss_ext_list_price DOUBLE, |
> ss_ext_tax DOUBLE, |
> ss_coupon_amt DOUBLE, |
> ss_net_paid DOUBLE, |
> ss_net_paid_inc_tax DOUBLE, |
> ss_net_profit DOUBLE |
> ) |
> PARTITIONED BY ( |
> ss_sold_date_sk INT |
> ) |
> STORED AS SEQUENCEFILE |
> {code}
> Query
> {code}
> insert into store_sales_sequencefile partition(ss_sold_date_sk) select * from store_sales where ss_sold_date_sk between 2450816 and 2451200;
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.15#6346)