You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues-all@impala.apache.org by "Zoltán Borók-Nagy (Jira)" <ji...@apache.org> on 2022/03/09 17:06:00 UTC
[jira] [Updated] (IMPALA-11172) DCHECK hit in Parquet column readers during test_page_index
[ https://issues.apache.org/jira/browse/IMPALA-11172?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Zoltán Borók-Nagy updated IMPALA-11172:
---------------------------------------
Description:
h4. Error Details
{noformat}
DCHECK found in log file: /data/jenkins/workspace/impala-cdw-master-staging-core-asan/repos/Impala/logs/ee_tests/impalad.FATAL
{noformat}
h4. Standard Error
{noformat}
Log file created at: 2022/03/08 05:36:37
Running on machine: impala-ec2-centos74-m5-4xlarge-ondemand-0f9e.vpc.cloudera.com
Log line format: [IWEF]mmdd hh:mm:ss.uuuuuu threadid file:line] msg
F0308 05:36:37.949270 7073 parquet-column-readers.cc:1287] c9417d8313c09974:eef448b800000002] Check failed: num_buffered_values_ >= num_rows (14528 vs. 232528)
{noformat}
h4. The location points to:
{noformat}
template <bool MULTI_PAGE>
bool BaseScalarColumnReader::SkipTopLevelRows(int64_t num_rows, int64_t* remaining) {
DCHECK_GT(num_rows, 0);
DCHECK_GT(num_buffered_values_, 0);
if (!MULTI_PAGE) {
DCHECK_GE(num_buffered_values_, num_rows); <===== HERE
}
{noformat}
h4. Stack Trace
{noformat}
Thread 528 (crashed)
0 libc-2.17.so + 0x351f7
1 impalad!google::LogMessage::Flush() + 0x1eb
2 impalad!google::LogMessageFatal::~LogMessageFatal() + 0x9
3 impalad!bool impala::BaseScalarColumnReader::SkipTopLevelRows<false>(long, long*) [parquet-column-readers.cc : 0 + 0xd]
if (!MULTI_PAGE) {
DCHECK_GE(num_buffered_values_, num_rows); <==== HERE
}
4 impalad!bool impala::BaseScalarColumnReader::SkipRowsInternal<false>(long, long) [parquet-column-readers.cc : 1555 + 0x11]
// Skip to the required row id within the page.
if (last_row < skip_row_id) {
if (UNLIKELY(!SkipTopLevelRows(skip_row_id - last_row, &remaining))) { <=== HERE
return false;
}
}
5 impalad!impala::HdfsParquetScanner::SkipRowsForColumns(std::vector<impala::ParquetColumnReader*, std::allocator<impala::ParquetColumnReader*> > const&, long*, long*) [hdfs-parquet-scanner.cc : 2376 + 0x5c]
ParquetColumnReader* col_reader = column_readers[c];
// Skipping may fail for corrupted Parquet file due to mismatch of rows
// among columns.
if (UNLIKELY(!col_reader->SkipRows(*num_rows_to_skip, *skip_to_row))) { <==== HERE
return Status(Substitute("Error in skipping rows in file $0.", filename()));
}
}
6 impalad!impala::Status impala::HdfsParquetScanner::AssembleRows<true>(impala::RowBatch*, bool*) [hdfs-parquet-scanner.cc : 2355 + 0x26]
7 impalad!impala::HdfsParquetScanner::GetNextInternal(impala::RowBatch*) [hdfs-parquet-scanner.cc : 525 + 0x1b]
8 impalad!impala::HdfsParquetScanner::ProcessSplit() [hdfs-parquet-scanner.cc : 415 + 0xd]
9 impalad!impala::HdfsScanNode::ProcessSplit(std::vector<impala::FilterContext, std::allocator<impala::FilterContext> > const&, impala::MemPool*, impala::io::ScanRange*, long*) [hdfs-scan-node.cc : 500 + 0x8]
10 impalad!impala::HdfsScanNode::ScannerThread(bool, long) [hdfs-scan-node.cc : 418 + 0x1e]
11 impalad!boost::function0<void>::operator()() const [function_template.hpp : 763 + 0x5]
12 impalad!impala::Thread::SuperviseThread(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*) [thread.cc : 360 + 0x9]
...
{noformat}
h4. Query information
{noformat}
select * from tpch_parquet.lineitem where l_commitdate = '1992-01-31' db: test_page_index_872ff6e2
{noformat}
So the crash happened during test_page_index.
Query configuration was:
{noformat}
I0308 12:31:25.258759 17976 impala-beeswax-server.cc:516] query: Query {
01: query (string) = "select * from tpch_parquet.lineitem where l_commitdate = '1992-01-31'",
03: configuration (list) = list<string>[6] {
[0] = "CLIENT_IDENTIFIE[...](273)",
[1] = "BATCH_SIZE=32",
[2] = "NUM_NODES=0",
[3] = "DISABLE_CODEGEN_ROWS_THRESHOLD=0",
[4] = "DISABLE_CODEGEN=False",
[5] = "EXEC_SINGLE_NODE_ROWS_THRESHOLD=0",
},
{noformat}
was:
h4. Error Details
{noformat}
DCHECK found in log file: /data/jenkins/workspace/impala-cdw-master-staging-core-asan/repos/Impala/logs/ee_tests/impalad.FATAL
{noformat}
h4. Standard Error
{noformat}
Log file created at: 2022/03/08 05:36:37
Running on machine: impala-ec2-centos74-m5-4xlarge-ondemand-0f9e.vpc.cloudera.com
Log line format: [IWEF]mmdd hh:mm:ss.uuuuuu threadid file:line] msg
F0308 05:36:37.949270 7073 parquet-column-readers.cc:1287] c9417d8313c09974:eef448b800000002] Check failed: num_buffered_values_ >= num_rows (14528 vs. 232528)
{noformat}
h4. The location points to:
{noformat}
template <bool MULTI_PAGE>
bool BaseScalarColumnReader::SkipTopLevelRows(int64_t num_rows, int64_t* remaining) {
DCHECK_GT(num_rows, 0);
DCHECK_GT(num_buffered_values_, 0);
if (!MULTI_PAGE) {
DCHECK_GE(num_buffered_values_, num_rows); <===== HERE
}
{noformat}
h4. Stack Trace
{noformat}
Thread 528 (crashed)
0 libc-2.17.so + 0x351f7
1 impalad!google::LogMessage::Flush() + 0x1eb
2 impalad!google::LogMessageFatal::~LogMessageFatal() + 0x9
3 impalad!bool impala::BaseScalarColumnReader::SkipTopLevelRows<false>(long, long*) [parquet-column-readers.cc : 0 + 0xd]
4 impalad!bool impala::BaseScalarColumnReader::SkipRowsInternal<false>(long, long) [parquet-column-readers.cc : 1555 + 0x11]
5 impalad!impala::HdfsParquetScanner::SkipRowsForColumns(std::vector<impala::ParquetColumnReader*, std::allocator<impala::ParquetColumnReader*> > const&, long*, long*) [hdfs-parquet-scanner.cc : 2376 + 0x5c]
6 impalad!impala::Status impala::HdfsParquetScanner::AssembleRows<true>(impala::RowBatch*, bool*) [hdfs-parquet-scanner.cc : 2355 + 0x26]
7 impalad!impala::HdfsParquetScanner::GetNextInternal(impala::RowBatch*) [hdfs-parquet-scanner.cc : 525 + 0x1b]
8 impalad!impala::HdfsParquetScanner::ProcessSplit() [hdfs-parquet-scanner.cc : 415 + 0xd]
9 impalad!impala::HdfsScanNode::ProcessSplit(std::vector<impala::FilterContext, std::allocator<impala::FilterContext> > const&, impala::MemPool*, impala::io::ScanRange*, long*) [hdfs-scan-node.cc : 500 + 0x8]
10 impalad!impala::HdfsScanNode::ScannerThread(bool, long) [hdfs-scan-node.cc : 418 + 0x1e]
11 impalad!boost::function0<void>::operator()() const [function_template.hpp : 763 + 0x5]
12 impalad!impala::Thread::SuperviseThread(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*) [thread.cc : 360 + 0x9]
...
{noformat}
h4. Query information
{noformat}
select * from tpch_parquet.lineitem where l_commitdate = '1992-01-31' db: test_page_index_872ff6e2
{noformat}
So the crash happened during test_page_index.
Query configuration was:
{noformat}
I0308 12:31:25.258759 17976 impala-beeswax-server.cc:516] query: Query {
01: query (string) = "select * from tpch_parquet.lineitem where l_commitdate = '1992-01-31'",
03: configuration (list) = list<string>[6] {
[0] = "CLIENT_IDENTIFIE[...](273)",
[1] = "BATCH_SIZE=32",
[2] = "NUM_NODES=0",
[3] = "DISABLE_CODEGEN_ROWS_THRESHOLD=0",
[4] = "DISABLE_CODEGEN=False",
[5] = "EXEC_SINGLE_NODE_ROWS_THRESHOLD=0",
},
{noformat}
> DCHECK hit in Parquet column readers during test_page_index
> -----------------------------------------------------------
>
> Key: IMPALA-11172
> URL: https://issues.apache.org/jira/browse/IMPALA-11172
> Project: IMPALA
> Issue Type: Bug
> Components: Backend
> Reporter: Zoltán Borók-Nagy
> Priority: Major
> Labels: broken-build
>
> h4. Error Details
> {noformat}
> DCHECK found in log file: /data/jenkins/workspace/impala-cdw-master-staging-core-asan/repos/Impala/logs/ee_tests/impalad.FATAL
> {noformat}
> h4. Standard Error
> {noformat}
> Log file created at: 2022/03/08 05:36:37
> Running on machine: impala-ec2-centos74-m5-4xlarge-ondemand-0f9e.vpc.cloudera.com
> Log line format: [IWEF]mmdd hh:mm:ss.uuuuuu threadid file:line] msg
> F0308 05:36:37.949270 7073 parquet-column-readers.cc:1287] c9417d8313c09974:eef448b800000002] Check failed: num_buffered_values_ >= num_rows (14528 vs. 232528)
> {noformat}
> h4. The location points to:
> {noformat}
> template <bool MULTI_PAGE>
> bool BaseScalarColumnReader::SkipTopLevelRows(int64_t num_rows, int64_t* remaining) {
> DCHECK_GT(num_rows, 0);
> DCHECK_GT(num_buffered_values_, 0);
> if (!MULTI_PAGE) {
> DCHECK_GE(num_buffered_values_, num_rows); <===== HERE
> }
> {noformat}
> h4. Stack Trace
> {noformat}
> Thread 528 (crashed)
> 0 libc-2.17.so + 0x351f7
> 1 impalad!google::LogMessage::Flush() + 0x1eb
> 2 impalad!google::LogMessageFatal::~LogMessageFatal() + 0x9
> 3 impalad!bool impala::BaseScalarColumnReader::SkipTopLevelRows<false>(long, long*) [parquet-column-readers.cc : 0 + 0xd]
> if (!MULTI_PAGE) {
> DCHECK_GE(num_buffered_values_, num_rows); <==== HERE
> }
> 4 impalad!bool impala::BaseScalarColumnReader::SkipRowsInternal<false>(long, long) [parquet-column-readers.cc : 1555 + 0x11]
> // Skip to the required row id within the page.
> if (last_row < skip_row_id) {
> if (UNLIKELY(!SkipTopLevelRows(skip_row_id - last_row, &remaining))) { <=== HERE
> return false;
> }
> }
> 5 impalad!impala::HdfsParquetScanner::SkipRowsForColumns(std::vector<impala::ParquetColumnReader*, std::allocator<impala::ParquetColumnReader*> > const&, long*, long*) [hdfs-parquet-scanner.cc : 2376 + 0x5c]
> ParquetColumnReader* col_reader = column_readers[c];
> // Skipping may fail for corrupted Parquet file due to mismatch of rows
> // among columns.
> if (UNLIKELY(!col_reader->SkipRows(*num_rows_to_skip, *skip_to_row))) { <==== HERE
> return Status(Substitute("Error in skipping rows in file $0.", filename()));
> }
> }
> 6 impalad!impala::Status impala::HdfsParquetScanner::AssembleRows<true>(impala::RowBatch*, bool*) [hdfs-parquet-scanner.cc : 2355 + 0x26]
> 7 impalad!impala::HdfsParquetScanner::GetNextInternal(impala::RowBatch*) [hdfs-parquet-scanner.cc : 525 + 0x1b]
> 8 impalad!impala::HdfsParquetScanner::ProcessSplit() [hdfs-parquet-scanner.cc : 415 + 0xd]
> 9 impalad!impala::HdfsScanNode::ProcessSplit(std::vector<impala::FilterContext, std::allocator<impala::FilterContext> > const&, impala::MemPool*, impala::io::ScanRange*, long*) [hdfs-scan-node.cc : 500 + 0x8]
> 10 impalad!impala::HdfsScanNode::ScannerThread(bool, long) [hdfs-scan-node.cc : 418 + 0x1e]
> 11 impalad!boost::function0<void>::operator()() const [function_template.hpp : 763 + 0x5]
> 12 impalad!impala::Thread::SuperviseThread(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*) [thread.cc : 360 + 0x9]
> ...
> {noformat}
> h4. Query information
> {noformat}
> select * from tpch_parquet.lineitem where l_commitdate = '1992-01-31' db: test_page_index_872ff6e2
> {noformat}
> So the crash happened during test_page_index.
> Query configuration was:
> {noformat}
> I0308 12:31:25.258759 17976 impala-beeswax-server.cc:516] query: Query {
> 01: query (string) = "select * from tpch_parquet.lineitem where l_commitdate = '1992-01-31'",
> 03: configuration (list) = list<string>[6] {
> [0] = "CLIENT_IDENTIFIE[...](273)",
> [1] = "BATCH_SIZE=32",
> [2] = "NUM_NODES=0",
> [3] = "DISABLE_CODEGEN_ROWS_THRESHOLD=0",
> [4] = "DISABLE_CODEGEN=False",
> [5] = "EXEC_SINGLE_NODE_ROWS_THRESHOLD=0",
> },
> {noformat}
--
This message was sent by Atlassian Jira
(v8.20.1#820001)
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-all-unsubscribe@impala.apache.org
For additional commands, e-mail: issues-all-help@impala.apache.org