You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues-all@impala.apache.org by "Zoltán Borók-Nagy (Jira)" <ji...@apache.org> on 2022/03/09 17:06:00 UTC

[jira] [Updated] (IMPALA-11172) DCHECK hit in Parquet column readers during test_page_index

     [ https://issues.apache.org/jira/browse/IMPALA-11172?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Zoltán Borók-Nagy updated IMPALA-11172:
---------------------------------------
    Description: 
h4. Error Details
{noformat}
DCHECK found in log file: /data/jenkins/workspace/impala-cdw-master-staging-core-asan/repos/Impala/logs/ee_tests/impalad.FATAL
{noformat}
h4. Standard Error
{noformat}
Log file created at: 2022/03/08 05:36:37
Running on machine: impala-ec2-centos74-m5-4xlarge-ondemand-0f9e.vpc.cloudera.com
Log line format: [IWEF]mmdd hh:mm:ss.uuuuuu threadid file:line] msg
F0308 05:36:37.949270  7073 parquet-column-readers.cc:1287] c9417d8313c09974:eef448b800000002] Check failed: num_buffered_values_ >= num_rows (14528 vs. 232528) 
{noformat}
h4. The location points to:
{noformat}
template <bool MULTI_PAGE>
bool BaseScalarColumnReader::SkipTopLevelRows(int64_t num_rows, int64_t* remaining) {
  DCHECK_GT(num_rows, 0);
  DCHECK_GT(num_buffered_values_, 0);
  if (!MULTI_PAGE) {
    DCHECK_GE(num_buffered_values_, num_rows);   <===== HERE
  }
{noformat}
h4. Stack Trace
{noformat}
Thread 528 (crashed)
 0  libc-2.17.so + 0x351f7
 1  impalad!google::LogMessage::Flush() + 0x1eb
 2  impalad!google::LogMessageFatal::~LogMessageFatal() + 0x9
 3  impalad!bool impala::BaseScalarColumnReader::SkipTopLevelRows<false>(long, long*) [parquet-column-readers.cc : 0 + 0xd]
  if (!MULTI_PAGE) {
    DCHECK_GE(num_buffered_values_, num_rows);   <==== HERE
  }
 4  impalad!bool impala::BaseScalarColumnReader::SkipRowsInternal<false>(long, long) [parquet-column-readers.cc : 1555 + 0x11]
    // Skip to the required row id within the page.
    if (last_row < skip_row_id) {
      if (UNLIKELY(!SkipTopLevelRows(skip_row_id - last_row, &remaining))) {   <=== HERE
        return false;
      }
    }
 5  impalad!impala::HdfsParquetScanner::SkipRowsForColumns(std::vector<impala::ParquetColumnReader*, std::allocator<impala::ParquetColumnReader*> > const&, long*, long*) [hdfs-parquet-scanner.cc : 2376 + 0x5c]
      ParquetColumnReader* col_reader = column_readers[c];
      // Skipping may fail for corrupted Parquet file due to mismatch of rows
      // among columns.
      if (UNLIKELY(!col_reader->SkipRows(*num_rows_to_skip, *skip_to_row))) {  <==== HERE
        return Status(Substitute("Error in skipping rows in file $0.", filename()));
      }
    }
 6  impalad!impala::Status impala::HdfsParquetScanner::AssembleRows<true>(impala::RowBatch*, bool*) [hdfs-parquet-scanner.cc : 2355 + 0x26]
 7  impalad!impala::HdfsParquetScanner::GetNextInternal(impala::RowBatch*) [hdfs-parquet-scanner.cc : 525 + 0x1b]
 8  impalad!impala::HdfsParquetScanner::ProcessSplit() [hdfs-parquet-scanner.cc : 415 + 0xd]
 9  impalad!impala::HdfsScanNode::ProcessSplit(std::vector<impala::FilterContext, std::allocator<impala::FilterContext> > const&, impala::MemPool*, impala::io::ScanRange*, long*) [hdfs-scan-node.cc : 500 + 0x8]
10  impalad!impala::HdfsScanNode::ScannerThread(bool, long) [hdfs-scan-node.cc : 418 + 0x1e]
11  impalad!boost::function0<void>::operator()() const [function_template.hpp : 763 + 0x5]
12  impalad!impala::Thread::SuperviseThread(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*) [thread.cc : 360 + 0x9]
...
{noformat}

h4. Query information

{noformat}
select * from tpch_parquet.lineitem where l_commitdate = '1992-01-31' db: test_page_index_872ff6e2
{noformat}

So the crash happened during test_page_index.

Query configuration was:

{noformat}
I0308 12:31:25.258759 17976 impala-beeswax-server.cc:516] query: Query {
  01: query (string) = "select * from tpch_parquet.lineitem where l_commitdate = '1992-01-31'",
  03: configuration (list) = list<string>[6] {
    [0] = "CLIENT_IDENTIFIE[...](273)",
    [1] = "BATCH_SIZE=32",
    [2] = "NUM_NODES=0",
    [3] = "DISABLE_CODEGEN_ROWS_THRESHOLD=0",
    [4] = "DISABLE_CODEGEN=False",
    [5] = "EXEC_SINGLE_NODE_ROWS_THRESHOLD=0",
  },
{noformat}


  was:
h4. Error Details
{noformat}
DCHECK found in log file: /data/jenkins/workspace/impala-cdw-master-staging-core-asan/repos/Impala/logs/ee_tests/impalad.FATAL
{noformat}
h4. Standard Error
{noformat}
Log file created at: 2022/03/08 05:36:37
Running on machine: impala-ec2-centos74-m5-4xlarge-ondemand-0f9e.vpc.cloudera.com
Log line format: [IWEF]mmdd hh:mm:ss.uuuuuu threadid file:line] msg
F0308 05:36:37.949270  7073 parquet-column-readers.cc:1287] c9417d8313c09974:eef448b800000002] Check failed: num_buffered_values_ >= num_rows (14528 vs. 232528) 
{noformat}
h4. The location points to:
{noformat}
template <bool MULTI_PAGE>
bool BaseScalarColumnReader::SkipTopLevelRows(int64_t num_rows, int64_t* remaining) {
  DCHECK_GT(num_rows, 0);
  DCHECK_GT(num_buffered_values_, 0);
  if (!MULTI_PAGE) {
    DCHECK_GE(num_buffered_values_, num_rows);   <===== HERE
  }
{noformat}
h4. Stack Trace
{noformat}
Thread 528 (crashed)
 0  libc-2.17.so + 0x351f7
 1  impalad!google::LogMessage::Flush() + 0x1eb
 2  impalad!google::LogMessageFatal::~LogMessageFatal() + 0x9
 3  impalad!bool impala::BaseScalarColumnReader::SkipTopLevelRows<false>(long, long*) [parquet-column-readers.cc : 0 + 0xd]
 4  impalad!bool impala::BaseScalarColumnReader::SkipRowsInternal<false>(long, long) [parquet-column-readers.cc : 1555 + 0x11]
 5  impalad!impala::HdfsParquetScanner::SkipRowsForColumns(std::vector<impala::ParquetColumnReader*, std::allocator<impala::ParquetColumnReader*> > const&, long*, long*) [hdfs-parquet-scanner.cc : 2376 + 0x5c]
 6  impalad!impala::Status impala::HdfsParquetScanner::AssembleRows<true>(impala::RowBatch*, bool*) [hdfs-parquet-scanner.cc : 2355 + 0x26]
 7  impalad!impala::HdfsParquetScanner::GetNextInternal(impala::RowBatch*) [hdfs-parquet-scanner.cc : 525 + 0x1b]
 8  impalad!impala::HdfsParquetScanner::ProcessSplit() [hdfs-parquet-scanner.cc : 415 + 0xd]
 9  impalad!impala::HdfsScanNode::ProcessSplit(std::vector<impala::FilterContext, std::allocator<impala::FilterContext> > const&, impala::MemPool*, impala::io::ScanRange*, long*) [hdfs-scan-node.cc : 500 + 0x8]
10  impalad!impala::HdfsScanNode::ScannerThread(bool, long) [hdfs-scan-node.cc : 418 + 0x1e]
11  impalad!boost::function0<void>::operator()() const [function_template.hpp : 763 + 0x5]
12  impalad!impala::Thread::SuperviseThread(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*) [thread.cc : 360 + 0x9]
...
{noformat}
 
h4. Query information

{noformat}
select * from tpch_parquet.lineitem where l_commitdate = '1992-01-31' db: test_page_index_872ff6e2
{noformat}

So the crash happened during test_page_index.

Query configuration was:

{noformat}
I0308 12:31:25.258759 17976 impala-beeswax-server.cc:516] query: Query {
  01: query (string) = "select * from tpch_parquet.lineitem where l_commitdate = '1992-01-31'",
  03: configuration (list) = list<string>[6] {
    [0] = "CLIENT_IDENTIFIE[...](273)",
    [1] = "BATCH_SIZE=32",
    [2] = "NUM_NODES=0",
    [3] = "DISABLE_CODEGEN_ROWS_THRESHOLD=0",
    [4] = "DISABLE_CODEGEN=False",
    [5] = "EXEC_SINGLE_NODE_ROWS_THRESHOLD=0",
  },
{noformat}



> DCHECK hit in Parquet column readers during test_page_index
> -----------------------------------------------------------
>
>                 Key: IMPALA-11172
>                 URL: https://issues.apache.org/jira/browse/IMPALA-11172
>             Project: IMPALA
>          Issue Type: Bug
>          Components: Backend
>            Reporter: Zoltán Borók-Nagy
>            Priority: Major
>              Labels: broken-build
>
> h4. Error Details
> {noformat}
> DCHECK found in log file: /data/jenkins/workspace/impala-cdw-master-staging-core-asan/repos/Impala/logs/ee_tests/impalad.FATAL
> {noformat}
> h4. Standard Error
> {noformat}
> Log file created at: 2022/03/08 05:36:37
> Running on machine: impala-ec2-centos74-m5-4xlarge-ondemand-0f9e.vpc.cloudera.com
> Log line format: [IWEF]mmdd hh:mm:ss.uuuuuu threadid file:line] msg
> F0308 05:36:37.949270  7073 parquet-column-readers.cc:1287] c9417d8313c09974:eef448b800000002] Check failed: num_buffered_values_ >= num_rows (14528 vs. 232528) 
> {noformat}
> h4. The location points to:
> {noformat}
> template <bool MULTI_PAGE>
> bool BaseScalarColumnReader::SkipTopLevelRows(int64_t num_rows, int64_t* remaining) {
>   DCHECK_GT(num_rows, 0);
>   DCHECK_GT(num_buffered_values_, 0);
>   if (!MULTI_PAGE) {
>     DCHECK_GE(num_buffered_values_, num_rows);   <===== HERE
>   }
> {noformat}
> h4. Stack Trace
> {noformat}
> Thread 528 (crashed)
>  0  libc-2.17.so + 0x351f7
>  1  impalad!google::LogMessage::Flush() + 0x1eb
>  2  impalad!google::LogMessageFatal::~LogMessageFatal() + 0x9
>  3  impalad!bool impala::BaseScalarColumnReader::SkipTopLevelRows<false>(long, long*) [parquet-column-readers.cc : 0 + 0xd]
>   if (!MULTI_PAGE) {
>     DCHECK_GE(num_buffered_values_, num_rows);   <==== HERE
>   }
>  4  impalad!bool impala::BaseScalarColumnReader::SkipRowsInternal<false>(long, long) [parquet-column-readers.cc : 1555 + 0x11]
>     // Skip to the required row id within the page.
>     if (last_row < skip_row_id) {
>       if (UNLIKELY(!SkipTopLevelRows(skip_row_id - last_row, &remaining))) {   <=== HERE
>         return false;
>       }
>     }
>  5  impalad!impala::HdfsParquetScanner::SkipRowsForColumns(std::vector<impala::ParquetColumnReader*, std::allocator<impala::ParquetColumnReader*> > const&, long*, long*) [hdfs-parquet-scanner.cc : 2376 + 0x5c]
>       ParquetColumnReader* col_reader = column_readers[c];
>       // Skipping may fail for corrupted Parquet file due to mismatch of rows
>       // among columns.
>       if (UNLIKELY(!col_reader->SkipRows(*num_rows_to_skip, *skip_to_row))) {  <==== HERE
>         return Status(Substitute("Error in skipping rows in file $0.", filename()));
>       }
>     }
>  6  impalad!impala::Status impala::HdfsParquetScanner::AssembleRows<true>(impala::RowBatch*, bool*) [hdfs-parquet-scanner.cc : 2355 + 0x26]
>  7  impalad!impala::HdfsParquetScanner::GetNextInternal(impala::RowBatch*) [hdfs-parquet-scanner.cc : 525 + 0x1b]
>  8  impalad!impala::HdfsParquetScanner::ProcessSplit() [hdfs-parquet-scanner.cc : 415 + 0xd]
>  9  impalad!impala::HdfsScanNode::ProcessSplit(std::vector<impala::FilterContext, std::allocator<impala::FilterContext> > const&, impala::MemPool*, impala::io::ScanRange*, long*) [hdfs-scan-node.cc : 500 + 0x8]
> 10  impalad!impala::HdfsScanNode::ScannerThread(bool, long) [hdfs-scan-node.cc : 418 + 0x1e]
> 11  impalad!boost::function0<void>::operator()() const [function_template.hpp : 763 + 0x5]
> 12  impalad!impala::Thread::SuperviseThread(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, boost::function<void ()>, impala::ThreadDebugInfo const*, impala::Promise<long, (impala::PromiseMode)0>*) [thread.cc : 360 + 0x9]
> ...
> {noformat}
> h4. Query information
> {noformat}
> select * from tpch_parquet.lineitem where l_commitdate = '1992-01-31' db: test_page_index_872ff6e2
> {noformat}
> So the crash happened during test_page_index.
> Query configuration was:
> {noformat}
> I0308 12:31:25.258759 17976 impala-beeswax-server.cc:516] query: Query {
>   01: query (string) = "select * from tpch_parquet.lineitem where l_commitdate = '1992-01-31'",
>   03: configuration (list) = list<string>[6] {
>     [0] = "CLIENT_IDENTIFIE[...](273)",
>     [1] = "BATCH_SIZE=32",
>     [2] = "NUM_NODES=0",
>     [3] = "DISABLE_CODEGEN_ROWS_THRESHOLD=0",
>     [4] = "DISABLE_CODEGEN=False",
>     [5] = "EXEC_SINGLE_NODE_ROWS_THRESHOLD=0",
>   },
> {noformat}



--
This message was sent by Atlassian Jira
(v8.20.1#820001)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-all-unsubscribe@impala.apache.org
For additional commands, e-mail: issues-all-help@impala.apache.org