You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by lv...@apache.org on 2019/02/21 01:08:55 UTC
[impala] 04/06: Adding hostname to Disk I/O errors.
This is an automated email from the ASF dual-hosted git repository.
lv pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
commit 0b7c9645451e6c2d30e225a27a89a901df940b09
Author: Philip Zeyliger <ph...@cloudera.com>
AuthorDate: Thu Feb 7 15:31:19 2019 -0800
Adding hostname to Disk I/O errors.
I recently ran into some queries that failed like so:
WARNINGS: Disk I/O error: Could not open file: /data/...: Error(5): Input/output error
These warnings were in the profile, but I had to cross-reference impalad
logs to figure out which machine had the broken disk.
In this commit, I've sprinkled GetBackendString() to include it.
Change-Id: Ib977d2c0983ef81ab1338de090239ed57f3efde2
Reviewed-on: http://gerrit.cloudera.org:8080/12402
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/exprs/timezone_db.cc | 3 ++-
be/src/runtime/io/disk-io-mgr-test.cc | 10 ++++++----
be/src/runtime/io/disk-io-mgr.cc | 8 ++++----
be/src/runtime/io/error-converter.cc | 4 ++--
be/src/runtime/io/hdfs-file-reader.cc | 8 ++++----
be/src/runtime/io/hdfs-monitored-ops.cc | 2 +-
be/src/runtime/io/local-file-reader.cc | 6 +++---
common/thrift/generate_error_codes.py | 2 +-
8 files changed, 23 insertions(+), 20 deletions(-)
diff --git a/be/src/exprs/timezone_db.cc b/be/src/exprs/timezone_db.cc
index 577dea2..ad50e6b 100644
--- a/be/src/exprs/timezone_db.cc
+++ b/be/src/exprs/timezone_db.cc
@@ -30,6 +30,7 @@
#include "gutil/strings/ascii_ctype.h"
#include "gutil/strings/substitute.h"
#include "runtime/hdfs-fs-cache.h"
+#include "util/debug-util.h"
#include "util/filesystem-util.h"
#include "util/hdfs-util.h"
#include "util/string-parser.h"
@@ -387,7 +388,7 @@ Status TimezoneDatabase::LoadZoneAliasesFromHdfs(const string& hdfs_zone_alias_c
current_bytes_read = hdfsRead(hdfs_conn, hdfs_file, buffer.data(), buffer.size());
if (current_bytes_read == 0) break;
if (current_bytes_read < 0) {
- status = Status(TErrorCode::DISK_IO_ERROR,
+ status = Status(TErrorCode::DISK_IO_ERROR, GetBackendString(),
GetHdfsErrorMsg("Error reading from HDFS file: ", hdfs_zone_alias_conf));
break;
}
diff --git a/be/src/runtime/io/disk-io-mgr-test.cc b/be/src/runtime/io/disk-io-mgr-test.cc
index 057713a..a8e07b8 100644
--- a/be/src/runtime/io/disk-io-mgr-test.cc
+++ b/be/src/runtime/io/disk-io-mgr-test.cc
@@ -360,8 +360,9 @@ TEST_F(DiskIoMgrTest, InvalidWrite) {
WriteRange::WriteDoneCallback callback =
bind(mem_fn(&DiskIoMgrTest::WriteValidateCallback), this, num_of_writes, new_range,
nullptr, nullptr, nullptr, data,
- Status(TErrorCode::DISK_IO_ERROR, "open() failed for /non-existent/file.txt. "
- "The given path doesn't exist. errno=2"), _1);
+ Status(TErrorCode::DISK_IO_ERROR, GetBackendString(),
+ "open() failed for /non-existent/file.txt. "
+ "The given path doesn't exist. errno=2"), _1);
*new_range = pool_.Add(new WriteRange(tmp_file, rand(), 0, callback));
(*new_range)->SetData(reinterpret_cast<uint8_t*>(data), sizeof(int32_t));
@@ -378,7 +379,8 @@ TEST_F(DiskIoMgrTest, InvalidWrite) {
new_range = pool_.Add(new WriteRange*);
callback = bind(mem_fn(&DiskIoMgrTest::WriteValidateCallback), this, num_of_writes,
new_range, nullptr, nullptr, nullptr, data,
- Status(TErrorCode::DISK_IO_ERROR, "fseek() failed for /tmp/disk_io_mgr_test.txt. "
+ Status(TErrorCode::DISK_IO_ERROR, GetBackendString(),
+ "fseek() failed for /tmp/disk_io_mgr_test.txt. "
"Invalid inputs. errno=22, offset=-1"), _1);
*new_range = pool_.Add(new WriteRange(tmp_file, -1, 0, callback));
@@ -466,7 +468,7 @@ void DiskIoMgrTest::AddWriteRange(int num_of_writes, int32_t* data,
WriteRange::WriteDoneCallback callback =
bind(mem_fn(&DiskIoMgrTest::WriteValidateCallback), this, num_of_writes,
nullptr, nullptr, nullptr, nullptr, data,
- Status(TErrorCode::DISK_IO_ERROR, expected_output), _1);
+ Status(TErrorCode::DISK_IO_ERROR, GetBackendString(), expected_output), _1);
WriteRange* write_range = pool_.Add(new WriteRange(file_name, offset, 0, callback));
write_range->SetData(reinterpret_cast<uint8_t*>(data), sizeof(int32_t));
EXPECT_OK(writer->AddWriteRange(write_range));
diff --git a/be/src/runtime/io/disk-io-mgr.cc b/be/src/runtime/io/disk-io-mgr.cc
index ce56be0..394745f 100644
--- a/be/src/runtime/io/disk-io-mgr.cc
+++ b/be/src/runtime/io/disk-io-mgr.cc
@@ -293,15 +293,15 @@ void DiskIoMgr::UnregisterContext(RequestContext* reader) {
Status DiskIoMgr::ValidateScanRange(ScanRange* range) {
int disk_id = range->disk_id();
if (disk_id < 0 || disk_id >= disk_queues_.size()) {
- return Status(TErrorCode::DISK_IO_ERROR,
- Substitute("Invalid scan range. Bad disk id: $0", disk_id));
+ return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(),
+ Substitute("Invalid scan range. Bad disk id: $0", disk_id));
}
if (range->offset() < 0) {
- return Status(TErrorCode::DISK_IO_ERROR,
+ return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(),
Substitute("Invalid scan range. Negative offset $0", range->offset()));
}
if (range->len() <= 0) {
- return Status(TErrorCode::DISK_IO_ERROR,
+ return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(),
Substitute("Invalid scan range. Non-positive length $0", range->len()));
}
return Status::OK();
diff --git a/be/src/runtime/io/error-converter.cc b/be/src/runtime/io/error-converter.cc
index 966558d..547f086 100644
--- a/be/src/runtime/io/error-converter.cc
+++ b/be/src/runtime/io/error-converter.cc
@@ -49,8 +49,8 @@ unordered_map<int, string> ErrorConverter::errno_to_error_text_map_(
Status ErrorConverter::GetErrorStatusFromErrno(const string& function_name,
const string& file_path, int err_no, const Params& params) {
- return Status(ErrorMsg(TErrorCode::DISK_IO_ERROR, GetErrorText(function_name,
- file_path, err_no, params)));
+ return Status(ErrorMsg(TErrorCode::DISK_IO_ERROR, GetBackendString(),
+ GetErrorText(function_name, file_path, err_no, params)));
}
string ErrorConverter::GetErrorText(const string& function_name,
diff --git a/be/src/runtime/io/hdfs-file-reader.cc b/be/src/runtime/io/hdfs-file-reader.cc
index b495d61..0bbf984 100644
--- a/be/src/runtime/io/hdfs-file-reader.cc
+++ b/be/src/runtime/io/hdfs-file-reader.cc
@@ -59,7 +59,7 @@ Status HdfsFileReader::Open(bool use_file_handle_cache) {
if (hdfsSeek(hdfs_fs_, exclusive_hdfs_fh_->file(), scan_range_->offset_) != 0) {
// Destroy the file handle
io_mgr->ReleaseExclusiveHdfsFileHandle(std::move(exclusive_hdfs_fh_));
- return Status(TErrorCode::DISK_IO_ERROR,
+ return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(),
Substitute("Error seeking to $0 in file: $1 $2", scan_range_->offset(),
*scan_range_->file_string(), GetHdfsErrorMsg("")));
}
@@ -165,7 +165,7 @@ Status HdfsFileReader::ReadFromPosInternal(hdfsFile hdfs_file, int64_t position_
if (FLAGS_use_hdfs_pread) {
*bytes_read = hdfsPread(hdfs_fs_, hdfs_file, position_in_file, buffer, chunk_size);
if (*bytes_read == -1) {
- return Status(TErrorCode::DISK_IO_ERROR,
+ return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(),
GetHdfsErrorMsg("Error reading from HDFS file: ",
*scan_range_->file_string()));
}
@@ -174,14 +174,14 @@ Status HdfsFileReader::ReadFromPosInternal(hdfsFile hdfs_file, int64_t position_
// location. Seek to the appropriate location.
if (is_borrowed_fh) {
if (hdfsSeek(hdfs_fs_, hdfs_file, position_in_file) != 0) {
- return Status(TErrorCode::DISK_IO_ERROR,
+ return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(),
Substitute("Error seeking to $0 in file: $1: $2",
position_in_file, *scan_range_->file_string(), GetHdfsErrorMsg("")));
}
}
*bytes_read = hdfsRead(hdfs_fs_, hdfs_file, buffer, chunk_size);
if (*bytes_read == -1) {
- return Status(TErrorCode::DISK_IO_ERROR,
+ return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(),
GetHdfsErrorMsg("Error reading from HDFS file: ",
*scan_range_->file_string()));
}
diff --git a/be/src/runtime/io/hdfs-monitored-ops.cc b/be/src/runtime/io/hdfs-monitored-ops.cc
index 3ea5c86..f864b7c 100644
--- a/be/src/runtime/io/hdfs-monitored-ops.cc
+++ b/be/src/runtime/io/hdfs-monitored-ops.cc
@@ -69,7 +69,7 @@ Status OpenHdfsFileOp::Execute() {
if (hdfs_file_ == nullptr) {
// GetHdfsErrorMsg references thread local state to get error information, so it
// must happen in the same thread as the hdfsOpenFile().
- return Status(TErrorCode::DISK_IO_ERROR,
+ return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(),
GetHdfsErrorMsg("Failed to open HDFS file ", fname_));
}
return Status::OK();
diff --git a/be/src/runtime/io/local-file-reader.cc b/be/src/runtime/io/local-file-reader.cc
index 3f88106..9be45a9 100644
--- a/be/src/runtime/io/local-file-reader.cc
+++ b/be/src/runtime/io/local-file-reader.cc
@@ -40,7 +40,7 @@ Status LocalFileReader::Open(bool use_file_handle_cache) {
file_ = fopen(scan_range_->file(), "r");
if (file_ == nullptr) {
- return Status(TErrorCode::DISK_IO_ERROR,
+ return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(),
Substitute("Could not open file: $0: $1", *scan_range_->file_string(),
GetStrErrMsg()));
}
@@ -68,7 +68,7 @@ Status LocalFileReader::ReadFromPos(int64_t file_offset, uint8_t* buffer,
if (fseek(file_, file_offset, SEEK_SET) == -1) {
fclose(file_);
file_ = nullptr;
- return Status(TErrorCode::DISK_IO_ERROR,
+ return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(),
Substitute("Could not seek to $0 "
"for file: $1: $2", scan_range_->offset(),
*scan_range_->file_string(), GetStrErrMsg()));
@@ -78,7 +78,7 @@ Status LocalFileReader::ReadFromPos(int64_t file_offset, uint8_t* buffer,
DCHECK_LE(*bytes_read, bytes_to_read);
if (*bytes_read < bytes_to_read) {
if (ferror(file_) != 0) {
- return Status(TErrorCode::DISK_IO_ERROR,
+ return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(),
Substitute("Error reading from $0"
"at byte offset: $1: $2", file_,
file_offset, GetStrErrMsg()));
diff --git a/common/thrift/generate_error_codes.py b/common/thrift/generate_error_codes.py
index 93fed7d..10f7d4d 100755
--- a/common/thrift/generate_error_codes.py
+++ b/common/thrift/generate_error_codes.py
@@ -335,7 +335,7 @@ error_codes = (
("THREAD_CREATION_FAILED", 109, "Failed to create thread $0 in category $1: $2"),
- ("DISK_IO_ERROR", 110, "Disk I/O error: $0"),
+ ("DISK_IO_ERROR", 110, "Disk I/O error on $0: $1"),
("DATASTREAM_RECVR_CLOSED", 111,
"DataStreamRecvr for fragment=$0, node=$1 is closed already"),