You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by lv...@apache.org on 2019/02/21 01:08:55 UTC

[impala] 04/06: Adding hostname to Disk I/O errors.

This is an automated email from the ASF dual-hosted git repository.

lv pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 0b7c9645451e6c2d30e225a27a89a901df940b09
Author: Philip Zeyliger <ph...@cloudera.com>
AuthorDate: Thu Feb 7 15:31:19 2019 -0800

    Adding hostname to Disk I/O errors.
    
    I recently ran into some queries that failed like so:
    
      WARNINGS: Disk I/O error: Could not open file: /data/...: Error(5): Input/output error
    
    These warnings were in the profile, but I had to cross-reference impalad
    logs to figure out which machine had the broken disk.
    
    In this commit, I've sprinkled GetBackendString() to include it.
    
    Change-Id: Ib977d2c0983ef81ab1338de090239ed57f3efde2
    Reviewed-on: http://gerrit.cloudera.org:8080/12402
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/exprs/timezone_db.cc             |  3 ++-
 be/src/runtime/io/disk-io-mgr-test.cc   | 10 ++++++----
 be/src/runtime/io/disk-io-mgr.cc        |  8 ++++----
 be/src/runtime/io/error-converter.cc    |  4 ++--
 be/src/runtime/io/hdfs-file-reader.cc   |  8 ++++----
 be/src/runtime/io/hdfs-monitored-ops.cc |  2 +-
 be/src/runtime/io/local-file-reader.cc  |  6 +++---
 common/thrift/generate_error_codes.py   |  2 +-
 8 files changed, 23 insertions(+), 20 deletions(-)

diff --git a/be/src/exprs/timezone_db.cc b/be/src/exprs/timezone_db.cc
index 577dea2..ad50e6b 100644
--- a/be/src/exprs/timezone_db.cc
+++ b/be/src/exprs/timezone_db.cc
@@ -30,6 +30,7 @@
 #include "gutil/strings/ascii_ctype.h"
 #include "gutil/strings/substitute.h"
 #include "runtime/hdfs-fs-cache.h"
+#include "util/debug-util.h"
 #include "util/filesystem-util.h"
 #include "util/hdfs-util.h"
 #include "util/string-parser.h"
@@ -387,7 +388,7 @@ Status TimezoneDatabase::LoadZoneAliasesFromHdfs(const string& hdfs_zone_alias_c
     current_bytes_read = hdfsRead(hdfs_conn, hdfs_file, buffer.data(), buffer.size());
     if (current_bytes_read == 0) break;
     if (current_bytes_read < 0) {
-      status = Status(TErrorCode::DISK_IO_ERROR,
+      status = Status(TErrorCode::DISK_IO_ERROR, GetBackendString(),
           GetHdfsErrorMsg("Error reading from HDFS file: ", hdfs_zone_alias_conf));
       break;
     }
diff --git a/be/src/runtime/io/disk-io-mgr-test.cc b/be/src/runtime/io/disk-io-mgr-test.cc
index 057713a..a8e07b8 100644
--- a/be/src/runtime/io/disk-io-mgr-test.cc
+++ b/be/src/runtime/io/disk-io-mgr-test.cc
@@ -360,8 +360,9 @@ TEST_F(DiskIoMgrTest, InvalidWrite) {
   WriteRange::WriteDoneCallback callback =
       bind(mem_fn(&DiskIoMgrTest::WriteValidateCallback), this, num_of_writes, new_range,
           nullptr, nullptr, nullptr, data,
-          Status(TErrorCode::DISK_IO_ERROR, "open() failed for /non-existent/file.txt. "
-              "The given path doesn't exist. errno=2"), _1);
+          Status(TErrorCode::DISK_IO_ERROR, GetBackendString(),
+            "open() failed for /non-existent/file.txt. "
+            "The given path doesn't exist. errno=2"), _1);
   *new_range = pool_.Add(new WriteRange(tmp_file, rand(), 0, callback));
 
   (*new_range)->SetData(reinterpret_cast<uint8_t*>(data), sizeof(int32_t));
@@ -378,7 +379,8 @@ TEST_F(DiskIoMgrTest, InvalidWrite) {
   new_range = pool_.Add(new WriteRange*);
   callback = bind(mem_fn(&DiskIoMgrTest::WriteValidateCallback), this, num_of_writes,
       new_range, nullptr, nullptr, nullptr, data,
-      Status(TErrorCode::DISK_IO_ERROR, "fseek() failed for /tmp/disk_io_mgr_test.txt. "
+      Status(TErrorCode::DISK_IO_ERROR, GetBackendString(),
+          "fseek() failed for /tmp/disk_io_mgr_test.txt. "
           "Invalid inputs. errno=22, offset=-1"), _1);
 
   *new_range = pool_.Add(new WriteRange(tmp_file, -1, 0, callback));
@@ -466,7 +468,7 @@ void DiskIoMgrTest::AddWriteRange(int num_of_writes, int32_t* data,
   WriteRange::WriteDoneCallback callback =
       bind(mem_fn(&DiskIoMgrTest::WriteValidateCallback), this, num_of_writes,
           nullptr, nullptr, nullptr, nullptr, data,
-          Status(TErrorCode::DISK_IO_ERROR, expected_output), _1);
+          Status(TErrorCode::DISK_IO_ERROR, GetBackendString(), expected_output), _1);
   WriteRange* write_range = pool_.Add(new WriteRange(file_name, offset, 0, callback));
   write_range->SetData(reinterpret_cast<uint8_t*>(data), sizeof(int32_t));
   EXPECT_OK(writer->AddWriteRange(write_range));
diff --git a/be/src/runtime/io/disk-io-mgr.cc b/be/src/runtime/io/disk-io-mgr.cc
index ce56be0..394745f 100644
--- a/be/src/runtime/io/disk-io-mgr.cc
+++ b/be/src/runtime/io/disk-io-mgr.cc
@@ -293,15 +293,15 @@ void DiskIoMgr::UnregisterContext(RequestContext* reader) {
 Status DiskIoMgr::ValidateScanRange(ScanRange* range) {
   int disk_id = range->disk_id();
   if (disk_id < 0 || disk_id >= disk_queues_.size()) {
-    return Status(TErrorCode::DISK_IO_ERROR,
-        Substitute("Invalid scan range.  Bad disk id: $0", disk_id));
+    return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(),
+        Substitute("Invalid scan range. Bad disk id: $0", disk_id));
   }
   if (range->offset() < 0) {
-    return Status(TErrorCode::DISK_IO_ERROR,
+    return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(),
         Substitute("Invalid scan range. Negative offset $0", range->offset()));
   }
   if (range->len() <= 0) {
-    return Status(TErrorCode::DISK_IO_ERROR,
+    return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(),
         Substitute("Invalid scan range. Non-positive length $0", range->len()));
   }
   return Status::OK();
diff --git a/be/src/runtime/io/error-converter.cc b/be/src/runtime/io/error-converter.cc
index 966558d..547f086 100644
--- a/be/src/runtime/io/error-converter.cc
+++ b/be/src/runtime/io/error-converter.cc
@@ -49,8 +49,8 @@ unordered_map<int, string> ErrorConverter::errno_to_error_text_map_(
 
 Status ErrorConverter::GetErrorStatusFromErrno(const string& function_name,
     const string& file_path, int err_no, const Params& params) {
-  return Status(ErrorMsg(TErrorCode::DISK_IO_ERROR, GetErrorText(function_name,
-      file_path, err_no, params)));
+  return Status(ErrorMsg(TErrorCode::DISK_IO_ERROR, GetBackendString(),
+      GetErrorText(function_name, file_path, err_no, params)));
 }
 
 string ErrorConverter::GetErrorText(const string& function_name,
diff --git a/be/src/runtime/io/hdfs-file-reader.cc b/be/src/runtime/io/hdfs-file-reader.cc
index b495d61..0bbf984 100644
--- a/be/src/runtime/io/hdfs-file-reader.cc
+++ b/be/src/runtime/io/hdfs-file-reader.cc
@@ -59,7 +59,7 @@ Status HdfsFileReader::Open(bool use_file_handle_cache) {
   if (hdfsSeek(hdfs_fs_, exclusive_hdfs_fh_->file(), scan_range_->offset_) != 0) {
     // Destroy the file handle
     io_mgr->ReleaseExclusiveHdfsFileHandle(std::move(exclusive_hdfs_fh_));
-    return Status(TErrorCode::DISK_IO_ERROR,
+    return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(),
         Substitute("Error seeking to $0 in file: $1 $2", scan_range_->offset(),
             *scan_range_->file_string(), GetHdfsErrorMsg("")));
   }
@@ -165,7 +165,7 @@ Status HdfsFileReader::ReadFromPosInternal(hdfsFile hdfs_file, int64_t position_
   if (FLAGS_use_hdfs_pread) {
     *bytes_read = hdfsPread(hdfs_fs_, hdfs_file, position_in_file, buffer, chunk_size);
     if (*bytes_read == -1) {
-      return Status(TErrorCode::DISK_IO_ERROR,
+      return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(),
           GetHdfsErrorMsg("Error reading from HDFS file: ",
               *scan_range_->file_string()));
     }
@@ -174,14 +174,14 @@ Status HdfsFileReader::ReadFromPosInternal(hdfsFile hdfs_file, int64_t position_
     // location. Seek to the appropriate location.
     if (is_borrowed_fh) {
       if (hdfsSeek(hdfs_fs_, hdfs_file, position_in_file) != 0) {
-        return Status(TErrorCode::DISK_IO_ERROR,
+        return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(),
             Substitute("Error seeking to $0 in file: $1: $2",
                 position_in_file, *scan_range_->file_string(), GetHdfsErrorMsg("")));
       }
     }
     *bytes_read = hdfsRead(hdfs_fs_, hdfs_file, buffer, chunk_size);
     if (*bytes_read == -1) {
-      return Status(TErrorCode::DISK_IO_ERROR,
+      return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(),
           GetHdfsErrorMsg("Error reading from HDFS file: ",
               *scan_range_->file_string()));
     }
diff --git a/be/src/runtime/io/hdfs-monitored-ops.cc b/be/src/runtime/io/hdfs-monitored-ops.cc
index 3ea5c86..f864b7c 100644
--- a/be/src/runtime/io/hdfs-monitored-ops.cc
+++ b/be/src/runtime/io/hdfs-monitored-ops.cc
@@ -69,7 +69,7 @@ Status OpenHdfsFileOp::Execute() {
   if (hdfs_file_ == nullptr) {
     // GetHdfsErrorMsg references thread local state to get error information, so it
     // must happen in the same thread as the hdfsOpenFile().
-    return Status(TErrorCode::DISK_IO_ERROR,
+    return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(),
         GetHdfsErrorMsg("Failed to open HDFS file ", fname_));
   }
   return Status::OK();
diff --git a/be/src/runtime/io/local-file-reader.cc b/be/src/runtime/io/local-file-reader.cc
index 3f88106..9be45a9 100644
--- a/be/src/runtime/io/local-file-reader.cc
+++ b/be/src/runtime/io/local-file-reader.cc
@@ -40,7 +40,7 @@ Status LocalFileReader::Open(bool use_file_handle_cache) {
 
   file_ = fopen(scan_range_->file(), "r");
   if (file_ == nullptr) {
-    return Status(TErrorCode::DISK_IO_ERROR,
+    return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(),
         Substitute("Could not open file: $0: $1", *scan_range_->file_string(),
             GetStrErrMsg()));
   }
@@ -68,7 +68,7 @@ Status LocalFileReader::ReadFromPos(int64_t file_offset, uint8_t* buffer,
   if (fseek(file_, file_offset, SEEK_SET) == -1) {
     fclose(file_);
     file_ = nullptr;
-    return Status(TErrorCode::DISK_IO_ERROR,
+    return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(),
         Substitute("Could not seek to $0 "
             "for file: $1: $2", scan_range_->offset(),
             *scan_range_->file_string(), GetStrErrMsg()));
@@ -78,7 +78,7 @@ Status LocalFileReader::ReadFromPos(int64_t file_offset, uint8_t* buffer,
   DCHECK_LE(*bytes_read, bytes_to_read);
   if (*bytes_read < bytes_to_read) {
     if (ferror(file_) != 0) {
-      return Status(TErrorCode::DISK_IO_ERROR,
+      return Status(TErrorCode::DISK_IO_ERROR, GetBackendString(),
           Substitute("Error reading from $0"
               "at byte offset: $1: $2", file_,
               file_offset, GetStrErrMsg()));
diff --git a/common/thrift/generate_error_codes.py b/common/thrift/generate_error_codes.py
index 93fed7d..10f7d4d 100755
--- a/common/thrift/generate_error_codes.py
+++ b/common/thrift/generate_error_codes.py
@@ -335,7 +335,7 @@ error_codes = (
 
   ("THREAD_CREATION_FAILED", 109, "Failed to create thread $0 in category $1: $2"),
 
-  ("DISK_IO_ERROR", 110, "Disk I/O error: $0"),
+  ("DISK_IO_ERROR", 110, "Disk I/O error on $0: $1"),
 
   ("DATASTREAM_RECVR_CLOSED", 111,
    "DataStreamRecvr for fragment=$0, node=$1 is closed already"),