You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2017/11/08 06:21:16 UTC

[3/3] incubator-impala git commit: IMPALA-5999: Fix LLVM linkage errors due LibCache sync issues

IMPALA-5999: Fix LLVM linkage errors due LibCache sync issues

Some test runs are currently failing randomly in test_ir_functions due
to LLVM linkage error. This happens when impala tries to link a function
from  a lib file on hdfs, but it somehow got removed from the lib cache
before it could link. This results in a new file being cached but with
a slightly different local filename, and since impala only uses local
filenames to check for collision for linking of LLVM modules, it ends
up linking the same file twice and hence encounters an error.
This patch fixes this issue by using the hdfs file path to check for
collision of lib files.

Testing:
Added a backend test that tries to link the same lib twice.

Change-Id: Iefa23bbf47998fe7e84011e1edf8e794e94a1757
Reviewed-on: http://gerrit.cloudera.org:8080/8487
Reviewed-by: Tim Armstrong <ta...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/1ca3adf4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/1ca3adf4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/1ca3adf4

Branch: refs/heads/master
Commit: 1ca3adf46c5ef5055c13fd3ce57e7c53218c219c
Parents: 1673e72
Author: Bikramjeet Vig <bi...@cloudera.com>
Authored: Mon Nov 6 16:15:50 2017 -0800
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Wed Nov 8 02:37:00 2017 +0000

----------------------------------------------------------------------
 be/src/codegen/llvm-codegen-test.cc | 25 +++++++++++++++++++------
 be/src/codegen/llvm-codegen.cc      | 19 +++++++++++--------
 be/src/codegen/llvm-codegen.h       | 14 +++++++++-----
 be/src/util/filesystem-util.cc      | 12 ------------
 be/src/util/filesystem-util.h       |  4 ----
 5 files changed, 39 insertions(+), 35 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ca3adf4/be/src/codegen/llvm-codegen-test.cc
----------------------------------------------------------------------
diff --git a/be/src/codegen/llvm-codegen-test.cc b/be/src/codegen/llvm-codegen-test.cc
index 9342633..e42b041 100644
--- a/be/src/codegen/llvm-codegen-test.cc
+++ b/be/src/codegen/llvm-codegen-test.cc
@@ -97,8 +97,12 @@ class LlvmCodeGenTest : public testing:: Test {
 
   static Status FinalizeModule(LlvmCodeGen* codegen) { return codegen->FinalizeModule(); }
 
-  static Status LinkModule(LlvmCodeGen* codegen, const string& file) {
-    return codegen->LinkModule(file);
+  static Status LinkModuleFromLocalFs(LlvmCodeGen* codegen, const string& file) {
+    return codegen->LinkModuleFromLocalFs(file);
+  }
+
+  static Status LinkModuleFromHdfs(LlvmCodeGen* codegen, const string& hdfs_file) {
+    return codegen->LinkModuleFromHdfs(hdfs_file);
   }
 };
 
@@ -465,18 +469,27 @@ TEST_F(LlvmCodeGenTest, HashTest) {
 // captured by impala. An error is induced by asking Llvm to link the same lib twice.
 TEST_F(LlvmCodeGenTest, HandleLinkageError) {
   string ir_file_path("llvm-ir/test-loop.bc");
-  string temp_copy_path("/tmp/test-loop.bc");
   string module_file;
   PathBuilder::GetFullPath(ir_file_path, &module_file);
-  ASSERT_OK(FileSystemUtil::CopyFile(module_file, temp_copy_path));
   scoped_ptr<LlvmCodeGen> codegen;
   ASSERT_OK(CreateFromFile(module_file.c_str(), &codegen));
-  EXPECT_TRUE(codegen.get() != NULL);
-  Status status = LinkModule(codegen.get(), temp_copy_path);
+  EXPECT_TRUE(codegen.get() != nullptr);
+  Status status = LinkModuleFromLocalFs(codegen.get(), module_file);
   EXPECT_STR_CONTAINS(status.GetDetail(), "symbol multiply defined");
   codegen->Close();
 }
 
+// Test that Impala does not return error when trying to link the same lib file twice.
+TEST_F(LlvmCodeGenTest, LinkageTest) {
+  scoped_ptr<LlvmCodeGen> codegen;
+  ASSERT_OK(LlvmCodeGen::CreateImpalaCodegen(runtime_state_, nullptr, "test", &codegen));
+  EXPECT_TRUE(codegen.get() != nullptr);
+  string hdfs_file_path("/test-warehouse/test-udfs.ll");
+  ASSERT_OK(LinkModuleFromHdfs(codegen.get(), hdfs_file_path));
+  ASSERT_OK(LinkModuleFromHdfs(codegen.get(), hdfs_file_path));
+  codegen->Close();
+}
+
 }
 
 int main(int argc, char **argv) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ca3adf4/be/src/codegen/llvm-codegen.cc
----------------------------------------------------------------------
diff --git a/be/src/codegen/llvm-codegen.cc b/be/src/codegen/llvm-codegen.cc
index 21678f2..777ca0a 100644
--- a/be/src/codegen/llvm-codegen.cc
+++ b/be/src/codegen/llvm-codegen.cc
@@ -276,9 +276,7 @@ Status LlvmCodeGen::LoadModuleFromMemory(unique_ptr<MemoryBuffer> module_ir_buf,
 }
 
 // TODO: Create separate counters/timers (file size, load time) for each module linked
-Status LlvmCodeGen::LinkModule(const string& file) {
-  if (linked_modules_.find(file) != linked_modules_.end()) return Status::OK();
-
+Status LlvmCodeGen::LinkModuleFromLocalFs(const string& file) {
   SCOPED_TIMER(profile_->total_time_counter());
   unique_ptr<Module> new_module;
   RETURN_IF_ERROR(LoadModuleFromFile(file, &new_module));
@@ -308,8 +306,16 @@ Status LlvmCodeGen::LinkModule(const string& file) {
     if (!diagnostic_err.empty()) ss << " " << diagnostic_err;
     return Status(ss.str());
   }
-  linked_modules_.insert(file);
+  return Status::OK();
+}
 
+Status LlvmCodeGen::LinkModuleFromHdfs(const string& hdfs_location) {
+  if (linked_modules_.find(hdfs_location) != linked_modules_.end()) return Status::OK();
+  string local_path;
+  RETURN_IF_ERROR(LibCache::instance()->GetLocalLibPath(hdfs_location, LibCache::TYPE_IR,
+      &local_path));
+  RETURN_IF_ERROR(LinkModuleFromLocalFs(local_path));
+  linked_modules_.insert(hdfs_location);
   return Status::OK();
 }
 
@@ -852,12 +858,9 @@ Status LlvmCodeGen::LoadFunction(const TFunction& fn, const std::string& symbol,
     // We're running an IR UDF.
     DCHECK_EQ(fn.binary_type, TFunctionBinaryType::IR);
 
-    string local_path;
-    RETURN_IF_ERROR(LibCache::instance()->GetLocalLibPath(
-        fn.hdfs_location, LibCache::TYPE_IR, &local_path));
     // Link the UDF module into this query's main module so the UDF's functions are
     // available in the main module.
-    RETURN_IF_ERROR(LinkModule(local_path));
+    RETURN_IF_ERROR(LinkModuleFromHdfs(fn.hdfs_location));
 
     *llvm_fn = GetFunction(symbol, true);
     if (*llvm_fn == NULL) {

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ca3adf4/be/src/codegen/llvm-codegen.h
----------------------------------------------------------------------
diff --git a/be/src/codegen/llvm-codegen.h b/be/src/codegen/llvm-codegen.h
index c63f001..c95fd28 100644
--- a/be/src/codegen/llvm-codegen.h
+++ b/be/src/codegen/llvm-codegen.h
@@ -579,9 +579,13 @@ class LlvmCodeGen {
   Status LoadModuleFromMemory(std::unique_ptr<llvm::MemoryBuffer> module_ir_buf,
       std::string module_name, std::unique_ptr<llvm::Module>* module);
 
-  /// Loads a module at 'file' and links it to the module associated with
-  /// this LlvmCodeGen object. The module must be on the local filesystem.
-  Status LinkModule(const std::string& file);
+  /// Loads a module at 'file' and links it to the module associated with this
+  /// LlvmCodeGen object. The 'file' must be on the local filesystem.
+  Status LinkModuleFromLocalFs(const std::string& file);
+
+  /// Same as 'LinkModuleFromLocalFs', but takes an hdfs file location instead and makes
+  /// sure that the same hdfs file is not linked twice.
+  Status LinkModuleFromHdfs(const std::string& hdfs_file);
 
   /// Strip global constructors and destructors from an LLVM module. We never run them
   /// anyway (they must be explicitly invoked) so it is dead code.
@@ -761,8 +765,8 @@ class LlvmCodeGen {
   /// we can codegen a loop unrolled hash function.
   std::map<int, llvm::Function*> hash_fns_;
 
-  /// The locations of modules that have been linked. Used to avoid linking the same module
-  /// twice, which causes symbol collision errors.
+  /// The locations of modules that have been linked. Uses hdfs file location as the key.
+  /// Used to avoid linking the same module twice, which causes symbol collision errors.
   std::set<std::string> linked_modules_;
 
   /// The vector of functions to automatically JIT compile after FinalizeModule().

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ca3adf4/be/src/util/filesystem-util.cc
----------------------------------------------------------------------
diff --git a/be/src/util/filesystem-util.cc b/be/src/util/filesystem-util.cc
index 95ecc53..a0cacdf 100644
--- a/be/src/util/filesystem-util.cc
+++ b/be/src/util/filesystem-util.cc
@@ -158,16 +158,4 @@ uint64_t FileSystemUtil::MaxNumFileHandles() {
   return 0ul;
 }
 
-Status FileSystemUtil::CopyFile(const string& from_path, const string& to_path) {
-  error_code errcode;
-  filesystem::copy_file(from_path, to_path, filesystem::copy_option::overwrite_if_exists,
-      errcode);
-  if (errcode != errc::success) {
-    return Status(ErrorMsg(TErrorCode::RUNTIME_ERROR, Substitute(
-        "Encountered exception while copying file from path $0 to $1: $2",
-        from_path, to_path, errcode.message())));
-  }
-  return Status::OK();
-}
-
 } // namespace impala

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/1ca3adf4/be/src/util/filesystem-util.h
----------------------------------------------------------------------
diff --git a/be/src/util/filesystem-util.h b/be/src/util/filesystem-util.h
index 9b78a4a..1d497c3 100644
--- a/be/src/util/filesystem-util.h
+++ b/be/src/util/filesystem-util.h
@@ -52,10 +52,6 @@ class FileSystemUtil {
   /// Returns the currently allowed maximum of possible file descriptors. In case of an
   /// error returns 0.
   static uint64_t MaxNumFileHandles();
-
-  /// Copy the specified file to the specified 'to_path'. Overwrite the file if it
-  /// already exists.
-  static Status CopyFile(const string& from_path, const string& to_path);
 };
 
 }