You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by ju...@apache.org on 2022/06/25 03:57:46 UTC

[tvm] branch main updated: [LLVM] Include LLVM headers in files that use them, not in llvm_common.h (#11888)

This is an automated email from the ASF dual-hosted git repository.

junrushao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new 59fb4219ff [LLVM] Include LLVM headers in files that use them, not in llvm_common.h (#11888)
59fb4219ff is described below

commit 59fb4219ffce7b7cbcabebe6fdd40d850e4b016b
Author: Krzysztof Parzyszek <kp...@quicinc.com>
AuthorDate: Fri Jun 24 22:57:42 2022 -0500

    [LLVM] Include LLVM headers in files that use them, not in llvm_common.h (#11888)
    
    This is following the same principle we use everywhere else in TVM, that
    is, every source file includes headers that it depends on. While including
    unnecessary LLVM headers (which may happen by including llvm_common.h)
    is not actively harmful, it makes the header dependencies much less trans-
    parent.
---
 src/target/llvm/codegen_amdgpu.cc      | 41 +++++++++++++++++-----
 src/target/llvm/codegen_arm.cc         | 14 ++++++--
 src/target/llvm/codegen_blob.cc        | 26 ++++++++++++++
 src/target/llvm/codegen_blob.h         |  8 +++--
 src/target/llvm/codegen_cpu.cc         | 31 +++++++++++++++++
 src/target/llvm/codegen_cpu.h          | 22 ++++++++++++
 src/target/llvm/codegen_hexagon.cc     | 32 +++++++++++++----
 src/target/llvm/codegen_llvm.cc        | 63 +++++++++++++++++++++++++++++++++-
 src/target/llvm/codegen_llvm.h         | 48 ++++++++++++++++++++++++--
 src/target/llvm/codegen_nvptx.cc       | 45 +++++++++++++++++++-----
 src/target/llvm/codegen_params.cc      |  8 ++++-
 src/target/llvm/codegen_params.h       |  7 ++--
 src/target/llvm/codegen_x86_64.cc      | 17 +++++++--
 src/target/llvm/intrin_rule_hexagon.cc |  1 +
 src/target/llvm/intrin_rule_llvm.cc    |  1 +
 src/target/llvm/intrin_rule_llvm.h     |  5 +--
 src/target/llvm/llvm_common.cc         | 17 +++++++++
 src/target/llvm/llvm_common.h          | 58 ++++---------------------------
 src/target/llvm/llvm_module.cc         | 21 ++++++++++++
 19 files changed, 374 insertions(+), 91 deletions(-)

diff --git a/src/target/llvm/codegen_amdgpu.cc b/src/target/llvm/codegen_amdgpu.cc
index 321a3ad1fc..2e5a4bc23b 100644
--- a/src/target/llvm/codegen_amdgpu.cc
+++ b/src/target/llvm/codegen_amdgpu.cc
@@ -23,6 +23,27 @@
  */
 #ifdef TVM_LLVM_VERSION
 
+#include <llvm/ADT/SmallString.h>
+#include <llvm/IR/Attributes.h>
+#include <llvm/IR/CallingConv.h>
+#include <llvm/IR/Function.h>
+#include <llvm/IR/GlobalValue.h>
+#include <llvm/IR/Instructions.h>
+#include <llvm/IR/Intrinsics.h>
+#if TVM_LLVM_VERSION >= 100
+#include <llvm/IR/IntrinsicsAMDGPU.h>
+#endif
+#include <llvm/IR/LegacyPassManager.h>
+#include <llvm/IRReader/IRReader.h>
+#if TVM_LLVM_VERSION >= 100
+#include <llvm/Support/Alignment.h>
+#endif
+#include <llvm/Support/CodeGen.h>
+#include <llvm/Support/SourceMgr.h>
+#include <llvm/Support/raw_ostream.h>
+#include <llvm/Target/TargetMachine.h>
+#include <llvm/Transforms/IPO/PassManagerBuilder.h>
+#include <llvm/Transforms/Utils/Cloning.h>
 #include <tvm/runtime/c_runtime_api.h>
 #include <tvm/runtime/device_api.h>
 #include <tvm/runtime/registry.h>
@@ -30,6 +51,7 @@
 #include "../../runtime/rocm/rocm_module.h"
 #include "../build_common.h"
 #include "codegen_llvm.h"
+#include "llvm_common.h"
 
 namespace tvm {
 namespace codegen {
@@ -60,6 +82,9 @@ static inline int DetectROCMmaxThreadsPerBlock() {
 // AMDGPU code generator.
 class CodeGenAMDGPU : public CodeGenLLVM {
  public:
+  CodeGenAMDGPU() = default;
+  virtual ~CodeGenAMDGPU() = default;
+
   void AddFunction(const PrimFunc& f) final {
     // add function as void return value
     CodeGenLLVM::AddFunctionInternal(f, true);
@@ -128,17 +153,17 @@ class CodeGenAMDGPU : public CodeGenLLVM {
   // Return the thread index via intrinsics.
   llvm::Value* GetThreadIndex(const IterVar& iv) final {
     runtime::ThreadScope ts = runtime::ThreadScope::Create(iv->thread_tag);
-    llvm::Intrinsic::ID intrin_id = ::llvm::Intrinsic::amdgcn_workitem_id_x;
+    llvm::Intrinsic::ID intrin_id = llvm::Intrinsic::amdgcn_workitem_id_x;
     if (ts.rank == 1) {
       switch (ts.dim_index) {
         case 0:
-          intrin_id = ::llvm::Intrinsic::amdgcn_workitem_id_x;
+          intrin_id = llvm::Intrinsic::amdgcn_workitem_id_x;
           break;
         case 1:
-          intrin_id = ::llvm::Intrinsic::amdgcn_workitem_id_y;
+          intrin_id = llvm::Intrinsic::amdgcn_workitem_id_y;
           break;
         case 2:
-          intrin_id = ::llvm::Intrinsic::amdgcn_workitem_id_z;
+          intrin_id = llvm::Intrinsic::amdgcn_workitem_id_z;
           break;
         default:
           LOG(FATAL) << "unknown workitem idx";
@@ -147,13 +172,13 @@ class CodeGenAMDGPU : public CodeGenLLVM {
       ICHECK_EQ(ts.rank, 0);
       switch (ts.dim_index) {
         case 0:
-          intrin_id = ::llvm::Intrinsic::amdgcn_workgroup_id_x;
+          intrin_id = llvm::Intrinsic::amdgcn_workgroup_id_x;
           break;
         case 1:
-          intrin_id = ::llvm::Intrinsic::amdgcn_workgroup_id_y;
+          intrin_id = llvm::Intrinsic::amdgcn_workgroup_id_y;
           break;
         case 2:
-          intrin_id = ::llvm::Intrinsic::amdgcn_workgroup_id_z;
+          intrin_id = llvm::Intrinsic::amdgcn_workgroup_id_z;
           break;
         default:
           LOG(FATAL) << "unknown workgroup idx";
@@ -169,7 +194,7 @@ class CodeGenAMDGPU : public CodeGenLLVM {
       return nullptr;
     } else if (sync == "shared") {
       llvm::Function* f =
-          llvm::Intrinsic::getDeclaration(module_.get(), ::llvm::Intrinsic::amdgcn_s_barrier);
+          llvm::Intrinsic::getDeclaration(module_.get(), llvm::Intrinsic::amdgcn_s_barrier);
       return builder_->CreateCall(f, {});
     } else {
       LOG(FATAL) << "Do not support sync " << sync;
diff --git a/src/target/llvm/codegen_arm.cc b/src/target/llvm/codegen_arm.cc
index 7b87dd5bde..f5ce0d550b 100644
--- a/src/target/llvm/codegen_arm.cc
+++ b/src/target/llvm/codegen_arm.cc
@@ -23,7 +23,12 @@
  */
 #ifdef TVM_LLVM_VERSION
 
+#include <llvm/IR/Intrinsics.h>
 #include <tvm/runtime/registry.h>
+#if TVM_LLVM_VERSION >= 100
+#include <llvm/IR/IntrinsicsARM.h>
+#endif
+#include <llvm/Target/TargetMachine.h>
 
 #include "codegen_cpu.h"
 
@@ -34,6 +39,9 @@ namespace codegen {
 // how to override behavior llvm code generator for specific target
 class CodeGenARM final : public CodeGenCPU {
  public:
+  CodeGenARM() = default;
+  virtual ~CodeGenARM() = default;
+
   void InitTarget(llvm::TargetMachine* tm) final {
     // set native vector bits.
     native_vector_bits_ = 16 * 8;
@@ -48,7 +56,7 @@ class CodeGenARM final : public CodeGenCPU {
 llvm::Value* CodeGenARM::CreateIntrinsic(const CallNode* op) {
   if (op->op.same_as(builtin_call_llvm_intrin_) || op->op.same_as(builtin_call_llvm_pure_intrin_)) {
     llvm::Intrinsic::ID id = static_cast<llvm::Intrinsic::ID>(Downcast<IntImm>(op->args[0])->value);
-    if (id == ::llvm::Intrinsic::ctpop) {
+    if (id == llvm::Intrinsic::ctpop) {
       PrimExpr e = ARMPopcount(op);
       return CodeGenCPU::CreateIntrinsic(e.as<CallNode>());
     }
@@ -59,8 +67,8 @@ llvm::Value* CodeGenARM::CreateIntrinsic(const CallNode* op) {
 PrimExpr CodeGenARM::ARMPopcount(const CallNode* call) {
   using namespace tir;
   const PrimExpr& e = call->args[2];
-  ::llvm::Intrinsic::ID ctpop_id = ::llvm::Intrinsic::ctpop;
-  ::llvm::Intrinsic::ID vpaddlu_id = ::llvm::Intrinsic::arm_neon_vpaddlu;
+  llvm::Intrinsic::ID ctpop_id = llvm::Intrinsic::ctpop;
+  llvm::Intrinsic::ID vpaddlu_id = llvm::Intrinsic::arm_neon_vpaddlu;
 
   // Fallback to default llvm lowering rule if input type not a full vector or half vector length
   int total_size = call->dtype.bits() * call->dtype.lanes();
diff --git a/src/target/llvm/codegen_blob.cc b/src/target/llvm/codegen_blob.cc
index dc9760f21f..8e6041b4c9 100644
--- a/src/target/llvm/codegen_blob.cc
+++ b/src/target/llvm/codegen_blob.cc
@@ -21,12 +21,38 @@
  * \file codegen_blob.cc
  */
 #ifdef TVM_LLVM_VERSION
+
 #include "codegen_blob.h"
 
+#include <llvm/ADT/SmallVector.h>
+#include <llvm/ADT/StringRef.h>
+#include <llvm/ADT/Triple.h>
+#include <llvm/ADT/Twine.h>
+#include <llvm/IR/BasicBlock.h>
+#include <llvm/IR/Constants.h>
+#include <llvm/IR/DerivedTypes.h>
+#include <llvm/IR/Function.h>
+#include <llvm/IR/GlobalVariable.h>
+#include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/LLVMContext.h>
+#include <llvm/IR/Metadata.h>
+#include <llvm/IR/Module.h>
+#include <llvm/IR/Type.h>
+#include <llvm/IR/Value.h>
+#if TVM_LLVM_VERSION >= 100
+#include <llvm/Support/Alignment.h>
+#endif
+#include <llvm/Target/TargetMachine.h>
+#include <llvm/Transforms/Utils/ModuleUtils.h>
 #include <tvm/runtime/module.h>
 #include <tvm/target/target.h>
 
 #include <cstring>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "llvm_common.h"
 
 namespace tvm {
 namespace codegen {
diff --git a/src/target/llvm/codegen_blob.h b/src/target/llvm/codegen_blob.h
index bc238543e6..46c037a30a 100644
--- a/src/target/llvm/codegen_blob.h
+++ b/src/target/llvm/codegen_blob.h
@@ -23,13 +23,16 @@
  */
 #ifndef TVM_TARGET_LLVM_CODEGEN_BLOB_H_
 #define TVM_TARGET_LLVM_CODEGEN_BLOB_H_
+
 #ifdef TVM_LLVM_VERSION
+
+#include <llvm/IR/LLVMContext.h>
+#include <llvm/IR/Module.h>
+
 #include <memory>
 #include <string>
 #include <utility>
 
-#include "llvm_common.h"
-
 namespace tvm {
 namespace codegen {
 /**
@@ -46,5 +49,6 @@ std::pair<std::unique_ptr<llvm::Module>, std::shared_ptr<llvm::LLVMContext>> Cod
 
 }  // namespace codegen
 }  // namespace tvm
+
 #endif  // LLVM_VERSION
 #endif  // TVM_TARGET_LLVM_CODEGEN_BLOB_H_
diff --git a/src/target/llvm/codegen_cpu.cc b/src/target/llvm/codegen_cpu.cc
index 50551049d3..bf0fe1502b 100644
--- a/src/target/llvm/codegen_cpu.cc
+++ b/src/target/llvm/codegen_cpu.cc
@@ -24,6 +24,31 @@
 
 #include "codegen_cpu.h"
 
+#include <llvm/ADT/SmallVector.h>
+#include <llvm/ADT/StringRef.h>
+#include <llvm/IR/Argument.h>
+#include <llvm/IR/Attributes.h>
+#include <llvm/IR/BasicBlock.h>
+#include <llvm/IR/CallingConv.h>
+#include <llvm/IR/Comdat.h>
+#include <llvm/IR/Constants.h>
+#include <llvm/IR/DIBuilder.h>
+#include <llvm/IR/DebugInfoMetadata.h>
+#include <llvm/IR/DebugLoc.h>
+#include <llvm/IR/DerivedTypes.h>
+#include <llvm/IR/Function.h>
+#include <llvm/IR/GlobalVariable.h>
+#include <llvm/IR/Instructions.h>
+#include <llvm/IR/LLVMContext.h>
+#include <llvm/IR/MDBuilder.h>
+#include <llvm/IR/Metadata.h>
+#include <llvm/IR/Module.h>
+#if TVM_LLVM_VERSION >= 100
+#include <llvm/Support/Alignment.h>
+#endif
+#include <llvm/Support/raw_ostream.h>
+#include <llvm/Target/TargetMachine.h>
+#include <llvm/Transforms/Utils/ModuleUtils.h>
 #include <tvm/runtime/c_runtime_api.h>
 #include <tvm/runtime/module.h>
 #include <tvm/tir/analysis.h>
@@ -35,9 +60,15 @@
 
 #include "../func_registry_generator.h"
 #include "../metadata_utils.h"
+
 namespace tvm {
 namespace codegen {
 
+// Make these non-inline because of std::unique_ptr. See comment in
+// codegen_llvm.cc for more information.
+CodeGenCPU::CodeGenCPU() = default;
+CodeGenCPU::~CodeGenCPU() = default;
+
 void CodeGenCPU::Init(const std::string& module_name, llvm::TargetMachine* tm,
                       llvm::LLVMContext* ctx, bool system_lib, bool dynamic_lookup,
                       bool target_c_runtime) {
diff --git a/src/target/llvm/codegen_cpu.h b/src/target/llvm/codegen_cpu.h
index a491d539a6..e2c23f2011 100644
--- a/src/target/llvm/codegen_cpu.h
+++ b/src/target/llvm/codegen_cpu.h
@@ -32,12 +32,34 @@
 
 #include "codegen_llvm.h"
 
+namespace llvm {
+class BasicBlock;
+class Constant;
+class DIBuilder;
+class DIType;
+class Function;
+class FunctionType;
+class GlobalVariable;
+class LLVMContext;
+class MDNode;
+class StructType;
+class TargetMachine;
+class Type;
+class Value;
+
+// Used in std::unique_ptr
+class Module;
+}  // namespace llvm
+
 namespace tvm {
 namespace codegen {
 
 // CPU host code generation
 class CodeGenCPU : public CodeGenLLVM {
  public:
+  CodeGenCPU();
+  virtual ~CodeGenCPU();
+
   void Init(const std::string& module_name, llvm::TargetMachine* tm, llvm::LLVMContext* ctx,
             bool system_lib, bool dynamic_lookup, bool target_c_runtime) override;
   void AddFunction(const PrimFunc& f) override;
diff --git a/src/target/llvm/codegen_hexagon.cc b/src/target/llvm/codegen_hexagon.cc
index 33c2104b1e..a195c9f054 100644
--- a/src/target/llvm/codegen_hexagon.cc
+++ b/src/target/llvm/codegen_hexagon.cc
@@ -19,13 +19,33 @@
 
 #if defined(TVM_LLVM_VERSION) && TVM_LLVM_VERSION >= 70
 
+#include <llvm/ADT/ArrayRef.h>
+#include <llvm/ADT/SmallString.h>
+#include <llvm/ADT/StringRef.h>
 #include <llvm/Bitcode/BitcodeWriter.h>
+#include <llvm/IR/Constants.h>
+#include <llvm/IR/DerivedTypes.h>
+#include <llvm/IR/Function.h>
+#include <llvm/IR/GlobalVariable.h>
+#include <llvm/IR/Instructions.h>
 #if TVM_LLVM_VERSION <= 90
 #include <llvm/IR/Intrinsics.h>
 #else
 #include <llvm/IR/IntrinsicsHexagon.h>
 #endif
+#include <llvm/IR/LLVMContext.h>
+#include <llvm/IR/LegacyPassManager.h>
+#include <llvm/IR/MDBuilder.h>
+#include <llvm/IR/Module.h>
+#if TVM_LLVM_VERSION >= 100
+#include <llvm/Support/Alignment.h>
+#endif
+#include <llvm/Support/CodeGen.h>
 #include <llvm/Support/CommandLine.h>
+#include <llvm/Support/FileSystem.h>
+#include <llvm/Support/raw_ostream.h>
+#include <llvm/Target/TargetMachine.h>
+#include <llvm/Transforms/Utils/Cloning.h>
 #include <tvm/runtime/module.h>
 #include <tvm/target/codegen.h>
 #include <tvm/tir/analysis.h>
@@ -42,6 +62,7 @@
 #include "../../runtime/hexagon/hexagon_module.h"
 #include "../build_common.h"
 #include "codegen_cpu.h"
+#include "llvm_common.h"
 
 namespace tvm {
 namespace codegen {
@@ -369,18 +390,17 @@ runtime::Module BuildHexagon(IRModule mod, Target target) {
       else
         llvm::WriteBitcodeToFile(m, os);
     } else if (cgft == Asm || cgft == Obj) {
-      using namespace llvm;
 #if TVM_LLVM_VERSION <= 90
-      auto ft = cgft == Asm ? TargetMachine::CodeGenFileType::CGFT_AssemblyFile
-                            : TargetMachine::CodeGenFileType::CGFT_ObjectFile;
+      auto ft = cgft == Asm ? llvm::TargetMachine::CodeGenFileType::CGFT_AssemblyFile
+                            : llvm::TargetMachine::CodeGenFileType::CGFT_ObjectFile;
 #else
       auto ft = cgft == Asm ? llvm::CGFT_AssemblyFile : llvm::CGFT_ObjectFile;
 #endif
 
-      SmallString<16384> ss;  // Will grow on demand.
+      llvm::SmallString<16384> ss;  // Will grow on demand.
       llvm::raw_svector_ostream os(ss);
-      std::unique_ptr<llvm::Module> cm = CloneModule(m);
-      legacy::PassManager pass;
+      std::unique_ptr<llvm::Module> cm = llvm::CloneModule(m);
+      llvm::legacy::PassManager pass;
       ICHECK(tm->addPassesToEmitFile(pass, os, nullptr, ft) == 0) << "Cannot emit target code";
       pass.run(*cm.get());
       out.assign(ss.c_str(), ss.size());
diff --git a/src/target/llvm/codegen_llvm.cc b/src/target/llvm/codegen_llvm.cc
index 28dc8652e3..f1d891e2c3 100644
--- a/src/target/llvm/codegen_llvm.cc
+++ b/src/target/llvm/codegen_llvm.cc
@@ -24,23 +24,84 @@
 // Part of the code are adapted from Halide's CodeGen_LLVM
 #include "codegen_llvm.h"
 
+#include <llvm/ADT/ArrayRef.h>
+#include <llvm/ADT/SmallVector.h>
+#include <llvm/ADT/StringRef.h>
+#include <llvm/ADT/Triple.h>
+#include <llvm/Analysis/TargetTransformInfo.h>
+#if TVM_LLVM_VERSION >= 50
+#include <llvm/BinaryFormat/Dwarf.h>
+#else
+#include <llvm/Support/Dwarf.h>
+#endif
+#include <llvm/IR/Argument.h>
+#include <llvm/IR/Attributes.h>
+#include <llvm/IR/BasicBlock.h>
+#include <llvm/IR/CallingConv.h>
+#include <llvm/IR/Constants.h>
+#include <llvm/IR/DIBuilder.h>
+#include <llvm/IR/DataLayout.h>
+#include <llvm/IR/DebugInfoMetadata.h>
+#include <llvm/IR/DerivedTypes.h>
+#if TVM_LLVM_VERSION >= 150
+#include <llvm/IR/FMF.h>
+#else
+#include <llvm/IR/Operator.h>
+#endif
+#include <llvm/IR/Function.h>
+#include <llvm/IR/GlobalVariable.h>
+#include <llvm/IR/Instructions.h>
+#include <llvm/IR/Intrinsics.h>
+#include <llvm/IR/LLVMContext.h>
+#include <llvm/IR/LegacyPassManager.h>
+#include <llvm/IR/MDBuilder.h>
+#include <llvm/IR/Metadata.h>
+#include <llvm/IR/Module.h>
+#include <llvm/IR/Type.h>
+#include <llvm/IRReader/IRReader.h>
+#include <llvm/Linker/Linker.h>
+#include <llvm/Pass.h>
+#if TVM_LLVM_VERSION >= 100
+#include <llvm/Support/Alignment.h>
+#include <llvm/Support/TypeSize.h>
+#endif
+#include <llvm/Support/CodeGen.h>
+#include <llvm/Support/MemoryBuffer.h>
+#include <llvm/Support/SourceMgr.h>
+#include <llvm/Target/TargetMachine.h>
+#include <llvm/Transforms/IPO.h>
+#include <llvm/Transforms/IPO/PassManagerBuilder.h>
+#include <llvm/Transforms/Utils/ModuleUtils.h>
 #include <tvm/runtime/c_runtime_api.h>
 #include <tvm/runtime/crt/error_codes.h>
 #include <tvm/runtime/device_api.h>
 #include <tvm/tir/op.h>
 
 #include <algorithm>
+#include <functional>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
 
 #include "../../arith/pattern_match.h"
 #include "../build_common.h"
 #include "../func_registry_generator.h"
 #include "codegen_params.h"
-#include "llvm/Support/raw_os_ostream.h"
 #include "llvm_common.h"
 
 namespace tvm {
 namespace codegen {
 
+// CodeGenLLVM has members of type std::unique_ptr<T>. These members will be
+// instantiated in the constructor, which will requre that the type T is
+// complete at that point. Put the constructor (and destructor) here, since
+// all types should be complete here.
+CodeGenLLVM::CodeGenLLVM() = default;
+CodeGenLLVM::~CodeGenLLVM() = default;
+CodeGenLLVM::DebugInfo::~DebugInfo() = default;
+
 std::unique_ptr<CodeGenLLVM> CodeGenLLVM::Create(llvm::TargetMachine* tm) {
   std::string target = tm->getTarget().getName();
   std::string factory_template = "tvm.codegen.llvm.target_";
diff --git a/src/target/llvm/codegen_llvm.h b/src/target/llvm/codegen_llvm.h
index 5656eb5b98..c6129c238c 100644
--- a/src/target/llvm/codegen_llvm.h
+++ b/src/target/llvm/codegen_llvm.h
@@ -23,9 +23,31 @@
  */
 #ifndef TVM_TARGET_LLVM_CODEGEN_LLVM_H_
 #define TVM_TARGET_LLVM_CODEGEN_LLVM_H_
-#include <llvm/IR/GlobalValue.h>
 #ifdef TVM_LLVM_VERSION
 
+#include <llvm/ADT/ArrayRef.h>
+#include <llvm/ADT/StringRef.h>
+#include <llvm/IR/BasicBlock.h>
+#include <llvm/IR/ConstantFolder.h>
+#include <llvm/IR/Constants.h>
+#include <llvm/IR/DerivedTypes.h>
+#if TVM_LLVM_VERSION >= 150
+#include <llvm/IR/FMF.h>
+#else
+#include <llvm/IR/Operator.h>
+#endif
+#include <llvm/IR/GlobalValue.h>
+#include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/Instructions.h>
+#include <llvm/IR/Intrinsics.h>
+#include <llvm/IR/LLVMContext.h>
+#include <llvm/Support/Casting.h>
+#if TVM_LLVM_VERSION >= 140
+#include <llvm/MC/TargetRegistry.h>
+#else
+#include <llvm/Support/TargetRegistry.h>
+#endif
+
 #include <tvm/arith/analyzer.h>
 #include <tvm/ir/module.h>
 #include <tvm/target/codegen.h>
@@ -48,7 +70,25 @@
 #include "../../runtime/thread_storage_scope.h"
 #include "../../tir/transforms/ir_utils.h"
 #include "codegen_params.h"
-#include "llvm_common.h"
+
+namespace llvm {
+class Argument;
+class CallInst;
+class Function;
+class GlobalVariable;
+class Instruction;
+class PassManagerBuilder;
+class TargetMachine;
+class DIFile;
+class DICompileUnit;
+class MDNode;
+
+// Used in std::unique_ptr
+class Module;
+class DataLayout;
+class DIBuilder;
+class MDBuilder;
+}  // namespace llvm
 
 namespace tvm {
 namespace codegen {
@@ -61,6 +101,9 @@ using namespace tir;
 class CodeGenLLVM : public ExprFunctor<llvm::Value*(const PrimExpr&)>,
                     public StmtFunctor<void(const Stmt&)> {
  public:
+  CodeGenLLVM();           // Do not make it default here.
+  virtual ~CodeGenLLVM();  // Do not make it default here.
+
   /*!
    * \brief Create new code generator based on target machine.
    * \param tm The target machine
@@ -485,6 +528,7 @@ class CodeGenLLVM : public ExprFunctor<llvm::Value*(const PrimExpr&)>,
 
   /*! \brief Helper struct for debug infos. */
   struct DebugInfo {
+    ~DebugInfo();  // Because of the std::unique_ptr.
     std::unique_ptr<llvm::DIBuilder> di_builder_;
     llvm::DICompileUnit* compilation_unit_{nullptr};
     llvm::DIFile* file_{nullptr};
diff --git a/src/target/llvm/codegen_nvptx.cc b/src/target/llvm/codegen_nvptx.cc
index 6ad9fddd77..a74274009c 100644
--- a/src/target/llvm/codegen_nvptx.cc
+++ b/src/target/llvm/codegen_nvptx.cc
@@ -23,11 +23,40 @@
  */
 #ifdef TVM_LLVM_VERSION
 
+#include <llvm/ADT/SmallString.h>
+#include <llvm/IR/Attributes.h>
+#include <llvm/IR/Function.h>
+#include <llvm/IR/GlobalValue.h>
+#include <llvm/IR/InlineAsm.h>
+#include <llvm/IR/Instructions.h>
+#include <llvm/IR/Intrinsics.h>
+#if TVM_LLVM_VERSION >= 100
+#include <llvm/IR/IntrinsicsNVPTX.h>
+#endif
+#include <llvm/IR/LegacyPassManager.h>
+#include <llvm/IR/Metadata.h>
+#include <llvm/IR/Module.h>
+#include <llvm/IR/Type.h>
+#include <llvm/IRReader/IRReader.h>
+#if TVM_LLVM_VERSION >= 100
+#include <llvm/Support/Alignment.h>
+#endif
+#include <llvm/Support/CodeGen.h>
+#include <llvm/Support/SourceMgr.h>
+#include <llvm/Support/raw_ostream.h>
+#include <llvm/Target/TargetMachine.h>
+#include <llvm/Transforms/IPO/PassManagerBuilder.h>
 #include <tvm/runtime/device_api.h>
 
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
 #include "../../runtime/cuda/cuda_module.h"
 #include "../build_common.h"
 #include "codegen_llvm.h"
+#include "llvm_common.h"
 
 namespace tvm {
 namespace codegen {
@@ -103,17 +132,17 @@ class CodeGenNVPTX : public CodeGenLLVM {
   // Return the thread index via intrinsics.
   llvm::Value* GetThreadIndex(const IterVar& iv) final {
     runtime::ThreadScope ts = runtime::ThreadScope::Create(iv->thread_tag);
-    llvm::Intrinsic::ID intrin_id = ::llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x;
+    llvm::Intrinsic::ID intrin_id = llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x;
     if (ts.rank == 1) {
       switch (ts.dim_index) {
         case 0:
-          intrin_id = ::llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x;
+          intrin_id = llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x;
           break;
         case 1:
-          intrin_id = ::llvm::Intrinsic::nvvm_read_ptx_sreg_tid_y;
+          intrin_id = llvm::Intrinsic::nvvm_read_ptx_sreg_tid_y;
           break;
         case 2:
-          intrin_id = ::llvm::Intrinsic::nvvm_read_ptx_sreg_tid_z;
+          intrin_id = llvm::Intrinsic::nvvm_read_ptx_sreg_tid_z;
           break;
         default:
           LOG(FATAL) << "unknown thread idx";
@@ -122,13 +151,13 @@ class CodeGenNVPTX : public CodeGenLLVM {
       ICHECK_EQ(ts.rank, 0);
       switch (ts.dim_index) {
         case 0:
-          intrin_id = ::llvm::Intrinsic::nvvm_read_ptx_sreg_ctaid_x;
+          intrin_id = llvm::Intrinsic::nvvm_read_ptx_sreg_ctaid_x;
           break;
         case 1:
-          intrin_id = ::llvm::Intrinsic::nvvm_read_ptx_sreg_ctaid_y;
+          intrin_id = llvm::Intrinsic::nvvm_read_ptx_sreg_ctaid_y;
           break;
         case 2:
-          intrin_id = ::llvm::Intrinsic::nvvm_read_ptx_sreg_ctaid_z;
+          intrin_id = llvm::Intrinsic::nvvm_read_ptx_sreg_ctaid_z;
           break;
         default:
           LOG(FATAL) << "unknown thread idx";
@@ -145,7 +174,7 @@ class CodeGenNVPTX : public CodeGenLLVM {
       return nullptr;
     } else if (sync == "shared" || sync == "shared.dyn") {
       llvm::Function* f =
-          llvm::Intrinsic::getDeclaration(module_.get(), ::llvm::Intrinsic::nvvm_barrier0);
+          llvm::Intrinsic::getDeclaration(module_.get(), llvm::Intrinsic::nvvm_barrier0);
       return builder_->CreateCall(f, {});
     } else {
       LOG(FATAL) << "Do not support sync " << sync;
diff --git a/src/target/llvm/codegen_params.cc b/src/target/llvm/codegen_params.cc
index 3b4cae9197..81ed446231 100644
--- a/src/target/llvm/codegen_params.cc
+++ b/src/target/llvm/codegen_params.cc
@@ -24,8 +24,14 @@
 
 #include "codegen_params.h"
 
+#include <llvm/ADT/ArrayRef.h>
+#include <llvm/IR/Constants.h>
+#include <llvm/IR/DerivedTypes.h>
+#include <llvm/IR/LLVMContext.h>
+#include <llvm/Support/Casting.h>
+
 #include <algorithm>
-#include <memory>
+#include <type_traits>
 #include <vector>
 
 namespace tvm {
diff --git a/src/target/llvm/codegen_params.h b/src/target/llvm/codegen_params.h
index f5fd21ff32..9d05621469 100644
--- a/src/target/llvm/codegen_params.h
+++ b/src/target/llvm/codegen_params.h
@@ -26,7 +26,10 @@
 
 #include <tvm/runtime/ndarray.h>
 
-#include "llvm_common.h"
+namespace llvm {
+class ConstantArray;
+class LLVMContext;
+}  // namespace llvm
 
 namespace tvm {
 namespace codegen {
@@ -40,7 +43,7 @@ namespace codegen {
  * \param arr NDArray to convert.
  * \return LLVM array containing the array data.
  */
-llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, ::tvm::runtime::NDArray arr);
+llvm::ConstantArray* NDArrayToLLVMArray(llvm::LLVMContext* ctx, tvm::runtime::NDArray arr);
 
 }  // namespace codegen
 }  // namespace tvm
diff --git a/src/target/llvm/codegen_x86_64.cc b/src/target/llvm/codegen_x86_64.cc
index 4ab4c064ae..2d36e0b022 100644
--- a/src/target/llvm/codegen_x86_64.cc
+++ b/src/target/llvm/codegen_x86_64.cc
@@ -23,10 +23,21 @@
  */
 #ifdef TVM_LLVM_VERSION
 
+#include <llvm/IR/DerivedTypes.h>
+#include <llvm/IR/Function.h>
+#include <llvm/IR/Intrinsics.h>
+#if TVM_LLVM_VERSION >= 100
+#include <llvm/IR/IntrinsicsX86.h>
+#endif
+#include <llvm/MC/MCSubtargetInfo.h>
+#include <llvm/Support/Casting.h>
+#include <llvm/Target/TargetMachine.h>
 #include <tvm/runtime/registry.h>
 
+#include <string>
+#include <vector>
+
 #include "codegen_cpu.h"
-#include "llvm/MC/MCSubtargetInfo.h"
 
 namespace tvm {
 namespace codegen {
@@ -86,7 +97,7 @@ llvm::Value* CodeGenX86_64::VisitExpr_(const CastNode* op) {
 
     if (from.lanes() >= 16 && has_avx512) {
       return CallVectorIntrin(
-          ::llvm::Intrinsic::x86_avx512_mask_vcvtph2ps_512, 16,
+          llvm::Intrinsic::x86_avx512_mask_vcvtph2ps_512, 16,
           DTypeToLLVMType(DataType::Float(32, from.lanes())),
           {
               MakeValue(tir::Call(DataType::Int(16, from.lanes()), tir::builtin::reinterpret(),
@@ -102,7 +113,7 @@ llvm::Value* CodeGenX86_64::VisitExpr_(const CastNode* op) {
     const auto has_f16c = TargetHasFeature(*target_machine_, "f16c");
 
     if (from.lanes() >= 8 && has_f16c) {
-      return CallVectorIntrin(::llvm::Intrinsic::x86_vcvtph2ps_256, 8,
+      return CallVectorIntrin(llvm::Intrinsic::x86_vcvtph2ps_256, 8,
                               DTypeToLLVMType(DataType::Float(32, from.lanes())),
                               {MakeValue(tir::Call(DataType::Int(16, from.lanes()),
                                                    tir::builtin::reinterpret(), {op->value}))});
diff --git a/src/target/llvm/intrin_rule_hexagon.cc b/src/target/llvm/intrin_rule_hexagon.cc
index 82f7d50513..a6f5eae4a5 100644
--- a/src/target/llvm/intrin_rule_hexagon.cc
+++ b/src/target/llvm/intrin_rule_hexagon.cc
@@ -19,6 +19,7 @@
 
 #ifdef TVM_LLVM_VERSION
 
+#include <llvm/IR/Intrinsics.h>
 #include <tvm/tir/op_attr_types.h>
 
 #include "intrin_rule_llvm.h"
diff --git a/src/target/llvm/intrin_rule_llvm.cc b/src/target/llvm/intrin_rule_llvm.cc
index adbd1056d9..9ef494fd2a 100644
--- a/src/target/llvm/intrin_rule_llvm.cc
+++ b/src/target/llvm/intrin_rule_llvm.cc
@@ -24,6 +24,7 @@
 
 #include "intrin_rule_llvm.h"
 
+#include <llvm/IR/Intrinsics.h>
 #include <tvm/tir/op.h>
 #include <tvm/tir/op_attr_types.h>
 
diff --git a/src/target/llvm/intrin_rule_llvm.h b/src/target/llvm/intrin_rule_llvm.h
index a926d7b9be..a0e040a204 100644
--- a/src/target/llvm/intrin_rule_llvm.h
+++ b/src/target/llvm/intrin_rule_llvm.h
@@ -23,6 +23,7 @@
  */
 #ifndef TVM_TARGET_LLVM_INTRIN_RULE_LLVM_H_
 #define TVM_TARGET_LLVM_INTRIN_RULE_LLVM_H_
+
 #ifdef TVM_LLVM_VERSION
 
 #include <tvm/runtime/registry.h>
@@ -30,10 +31,6 @@
 #include <tvm/tir/builtin.h>
 #include <tvm/tir/expr.h>
 
-#include <string>
-
-#include "llvm_common.h"
-
 namespace tvm {
 namespace codegen {
 // num_signature means number of arguments used to query signature
diff --git a/src/target/llvm/llvm_common.cc b/src/target/llvm/llvm_common.cc
index 06b2be2d9f..3d9ac835dc 100644
--- a/src/target/llvm/llvm_common.cc
+++ b/src/target/llvm/llvm_common.cc
@@ -24,12 +24,29 @@
 
 #include "llvm_common.h"
 
+#if TVM_LLVM_VERSION >= 140
+#include <llvm/MC/TargetRegistry.h>
+#else
+#include <llvm/Support/TargetRegistry.h>
+#endif
+#include <llvm/Support/CodeGen.h>
+#include <llvm/Support/Host.h>
+#include <llvm/Support/TargetSelect.h>
+#include <llvm/Support/raw_ostream.h>
+#include <llvm/Target/TargetMachine.h>
+#include <llvm/Target/TargetOptions.h>
+#include <tvm/ir/expr.h>
+#include <tvm/runtime/container/array.h>
+#include <tvm/runtime/container/optional.h>
+#include <tvm/runtime/container/string.h>
 #include <tvm/runtime/logging.h>
 #include <tvm/target/target.h>
 
 #include <atomic>
 #include <memory>
 #include <mutex>
+#include <sstream>
+#include <string>
 
 namespace tvm {
 namespace codegen {
diff --git a/src/target/llvm/llvm_common.h b/src/target/llvm/llvm_common.h
index e11392be29..c127b77c03 100644
--- a/src/target/llvm/llvm_common.h
+++ b/src/target/llvm/llvm_common.h
@@ -29,63 +29,19 @@
 #endif
 #ifdef TVM_LLVM_VERSION
 
-#include <llvm/Analysis/TargetTransformInfo.h>
-#include <llvm/Bitcode/BitcodeWriter.h>
-#include <llvm/ExecutionEngine/MCJIT.h>
-#include <llvm/IR/InlineAsm.h>
-#include <llvm/IR/Intrinsics.h>
-#include <llvm/IR/Value.h>
-#include <llvm/Support/SourceMgr.h>
-#if TVM_LLVM_VERSION >= 100
-#include <llvm/IR/IntrinsicsAMDGPU.h>
-#include <llvm/IR/IntrinsicsARM.h>
-#include <llvm/IR/IntrinsicsNVPTX.h>
-#include <llvm/IR/IntrinsicsX86.h>
-#endif
-#include <llvm/IR/Argument.h>
-#include <llvm/IR/BasicBlock.h>
-#include <llvm/IR/Constants.h>
-#include <llvm/IR/DIBuilder.h>
-#include <llvm/IR/DerivedTypes.h>
-#include <llvm/IR/Function.h>
-#include <llvm/IR/IRBuilder.h>
-#include <llvm/IR/Instructions.h>
-#include <llvm/IR/LLVMContext.h>
-#include <llvm/IR/LegacyPassManager.h>
-#include <llvm/IR/MDBuilder.h>
-#include <llvm/IR/Module.h>
-#include <llvm/IR/Type.h>
-#include <llvm/IR/Verifier.h>
-#include <llvm/Transforms/IPO.h>
-#include <llvm/Transforms/IPO/PassManagerBuilder.h>
-#include <llvm/Transforms/Utils/Cloning.h>
-#include <llvm/Transforms/Utils/ModuleUtils.h>
-
-#if TVM_LLVM_VERSION >= 100
-#include <llvm/Support/Alignment.h>
-#endif
-#include <llvm/CodeGen/TargetLoweringObjectFileImpl.h>
-#include <llvm/IRReader/IRReader.h>
-#include <llvm/Linker/Linker.h>
-#include <llvm/Support/Casting.h>
-#include <llvm/Support/FileSystem.h>
-#include <llvm/Support/Host.h>
-#include <llvm/Support/MemoryBuffer.h>
-#if TVM_LLVM_VERSION >= 140
-#include <llvm/MC/TargetRegistry.h>
-#else
-#include <llvm/Support/TargetRegistry.h>
-#endif
-#include <llvm/Support/TargetSelect.h>
-#include <llvm/Support/raw_ostream.h>
-#include <llvm/Target/TargetMachine.h>
-#include <llvm/Target/TargetOptions.h>
 #include <tvm/runtime/container/string.h>
 
 #include <memory>
 #include <string>
 #include <utility>
 
+namespace llvm {
+class Module;
+class Target;
+class TargetMachine;
+class TargetOptions;
+}  // namespace llvm
+
 namespace tvm {
 
 // The TVM target
diff --git a/src/target/llvm/llvm_module.cc b/src/target/llvm/llvm_module.cc
index 30a1b39872..8e87229155 100644
--- a/src/target/llvm/llvm_module.cc
+++ b/src/target/llvm/llvm_module.cc
@@ -23,6 +23,27 @@
  */
 #ifdef TVM_LLVM_VERSION
 
+#include <llvm/ADT/SmallString.h>
+#include <llvm/ADT/StringRef.h>
+#include <llvm/Bitcode/BitcodeWriter.h>
+#include <llvm/ExecutionEngine/ExecutionEngine.h>
+#include <llvm/ExecutionEngine/MCJIT.h>  // Force linking of MCJIT
+#include <llvm/IR/DataLayout.h>
+#include <llvm/IR/Function.h>
+#include <llvm/IR/Intrinsics.h>
+#include <llvm/IR/LLVMContext.h>
+#include <llvm/IR/LegacyPassManager.h>
+#include <llvm/IR/MDBuilder.h>
+#include <llvm/IR/Metadata.h>
+#include <llvm/IR/Module.h>
+#include <llvm/IR/Verifier.h>
+#include <llvm/IRReader/IRReader.h>
+#include <llvm/Support/FileSystem.h>
+#include <llvm/Support/SourceMgr.h>
+#include <llvm/Support/raw_ostream.h>
+#include <llvm/Target/TargetMachine.h>
+#include <llvm/Target/TargetOptions.h>
+#include <llvm/Transforms/Utils/Cloning.h>
 #include <tvm/ir/module.h>
 #include <tvm/relay/runtime.h>
 #include <tvm/runtime/packed_func.h>