You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by GitBox <gi...@apache.org> on 2022/06/10 16:37:45 UTC

[GitHub] [tvm] tqchen commented on a diff in pull request #11505: Fix max concurrency number in runtime for Intel processors

tqchen commented on code in PR #11505:
URL: https://github.com/apache/tvm/pull/11505#discussion_r894711056


##########
src/runtime/threading_backend.cc:
##########
@@ -40,11 +40,158 @@
 #define HEXAGON_STACK_ALIGNMENT 32
 #endif
 #include <algorithm>
+#include <string>
 #include <thread>
 #define CURRENT_THREAD_HANDLE (static_cast<std::thread::native_handle_type>(0))
 namespace tvm {
 namespace runtime {
 namespace threading {
+#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
+#ifdef _MSC_VER
+#if (_MSC_VER < 1400)
+static inline __declspec(naked) void __cpuid(int[4], int) {
+  __asm {
+        push  ebx
+        push  esi
+        mov   eax, dword ptr [esp + 4 * 2 + 8]  // eaxIn
+        cpuid
+        mov   esi, dword ptr [esp + 4 * 2 + 4]  // data
+        mov   dword ptr [esi], eax
+        mov   dword ptr [esi + 4], ebx
+        mov   dword ptr [esi + 8], ecx
+        mov   dword ptr [esi + 12], edx
+        pop   esi
+        pop   ebx
+        ret
+  }
+}
+#else
+#include <intrin.h>  // for __cpuid
+#endif
+#else
+#ifndef __GNUC_PREREQ
+#define __GNUC_PREREQ(major, minor) \
+  ((((__GNUC__) << 16) + (__GNUC_MINOR__)) >= (((major) << 16) + (minor)))
+#endif
+#if __GNUC_PREREQ(4, 3) && !defined(__APPLE__)
+#include <cpuid.h>
+#else
+// avoid err on Apple: can't find a register in class `BREG' while reloading `asm'
+#if defined(__APPLE__) && defined(_M_IX86)
+#define __cpuid(eaxIn, a, b, c, d)                                         \
+  __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" \
+                       : "=a"(a), "=S"(b), "=c"(c), "=d"(d)                \
+                       : "0"(eaxIn))
+#define __cpuid_count(eaxIn, ecxIn, a, b, c, d)                            \
+  __asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" \
+                       : "=a"(a), "=S"(b), "=c"(c), "=d"(d)                \
+                       : "0"(eaxIn), "2"(ecxIn))
+#else
+#define __cpuid(eaxIn, a, b, c, d) \
+  __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn))
+#define __cpuid_count(eaxIn, ecxIn, a, b, c, d) \
+  __asm__ __volatile__("cpuid\n" : "=a"(a), "=b"(b), "=c"(c), "=d"(d) : "0"(eaxIn), "2"(ecxIn))
+#endif
+#endif
+#endif
+
+unsigned int Cpu::get_num_cores(IntelCpuTopologyLevel level) const {
+  if (!x2apic_supported_) throw std::string("x2apic_supported_ is not supported");
+  switch (level) {
+    case SmtLevel:
+      return num_cores_[level - 1];
+    case CoreLevel:
+      return num_cores_[level - 1] / num_cores_[SmtLevel - 1];
+    default:
+      throw std::string("x2apic_supported_ is not supported");
+  }
+}
+
+/*
+  data[] = { eax, ebx, ecx, edx }
+*/
+void Cpu::get_cpuid(unsigned int eaxIn, unsigned int data[4]) {
+#ifdef _MSC_VER
+  __cpuid(reinterpret_cast<int*>(data), eaxIn);
+#else
+  __cpuid(eaxIn, data[0], data[1], data[2], data[3]);
+#endif
+}
+void Cpu::get_cpuid_ex(unsigned int eaxIn, unsigned int ecxIn, unsigned int data[4]) {
+#ifdef _MSC_VER
+  __cpuidex(reinterpret_cast<int*>(data), eaxIn, ecxIn);
+#else
+  __cpuid_count(eaxIn, ecxIn, data[0], data[1], data[2], data[3]);
+#endif
+}
+
+typedef uint64_t Type;
+static const Type NONE = 0;
+static const Type tINTEL = 1 << 24;
+static const Type tAMD = 1 << 25;
+
+Cpu::Cpu() : type_(NONE), x2apic_supported_(false), num_cores_() {
+  unsigned int data[4] = {};
+  const unsigned int& ECX = data[2];
+  get_cpuid(0, data);
+  static const char intel[] = "ntel";
+  static const char amd[] = "cAMD";
+  if (ECX == get32bit_ss_be(amd)) {
+    type_ |= tAMD;
+  }
+  if (ECX == get32bit_ss_be(intel)) {
+    type_ |= tINTEL;
+  }
+
+  set_num_cores();
+}
+bool Cpu::is_intel() { return type_ & tINTEL; }
+
+bool Cpu::is_amd() { return type_ & tAMD; }
+
+unsigned int Cpu::get32bit_ss_be(const char* x) const {
+  return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24);
+}
+
+unsigned int Cpu::extract_bit(unsigned int val, unsigned int base, unsigned int end) {
+  return (val >> base) & ((1u << (end - base)) - 1);
+}
+
+void Cpu::set_num_cores() {
+  if ((type_ & tINTEL) == 0) return;
+
+  unsigned int data[4] = {};
+
+  /* CAUTION: These numbers are configuration as shipped by Intel. */
+  get_cpuid_ex(0x0, 0, data);
+  if (data[0] >= 0xB) {
+    /*
+      if leaf 11 exists(x2APIC is supported),
+      we use it to get the number of smt cores and cores on socket
+
+      leaf 0xB can be zeroed-out by a hypervisor
+    */
+    x2apic_supported_ = true;
+    for (unsigned int i = 0; i < max_topology_levels; i++) {
+      get_cpuid_ex(0xB, i, data);
+      IntelCpuTopologyLevel level = (IntelCpuTopologyLevel)extract_bit(data[2], 8, 15);
+      if (level == SmtLevel || level == CoreLevel) {
+        num_cores_[level - 1] = extract_bit(data[1], 0, 15);
+      }
+    }
+    /*
+      Fallback values in case a hypervisor has 0xB leaf zeroed-out.
+    */
+    num_cores_[SmtLevel - 1] = (std::max)(1u, num_cores_[SmtLevel - 1]);

Review Comment:
   unless the code is written in scratch, it would still be good to put into 3rdparty to be extra careful as long as there is no mechanism problems. The LICENSE seems to be fine, and we can add that to https://github.com/apache/tvm/tree/main/licenses



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org