You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/05/02 04:18:25 UTC

[GitHub] anirudh2290 closed pull request #10760: [MXNET-372] Add build flag for USE_F16C in CMake and clarify flag in make

anirudh2290 closed pull request #10760: [MXNET-372] Add build flag for USE_F16C in CMake and clarify flag in make
URL: https://github.com/apache/incubator-mxnet/pull/10760
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/3rdparty/mshadow b/3rdparty/mshadow
index 317fad64cc2..a8c650ce8a7 160000
--- a/3rdparty/mshadow
+++ b/3rdparty/mshadow
@@ -1 +1 @@
-Subproject commit 317fad64cc234c458e3f01ff47fffe3b8b3e5f63
+Subproject commit a8c650ce8a708608a282c4d1e251c57873a8db25
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 363237f909e..05d8021c367 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,6 +16,7 @@ mxnet_option(USE_OPENCV           "Build with OpenCV support" ON)
 mxnet_option(USE_OPENMP           "Build with Openmp support" ON)
 mxnet_option(USE_CUDNN            "Build with cudnn support"  ON) # one could set CUDNN_ROOT for search path
 mxnet_option(USE_SSE              "Build with x86 SSE instruction support" ON)
+mxnet_option(USE_F16C             "Build with x86 F16C instruction support" ON) # autodetects support if ON
 mxnet_option(USE_LAPACK           "Build with lapack support" ON IF NOT MSVC)
 mxnet_option(USE_MKL_IF_AVAILABLE "Use MKL if found" ON)
 mxnet_option(USE_MKLML_MKL        "Use MKLDNN variant of MKL (if MKL found)" ON IF USE_MKL_IF_AVAILABLE AND UNIX AND (NOT APPLE))
@@ -86,6 +87,10 @@ if(MSVC)
   add_definitions(-DNNVM_EXPORTS)
   add_definitions(-DDMLC_STRICT_CXX11)
   add_definitions(-DNOMINMAX)
+  set(SUPPORT_F16C FALSE)
+  if(USE_F16C)
+    message("F16C instruction set is not yet supported for MSVC")
+  endif()
   set(CMAKE_C_FLAGS "/MP")
   set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} /bigobj")
 else(MSVC)
@@ -102,6 +107,29 @@ else(MSVC)
   else()
     set(SUPPORT_MSSE2 FALSE)
   endif()
+  # For cross complication, turn off flag if target device does not support it
+  if(USE_F16C)
+    check_cxx_compiler_flag("-mf16c"     COMPILER_SUPPORT_MF16C)
+    if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+      execute_process(COMMAND cat /proc/cpuinfo
+              COMMAND grep flags
+              COMMAND grep f16c
+              OUTPUT_VARIABLE CPU_SUPPORT_F16C)
+    elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
+      execute_process(COMMAND sysctl -a
+              COMMAND grep machdep.cpu.features
+              COMMAND grep F16C
+              OUTPUT_VARIABLE CPU_SUPPORT_F16C)
+    endif()
+    if(NOT CPU_SUPPORT_F16C)
+      message("CPU does not support F16C instructions")
+    endif()
+    if(CPU_SUPPORT_F16C AND COMPILER_SUPPORT_MF16C)
+      set(SUPPORT_F16C TRUE)
+    endif()
+  else()
+    set(SUPPORT_F16C FALSE)
+  endif()
   set(CMAKE_C_FLAGS "-Wall -Wno-unknown-pragmas -fPIC -Wno-sign-compare")
   if ("${CMAKE_CXX_COMPILER_ID}" MATCHES ".*Clang$")
     set(CMAKE_C_FLAGS "-Wno-braced-scalar-init")
diff --git a/Makefile b/Makefile
index 043dcb9795e..951b29b41cf 100644
--- a/Makefile
+++ b/Makefile
@@ -187,6 +187,30 @@ ifeq ($(USE_CUDNN), 1)
 	LDFLAGS += -lcudnn
 endif
 
+# whether to use F16C instruction set extension for fast fp16 compute on CPU
+# if cross compiling you may want to explicitly turn it off if target system does not support it
+ifndef USE_F16C
+    ifneq ($(OS),Windows_NT)
+        detected_OS := $(shell uname -s)
+        ifeq ($(detected_OS),Darwin)
+            F16C_SUPP = $(shell sysctl -a | grep machdep.cpu.features | grep F16C)
+        endif
+        ifeq ($(detected_OS),Linux)
+            F16C_SUPP = $(shell cat /proc/cpuinfo | grep flags | grep f16c)
+        endif
+	ifneq ($(strip $(F16C_SUPP)),)
+                USE_F16C=1
+        else
+                USE_F16C=0
+        endif
+    endif
+    # if OS is Windows, check if your processor and compiler support F16C architecture.
+    # One way to check if processor supports it is to download the tool
+    # https://docs.microsoft.com/en-us/sysinternals/downloads/coreinfo.
+    # If coreinfo -c shows F16C and compiler supports it,
+    # then you can set USE_F16C=1 explicitly to leverage that capability"
+endif
+
 # gperftools malloc library (tcmalloc)
 ifeq ($(USE_GPERFTOOLS), 1)
 #	FIND_LIBNAME=tcmalloc_and_profiler
diff --git a/amalgamation/Makefile b/amalgamation/Makefile
index f7f3c001e19..f03a2b97fc4 100644
--- a/amalgamation/Makefile
+++ b/amalgamation/Makefile
@@ -55,7 +55,6 @@ CFLAGS=-std=c++11 -Wno-unknown-pragmas -Wall $(DEFS)
 # if architecture of the CPU supports F16C instruction set, enable USE_F16C for fast fp16 computation on CPU
 ifeq ($(USE_F16C), 1)
 	CFLAGS+=-mf16c
-	DEFS+=-DMSHADOW_USE_F16C=1
 else
 	DEFS+=-DMSHADOW_USE_F16C=0
 endif
diff --git a/make/config.mk b/make/config.mk
index 9eded6f5080..dd67c33cc9e 100644
--- a/make/config.mk
+++ b/make/config.mk
@@ -132,10 +132,19 @@ endif
 ARCH := $(shell uname -a)
 ifneq (,$(filter $(ARCH), armv6l armv7l powerpc64le ppc64le aarch64))
 	USE_SSE=0
+	USE_F16C=0
 else
 	USE_SSE=1
 endif
 
+#----------------------------
+# F16C instruction support for faster arithmetic of fp16 on CPU
+#----------------------------
+# For distributed training with fp16, this helps even if training on GPUs
+# If left empty, checks CPU support and turns it on.
+# For cross compilation, please check support for F16C on target device and turn off if necessary.
+USE_F16C =
+
 #----------------------------
 # distributed computing
 #----------------------------
diff --git a/make/crosscompile.jetson.mk b/make/crosscompile.jetson.mk
index 31a1398c1b7..acc9c4a5a8a 100644
--- a/make/crosscompile.jetson.mk
+++ b/make/crosscompile.jetson.mk
@@ -132,7 +132,10 @@ endif
 # Settings for power and arm arch
 #----------------------------
 USE_SSE=0
+
+# Turn off F16C instruction set support
 USE_F16C=0
+
 #----------------------------
 # distributed computing
 #----------------------------


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services