You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by an...@apache.org on 2018/05/02 04:32:25 UTC

[incubator-mxnet] branch v1.2.0 updated: [MXNET-372] Add build flag for USE_F16C in CMake and clarify flag in make (#10771)

This is an automated email from the ASF dual-hosted git repository.

anirudh2290 pushed a commit to branch v1.2.0
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/v1.2.0 by this push:
     new 9fb3835  [MXNET-372] Add build flag for USE_F16C in CMake and clarify flag in make (#10771)
9fb3835 is described below

commit 9fb3835de52ea04d013ebabe028168208a7f0700
Author: Rahul Huilgol <ra...@gmail.com>
AuthorDate: Tue May 1 21:32:21 2018 -0700

    [MXNET-372] Add build flag for USE_F16C in CMake and clarify flag in make (#10771)
    
    * f16c build
    
    * typo
    
    * only print message for MSVC if USE_F16C
    
    * improve build logic
    
    * update mshadow
    
    * remove def
---
 3rdparty/mshadow            |  2 +-
 CMakeLists.txt              | 28 ++++++++++++++++++++++++++++
 Makefile                    | 24 ++++++++++++++++++++++++
 amalgamation/Makefile       |  1 -
 make/config.mk              |  9 +++++++++
 make/crosscompile.jetson.mk |  3 +++
 6 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/3rdparty/mshadow b/3rdparty/mshadow
index e051c2c..a8c650c 160000
--- a/3rdparty/mshadow
+++ b/3rdparty/mshadow
@@ -1 +1 @@
-Subproject commit e051c2c91c1ac4a3a8fed3e90ef2fb549220d972
+Subproject commit a8c650ce8a708608a282c4d1e251c57873a8db25
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 363237f..05d8021 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,6 +16,7 @@ mxnet_option(USE_OPENCV           "Build with OpenCV support" ON)
 mxnet_option(USE_OPENMP           "Build with Openmp support" ON)
 mxnet_option(USE_CUDNN            "Build with cudnn support"  ON) # one could set CUDNN_ROOT for search path
 mxnet_option(USE_SSE              "Build with x86 SSE instruction support" ON)
+mxnet_option(USE_F16C             "Build with x86 F16C instruction support" ON) # autodetects support if ON
 mxnet_option(USE_LAPACK           "Build with lapack support" ON IF NOT MSVC)
 mxnet_option(USE_MKL_IF_AVAILABLE "Use MKL if found" ON)
 mxnet_option(USE_MKLML_MKL        "Use MKLDNN variant of MKL (if MKL found)" ON IF USE_MKL_IF_AVAILABLE AND UNIX AND (NOT APPLE))
@@ -86,6 +87,10 @@ if(MSVC)
   add_definitions(-DNNVM_EXPORTS)
   add_definitions(-DDMLC_STRICT_CXX11)
   add_definitions(-DNOMINMAX)
+  set(SUPPORT_F16C FALSE)
+  if(USE_F16C)
+    message("F16C instruction set is not yet supported for MSVC")
+  endif()
   set(CMAKE_C_FLAGS "/MP")
   set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} /bigobj")
 else(MSVC)
@@ -102,6 +107,29 @@ else(MSVC)
   else()
     set(SUPPORT_MSSE2 FALSE)
   endif()
+  # For cross complication, turn off flag if target device does not support it
+  if(USE_F16C)
+    check_cxx_compiler_flag("-mf16c"     COMPILER_SUPPORT_MF16C)
+    if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+      execute_process(COMMAND cat /proc/cpuinfo
+              COMMAND grep flags
+              COMMAND grep f16c
+              OUTPUT_VARIABLE CPU_SUPPORT_F16C)
+    elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
+      execute_process(COMMAND sysctl -a
+              COMMAND grep machdep.cpu.features
+              COMMAND grep F16C
+              OUTPUT_VARIABLE CPU_SUPPORT_F16C)
+    endif()
+    if(NOT CPU_SUPPORT_F16C)
+      message("CPU does not support F16C instructions")
+    endif()
+    if(CPU_SUPPORT_F16C AND COMPILER_SUPPORT_MF16C)
+      set(SUPPORT_F16C TRUE)
+    endif()
+  else()
+    set(SUPPORT_F16C FALSE)
+  endif()
   set(CMAKE_C_FLAGS "-Wall -Wno-unknown-pragmas -fPIC -Wno-sign-compare")
   if ("${CMAKE_CXX_COMPILER_ID}" MATCHES ".*Clang$")
     set(CMAKE_C_FLAGS "-Wno-braced-scalar-init")
diff --git a/Makefile b/Makefile
index 043dcb9..951b29b 100644
--- a/Makefile
+++ b/Makefile
@@ -187,6 +187,30 @@ ifeq ($(USE_CUDNN), 1)
 	LDFLAGS += -lcudnn
 endif
 
+# whether to use F16C instruction set extension for fast fp16 compute on CPU
+# if cross compiling you may want to explicitly turn it off if target system does not support it
+ifndef USE_F16C
+    ifneq ($(OS),Windows_NT)
+        detected_OS := $(shell uname -s)
+        ifeq ($(detected_OS),Darwin)
+            F16C_SUPP = $(shell sysctl -a | grep machdep.cpu.features | grep F16C)
+        endif
+        ifeq ($(detected_OS),Linux)
+            F16C_SUPP = $(shell cat /proc/cpuinfo | grep flags | grep f16c)
+        endif
+	ifneq ($(strip $(F16C_SUPP)),)
+                USE_F16C=1
+        else
+                USE_F16C=0
+        endif
+    endif
+    # if OS is Windows, check if your processor and compiler support F16C architecture.
+    # One way to check if processor supports it is to download the tool
+    # https://docs.microsoft.com/en-us/sysinternals/downloads/coreinfo.
+    # If coreinfo -c shows F16C and compiler supports it,
+    # then you can set USE_F16C=1 explicitly to leverage that capability"
+endif
+
 # gperftools malloc library (tcmalloc)
 ifeq ($(USE_GPERFTOOLS), 1)
 #	FIND_LIBNAME=tcmalloc_and_profiler
diff --git a/amalgamation/Makefile b/amalgamation/Makefile
index f7f3c00..f03a2b9 100644
--- a/amalgamation/Makefile
+++ b/amalgamation/Makefile
@@ -55,7 +55,6 @@ CFLAGS=-std=c++11 -Wno-unknown-pragmas -Wall $(DEFS)
 # if architecture of the CPU supports F16C instruction set, enable USE_F16C for fast fp16 computation on CPU
 ifeq ($(USE_F16C), 1)
 	CFLAGS+=-mf16c
-	DEFS+=-DMSHADOW_USE_F16C=1
 else
 	DEFS+=-DMSHADOW_USE_F16C=0
 endif
diff --git a/make/config.mk b/make/config.mk
index 9eded6f..dd67c33 100644
--- a/make/config.mk
+++ b/make/config.mk
@@ -132,11 +132,20 @@ endif
 ARCH := $(shell uname -a)
 ifneq (,$(filter $(ARCH), armv6l armv7l powerpc64le ppc64le aarch64))
 	USE_SSE=0
+	USE_F16C=0
 else
 	USE_SSE=1
 endif
 
 #----------------------------
+# F16C instruction support for faster arithmetic of fp16 on CPU
+#----------------------------
+# For distributed training with fp16, this helps even if training on GPUs
+# If left empty, checks CPU support and turns it on.
+# For cross compilation, please check support for F16C on target device and turn off if necessary.
+USE_F16C =
+
+#----------------------------
 # distributed computing
 #----------------------------
 
diff --git a/make/crosscompile.jetson.mk b/make/crosscompile.jetson.mk
index 31a1398..acc9c4a 100644
--- a/make/crosscompile.jetson.mk
+++ b/make/crosscompile.jetson.mk
@@ -132,7 +132,10 @@ endif
 # Settings for power and arm arch
 #----------------------------
 USE_SSE=0
+
+# Turn off F16C instruction set support
 USE_F16C=0
+
 #----------------------------
 # distributed computing
 #----------------------------

-- 
To stop receiving notification emails like this one, please contact
anirudh2290@apache.org.