You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by an...@apache.org on 2018/05/02 04:32:25 UTC
[incubator-mxnet] branch v1.2.0 updated: [MXNET-372] Add build flag
for USE_F16C in CMake and clarify flag in make (#10771)
This is an automated email from the ASF dual-hosted git repository.
anirudh2290 pushed a commit to branch v1.2.0
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/v1.2.0 by this push:
new 9fb3835 [MXNET-372] Add build flag for USE_F16C in CMake and clarify flag in make (#10771)
9fb3835 is described below
commit 9fb3835de52ea04d013ebabe028168208a7f0700
Author: Rahul Huilgol <ra...@gmail.com>
AuthorDate: Tue May 1 21:32:21 2018 -0700
[MXNET-372] Add build flag for USE_F16C in CMake and clarify flag in make (#10771)
* f16c build
* typo
* only print message for MSVC if USE_F16C
* improve build logic
* update mshadow
* remove def
---
3rdparty/mshadow | 2 +-
CMakeLists.txt | 28 ++++++++++++++++++++++++++++
Makefile | 24 ++++++++++++++++++++++++
amalgamation/Makefile | 1 -
make/config.mk | 9 +++++++++
make/crosscompile.jetson.mk | 3 +++
6 files changed, 65 insertions(+), 2 deletions(-)
diff --git a/3rdparty/mshadow b/3rdparty/mshadow
index e051c2c..a8c650c 160000
--- a/3rdparty/mshadow
+++ b/3rdparty/mshadow
@@ -1 +1 @@
-Subproject commit e051c2c91c1ac4a3a8fed3e90ef2fb549220d972
+Subproject commit a8c650ce8a708608a282c4d1e251c57873a8db25
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 363237f..05d8021 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -16,6 +16,7 @@ mxnet_option(USE_OPENCV "Build with OpenCV support" ON)
mxnet_option(USE_OPENMP "Build with Openmp support" ON)
mxnet_option(USE_CUDNN "Build with cudnn support" ON) # one could set CUDNN_ROOT for search path
mxnet_option(USE_SSE "Build with x86 SSE instruction support" ON)
+mxnet_option(USE_F16C "Build with x86 F16C instruction support" ON) # autodetects support if ON
mxnet_option(USE_LAPACK "Build with lapack support" ON IF NOT MSVC)
mxnet_option(USE_MKL_IF_AVAILABLE "Use MKL if found" ON)
mxnet_option(USE_MKLML_MKL "Use MKLDNN variant of MKL (if MKL found)" ON IF USE_MKL_IF_AVAILABLE AND UNIX AND (NOT APPLE))
@@ -86,6 +87,10 @@ if(MSVC)
add_definitions(-DNNVM_EXPORTS)
add_definitions(-DDMLC_STRICT_CXX11)
add_definitions(-DNOMINMAX)
+ set(SUPPORT_F16C FALSE)
+ if(USE_F16C)
+ message("F16C instruction set is not yet supported for MSVC")
+ endif()
set(CMAKE_C_FLAGS "/MP")
set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} /bigobj")
else(MSVC)
@@ -102,6 +107,29 @@ else(MSVC)
else()
set(SUPPORT_MSSE2 FALSE)
endif()
+ # For cross complication, turn off flag if target device does not support it
+ if(USE_F16C)
+ check_cxx_compiler_flag("-mf16c" COMPILER_SUPPORT_MF16C)
+ if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ execute_process(COMMAND cat /proc/cpuinfo
+ COMMAND grep flags
+ COMMAND grep f16c
+ OUTPUT_VARIABLE CPU_SUPPORT_F16C)
+ elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
+ execute_process(COMMAND sysctl -a
+ COMMAND grep machdep.cpu.features
+ COMMAND grep F16C
+ OUTPUT_VARIABLE CPU_SUPPORT_F16C)
+ endif()
+ if(NOT CPU_SUPPORT_F16C)
+ message("CPU does not support F16C instructions")
+ endif()
+ if(CPU_SUPPORT_F16C AND COMPILER_SUPPORT_MF16C)
+ set(SUPPORT_F16C TRUE)
+ endif()
+ else()
+ set(SUPPORT_F16C FALSE)
+ endif()
set(CMAKE_C_FLAGS "-Wall -Wno-unknown-pragmas -fPIC -Wno-sign-compare")
if ("${CMAKE_CXX_COMPILER_ID}" MATCHES ".*Clang$")
set(CMAKE_C_FLAGS "-Wno-braced-scalar-init")
diff --git a/Makefile b/Makefile
index 043dcb9..951b29b 100644
--- a/Makefile
+++ b/Makefile
@@ -187,6 +187,30 @@ ifeq ($(USE_CUDNN), 1)
LDFLAGS += -lcudnn
endif
+# whether to use F16C instruction set extension for fast fp16 compute on CPU
+# if cross compiling you may want to explicitly turn it off if target system does not support it
+ifndef USE_F16C
+ ifneq ($(OS),Windows_NT)
+ detected_OS := $(shell uname -s)
+ ifeq ($(detected_OS),Darwin)
+ F16C_SUPP = $(shell sysctl -a | grep machdep.cpu.features | grep F16C)
+ endif
+ ifeq ($(detected_OS),Linux)
+ F16C_SUPP = $(shell cat /proc/cpuinfo | grep flags | grep f16c)
+ endif
+ ifneq ($(strip $(F16C_SUPP)),)
+ USE_F16C=1
+ else
+ USE_F16C=0
+ endif
+ endif
+ # if OS is Windows, check if your processor and compiler support F16C architecture.
+ # One way to check if processor supports it is to download the tool
+ # https://docs.microsoft.com/en-us/sysinternals/downloads/coreinfo.
+ # If coreinfo -c shows F16C and compiler supports it,
+ # then you can set USE_F16C=1 explicitly to leverage that capability"
+endif
+
# gperftools malloc library (tcmalloc)
ifeq ($(USE_GPERFTOOLS), 1)
# FIND_LIBNAME=tcmalloc_and_profiler
diff --git a/amalgamation/Makefile b/amalgamation/Makefile
index f7f3c00..f03a2b9 100644
--- a/amalgamation/Makefile
+++ b/amalgamation/Makefile
@@ -55,7 +55,6 @@ CFLAGS=-std=c++11 -Wno-unknown-pragmas -Wall $(DEFS)
# if architecture of the CPU supports F16C instruction set, enable USE_F16C for fast fp16 computation on CPU
ifeq ($(USE_F16C), 1)
CFLAGS+=-mf16c
- DEFS+=-DMSHADOW_USE_F16C=1
else
DEFS+=-DMSHADOW_USE_F16C=0
endif
diff --git a/make/config.mk b/make/config.mk
index 9eded6f..dd67c33 100644
--- a/make/config.mk
+++ b/make/config.mk
@@ -132,11 +132,20 @@ endif
ARCH := $(shell uname -a)
ifneq (,$(filter $(ARCH), armv6l armv7l powerpc64le ppc64le aarch64))
USE_SSE=0
+ USE_F16C=0
else
USE_SSE=1
endif
#----------------------------
+# F16C instruction support for faster arithmetic of fp16 on CPU
+#----------------------------
+# For distributed training with fp16, this helps even if training on GPUs
+# If left empty, checks CPU support and turns it on.
+# For cross compilation, please check support for F16C on target device and turn off if necessary.
+USE_F16C =
+
+#----------------------------
# distributed computing
#----------------------------
diff --git a/make/crosscompile.jetson.mk b/make/crosscompile.jetson.mk
index 31a1398..acc9c4a 100644
--- a/make/crosscompile.jetson.mk
+++ b/make/crosscompile.jetson.mk
@@ -132,7 +132,10 @@ endif
# Settings for power and arm arch
#----------------------------
USE_SSE=0
+
+# Turn off F16C instruction set support
USE_F16C=0
+
#----------------------------
# distributed computing
#----------------------------
--
To stop receiving notification emails like this one, please contact
anirudh2290@apache.org.