You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by pt...@apache.org on 2019/12/12 02:35:06 UTC

[incubator-mxnet] branch v1.6.x updated: Omp fork numthreads fix 1.6 (#17000)

This is an automated email from the ASF dual-hosted git repository.

ptrendx pushed a commit to branch v1.6.x
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/v1.6.x by this push:
     new 5b9f79a  Omp fork numthreads fix 1.6 (#17000)
5b9f79a is described below

commit 5b9f79aa757a0e330e9a1831e3f0a2bccabea310
Author: Pedro Larroy <pe...@gmail.com>
AuthorDate: Wed Dec 11 18:34:19 2019 -0800

    Omp fork numthreads fix 1.6 (#17000)
    
    * Fix test_gluon.py:test_sync_batchnorm when number of GPUS > 4
    
    * Prevent after-fork number of OMP threads being bigger than 1.
    This could happen if it was set in the environment. As we are setting engine::OpenMP::Get()->set_enabled(false) in initialize.cc in the child after forking, the behaviour goes back to what it was before #15762 was introduced.
    
    Regions using omp get the threads count from GetRecommendedOMPThreadCount, so if omp is disabled they will get 1 thread and run serially
    
    * add C++ unit test
    
    * Add comment
---
 src/engine/openmp.cc                 | 10 +++++---
 tests/cpp/engine/omp_test.cc         | 50 ++++++++++++++++++++++++++++++++++++
 tests/python/unittest/test_engine.py | 41 +++++++++++++++++++++++++++++
 3 files changed, 97 insertions(+), 4 deletions(-)

diff --git a/src/engine/openmp.cc b/src/engine/openmp.cc
index 8fe3939..8946d91 100644
--- a/src/engine/openmp.cc
+++ b/src/engine/openmp.cc
@@ -83,10 +83,11 @@ void OpenMP::set_reserve_cores(int cores) {
 
 int OpenMP::GetRecommendedOMPThreadCount(bool exclude_reserved) const {
 #ifdef _OPENMP
-  if (omp_num_threads_set_in_environment_) {
-    return omp_get_max_threads();
-  }
   if (enabled_) {
+    // OMP_NUM_THREADS was set in the environment at the time of static initialization
+    if (omp_num_threads_set_in_environment_) {
+      return omp_get_max_threads();
+    }
     int thread_count = omp_get_max_threads();
     if (exclude_reserved) {
       if (reserve_cores_ >= thread_count) {
@@ -100,8 +101,9 @@ int OpenMP::GetRecommendedOMPThreadCount(bool exclude_reserved) const {
       return thread_count;
     }
     return omp_thread_max_;
+  } else {
+    return 1;
   }
-  return 1;
 #else
   return 1;
 #endif
diff --git a/tests/cpp/engine/omp_test.cc b/tests/cpp/engine/omp_test.cc
new file mode 100644
index 0000000..2be7d9d
--- /dev/null
+++ b/tests/cpp/engine/omp_test.cc
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "../include/test_util.h"
+#include "../../src/engine/openmp.h"
+
+#if defined(unix) || defined(__unix__) || defined(__unix)
+#include <unistd.h>
+#include <sys/types.h>
+#include <dmlc/logging.h>
+
+
+TEST(OMPBehaviour, after_fork) {
+    /* 
+     * Check that after fork, OMP is disabled, and the recommended thread count is 1 to prevent 
+     * process fanout.
+     */
+    using namespace mxnet::engine;
+    auto openmp = OpenMP::Get();
+    pid_t pid = fork();
+    if (pid == 0) {
+        EXPECT_FALSE(openmp->enabled());
+        EXPECT_EQ(openmp->GetRecommendedOMPThreadCount(), 1);
+    } else if (pid > 0) {
+        int status;
+        int ret = waitpid(pid, &status, 0);
+        CHECK_EQ(ret, pid) << "waitpid failed";
+    } else {
+        CHECK(false) << "fork failed";
+    }
+}
+#endif
diff --git a/tests/python/unittest/test_engine.py b/tests/python/unittest/test_engine.py
index 29b7b82..61d94dd 100644
--- a/tests/python/unittest/test_engine.py
+++ b/tests/python/unittest/test_engine.py
@@ -17,6 +17,9 @@
 
 import nose
 import mxnet as mx
+import os
+import unittest
+from mxnet.test_utils import EnvManager
 
 def test_bulk():
     with mx.engine.bulk(10):
@@ -30,6 +33,44 @@ def test_bulk():
             x += 1
     assert (x.asnumpy() == 104).all()
 
+@unittest.skip("OMP platform dependent")
+def test_engine_openmp_after_fork():
+    """
+    Test that the number of max threads in the child is 1. After forking we should not use a bigger
+    OMP thread pool.
+
+    With GOMP the child always has the same number when calling omp_get_max_threads, with LLVM OMP
+    the child respects the number of max threads set in the parent.
+    """
+    with EnvManager('OMP_NUM_THREADS', '42'):
+        r, w = os.pipe()
+        pid = os.fork()
+        if pid:
+            os.close(r)
+            wfd = os.fdopen(w, 'w')
+            wfd.write('a')
+            omp_max_threads = mx.base._LIB.omp_get_max_threads()
+            print("Parent omp max threads: {}".format(omp_max_threads))
+            try:
+                wfd.close()
+            except:
+                pass
+            try:
+                (cpid, status) = os.waitpid(pid, 0)
+                assert cpid == pid
+                exit_status = status >> 8
+                assert exit_status == 0
+            except:
+                pass
+        else:
+            os.close(w)
+            rfd = os.fdopen(r, 'r')
+            rfd.read(1)
+            omp_max_threads = mx.base._LIB.omp_get_max_threads()
+            print("Child omp max threads: {}".format(omp_max_threads))
+            assert omp_max_threads == 1
+
+
 
 if __name__ == '__main__':
     import nose