You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/06/29 23:47:27 UTC

[GitHub] ctcyang commented on a change in pull request #10696: [MXNET-366]Extend MXNet Distributed Training by AllReduce

ctcyang commented on a change in pull request #10696: [MXNET-366]Extend MXNet Distributed Training by AllReduce
URL: https://github.com/apache/incubator-mxnet/pull/10696#discussion_r199300072
 
 

 ##########
 File path: src/kvstore/collectives/src/collectives.cc
 ##########
 @@ -0,0 +1,779 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * Copyright (c) 2018 by Contributors
+ */
+
+#if MXNET_USE_ALLREDUCE_DIST_KVSTORE
+
+#include <mpi.h>
+#include <unordered_map>
+#include <queue>
+#include <thread>
+#include <functional>
+#include <mutex>
+#include <condition_variable>
+#include <atomic>
+#include <iostream>
+
+#include "mxnet/base.h"
+#include "mxnet/ndarray.h"
+#include "mxnet/engine.h"
+#include "dmlc/logging.h"
+#include "mpi_message.pb.h"
+#include "collectives.h"
+#include "coll_wrapper.h"
+#include "coll_util.h"
+
+using namespace mxnet::kvstore;
+
+const char INT_PREFIX[] = "INT";
+const char STR_PREFIX[] = "STR";
+const char IDX_PREFIX[] = "IDX";
+const char OPS_PREFIX[] = "OPS";
+const char OPS_ALLREDUCE[] = "ALLREDUCE";
+const char OPS_BROADCAST[] = "BROADCAST";
+const char DELIMITER[] = ":";
+
+namespace {
+
+struct CollectiveOpRecord {
+  int rank;
+
+  std::string key;
+
+  MPIDataType dtype;
+
+  mxnet::NDArray *val_in;
+
+  mxnet::NDArray *val_out;
+
+  int root_rank;
+
+  mxnet::engine::CallbackOnComplete callback;
+};
+
+typedef std::unordered_map<std::string, CollectiveOpRecord> NDArrayTable;
+
+typedef std::unordered_map<std::string, std::vector<MPIRequest> > MessageTable;
+
+/*
+ *  Collective_global var maintain a message table and a background thread.
+ *  In rank 0, message table is used to coordinate all reduce order
+ *  of ndarray in different nodes.The background thread is used
+ *  for doing collectives and  doing coordination between nodes
+ *  through mpi messages.
+ */
+struct CollectiveGlobalState {
+  std::atomic_flag initialized_flag = ATOMIC_FLAG_INIT;
+
+  std::condition_variable cv;
+
+  bool initialization_done = false;
+
+  int init_status;
+
+  std::mutex mu;
+
+  NDArrayTable ndarray_table;
+
+  std::queue<MPIRequest> message_queue;
+
+  std::thread background_thread;
+
+  bool shut_down = false;
+
+  std::unique_ptr<MessageTable> message_table;
+
+  int rank = 0;
+
+  int local_rank = 0;
+
+  int size = 1;
+
+  int device = -1;
+
+  mxnet::Context pinned_ctx;
+
+  Comm *local_comm = NULL;
+
+~CollectiveGlobalState() {
+  if (background_thread.joinable()) {
+    shut_down = true;
+    background_thread.join();
+  }
+}
+};
+
+static CollectiveGlobalState coll_global;
+
+// static std::unordered_map<std::string, mxnet::NDArray> mpi_comm_buf;
+
+#define RANK_ZERO 0
+
+#define TAG_NOTIFY 1
+
+bool IncrementNDArrayCount(
+  const std::unique_ptr<MessageTable>& message_table,
+  const MPIRequest &msg, int mpi_size) {
+  auto name = msg.key_name();
+  auto table_iter = message_table->find(name);
+  if (table_iter == message_table->end()) {
+    message_table->emplace(name, std::vector<MPIRequest>({msg}));
+    MXCOLL_DEBUG(coll_global.rank, "Insert new message key [%s] reqeust type [%d] from "
+                "rank[%d] into message table!\n", name.c_str(), msg.request_type(),
+                msg.request_rank());
+    table_iter = message_table->find(name);
+  } else {
+    MXCOLL_DEBUG(coll_global.rank, "Insert existing message key [%s] request type [%d]"
+                "from rank[%d] into message table!\n",
+                name.c_str(), msg.request_type(), msg.request_rank());
+    table_iter->second.push_back(msg);
+  }
+
+  int count = table_iter->second.size();
+  MXCOLL_DEBUG(coll_global.rank, "Message Key [%s] count [%d]\n", name.c_str(), count);
+  return count == mpi_size;
+}
+
+int DataTypeToMPIType(int ndarray_dtype, MPIDataType *mpi_dtype) {
+  if (ndarray_dtype == mshadow::kFloat32) {
 
 Review comment:
   float16 is a very important datatype to GPU training, so it would be great if that were added here.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services