You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2016/06/03 07:48:16 UTC
[11/60] incubator-singa git commit: SINGA-163 - Reorganize the
project folder layout
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/cluster_rt.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/cluster_rt.h b/include/singa/utils/cluster_rt.h
deleted file mode 100644
index 4ab48bd..0000000
--- a/include/singa/utils/cluster_rt.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements. See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership. The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied. See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_UTILS_CLUSTER_RT_H_
-#define SINGA_UTILS_CLUSTER_RT_H_
-
-#include <map>
-#include <mutex>
-#include <string>
-#include <vector>
-
-namespace singa {
-
-typedef void (*rt_callback)(void *contest);
-
-struct RTCallback {
- rt_callback fn;
- void* ctx;
-};
-
-/**
- * ClusterRuntime is a runtime service that manages dynamic configuration
- * and status of the whole cluster. It mainly provides following services:
- * 1) Provide running status of each server/worker
- * 2) Translate process id to (hostname:port)
- */
-class ClusterRuntime {
- public:
- // ClusterRuntime have different implementation determined when compiling
- static ClusterRuntime* Create(const std::string&host, int job_id);
-
- virtual ~ClusterRuntime() {}
- /**
- * Initialize the runtime instance
- */
- virtual bool Init() = 0;
- /**
- * register the process, and get a unique process id
- *
- * \return the process id, -1 if failed
- */
- virtual int RegistProc(const std::string& host_addr, int pid) = 0;
- /**
- * translate the process id to host address
- *
- * \return the host and port, "" if no such proc id
- */
- virtual std::string GetProcHost(int proc_id) = 0;
- /**
- * Server: watch all workers in a server group,
- * will be notified when all workers have left
- */
- virtual bool WatchSGroup(int gid, int sid, rt_callback fn, void* ctx) = 0;
- /**
- * Worker: join a server group (i.e. start to read/update these servers)
- */
- virtual bool JoinSGroup(int gid, int wid, int s_group) = 0;
- /**
- * Worker: leave a server group (i.e. finish its all work)
- */
- virtual bool LeaveSGroup(int gid, int wid, int s_group) = 0;
-};
-
-/*
- * A ClusterRuntime implementation for single-process environment
- */
-class SPClusterRT : public ClusterRuntime {
- public:
- ~SPClusterRT();
-
- bool Init() override;
- int RegistProc(const std::string& host_addr, int pid) override;
- std::string GetProcHost(int proc_id) override;
- bool WatchSGroup(int gid, int sid, rt_callback fn, void* ctx) override;
- bool JoinSGroup(int gid, int wid, int s_group) override;
- bool LeaveSGroup(int gid, int wid, int s_group) override;
-
- private:
- std::vector<std::string> proc_list_;
- std::map<int, std::vector<RTCallback*>> grp_callbacks_;
- std::map<int, int> grp_count_;
- std::mutex lock_;
-};
-
-} // namespace singa
-
-#endif // SINGA_UTILS_CLUSTER_RT_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/common.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/common.h b/include/singa/utils/common.h
deleted file mode 100644
index 0bcec58..0000000
--- a/include/singa/utils/common.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements. See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership. The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied. See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_UTILS_COMMON_H_
-#define SINGA_UTILS_COMMON_H_
-
-#include <google/protobuf/message.h>
-#include <unordered_map>
-#include <sstream>
-#include <string>
-#include <vector>
-#include <utility>
-#include "singa/proto/common.pb.h"
-
-namespace singa {
-
-using std::vector;
-using std::string;
-std::string IntVecToString(const std::vector<int>& vec);
-std::string VStringPrintf(std::string fmt, va_list l);
-std::string StringPrintf(std::string fmt, ...);
-
-/**
- * Locate the position of the arg in arglist.
- *
- * @param argc total num of arguments
- * @param arglist all arguments
- * @param the searched argument
- * @return the position of arg in the arglist; -1 if not found.
- */
-int ArgPos(int argc, char** arglist, const char* arg);
-void CreateFolder(const std::string name);
-/**
- * Slice a set of large Params into small pieces such that they can be roughtly
- * equally partitioned into a fixed number of boxes.
- *
- * @param num total number of boxes to store the small pieces
- * @param sizes size of all Params
- * @return all slices for each Param
- */
-const std::vector<std::vector<int>> Slice(int num,
- const std::vector<int>& sizes);
-/**
- * Partition slices into boxes.
- *
- * @param num number of boxes
- * @param slices slice sizes
- * @return box id for each slice
- */
-const std::vector<int> PartitionSlices(int num, const std::vector<int>& slices);
-/*
-inline void Sleep(int millisec=1){
- std::this_thread::sleep_for(std::chrono::milliseconds(millisec));
-}
-*/
-int gcd(int a, int b);
-int LeastCommonMultiple(int a, int b);
-/*
-inline float rand_real() {
- return static_cast<float>(rand_r())/(RAND_MAX+1.0f);
-}
-*/
-std::string GetHostIP();
-void SetupLog(const std::string& workspace, const std::string& model);
-
-/**
- * Performance mtrics.
- */
-class Metric {
- public:
- Metric() {}
- explicit Metric(const std::string& str);
- /**
- * Add one metric.
- *
- * If the metric exist, the aggregate. Otherwise create a new entry for it.
- *
- * @param name metric name, e.g., 'loss'
- * @param value metric value
- */
- void Add(const std::string& name, float value);
- void Add(const std::string& name, float value, int count);
- /**
- * reset all metric counter and value to 0
- */
- void Reset();
- /**
- * Generate a one-line string for logging
- */
- std::string ToLogString() const;
- /**
- * Serialize the object into a string
- */
- std::string ToString() const;
- /**
- * Parse the metric from a string
- */
- void ParseFrom(const std::string& msg);
-
- private:
- std::unordered_map<std::string, std::pair<int, float>> entry_;
-};
-
-using google::protobuf::Message;
-void Im2col(const float* data_im, const int channels,
- const int height, const int width, const int kernel_h, const int kernel_w,
- const int pad_h, const int pad_w, const int stride_h, const int stride_w,
- float* data_col);
-void Col2im(const float* data_col, const int channels,
- const int height, const int width, const int patch_h, const int patch_w,
- const int pad_h, const int pad_w, const int stride_h, const int stride_w,
- float* data_im);
-void ForwardMaxPooling(const float* bottom, const int num, const int channels,
- const int height, const int width, const int kernel_h, const int kernel_w,
- const int pad_h, const int pad_w, const int stride_h, const int stride_w,
- float* top, float* mask);
-void BackwardMaxPooling(const float* top, const float* mask, const int num,
- const int channels, const int height, const int width,
- const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
- const int stride_h, const int stride_w,
- float* bottom);
-void ForwardAvgPooling(const float* bottom, const int num, const int channels,
- const int height, const int width, const int kernel_h, const int kernel_w,
- const int pad_h, const int pad_w, const int stride_h, const int stride_w,
- float* top);
-void BackwardAvgPooling(const float* top, const int num, const int channels,
- const int height, const int width, const int kernel_h, const int kernel_w,
- const int pad_h, const int pad_w, const int stride_h, const int stride_w,
- float* bottom);
-
-void ReadProtoFromTextFile(const char* filename, Message* proto);
-void WriteProtoToTextFile(const Message& proto, const char* filename);
-void ReadProtoFromBinaryFile(const char* filename, Message* proto);
-void WriteProtoToBinaryFile(const Message& proto, const char* filename);
-
-/**
- * Write a string (e.g., graph reprensetation of a net) into a text file.
- */
-void WriteStringToTextFile(const string& filename, const string& context);
-
-/**
- * Parse metric pairs (key = value[, key = value]) from string
- */
-const vector<std::pair<string, float>> GetMetricFromString(const string& disp);
-} // namespace singa
-
-#endif // SINGA_UTILS_COMMON_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/context.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/context.h b/include/singa/utils/context.h
index 3490d29..55e783d 100644
--- a/include/singa/utils/context.h
+++ b/include/singa/utils/context.h
@@ -30,7 +30,17 @@
#include <vector>
#ifdef USE_GPU
-#include "singa/utils/cuda_utils.h"
+#include <cublas_v2.h>
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <curand.h>
+// CUDA: various checks for different function calls.
+#define CUDA_CHECK(condition) \
+/* Code block avoids redefinition of cudaError_t error */ \
+do { \
+cudaError_t error = condition; \
+CHECK_EQ(error, cudaSuccess) << " " << cudaGetErrorString(error); \
+} while (0)
#ifdef USE_CUDNN
#include <cudnn.h>
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/cuda_utils.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/cuda_utils.h b/include/singa/utils/cuda_utils.h
deleted file mode 100644
index 1270e92..0000000
--- a/include/singa/utils/cuda_utils.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements. See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership. The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied. See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-/**
- * The code is adapted from that of Caffe which is under BSD 2 Clause License.
- *
- * COPYRIGHT
- * All contributions by the University of California:
- * Copyright (c) 2014, The Regents of the University of California (Regents)
- * All rights reserved.
- * All other contributions:
- * Copyright (c) 2014, the respective contributors
- * All rights reserved.
- */
-#ifndef SINGA_UTILS_CUDA_UTILS_H_
-#define SINGA_UTILS_CUDA_UTILS_H_
-#include <cublas_v2.h>
-#include <cuda.h>
-#include <cuda_runtime.h>
-#include <curand.h>
-
-// CUDA: various checks for different function calls.
-#define CUDA_CHECK(condition) \
- /* Code block avoids redefinition of cudaError_t error */ \
- do { \
- cudaError_t error = condition; \
- CHECK_EQ(error, cudaSuccess) << " " << cudaGetErrorString(error); \
- } while (0)
-
-#endif // SINGA_UTILS_CUDA_UTILS_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/graph.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/graph.h b/include/singa/utils/graph.h
deleted file mode 100644
index 2462808..0000000
--- a/include/singa/utils/graph.h
+++ /dev/null
@@ -1,196 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements. See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership. The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied. See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_UTILS_GRAPH_H_
-#define SINGA_UTILS_GRAPH_H_
-
-#include <stack>
-#include <string>
-#include <map>
-#include <vector>
-namespace singa {
-using std::string;
-using std::map;
-
-/**
- * Node class representing a layer in a neural net.
- *
- * TODO remove layer dependent fields, like origin, and partition_id, to make
- * it an independent and simple class.
- */
-class Node {
- public:
- /**
- * Node constructor.
- *
- * @param name identifier of the node, e.g, layer name.
- */
- explicit Node(string name);
- /**
- * Construct a node with specified attributes.
- * @param name node identifier
- * @param attrs node attributes for printing, including "shape", "color", etc.
- * Depending on the visulization engine, if using graphviz, then the attribute
- * list is http://www.graphviz.org/content/attrs.
- */
- Node(string name, const std::map<string, string>& attrs);
- /**
- * @deprecated {to make the Graph class an independent class.}
- *
- * Node constructor used for model partitioning.
- *
- * This node is a partition of some node.
- * @param name node name
- * @param origin name of the original node
- * @param id partition id of this node
- * @param proto conf of the corresponding layer
- */
- Node(const string& name, const std::string& origin, int id, void* proto);
- ~Node() {} // the proto field is deleted outside by other functions
-
-
- void AddDstNode(Node* dst);
- void AddSrcNode(Node* src);
- void RemoveDstNode(Node* dst);
- void RemoveSrcNode(Node* src);
-
- string name = "";
- //! name of the origin node/layer from which is node is derived
- string origin = "";
- //! partition id
- int partition_id = 0;
- //! proto of the corresponding layer
- void* proto = nullptr;
- std::vector<Node*> srcnodes;
- std::vector<Node*> dstnodes;
- //!< node attribute including shape, color, etc.
- std::map<string, string> attrs;
-};
-
-/**
- * Neuralnet is constructed by creating a graph with each node representing one
- * layer at first. After topology sort for graph nodes, layers are created and
- * connected.
- */
-class Graph {
- public:
- Graph() {}
- ~Graph();
- const Graph Reverse() const;
- /**
- * @return all nodes of the graph
- */
- inline const std::vector<Node*>& nodes() const {
- return nodes_;
- }
- /**
- * @param name node name
- * @return return the node of given name
- */
- inline Node* node(const string& name) const {
- return name2node_.at(name);
- }
- /**
- * Add an exiting node into this graph.
- */
- void AddNode(Node* node);
- /**
- * Creat an node with the given name and add it into the graph.
- * @return the newly created node.
- */
- Node* AddNode(const string& name);
- /**
- * Create an node with the given name and attributes.
- */
- Node* AddNode(const string& name, const std::map<string, string>& attrs);
- /**
- * @deprecated {remove layer related info from node attrs}
- * Add a node with given name and other info.
- */
- Node* AddNode(const std::string& name, const std::string& origin, int id,
- void* proto);
- /**
- * Add an edge connecting the two given nodes.
- */
- void AddEdge(Node* srcnode, Node* dstnode);
- /**
- * Add an edge connecting the two nodes with the given name.
- */
- void AddEdge(const string& src, const std::string& dst);
- /**
- * Add an edge connecting the two given nodes, the edge attributes are also
- * given.
- */
- void AddEdge(Node* srcnode, Node* dstnode,
- const std::map<string, string>& attrs);
- /**
- * Add an edge connecting the two nodes with the given names, the edge
- * attributes are also given, which are used for printing.
- * http://www.graphviz.org/content/attrs
- */
- void AddEdge(const string& src, const std::string& dst,
- const std::map<string, string>& attrs);
-
- /**
- * Remove the edge connecting the two given nodes.
- */
- void RemoveEdge(Node* src, Node* dst);
- /**
- * Remove the edge connecting two nodes with the given names.
- */
- void RemoveEdge(const string &src, const std::string& dst);
- /**
- * Dump the graph into json string which can be used to draw a picture by
- * graphviz.
- *
- * It calls ToJson(const std::map<std::string, std::string>& label) with
- * empty label mapping.
- */
- string ToJson() const;
- /**
- * \copybreif ToJson()
- *
- * @param label information to be displayed as label for each node
- */
- string ToJson(const map<std::string, std::string>& label) const;
- /**
- * Do topology sort for all nodes of the graph.
- */
- void Sort();
-
- private:
- /**
- *
- * @return the name of the edge connecting src to dst
- */
- const string GetEdgeName(const string& src, const string& dst) const {
- return src + "-->" + dst;
- }
-
- private:
- std::vector<Node*> nodes_;
- std::map<string, Node*> name2node_;
- std::map<string, std::map<string, string>> edge_attrs_;
-};
-
-} // namespace singa
-
-#endif // SINGA_UTILS_GRAPH_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/image_transform.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/image_transform.h b/include/singa/utils/image_transform.h
deleted file mode 100644
index 2867ad2..0000000
--- a/include/singa/utils/image_transform.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements. See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership. The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied. See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_UTILS_IMAGE_TRANSFORM_H_
-#define SINGA_UTILS_IMAGE_TRANSFORM_H_
-
-#include <glog/logging.h>
-// TODO(wangwei) provide image transformation API, the implementation can be
-// done by opencv, manual transform, or mshadow.
-namespace singa {
-
-void ImageTransform(const float* in, const float* mean, bool mirror, int h_crop,
- int w_crop, int h_offset, int w_offset, int channel, int height, int width,
- float scale, float* out);
-} // namespace singa
-
-#endif // SINGA_UTILS_IMAGE_TRANSFORM_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/job_manager.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/job_manager.h b/include/singa/utils/job_manager.h
deleted file mode 100644
index 7f1b4f1..0000000
--- a/include/singa/utils/job_manager.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements. See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership. The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied. See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_UTILS_JOB_MANAGER_H_
-#define SINGA_UTILS_JOB_MANAGER_H_
-
-#include <string>
-#include <vector>
-
-#ifdef USE_ZOOKEEPER
-#include "singa/utils/zk_service.h"
-#endif
-
-namespace singa {
-
-struct JobInfo {
- int id;
- int procs;
- std::string name;
-};
-
-class JobManager {
- public:
- // host is comma separated host:port pairs, each corresponding to a zk server.
- // e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002"
- explicit JobManager(const std::string& host);
-
- // NOTICE: Init must be called once, before start to use other functions
- bool Init();
- // generate a unique job id
- bool GenerateJobID(int* id);
- // generate a list of hosts for a job conf
- bool GenerateHostList(const char* host_file, const char* job_file,
- std::vector<std::string>* list);
- // list all jobs recorded in zk
- bool ListJobs(std::vector<JobInfo>* jobs);
- // list running processes for a job
- bool ListJobProcs(int job, std::vector<std::string>* procs);
- // remove a job path in zk
- bool Remove(int job);
- // remove all job paths in zk
- bool RemoveAllJobs();
- // remove all singa related paths in zk
- bool CleanUp();
-
- private:
- const int kJobsNotRemoved = 10;
-
- bool CleanPath(const std::string& path, bool remove);
- std::string ExtractClusterConf(const char* job_file);
-
- std::string host_ = "";
-#ifdef USE_ZOOKEEPER
- int timeout_ = 30000;
- ZKService zk_;
-#endif
-};
-
-} // namespace singa
-
-#endif // SINGA_UTILS_JOB_MANAGER_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/math_addr.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/math_addr.h b/include/singa/utils/math_addr.h
deleted file mode 100644
index cf1d227..0000000
--- a/include/singa/utils/math_addr.h
+++ /dev/null
@@ -1,279 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements. See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership. The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied. See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_UTILS_MATH_ADDR_H_
-#define SINGA_UTILS_MATH_ADDR_H_
-
-extern "C" {
-#include <cblas.h>
-}
-#ifdef USE_GPU
-#include <cuda_runtime.h>
-#include <cublas_v2.h>
-#endif
-
-#include "singa/utils/singa_op.h"
-
-namespace singa {
-template<typename Dtype>
-Dtype cpu_asum(int n, const Dtype* A, int inc) {
- return cblas_sasum(n, A, inc);
-}
-
-template<typename Dtype>
-void cpu_gemm(const Dtype * A, const Dtype * B,
- const int m, const int n, const int k, const Dtype alpha, const Dtype beta,
- const bool TranA, const bool TranB, Dtype * C) {
- int lda, ldb;
- CBLAS_TRANSPOSE tA, tB;
- lda = TranA ? m : k;
- ldb = TranB ? k : n;
- tA = TranA ? CblasTrans : CblasNoTrans;
- tB = TranB ? CblasTrans : CblasNoTrans;
- cblas_sgemm(CblasRowMajor, tA, tB, m, n, k, alpha, A, lda,
- B, ldb, beta, C, n);
-}
-
-// should be very careful:
-// m is the length of B, and n is the length of C , A is a n*m matrix
-template<typename Dtype>
-void cpu_gemv(const Dtype * A, const Dtype * B, const int m, const int n,
- const Dtype alpha, const Dtype beta, const bool TranA, Dtype * C) {
- CBLAS_TRANSPOSE tA;
- tA = TranA ? CblasTrans : CblasNoTrans;
- cblas_sgemv(CblasRowMajor, tA, m, n, alpha, A, n, B, 1, beta, C, 1);
-}
-
-template<typename Dtype>
-void cpu_axpy(const int n, const Dtype alpha, const Dtype * A, Dtype * B) {
- cblas_saxpy(n, alpha, A, 1, B, 1);
-}
-
-template<typename Dtype>
-void cpu_scale(const int n, const Dtype alpha, Dtype * A) {
- cblas_sscal(n, alpha, A, 1);
-}
-
-template<typename Dtype>
-void cpu_copy(const int n, const Dtype* A, Dtype *B) {
- cblas_scopy(n, A, 1, B, 1);
-}
-
-template<typename Dtype>
-Dtype cpu_dot(const int n, const Dtype * A, const Dtype * B) {
- Dtype sum = 0;
- for (int i = 0 ; i < n ; i++)
- sum += A[i] * B[i];
- return sum;
-}
-
-// element-wise
-template<typename Op, typename Dtype>
-void cpu_e_f(const int n, const Dtype * A, Dtype * B) {
- for (int i = 0 ; i < n ; i++) {
- Op::Map(A[i], &B[i]);
- }
-}
-
-template<typename Op, typename Dtype>
-void cpu_e_f(const int n, const Dtype * A, const Dtype * B, Dtype * C) {
- for (int i = 0 ; i < n ; i++) {
- Op::Map(A[i], B[i], &C[i]);
- }
-}
-template<typename Op, typename Dtype>
-void cpu_e_f(const int n, const Dtype alpha, const Dtype * A, Dtype * B) {
- for (int i = 0 ; i < n ; i++) {
- Op::Map(alpha, A[i], &B[i]);
- }
-}
-
-template<typename Op, typename Dtype>
-void cpu_e_f(const int n, const Dtype alpha, const Dtype * A, const Dtype * B,
- Dtype * C) {
- for (int i = 0 ; i < n ; i++) {
- Op::Map(alpha, A[i], B[i], &C[i]);
- }
-}
-// element-wise generalized operation defined in Op
-
-
-// matrix/vector expand/reduce
-
-template<typename Op, typename Dtype>
-void cpu_reduce_f(const Dtype * A, const int m, const int n, Dtype * B) {
- for (int i = 0 ; i < m ; i++) {
- Op::Map(A+i*n, n, B[i]);
- }
-}
-// reduce each row of A to an element of B e.g. the sum operation in softmax
-template<typename Op, typename Dtype>
-void cpu_expand_f(const Dtype * A, const int m, const int n, Dtype * B) {
- for (int i = 0 ; i < m ; i++) {
- Op::Map(A[i], n, B+i*n);
- }
-}
-
-
-template<typename Dtype>
-void cpu_softmax(int nb_rows, int nb_cols, const Dtype* A, Dtype* B) {
- for (int i = 0; i < nb_rows; i++) {
- const Dtype* dptr = A + i * nb_cols;
- Dtype mmax = dptr[0];
- for (int x = 1; x < nb_cols; ++x)
- if (mmax < dptr[x]) mmax = dptr[x];
- Dtype sum = 0.0f;
- for (int x = 0; x < nb_cols; ++x) {
- dptr[x] = std::exp(dptr[x] - mmax);
- sum += dptr[x];
- }
- for (int x = 0; x < nb_cols; ++x) {
- dptr[x] /= sum;
- }
- }
-}
-
-
-
-template<typename Dtype, typename URNG>
-void cpu_sample_uniform(URNG& g, int n, Dtype low, Dtype high, Dtype* A) {
- std::uniform_real_distribution<Dtype> distribution(low, high);
- for (int i = 0; i < n; i++)
- A[i] = distribution(g);
-}
-
-template<typename Dtype, typename URNG>
-void cpu_sample_gaussian(URNG& g, int n, Dtype mean, Dtype std, Dtype* A) {
- std::normal_distribution<Dtype> distribution(mean, std);
- for (int i = 0; i < n; i++)
- A[i] = distribution(g);
-}
-
-#ifdef USE_GPU
-template<typename Dtype>
-Dtype gpu_asum(cublasHandle_t handle, int n, const Dtype* A, int inc) {
- Dtype result = 0.0;
- cublasSasum(handle, n, A, inc, &result);
- return result;
-}
-
-template<typename Dtype>
-void gpu_gemm(cublasHandle_t handle, const Dtype * A, const Dtype * B,
- const int m, const int n, const int k, const Dtype alpha, const Dtype beta,
- const bool TranA, const bool TranB, Dtype * C) {
- int lda = TranA ? m : k;
- int ldb = TranB ? k : n;
- int ldc = n;
- cublasOperation_t tA = (TranA == false) ? CUBLAS_OP_N : CUBLAS_OP_T;
- cublasOperation_t tB = (TranB == false) ? CUBLAS_OP_N : CUBLAS_OP_T;
- cublasSgemm(handle, tB, tA, n, m, k, &alpha, B, ldb,
- A, lda, &beta, C, ldc);
-}
-
-template<typename Dtype>
-void gpu_gemv(cublasHandle_t handle, const Dtype * A, const Dtype * B,
- const int m, const int n, const Dtype alpha, const Dtype beta,
- const bool TranA, Dtype * C) {
- int lda = n;
- cublasOperation_t tA = (TranA == true) ? CUBLAS_OP_N : CUBLAS_OP_T;
- cublasSgemv(handle, tA, n, m, &alpha , A, lda, B, 1, &beta, C, 1);
-}
-
-template<typename Dtype>
-void gpu_axpy(cublasHandle_t handle, const int n, const Dtype alpha,
- const Dtype * A, Dtype * B) {
- cublasSaxpy(handle, n, &alpha, A, 1, B, 1);
-}
-
-template<typename Dtype>
-void gpu_scale(cublasHandle_t handle, const int n, const Dtype alpha,
- Dtype * A) {
- cublasSscal(handle, n, &alpha, A, 1);
-}
-
-template<typename Dtype>
-Dtype gpu_dot(cublasHandle_t handle, const int n, const Dtype * A,
- const Dtype * B) {
- Dtype result = 0.0;
- cublasSdot(handle, n, A, 1, B, 1, &result);
- return result;
-}
-
-// element-wise
-template<typename Op, typename Dtype>
-void gpu_e_f(const int n, const Dtype alpha, Dtype * A) {
- Op::CudaMap(alpha, A, n);
-}
-
-template<typename Op, typename Dtype>
-void gpu_e_f(const int n, const Dtype * A, Dtype * B) {
- Op::CudaMap(A, B, n);
-}
-
-template<typename Op, typename Dtype>
-void gpu_e_f(const int n, const Dtype * A, const Dtype * B, Dtype * C) {
- Op::CudaMap(A, B, C, n);
-}
-
-template<typename Op, typename Dtype>
-void gpu_e_f(const int n, const Dtype alpha, const Dtype * A, Dtype * B) {
- Op::CudaMap(alpha, A, B, n);
-}
-
-template<typename Op, typename Dtype>
-void gpu_e_f(const int n, const Dtype alpha, const Dtype beta,
- const Dtype * A, const Dtype * B, Dtype * C) {
- Op::CudaMap(alpha, beta, A, B, C, n);
-}
-// element-wise generalized operation defined in Op
-
-// matrix/vector expand/reduce
-
-template<typename Op, typename Dtype>
-void gpu_reduce_f(const Dtype * A, const int m, const int n, Dtype * B) {
- for (int i = 0 ; i < m ; i++) {
- Op::CudaMap(A+i*n, n, B[i]);
- }
-}
-// reduce each row of A to an element of B e.g. the sum operation in softmax
-template<typename Op, typename Dtype>
-void gpu_expand_f(const Dtype * A, const int m, const int n, Dtype * B) {
- for (int i = 0 ; i < m ; i++) {
- Op::CudaMap(A[i], n, B+i*n);
- }
-}
-
-
-template<typename Dtype, typename URNG>
-void gpu_sample_uniform(URNG g, int n, Dtype low, Dtype high, Dtype* A) {
- curandGenerateUniform(g, A, n);
-}
-
-template<typename Dtype, typename URNG>
-void gpu_sample_gaussian(URNG g, int n, Dtype mean, Dtype std, Dtype* A) {
- curandGenerateNormal(g, A, n, mean, std);
-}
-
-// expand each element in A into a row of B
-#endif // USE_GPU
-
-} // namespace singa
-#endif // SINGA_UTILS_MATH_ADDR_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/math_blob.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/math_blob.h b/include/singa/utils/math_blob.h
deleted file mode 100644
index abe7722..0000000
--- a/include/singa/utils/math_blob.h
+++ /dev/null
@@ -1,762 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements. See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership. The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied. See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_UTILS_MATH_BLOB_H_
-#define SINGA_UTILS_MATH_BLOB_H_
-
-#include <vector>
-#include <algorithm>
-#include <thread>
-#include "singa/utils/blob.h"
-#include "singa/utils/singa_op.h"
-#include "singa/utils/math_addr.h"
-#include "singa/utils/singleton.h"
-#include "singa/utils/context.h"
-
-namespace singa {
-
-#define NO_GPU LOG(FATAL) << "Not compiled with GPU";
-/**
- * \file math_blob.h is not tested thorough.
- * Only GEMM() and MMDot() MVSumRow() andMVAddRow() are used now.
- */
-/************* BLAS level 1 *****************/
-/**
- * Scale each element of A with alpha, and put the result into A.
- * Ai = alpha*Ai
- * Use blas scale internally.
- */
-template<typename Dtype>
-void Scale(Dtype alpha, Blob<Dtype> * B) {
- auto context = Singleton<Context>::Instance();
- int device = context->device_id(std::this_thread::get_id());
- if (device < 0) {
- cpu_scale(B->count(), alpha, B->mutable_cpu_data());
- } else {
-#ifdef USE_GPU
- gpu_scale(context->cublas_handle(device), B->count(), alpha,
- B->mutable_gpu_data());
-#else
- NO_GPU;
-#endif
- }
-}
-
-/**
- * Element-wise operation: Bi = alpha*Ai+Bi. A and B should have the same size
- */
-template<typename Dtype>
-void AXPY(Dtype alpha, const Blob<Dtype> & A, Blob<Dtype> * B) {
- CHECK_EQ(A.count(), B->count());
- auto context = Singleton<Context>::Instance();
- int device = context->device_id(std::this_thread::get_id());
- if (device < 0) {
- cpu_axpy(A.count(), alpha, A.cpu_data(), B->mutable_cpu_data());
- } else {
-#ifdef USE_GPU
- gpu_axpy(context->cublas_handle(device), A.count(), alpha, A.gpu_data(),
- B->mutable_gpu_data());
-#else
- NO_GPU;
-#endif
- }
-}
-
-/************* BLAS level 2 *****************/
-/**
- * Matrix vector multiplication, C = alpha A(.T) * B + beta C.
- * Loose shape checking:
- * - dim of A >=2
- * - row of A is shape(0) (no transpose)
- * - column of A(.T) == B.count()
- * - rows of A(.T) == C.count()
- *
- * @param[in] alpha
- * @param[in] beta
- * @param[in] A, matrix
- * @param[in] B, vector
- * @param[in, out] C, vector
- */
-template<typename Dtype>
-void GEMV(Dtype alpha, Dtype beta, const Blob<Dtype>& A,
- const Blob<Dtype>& B, Blob<Dtype>* C) {
- CHECK_EQ(A.shape().size(), 2);
- int a1, a2, m, n;
- a1 = A.transpose() ? A.count() / A.shape(0) : A.shape(0);
- a2 = A.transpose() ? A.shape(0) : A.count() / A.shape(0);
- m = B.count();
- n = C->count();
- CHECK_EQ(a2, m) << "# columns of A(.T) must = length of B";
- CHECK_EQ(a1, n) << "# rows of A(.T) must = length of C";
-
- bool TranA = A.transpose();
- auto context = Singleton<Context>::Instance();
- int device = context->device_id(std::this_thread::get_id());
- if (device < 0) {
- cpu_gemv(A.cpu_data(), B.cpu_data(), m, n, alpha, beta, TranA,
- C->mutable_cpu_data());
- } else {
-#ifdef USE_GPU
- gpu_gemv(context->cublas_handle(device), A.gpu_data(), B.gpu_data(), m, n,
- alpha, beta, TranA, C->mutable_gpu_data());
-#else
- NO_GPU;
-#endif // USE_GPU
- }
-}
-/**
- * Matrix vector multiplication, C = A(.T) * B, transpose is considered.
- * Loose shape checking:
- * - dim of A >=2
- * - A.count() % B.count() == 0
- * - B.count() == C.count()
- *
- * @param[in] A input matrix
- * @param[in] B input vector
- * @param[out] C output vector
- */
-template <typename Dtype>
-void MVDot(const Blob<Dtype>& A, const Blob<Dtype>& B,
- Blob<Dtype>* C) {
- GEMV(Dtype(1), Dtype(0), A, B, C);
-}
-
-/************* BLAS level 3 *****************/
-/**
- * Matrix multiplication, C = alpha A*B + beta C, A, B and C are matrix.
- *
- * Tranpose is considered for A and B.
- * Loose shape checking:
- * - the first dimension is row (no transpose) or col (with transpose) size
- * - shapes match for matrix multiplication
- *
- * @param[in] alpha
- * @param[in] beta
- * @param[in] A, matrix
- * @param[in] B, matrix
- * @param[in, out] C, matrix
- */
-template <typename Dtype>
-void GEMM(Dtype alpha, Dtype beta, const Blob<Dtype>& A, const Blob<Dtype>& B,
- Blob<Dtype> * C) {
- CHECK_GE(A.shape().size(), 2);
- CHECK_GE(B.shape().size(), 2);
- CHECK_GE(C->shape().size(), 2);
- int a1, a2, b1, b2, m, n;
- CHECK(!C->transpose());
- a1 = A.transpose() ? A.count() / A.shape(0) : A.shape(0);
- a2 = A.count() / a1;
- b1 = B.transpose() ? B.count() /B.shape(0) : B.shape(0);
- b2 = B.count() / b1;
- m = C->shape(0);
- n = C->count() / m;
- CHECK_EQ(a2, b1);
- CHECK_EQ(a1, m);
- CHECK_EQ(b2, n);
-
- int k = a2;
- bool TranA = A.transpose();
- bool TranB = B.transpose();
- auto context = Singleton<Context>::Instance();
- int device = context->device_id(std::this_thread::get_id());
- if (device < 0) {
- cpu_gemm(A.cpu_data(), B.cpu_data(), m, n, k, alpha, beta, TranA, TranB,
- C->mutable_cpu_data());
- } else {
-#ifdef USE_GPU
- gpu_gemm(context->cublas_handle(device), A.gpu_data(), B.gpu_data(),
- m, n, k, alpha, beta, TranA, TranB, C->mutable_gpu_data());
-#else
- NO_GPU;
-#endif // USE_GPU
- }
-}
-/**
- * Matrix multiplication, C = A(.T) * B(.T), transpose is considered.
- * Strict shape checking:
- * - all are matrix
- * - shapes match for matrix multiplication
- *
- * @param[in] A input matrix
- * @param[in] B input matrix
- * @param[out] C output matrix
- */
-template <typename Dtype>
-void MMDot(const Blob<Dtype>& A, const Blob<Dtype>& B,
- Blob<Dtype>* C) {
- GEMM(Dtype(1), Dtype(0), A, B, C);
-}
-
-
-/*********************** Inner and Outer product****************************/
-/**
- * Inner product for two vectors.
- * Loose shape checking, A.count() == B.count.
- *
- * @param[in] A, input vector (shape checking using A.count()).
- * @param[in] B, input vector (shape checking using B.count()).
- * @return inner product value.
- */
-template <typename Dtype>
-Dtype VVDot(const Blob<Dtype> & A, const Blob<Dtype> & B) {
- Dtype res = 0;
- CHECK_EQ(A.count(), B.count());
- int n = A.count();
- auto context = Singleton<Context>::Instance();
- int device = context->device_id(std::this_thread::get_id());
- if (device < 0) {
- res = cpu_dot(n, A.cpu_data(), B.cpu_data());
- } else {
-#ifdef USE_GPU
- res = gpu_dot(context->cublas_handle(device), n, A.gpu_data(),
- B.gpu_data());
-#else
- NO_GPU;
-#endif // USE_GPU
- }
- return res;
-}
-
-/**
- * Outer product, C = A ** B, transpose is disabled.
- * Loose shape checking, A.count() * B.count() == C.count()
- *
- * @param[in] A, input vector
- * @param[in] B, input vector
- * @param[out] C, output matrix
- */
-template <typename Dtype>
-void OuterProduct(const Blob<Dtype>& A, const Blob<Dtype>& B, Blob<Dtype> * C) {
- CHECK(!C->transpose()); // do not support C.T now.
-
- int m = A.count();
- int n = B.count();
- CHECK_EQ(C->count(), m * n);
- auto context = Singleton<Context>::Instance();
- int device = context->device_id(std::this_thread::get_id());
- if (device < 0) {
- cpu_gemm(A.cpu_data(), B.cpu_data(), m, n, 1, Dtype(1), Dtype(0), false,
- false, C->mutable_cpu_data());
- } else {
-#ifdef USE_GPU
- gpu_gemm(context->cublas_handle(device), A.gpu_data(), B.gpu_data(),
- m, n, 1, Dtype(1), Dtype(0), false, false, C->mutable_gpu_data());
-#else
- NO_GPU;
-#endif // USE_GPU
- }
-}
-/*********************** Element-wise functions ***********************/
-/**
- * Apply the function from Op for each element in A and put the result into B,
- * i.e., Bi = Op(Ai).
- * Loose shape checking, A.count() == B.count().
- */
-template<typename Op, typename Dtype>
-void Map(const Blob<Dtype> & A, Blob<Dtype> * B) {
- CHECK_EQ(A.count(), B->count()) << "Blobs must have the same size";
- auto context = Singleton<Context>::Instance();
- int device = context->device_id(std::this_thread::get_id());
- if (device < 0) {
- cpu_e_f<Op>(A.count(), A.cpu_data(), B->mutable_cpu_data());
- } else {
-#ifdef USE_GPU
- gpu_e_f<Op>(A.count(), A.gpu_data(), B->mutable_gpu_data());
-#else
- NO_GPU;
-#endif // USE_GPU
- }
-}
-
-/**
- * Apply the function from Op for each element in A and B, and put the result
- * into C, i.e., Ci = Op(Ai, Bi).
- * Loose shape checking, A, B and C are of the same size.
- */
-template<typename Op, typename Dtype>
-void Map(const Blob<Dtype> & A, const Blob<Dtype> & B, Blob<Dtype> * C) {
- CHECK_EQ(A.count(), B.count()) << "Blobs must have the same size";
- CHECK_EQ(A.count(), C->count()) << "Blobs must have the same size";
- // cpu_e_f<Op>(A.count(), A.cpu_data(), B.cpu_data(), C->mutable_cpu_data());
- auto context = Singleton<Context>::Instance();
- int device = context->device_id(std::this_thread::get_id());
- if (device < 0) {
- cpu_e_f<Op>(A.count(), A.cpu_data(), B.cpu_data(), C->mutable_cpu_data());
- } else {
-#ifdef USE_GPU
- gpu_e_f<Op>(A.count(), A.gpu_data(), B.gpu_data(), C->mutable_gpu_data());
-#else
- NO_GPU;
-#endif // USE_GPU
- }
-}
-
-/**
- * Bi = Op(alpha, Ai)
- * Loose shape checking, A.count() == B.count().
- */
-template<typename Op, typename Dtype>
-void Map(Dtype alpha, const Blob<Dtype>& A, Blob<Dtype>* B) {
- CHECK_EQ(A.count(), B->count()) << "Blobs must have the same size";
- auto context = Singleton<Context>::Instance();
- int device = context->device_id(std::this_thread::get_id());
- if (device < 0) {
- cpu_e_f<Op>(A.count(), alpha, A.cpu_data(), B->mutable_cpu_data());
- } else {
-#ifdef USE_GPU
- gpu_e_f<Op>(A.count(), alpha, A.gpu_data(), B->mutable_gpu_data());
-#else
- NO_GPU;
-#endif // USE_GPU
- }
-}
-/**
- * Ci = Op(alpha, Ai, Bi)
- * Loose shape checking, A, B and C are of the same size.
- */
-template<typename Op, typename Dtype>
-void Map(Dtype alpha, const Blob<Dtype>& A, const Blob<Dtype>& B,
- Blob<Dtype>* C) {
- CHECK_EQ(A.count(), B->count()) << "Blobs must have the same size";
- auto context = Singleton<Context>::Instance();
- int device = context->device_id(std::this_thread::get_id());
- if (device < 0) {
- cpu_e_f<Op>(A.count(), alpha, A.cpu_data(), B->cpu_data(),
- C->mutable_cpu_data());
- } else {
- // TODO(wangwei) implement gpu version.
- NO_GPU;
- }
-}
-
-/**
- * Currently use std::copy which has shown better performance than memcpy.
- * http://stackoverflow.com/questions/4707012/c-memcpy-vs-stdcopy
- * TODO(wangwei) test blas copy vs std::copy.
- *
- * Loose shape checking, A.count() == B.count().
- */
-template<typename Dtype>
-void Copy(const Blob<Dtype>& A, Blob<Dtype>* B) {
- CHECK_EQ(A.count(), B->count()) << "Blobs must have the same size";
- auto context = Singleton<Context>::Instance();
- int device = context->device_id(std::this_thread::get_id());
- if (device < 0) {
- std::copy(A.cpu_data(), A.cpu_data() + A.count(), B->mutable_cpu_data());
- } else {
-#ifdef USE_GPU
- CUDA_CHECK(cudaMemcpy(static_cast<Dtype*>(B->mutable_gpu_data()),
- A.gpu_data(), sizeof(Dtype) * A.count(), cudaMemcpyDefault));
-#else
- NO_GPU;
-#endif
- }
-}
-
-
-/**
- * B = alpha + A
- * Implemented using Copy and AXPY.
- */
-template<typename Dtype>
-void Add(Dtype alpha, const Blob<Dtype> & A, Blob<Dtype> * B) {
- Map<singa::op::Add<Dtype>, Dtype>(alpha, A, B);
-}
-
-/**
- * C = A + B
- * Implemented using Copy and AXPY.
- */
-template<typename Dtype>
-void Add(const Blob<Dtype> & A, const Blob<Dtype> & B,
- Blob<Dtype> * C) {
- Copy(A, C);
- AXPY(Dtype(1), B, C);
-}
-
-/**
- * B = alpha - A
- * Implemented using Copy and AXPY.
- */
-template<typename Dtype>
-void Sub(Dtype alpha, const Blob<Dtype> & A, Blob<Dtype>* B) {
- Map<singa::op::Sub<Dtype>, Dtype>(alpha, A, B);
-}
-
-/**
- * C = A - B
- * Implemented using Copy and AXPY.
- */
-template<typename Dtype>
-void Sub(const Blob<Dtype> & A, const Blob<Dtype> & B,
- Blob<Dtype> * C) {
- Copy(A, C);
- AXPY(Dtype(-1), B, C);
-}
-
-/**
- * C = A * B, implemented using
- * Map(const Blob<Dtype>&, const Blob<Dtype>&, Blob<Dtype>*).
- */
-template<typename Dtype>
-void Mult(const Blob<Dtype> & A, const Blob<Dtype> & B,
- Blob<Dtype> * C) {
- Map<singa::op::Mult<Dtype>, Dtype>(A, B, C);
- // TODO(wangwei) use MKL's vector func
-}
-
-/**
- * C = A / B, implemented using
- * Map(const Blob<Dtype>&, const Blob<Dtype>&, Blob<Dtype>*).
- */
-template<typename Dtype>
-void Div(const Blob<Dtype> & A, const Blob<Dtype> & B,
- Blob<Dtype> * C) {
- Map<singa::op::Div<Dtype>, Dtype>(A, B, C);
- // TODO(wangwei) use MKL's vector func
-}
-/**
- * B = sqrt(A)
- */
-template<typename Dtype>
-void Sqrt(const Blob<Dtype> & A, Blob<Dtype>* B) {
- Map<singa::op::Sqrt<Dtype>, Dtype>(A, B);
-}
-/**
- * B = square(A)
- */
-template<typename Dtype>
-void Square(const Blob<Dtype> & A, Blob<Dtype>* B) {
- Map<singa::op::Square<Dtype>, Dtype>(A, B);
-}
-/**
- * B = exp(A)
- */
-template<typename Dtype>
-void Exp(const Blob<Dtype> & A, Blob<Dtype>* B) {
- Map<singa::op::Exp<Dtype>, Dtype>(A, B);
-}
-/**
- * B = log(A)
- */
-template<typename Dtype>
-void Log(const Blob<Dtype>& A, Blob<Dtype>* B) {
- Map<singa::op::Log<Dtype>, Dtype>(A, B);
-}
-/**
- * B = tanh(A)
- */
-template<typename Dtype>
-void Tanh(const Blob<Dtype>& A, Blob<Dtype>* B) {
- Map<singa::op::Tanh<Dtype>, Dtype>(A, B);
-}
-/*************************1D<-->2D op/transform***************************/
-/**
- * Add A to each column of B, i.e., Bij = alpha*Ai + beta*Bij
- * Loose shape checking, B.count() % A.count() == 0.
- * # columns of B = B.count() / A.count().
- */
-template<typename Dtype>
-void MVAddCol(Dtype alpha, Dtype beta, const Blob<Dtype> & A, Blob<Dtype> * B) {
- if (B->transpose()) {
- B->set_transpose(false);
- MVAddRow(alpha, beta, A, B);
- B->set_transpose(true);
- } else {
- CHECK_EQ(B->count() % A.count(), 0) << "#col of B not match length of A";
- int m = A.count(), n = B->count() / m;
- Blob<Dtype> one(n);
- one.SetValue(1);
- auto context = Singleton<Context>::Instance();
- int device = context->device_id(std::this_thread::get_id());
- if (device < 0) {
- cpu_gemm(A.cpu_data(), one.cpu_data(), m, n, 1, alpha, beta, false, false,
- B->mutable_cpu_data());
- } else {
-#ifdef USE_GPU
- gpu_gemm(context->cublas_handle(device), A.gpu_data(), one.gpu_data(), m,
- n, 1, alpha, beta, false, false, B->mutable_gpu_data());
-#else
- NO_GPU;
-#endif // USE_GPU
- }
- }
-}
-/**
- * Add A to each column of B, i.e., Bij = Ai + Bij
- * Loose shape checking, B.count() % A.count() == 0.
- * # columns of B = B.count() / A.count().
- */
-template<typename Dtype>
-void MVAddCol(const Blob<Dtype> & A, Blob<Dtype>* B) {
- MVAddCol(Dtype(1), Dtype(1), A, B);
-}
-
-/**
- * Add A to each row of B, i.e., Bij = alpha*Aj + beta*Bij
- * Loose shape checking, B.count() % A.count() == 0.
- * # rows of B = B.count() / A.count().
- */
-template<typename Dtype>
-void MVAddRow(Dtype alpha, Dtype beta, const Blob<Dtype> & A, Blob<Dtype> * B) {
- if (B->transpose()) {
- B->set_transpose(false);
- MVAddCol(alpha, beta, A, B);
- B->set_transpose(true);
- } else {
- CHECK_EQ(B->count() % A.count(), 0) << "#col of B not match length of A";
- int n = A.count(), m = B->count() / n;
- auto context = Singleton<Context>::Instance();
- int device = context->device_id(std::this_thread::get_id());
- if (device < 0) {
- Blob<Dtype> one(m);
- one.SetValue(1);
- cpu_gemm(one.cpu_data(), A.cpu_data(), m, n, 1, alpha, beta,
- false, false, B->mutable_cpu_data());
- } else {
-#ifdef USE_GPU
- singa_gpu_add_vec_row(A.gpu_data(), B->gpu_data(), B->mutable_gpu_data(),
- m, n, n);
-#else
- NO_GPU;
-#endif // USE_GPU
- }
- }
-}
-/**
- * Add A to each row of B, i.e., Bij = Aj + Bij
- * Loose shape checking, B.count() % A.count() == 0.
- * # rows of B = B.count() / A.count().
- */
-template<typename Dtype>
-void MVAddRow(const Blob<Dtype> & A, Blob<Dtype>* B) {
- MVAddRow(Dtype(1), Dtype(1), A, B);
-}
-
-/**
- * Copy A to each column of B, i.e., Bij = Ai
- * Loose shape checking, B.count() % A.count() == 0,
- * # columns of B = B.count() / A.count().
- */
-template<typename Dtype>
-void RepmatCol(const Blob<Dtype> & A, Blob<Dtype> * B) {
- MVAddCol(Dtype(1), Dtype(0), A, B);
-}
-
-/**
- * Copy A to each row of B, i.e., Bij = Aj
- * Loose shape checking, B.count() % A.count() == 0,
- * # rows of B = B.count() / A.count().
- */
-template<typename Dtype>
-void RepmatRow(const Blob<Dtype> & A, Blob<Dtype> * B) {
- MVAddRow(Dtype(1), Dtype(0), A, B);
-}
-
-/**
- * Sum all columns of matrix A to a column vector B,
- * i.e., Bi = \sum_j {alpha*Aij}+beta*Bi
- * Loose shape checking, A.count() % B.count() == 0.
- * # columns of A = A.count() / B.count().
- */
-template<typename Dtype>
-void MVSumCol(Dtype alpha, Dtype beta, const Blob<Dtype> & A, Blob<Dtype> * B) {
- CHECK_EQ(A.count() % B->count(), 0) << "length of B must = # of cols of A";
- int m = B->count(), n = A.count() / m;
- auto context = Singleton<Context>::Instance();
- int device = context->device_id(std::this_thread::get_id());
- if (device < 0) {
- Blob<Dtype> one(n);
- one.SetValue(1);
- cpu_gemm(A.cpu_data(), one.cpu_data(), m, 1, n, alpha, beta,
- A.transpose(), false, B->mutable_cpu_data());
- } else {
-#ifdef USE_GPU
- singa_gpu_sum_col(A.gpu_data(), B->mutable_gpu_data(), m, n, n);
-#else
- NO_GPU;
-#endif // USE_GPU
- }
-}
-
-/**
- * Sum all rows of matrix A to a row vector B,
- * i.e., Bj = \sum_i {alpha*Aij}+beta*Bj
- * Loose shape checking, A.count() % B.count() == 0.
- * # rows of A = A.count() / B.count().
- */
-template<typename Dtype>
-void MVSumRow(Dtype alpha, Dtype beta, const Blob<Dtype> & A, Blob<Dtype> * B) {
- CHECK_EQ(A.count() % B->count(), 0) << "length of B must = # of cols of A";
- int n = B->count(), m = A.count() / n;
- auto context = Singleton<Context>::Instance();
- int device = context->device_id(std::this_thread::get_id());
- if (device < 0) {
- Blob<Dtype> one(m);
- one.SetValue(1);
- cpu_gemm(one.cpu_data(), A.cpu_data(), 1, n, m, alpha, beta, false,
- A.transpose(), B->mutable_cpu_data());
- } else {
-#ifdef USE_GPU
- singa_gpu_sum_row(A.gpu_data(), B->mutable_gpu_data(), m, n, n);
-#else
- NO_GPU;
-#endif // USE_GPU
- }
-}
-
-/**
- * Reduce each row of A to an element of B.
- * Loose shape checking, A.count() % B.count() == 0.
- * # columns of A = A.count() / B.count().
- */
-template<typename Op, typename Dtype>
-void Reduce2D(const Blob<Dtype> & A, Blob<Dtype> * B) {
- CHECK_EQ(A.count() % B->count(), 0) << "Row size not match B length";
- int m = B->count(), n = A.count() / m;
- auto context = Singleton<Context>::Instance();
- int device = context->device_id(std::this_thread::get_id());
- if (device < 0) {
- cpu_reduce_f<Op>(A.cpu_data(), m, n, B->mutable_cpu_data());
- } else {
-#ifdef USE_GPU
- gpu_reduce_f<Op>(A.gpu_data(), m, n, B->mutable_gpu_data());
-#else
- NO_GPU;
-#endif // USE_GPU
- }
-}
-/**
- * Duplicate each element of A into a row of B.
- * Loose shape checking, B.count() % A.count() == 0.
- * # columns of B = B.count() / A.count().
- */
-template<typename Op, typename Dtype>
-void Expand2D(const Blob<Dtype> & A, Blob<Dtype> * B) {
- CHECK_EQ(B->count() % A.count(), 0) << "Row size of B not match length of A";
- int m = A.count(), n = B->count() / m;
- auto context = Singleton<Context>::Instance();
- int device = context->device_id(std::this_thread::get_id());
- if (device < 0) {
- cpu_expand_f<Op>(A.cpu_data(), m, n, B->mutable_cpu_data());
- } else {
-#ifdef USE_GPU
- gpu_expand_f<Op>(A.gpu_data(), m, n, B->mutable_gpu_data());
-#else
- NO_GPU;
-#endif // USE_GPU
- }
-}
-
-/**
- * Average the absolute values.
- */
-template<typename Dtype>
-Dtype Asum(const Blob<Dtype>& A) {
- if (A.count() == 0) return Dtype(0);
- auto context = Singleton<Context>::Instance();
- int device = context->device_id(std::this_thread::get_id());
- Dtype ret = Dtype(0);
- if (device < 0) {
- ret = cpu_asum(A.count(), A.cpu_data(), 1) / A.count();
- } else {
-#ifdef USE_GPU
- ret = gpu_asum(context->cublas_handle(device), A.count(), A.gpu_data(), 1)
- / A.count();
-#else
- NO_GPU;
-#endif
- }
- return ret;
-}
-
-
-/*************Random Sample***************/
-template<typename Dtype>
-void SampleUniform(Dtype low, Dtype high, Blob<Dtype>* A) {
- auto context = Singleton<Context>::Instance();
- const auto& thread = std::this_thread::get_id();
- int device = context->device_id(thread);
- if (device < 0) {
- cpu_sample_uniform(*context->rand_generator(thread), A->count(), low, high,
- A->mutable_cpu_data());
- } else {
-#ifdef USE_GPU
- gpu_sample_uniform(context->curand_generator(thread), A->count(), low, high,
- A->mutable_gpu_data());
-#else
- NO_GPU;
-#endif
- }
-}
-
-template<typename Dtype>
-void SampleGaussian(Dtype mean, Dtype std, Blob<Dtype>* A) {
- auto context = Singleton<Context>::Instance();
- const auto& thread = std::this_thread::get_id();
- int device = context->device_id(thread);
- if (device < 0) {
- cpu_sample_gaussian(*context->rand_generator(thread), A->count(), mean, std,
- A->mutable_cpu_data());
- } else {
-#ifdef USE_GPU
- gpu_sample_gaussian(context->curand_generator(thread), A->count(),
- mean, std, A->mutable_gpu_data());
-#else
- NO_GPU;
-#endif
- }
-}
-
-/************** Other functions ****************/
-template<typename Dtype>
-void Softmax(int nb_rows, const Blob<Dtype>& A, Blob<Dtype>* B) {
- CHECK_GT(nb_rows, 0);
- CHECK_EQ(A.count() % nb_rows, 0);
- CHECK_EQ(A.count(), B->count());
- auto context = Singleton<Context>::Instance();
- int device = context->device_id(std::this_thread::get_id());
- if (device < 0) {
- cpu_softmax(nb_rows, A.count() / nb_rows, A.cpu_data(),
- B->mutable_cpu_data());
- } else {
- // TODO(wangwei) implement the GPU version.
- NO_GPU;
- }
-}
-
-template<typename Dtype>
-void Zero(Blob<Dtype>* B) {
- auto context = Singleton<Context>::Instance();
- int device = context->device_id(std::this_thread::get_id());
- if (device < 0) {
- B->SetValue(0);
- } else {
-#ifdef USE_GPU
- cudaMemset(B->mutable_gpu_data(), 0, B->count() * sizeof(float));
-#else
- NO_GPU;
-#endif // USE_GPU
- }
-}
-} // end of namespace singa
-
-#endif // SINGA_UTILS_MATH_BLOB_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/math_kernel.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/math_kernel.h b/include/singa/utils/math_kernel.h
deleted file mode 100644
index 0239d3d..0000000
--- a/include/singa/utils/math_kernel.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements. See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership. The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied. See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-#ifndef SINGA_UTILS_MATH_KERNEL_H_
-#define SINGA_UTILS_MATH_KERNEL_H_
-
-namespace singa {
-
-extern "C" {
- void singa_gpu_softmaxloss_forward(int n, int dim, const float *prob,
- const int *label, float *loss);
-
- void singa_gpu_softmaxloss_backward(int n, int dim, float scale,
- const int *label, float *grad);
-
- void singa_gpu_sum_vec(float *data, float *sum , int n);
-
- void singa_gpu_sum_col(const float *src_mat_data, float *dst_vec_data,
- int rows, int cols, int stride);
-
- void singa_gpu_sum_row(const float *src_mat_data, float *dst_vec_data,
- int rows, int cols, int stride);
-
- void singa_gpu_add_vec_row(const float *src_vec_data,
- const float *src_mat_data, float *des_mat_data,
- int rows, int cols, int stride);
-
- void singa_gpu_exp(const float *src_data, float *des_data, int n);
-
- void singa_gpu_log(const float *src_data, float *des_data, int n);
-
- void singa_gpu_sigmoid(const float *src_data, float *des_data, int n);
-
- void singa_gpu_sigmoid_grad(const float *src_data, float *des_data, int n);
-
- void singa_gpu_relu(const float *src_data, float *des_data, int n);
-
- void singa_gpu_relu_grad(const float *src_data, float *des_data, int n);
-
- void singa_gpu_tanh(const float *src_data, float *des_data, int n);
-
- void singa_gpu_tanh_grad(const float *src_data, float *des_data, int n);
-
- void singa_gpu_softplus(const float *src_data, float *des_data, int n);
-
- void singa_gpu_softplus_grad(const float *src_data, float *des_data, int n);
-
- void singa_gpu_square(const float *src_data, float *des_data, int n);
-
- void singa_gpu_square_grad(const float *src_data, float *des_data, int n);
-
- void singa_gpu_sqrt(const float *src_data, float *des_data, int n);
-
- void singa_gpu_pow(const float *src_data_a, const float *src_data_b,
- float *des_data, int n);
-
- void singa_gpu_mult(const float *src_data_a, const float *src_data_b,
- float *des_data, int n);
-
- void singa_gpu_div(const float *src_data_a, const float *src_data_b,
- float *des_data, int n);
-
- void singa_gpu_set_value(float *data, float value, int n);
-
- void singa_gpu_threshold(const float *src_data, float *des_data,
- float alpha, int n);
-};
-
-} // namespace singa
-
-#endif // SINGA_UTILS_MATH_KERNEL_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/param.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/param.h b/include/singa/utils/param.h
deleted file mode 100644
index 319f2b4..0000000
--- a/include/singa/utils/param.h
+++ /dev/null
@@ -1,407 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements. See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership. The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied. See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_UTILS_PARAM_H_
-#define SINGA_UTILS_PARAM_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#include "singa/comm/msg.h"
-#include "singa/proto/job.pb.h"
-#include "singa/utils/blob.h"
-
-namespace singa {
-using std::vector;
-/**
- * Base parameter generator which intializes parameter values.
- */
-class ParamGenerator {
- public:
- static ParamGenerator* Create(const ParamGenProto& proto);
-
- virtual ~ParamGenerator() {}
-
- virtual void Init(const ParamGenProto& proto) { proto_ = proto; }
- virtual void Fill(Blob<float>* data);
-
- protected:
- ParamGenProto proto_;
-};
-
-class GaussianGen : public ParamGenerator {
- public:
- void Fill(Blob<float>* data) override;
-};
-
-class GaussianSqrtFanInGen : public GaussianGen {
- public:
- void Fill(Blob<float>* data) override;
-};
-
-class UniformGen : public ParamGenerator {
- public:
- void Fill(Blob<float>* data) override;
-};
-
-class UniformSqrtFanInGen : public UniformGen {
- public:
- void Fill(Blob<float>* data) override;
-};
-
-class UniformSqrtFanInOutGen : public UniformGen {
- public:
- void Fill(Blob<float>* data) override;
-};
-
-/**
- * Base paramter class.
- *
- * The Param object is a set of parameters, e.g., the (sub) weight matrix or
- * (sub) bias vector.
- *
- * It has at a gradient Blob and data Blob for gradients and parameter values.
- * Since some layers (or neuralnet) share parameter values, the data Blob is a
- * shared pointer which can be assigned to many Param objects' data field.
- *
- * It provides access methods like data(), grad(). It also provides functions
- * for generating messages and parsing messages to transferring the Param
- * objects among worker-worker, worker-server and server-server.
- *
- * Param objects are of different sizes, which makes it hard to acheive
- * load-balance among servers. Hence, we slice large Param objects into small
- * pieces. At the server side, one slice is a Param object.
- */
-class Param {
- public:
- /**
- * Create an instance of (sub) Param class based on the type from the
- * configuration.
- *
- * @param[in] conf configuration
- * @param a pointer to an instance
- */
- static Param* Create(const ParamProto& conf);
-
- /**
- * Try to slice the Param objects (from a neural net) into a given number of
- * servers (groups) evenly. This is to achieve load-balance among servers.
- *
- * It does not change the Param objects, but just computes the length of each
- * slice.
- *
- * @param num number of servers (groups) for maintaining the Param objects.
- * @param params all Param objects from a neural net.
- * @return the length of each slice.
- */
- static const vector<int> ComputeSlices(int num, const vector<Param*>& params);
- /**
- * It computes the length of each slice and slices the Param objects by adding
- * the slicing information into every Param object.
- *
- * @copydetails ComputeSlices()
- */
- static void SliceParams(int num, const vector<Param*>& params);
-
- Param() {}
- virtual ~Param() {}
- void Init(const ParamProto& proto) { proto_ = proto; }
- /**
- * Setup param object
- *
- * @param conf param configuration, include learning rate multiplier etc.
- * @param shape one value per dimension
- */
- virtual void Setup(const std::vector<int>& shape);
- /*
- * Fill the values according to init method, e.g., gaussian distribution.
- *
- * @param version initial version
- */
- virtual void InitValues();
- virtual void InitValues(int version);
- /**
- * Share the data blob from other Param objects.
- *
- * @param other the Param object whose owner owns the data blob
- * @param cpu_only if true, share only cpu memory (used for training with
- * multi-gpu cards); else, share both cpu and gpu memory.
- */
- void ShareDataFrom(Param* other, bool cpu_only);
- /**
- * Share both data and grad from other param
- */
- void ShareFrom(Param* other);
- /**
- * Init param values from checkpoint blob.
- */
- void FromProto(const BlobProto& blob);
- void FromProto(const std::string str);
- /**
- * Dump param values to blob.
- */
- void ToProto(BlobProto* blob);
- /**
- * Add a slice
- *
- * @param slice_id
- * @param size num of floats for this slice
- */
- void AddSlice(int slice_id, int size);
- /**
- * Scale the learning rate when updating parameters in the Param object
- */
- inline float lr_scale() const { return proto_.lr_scale(); }
- /**
- * Scale the weight decay when updating parameters in the Param object
- */
- inline float wd_scale() const { return proto_.wd_scale(); }
- /**
- * Parameter name used for Param re-use in other model or sharing between
- * layers
- */
- inline const std::string& name() const { return proto_.name(); }
- inline void set_name(const std::string& name) { proto_.set_name(name); }
- /**
- * If it shares data from others, then owner is the id of that Param,
- * otherwise it is itself's id.
- */
- inline int owner() const { return proto_.owner(); }
- /**
- * ID start from 0 and ordered for all Param from the same neuralnet
- */
- inline int id() const { return proto_.id(); }
- /**
- * Set ID
- */
- inline void set_id(int id) {
- proto_.set_id(id);
- proto_.set_owner(id);
- }
- inline int version() const { return version_; }
- inline void set_version(int v) { version_ = v; }
- /**
- * @return the version of the Param when the last Update request was issued.
- */
- inline int last_version() const { return last_version_; }
- inline void set_last_version(int v) { last_version_ = v; }
-
- /**
- * @return the sharing Param name which is configured by users in conf file.
- */
- inline const std::string& share_from() const { return proto_.share_from(); }
- /**
- * @return num of parameters in this Param obj.
- */
- inline const std::vector<int>& shape() const { return data_.shape(); }
- inline int size() const { return data_.count(); }
- inline const Blob<float>& data() const { return data_; }
- inline Blob<float>* mutable_data() { return &data_; }
- inline const Blob<float> &grad() const { return grad_; }
- inline Blob<float> *mutable_grad() { return &grad_; }
- inline float* mutable_cpu_data() { return data_.mutable_cpu_data(); }
- inline float* mutable_cpu_grad() { return grad_.mutable_cpu_data(); }
- inline float* mutable_cpu_history() { return history_.mutable_cpu_data(); }
- inline float* mutable_cpu_update() { return update_.mutable_cpu_data(); }
- /**
- * @return slice start ID
- */
- inline int slice_start() const { return slice_start_; }
- inline int num_slices() const { return num_slices_; }
-
- /**
- * Below are message/request related functions.
- * The basic communication workflows are as follow:
- *------------------------------------------------------------------------
- * |Put |Get |Update |Sync
- *------------------------------------------------------------------------
- * Generate|(stub) |(stub) |(stub) |(server)
- * Message |GenPutMsg |GenGetMsg |GenUpdateMsg |GenSyncMsg
- *------------------------------------------------------------------------
- * Handle |(server) |(server) |(server) |(server)
- * Message |HandlePutMsg|HandleGetMsg |ParseUpdateMsg |HandleSyncMsg
- * | | |GenUpdateResMsg |
- *------------------------------------------------------------------------
- * Handle | |(stub) |(stub) |(server)
- * Response| |ParseGetResMsg|ParseUpdateResMsg|ParseSyncResMsg
- *------------------------------------------------------------------------
- */
-
- /**
- * Generate the message for a put request, i.e., put parameters to a server
- *
- * This function is called at worker/stub side.
- * @param copy decides whether to copy the parameter values from the server.
- * @param slice_idx index of the slice from which the message is generated.
- * @return generated message without setting src, dst, target fields.
- */
- virtual Msg* GenPutMsg(bool copy, int slice_idx);
- /**
- * Generate the message for a get request, i.e., get parameters from a server
- * \copydetails GenPutMsg(bool, int);
- */
- virtual Msg* GenGetMsg(bool copy, int slice_idx);
- /**
- * Generate the message for a update request, i.e., pass info to server for
- * parameter update.
- * \copydetails GenPutMsg(bool, int);
- */
- virtual Msg* GenUpdateMsg(bool copy, int slice_idx);
- /**
- * Generate the message for a synchronization request between server groups.
- *
- * This function is called at server side where the Param is actually a slice
- * of an original Param object.
- * */
- virtual Msg* GenSyncMsg(int offset, int size);
- /**
- * Server handling function for put request.
- *
- * @param msg request
- * @param reserve if true reserve the msg space for the calling function;
- * otherwise the msg should be freed inside the function.
- * @return resposne message
- */
- virtual Msg* HandlePutMsg(Msg** msg, bool reserve);
- /**
- * Server handling function for put request.
- *
- * \copydetails HandleGetMsg(Msg**, bool reserve)
- */
- virtual Msg* HandleGetMsg(Msg** msg, bool reserve);
- /**
- * Server parse update requests.
- * \copydetails GenUpdateResponseMsgs(const std::vector<Msg*>& msgs);
- */
- virtual void ParseUpdateMsgs(const std::vector<Msg*>& msgs);
- /**
- * Generate the messages to response the update requests.
- *
- * This function is called at the server side, where the Param is actually a
- * slice of an original Param object.
- *
- * @param msgs for synchronous training, there would be multiple procs in
- * which workers sharing the same Param (slice) objects. Their update requests
- * is bufferred and handled together. For asynchrnous training, there is only
- * request in msgs.
- * @return response messages
- */
- virtual const std::vector<Msg*>
- GenUpdateResponseMsgs(std::vector<Msg*>* msgs, bool reserve);
- /**
- * Server handling function for synchronization message
- *
- * \copydetails HandleGetMsg(Msg**, bool reserve)
- */
- virtual Msg* HandleSyncMsg(Msg** msg, bool reserve);
- /**
- * Worker/Stub parsing function for get response.
- *
- * @param msg
- * @param slice_idx index for the slice
- */
- virtual int ParseGetResponseMsg(Msg* msg, int slice_idx);
- /**
- * Worker/Server parsing function for update response
- *
- * \copydetails ParseGetResponseMsg(Msg**, int);
- */
- virtual int ParseUpdateResponseMsg(Msg* msg, int slice_idx);
- /**
- * Server parsing function for synchronization response.
- *
- * \copydetails ParseGetResponseMsg(Msg** , int);
- */
- virtual int ParseSyncResponseMsg(Msg* msg, int slice_idx);
-
- protected:
- /**
- * Implement the common code of ParseGetResponseMsg and ParseUpdateResponseMsg
- * \copydetails ParseSyncResponseMsg(Msg* msg, int slice_idx);
- */
- void ParseResponseMsg(Msg* msg, int slice_idx);
-
- protected:
- //!< param version updated by the Update/Sync/Get response
- //!< only the owner param is initialized.
- int version_ = -1;
- //!< param version before last Update/Sync/Get request, set from version_
- int last_version_ = -1;
- //!< the global ID of the first slice
- int slice_start_ = 0;
- //!< total num of slices for this Parm obj
- int num_slices_ = 0;
- // offset and size of each slice
- std::vector<int> slice_offset_;
- std::vector<int> slice_size_;
- // for debug. Put request has no feedback, we do not track its pending status
- std::vector<bool> pending_get_;
- std::vector<bool> pending_update_;
- int num_pending_requests_ = 0;
- // data, gradient, history gradient of this parameter
- Blob<float> data_, grad_, history_, update_;
- ParamProto proto_;
-};
-
-/**
- * ParamEntry is used for aggregating gradients of Params shared by workers from
- * the same group.
- *
- * For each worker group, every unique Param object has a ParamEntry object.
- * Param objects sharing the same values are associated with the same
- * ParamEntry.
- */
-class ParamEntry {
- public:
- ParamEntry() {}
- ParamEntry(int total, Param* p);
- /**
- * Associate the counter to a Param object.
- *
- * @param p
- * @param local 1 if it is used by workers in this procs, 0 otherwise
- */
- void AddParam(bool local, Param* p);
- int next_version = -1; // next_version & num_update are directly used by stub
- int num_update = 0;
- int num_local = 0; //!< # local workers using the shared parameter
- int num_total = 0; //!< # total workers using the shared parameter
- //!< Shares are deleted by neuralnet's destructor
- std::vector<Param*> shares;
-};
-
-inline int ParamTrgt(int param_id, int slice_id) {
- return (param_id << 16) | slice_id;
-}
-
-inline int ParamID(int param_trgt) {
- return param_trgt >> 16;
-}
-
-inline int SliceID(int param_trgt) {
- static const int mask = (1 << 16) -1;
- return param_trgt & mask;
-}
-
-} // namespace singa
-
-#endif // SINGA_UTILS_PARAM_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/singa_op.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/singa_op.h b/include/singa/utils/singa_op.h
deleted file mode 100644
index 7499eb1..0000000
--- a/include/singa/utils/singa_op.h
+++ /dev/null
@@ -1,299 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements. See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership. The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied. See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_UTILS_SINGA_OP_H_
-#define SINGA_UTILS_SINGA_OP_H_
-
-#include <cmath>
-#include <algorithm>
-
-#ifdef USE_GPU
-#include <cuda_runtime.h>
-#include <cublas_v2.h>
-#include "singa/utils/math_kernel.h"
-#endif // USE_GPU
-
-namespace singa {
-
-namespace op {
-
-/**
- * b = e^a
- */
-template<typename Dtype>
-struct Exp {
- inline static void Map(const Dtype & a, Dtype * b) {
- *b = exp(a);
- }
-#ifdef USE_GPU
- inline static void CudaMap(const Dtype * a, Dtype * b, int n) {
- singa::singa_gpu_exp(a, b, n);
- }
-#endif // USE_GPU
-};
-/**
- * b = log(a), base is e
- */
-template<typename Dtype>
-struct Log {
- inline static void Map(const Dtype & a, Dtype *b) {
- *b = log(a);
- }
-#ifdef USE_GPU
- inline static void CudaMap(const Dtype * a, Dtype * b, int n) {
- singa::singa_gpu_log(a, b, n);
- }
-#endif // USE_GPU
-};
-
-template<typename Dtype>
-struct Sigmoid {
- inline static void Map(const Dtype & a, Dtype * b) {
- *b = 1.0f / (1.0f + expf(-a));
- }
-#ifdef USE_GPU
- inline static void CudaMap(const Dtype * a, Dtype * b, int n) {
- singa::singa_gpu_sigmoid(a, b, n);
- }
-#endif // USE_GPU
-};
-template<typename Dtype>
-struct SigmoidGrad {
- inline static void Map(const Dtype & a, Dtype * b) {
- *b = a * (1.0f - a);
- }
-#ifdef USE_GPU
- inline static void CudaMap(const Dtype * a, Dtype * b, int n) {
- singa::singa_gpu_sigmoid_grad(a, b, n);
- }
-#endif // USE_GPU
-};
-
-template<typename Dtype>
-struct Relu {
- inline static void Map(const Dtype & a, Dtype * b) {
- *b = std::max(a, 0.0f);
- }
-#ifdef USE_GPU
- inline static void CudaMap(const Dtype * a, Dtype * b, int n) {
- singa::singa_gpu_relu(a, b, n);
- }
-#endif // USE_GPU
-};
-
-template<typename Dtype>
-struct ReluGrad {
- inline static void Map(const Dtype & a, Dtype * b) {
- *b = a > 0 ? 1 : 0;
- }
-#ifdef USE_GPU
- inline static void CudaMap(const Dtype * a, Dtype * b, int n) {
- singa::singa_gpu_relu_grad(a, b, n);
- }
-#endif // USE_GPU
-};
-
-template<typename Dtype>
-struct Tanh {
- inline static void Map(const Dtype & a, Dtype * b) {
- *b = tanhf(a);
- }
-#ifdef USE_GPU
- inline static void CudaMap(const Dtype * a, Dtype * b, int n) {
- singa::singa_gpu_tanh(a, b, n);
- }
-#endif // USE_GPU
-};
-
-template<typename Dtype>
-struct TanhGrad {
- inline static void Map(const Dtype & a, Dtype * b) {
- *b = 1 - a * a;
- }
-#ifdef USE_GPU
- inline static void CudaMap(const Dtype * a, Dtype * b, int n) {
- singa::singa_gpu_tanh_grad(a, b, n);
- }
-#endif // USE_GPU
-};
-
-template<typename Dtype>
-struct Softplus {
- inline static void Map(const Dtype & a, Dtype * b) {
- *b = logf(1 + expf(a));
- }
-#ifdef USE_GPU
- inline static void CudaMap(const Dtype * a, Dtype * b, int n) {
- singa::singa_gpu_softplus(a, b, n);
- }
-#endif // USE_GPU
-};
-
-template<typename Dtype>
-struct SoftplusGrad {
- inline static void Map(const Dtype & a, Dtype * b) {
- *b = 1.0f / (1.0f + expf(-a));
- }
-#ifdef USE_GPU
- inline static void CudaMap(const Dtype * a, Dtype * b, int n) {
- singa::singa_gpu_softplus_grad(a, b, n);
- }
-#endif // USE_GPU
-};
-
-template<typename Dtype>
-struct Square {
- inline static void Map(const Dtype & a, Dtype * b) {
- *b = a * a;
- }
-#ifdef USE_GPU
- inline static void CudaMap(const Dtype * a, Dtype * b, int n) {
- singa::singa_gpu_square(a, b, n);
- }
-#endif // USE_GPU
-};
-
-template<typename Dtype>
-struct SquareGrad {
- inline static void Map(const Dtype & a, Dtype * b) {
- *b = 2 * sqrt(a);
- }
-#ifdef USE_GPU
- inline static void CudaMap(const Dtype * a, Dtype * b, int n) {
- singa::singa_gpu_square_grad(a, b, 1, n);
- }
-#endif // USE_GPU
-};
-
-template<typename Dtype>
-struct Sqrt {
- inline static void Map(const Dtype & a, Dtype * b) {
- *b = sqrt(a);
- }
-#ifdef USE_GPU
- inline static void CudaMap(const Dtype * a, Dtype * b, int n) {
- singa::singa_gpu_sqrt(a, b, n);
- }
-#endif // USE_GPU
-};
-
-/*********************************************************************/
-/**
- * c = pow(a, b), i.e., c = a^b
- */
-template<typename Dtype>
-struct Pow {
- inline static void Map(const Dtype & a, const Dtype &b, Dtype * c) {
- *c = pow(a, b);
- }
-#ifdef USE_GPU
- inline static void CudaMap(const Dtype * a,
- const Dtype * b, Dtype * c, int n) {
- singa::singa_gpu_pow(a, b, c, n);
- }
-#endif // USE_GPU
-};
-
-template<typename Dtype>
-struct Add {
- inline static void Map(const Dtype & a, const Dtype & b, Dtype * c) {
- *c = a + b;
- }
-#ifdef USE_GPU
- inline static void CudaMap(const Dtype * a,
- const Dtype * b, Dtype * c, int n) {
-// singa::singa_gpu_add(a, b, c, n); // TODO(haibo)
- }
-#endif // USE_GPU
-};
-
-template<typename Dtype>
-struct Sub {
- inline static void Map(const Dtype & a, const Dtype & b, Dtype * c) {
- *c = a - b;
- }
-#ifdef USE_GPU
- inline static void CudaMap(const Dtype * a,
- const Dtype * b, Dtype * c, int n) {
-// singa::singa_gpu_add(a, b, c, n); // TODO(haibo)
- }
-#endif // USE_GPU
-};
-
-
-template<typename Dtype>
-struct Mult {
- inline static void Map(const Dtype & a, const Dtype & b, Dtype * c) {
- *c = a * b;
- }
-#ifdef USE_GPU
- inline static void CudaMap(const Dtype * a,
- const Dtype * b, Dtype * c, int n) {
- singa::singa_gpu_mult(a, b, c, n);
- }
-#endif // USE_GPU
-};
-
-template<typename Dtype>
-struct Div {
- inline static void Map(const Dtype & a, const Dtype & b, Dtype * c) {
- *c = a / b;
- }
-#ifdef USE_GPU
- inline static void CudaMap(const Dtype * a,
- const Dtype * b, Dtype * c, int n) {
- singa::singa_gpu_div(a, b, c, n);
- }
-#endif // USE_GPU
-};
-
-
-/*********************************************************************/
-template<typename Dtype>
-struct Set {
- inline static void Map(Dtype alpha, Dtype * a) {
- *a = alpha;
- }
-#ifdef USE_GPU
- inline static void CudaMap(Dtype alpha, Dtype * a, int n) {
- singa::singa_gpu_set_value(a, alpha, n);
- }
-#endif // USE_GPU
-};
-
-template<typename Dtype>
-struct Threshold {
- inline static void Map(Dtype alpha, const Dtype & a, Dtype * b) {
- *b = a < alpha ? 1.0f : 0.0f;
- }
-#ifdef USE_GPU
- inline static void CudaMap(Dtype alpha, const Dtype * a,
- Dtype * b, int n) {
- singa::singa_gpu_threshold(a, b, alpha, n);
- }
-#endif // USE_GPU
-};
-
-}; // namespace op
-
-}; // namespace singa
-
-#endif // SINGA_UTILS_SINGA_OP_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/updater.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/updater.h b/include/singa/utils/updater.h
deleted file mode 100644
index 33ad8a7..0000000
--- a/include/singa/utils/updater.h
+++ /dev/null
@@ -1,173 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements. See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership. The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied. See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_UTILS_UPDATER_H_
-#define SINGA_UTILS_UPDATER_H_
-
-#include <string>
-#include "singa/proto/job.pb.h"
-#include "singa/utils/param.h"
-#include "singa/neuralnet/layer.h"
-
-namespace singa {
-using std::string;
-/**
- * Base learning rate generator.
- *
- * Generate learning rate for a give training step/iteration.
- * There are many different ways to change the learning rate through time/step.
- * Users can inherint this class to implement their own change method.
- */
-class LRGenerator {
- public:
- static LRGenerator* Create(const LRGenProto& proto);
-
- virtual ~LRGenerator() {}
-
- virtual void Init(const LRGenProto& proto) { proto_ = proto; }
- /**
- * @param step training step/iteration.
- * @return base learning rate regardless of step
- */
- virtual float Get(int step) { return proto_.base_lr(); }
-
- protected:
- LRGenProto proto_;
-};
-
-class FixedStepLRGen : public LRGenerator {
- public:
- float Get(int step) override;
- private:
- int last_idx_ = 0;
-};
-
-class StepLRGen : public LRGenerator {
- public:
- float Get(int step) override;
-};
-
-class LinearLRGen : public LRGenerator {
- public:
- float Get(int step) override;
-};
-
-class ExpLRGen : public LRGenerator {
- public:
- float Get(int step) override;
-};
-
-class InvLRGen : public LRGenerator {
- public:
- float Get(int step) override;
-};
-
-class InvTLRGen : public LRGenerator {
- public:
- float Get(int step) override;
-};
-
-/**
- * Updater for Param.
- */
-class Updater {
- public:
-
- /* added for python binding */
- static Updater* CreateUpdater(const string str);
- /* ------------------------ */
-
- static Updater* Create(const UpdaterProto& proto);
-
- virtual ~Updater() {}
-
- virtual void Init(const UpdaterProto &proto);
- virtual void Update(int step, Param* param, float grad_scale) = 0;
- void Clip(const float low, const float high, Param* param);
- protected:
- UpdaterProto proto_;
- LRGenerator* lr_gen_;
- float weight_decay_;
- float momentum_;
- float clip_low_, clip_high_;
-};
-
-class SGDUpdater : public Updater {
- public:
- void Update(int step, Param* param, float grad_scale) override;
-};
-
-class AdaGradUpdater : public Updater {
- public:
- void Update(int step, Param* param, float grad_scale) override;
-};
-
-
-class NesterovUpdater : public Updater {
- public:
- void Update(int step, Param* param, float grad_scale) override;
-};
-
-class RMSPropUpdater : public Updater {
- public:
- void Init(const UpdaterProto &proto) override;
- void Update(int step, Param* param, float grad_scale) override;
-
- protected:
- float rho_;
- float delta_;
-};
-
-class AdaDeltaUpdater : public Updater {
- public:
- void Init(const UpdaterProto &proto) override;
- void Update(int step, Param* param, float grad_scale) override;
-
- protected:
- float rho_;
- float delta_;
-};
-
-class AdamUpdater : public Updater {
- public:
- void Init(const UpdaterProto &proto) override;
- void Update(int step, Param* param, float grad_scale) override;
-
- protected:
- float beta1_;
- float beta2_;
- float delta_;
-};
-
-class AdamMaxUpdater : public Updater {
- public:
- void Init(const UpdaterProto &proto) override;
- void Update(int step, Param* param, float grad_scale) override;
-
- protected:
- float beta1_;
- float beta2_;
- float delta_;
-};
-
-} // namespace singa
-
-#endif // SINGA_UTILS_UPDATER_H_