You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by ni...@apache.org on 2017/05/05 23:32:26 UTC
incubator-systemml git commit: [MINOR] Added SGDNesterovUpdate UDF
support to Caffe2DML
Repository: incubator-systemml
Updated Branches:
refs/heads/master b9814ccf0 -> 44d7a8857
[MINOR] Added SGDNesterovUpdate UDF support to Caffe2DML
This will allow us to extrapolate the performance gains using codegen.
The UDF is disabled by default and is enabled only after setting `export
USE_NESTEROV_UDF="true"`. We will remove this flag and also the class
org.apache.sysml.udf.lib.SGDNesterovUpdate after codegen is stable.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/44d7a885
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/44d7a885
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/44d7a885
Branch: refs/heads/master
Commit: 44d7a88576dccce0bc2262588eb25cb521ca739d
Parents: b9814cc
Author: Niketan Pansare <np...@us.ibm.com>
Authored: Fri May 5 15:32:12 2017 -0800
Committer: Niketan Pansare <np...@us.ibm.com>
Committed: Fri May 5 16:32:12 2017 -0700
----------------------------------------------------------------------
.../apache/sysml/udf/lib/SGDNesterovUpdate.java | 52 +++++++++++++++++---
.../org/apache/sysml/api/dl/Caffe2DML.scala | 11 +++++
.../org/apache/sysml/api/dl/CaffeSolver.scala | 5 +-
3 files changed, 59 insertions(+), 9 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/44d7a885/src/main/java/org/apache/sysml/udf/lib/SGDNesterovUpdate.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/udf/lib/SGDNesterovUpdate.java b/src/main/java/org/apache/sysml/udf/lib/SGDNesterovUpdate.java
index fa3bd0e..9c027d3 100644
--- a/src/main/java/org/apache/sysml/udf/lib/SGDNesterovUpdate.java
+++ b/src/main/java/org/apache/sysml/udf/lib/SGDNesterovUpdate.java
@@ -67,6 +67,10 @@ public class SGDNesterovUpdate extends PackageFunction {
throw new RuntimeException("Invalid function output being requested");
}
+
+ boolean isDense(MatrixBlock X) {
+ return !X.isInSparseFormat() && X.getDenseBlock() != null;
+ }
@Override
public void execute() {
@@ -81,19 +85,53 @@ public class SGDNesterovUpdate extends PackageFunction {
updatedV = new Matrix( "tmp_" + rand.nextLong(), v.getNumRows(), v.getNumColumns(), ValueType.Double );
MatrixBlock updatedVMB = allocateDenseMatrixBlock(updatedV);
double [] updatedVData = updatedVMB.getDenseBlock();
- multiplyByConstant(v, mu, updatedVData);
- multiplyByConstant(dX, -lr, updatedVData);
- updatedVMB.setNonZeros(-1); // rather than updatedVMB.recomputeNonZeros();
+ if(isDense(v) && isDense(dX)) {
+ double [] vArr = v.getDenseBlock();
+ double [] dXArr = dX.getDenseBlock();
+ int nnz = 0;
+ for(int i = 0; i < updatedVData.length; i++) {
+ updatedVData[i] = mu*vArr[i] - lr*dXArr[i];
+ nnz += (updatedVData[i]!=0) ? 1 : 0;
+ }
+ updatedVMB.setNonZeros(nnz);
+ }
+ else {
+ multiplyByConstant(v, mu, updatedVData);
+ multiplyByConstant(dX, -lr, updatedVData);
+ updatedVMB.recomputeNonZeros();
+ }
updatedV.setMatrixDoubleArray(updatedVMB, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo);
// X = X - mu * v_prev + (1 + mu) * v
updatedX = new Matrix( "tmp_" + rand.nextLong(), X.getNumRows(), X.getNumColumns(), ValueType.Double );
MatrixBlock updatedXMB = allocateDenseMatrixBlock(updatedX);
double [] updatedXData = updatedXMB.getDenseBlock();
- copy(X, updatedXData);
- multiplyByConstant(v, -mu, updatedXData);
- multiplyByConstant(updatedVData, 1+mu, updatedXData);
- updatedXMB.setNonZeros(-1); // rather than updatedXMB.recomputeNonZeros();
+ if(isDense(X) && isDense(v)) {
+ double [] XArr = X.getDenseBlock();
+ double [] vPrevArr = v.getDenseBlock();
+ int nnz = 0; double muPlus1 = mu+1;
+ for(int i = 0; i < updatedXData.length; i++) {
+ updatedXData[i] = XArr[i] - mu*vPrevArr[i] + muPlus1*updatedVData[i];
+ nnz += (updatedXData[i]!=0) ? 1 : 0;
+ }
+ updatedXMB.setNonZeros(nnz);
+ }
+ else if(isDense(v)) {
+ copy(X, updatedXData);
+ double [] vPrevArr = v.getDenseBlock();
+ int nnz = 0; double muPlus1 = mu+1;
+ for(int i = 0; i < updatedXData.length; i++) {
+ updatedXData[i] += - mu*vPrevArr[i] + muPlus1*updatedVData[i];
+ nnz += (updatedXData[i]!=0) ? 1 : 0;
+ }
+ updatedXMB.setNonZeros(nnz);
+ }
+ else {
+ copy(X, updatedXData);
+ multiplyByConstant(v, -mu, updatedXData);
+ multiplyByConstant(updatedVData, 1+mu, updatedXData);
+ updatedXMB.recomputeNonZeros();
+ }
updatedX.setMatrixDoubleArray(updatedXMB, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo);
((Matrix) getFunctionInput(0)).getMatrixObject().release();
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/44d7a885/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala b/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
index 22de112..377ebf3 100644
--- a/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
+++ b/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
@@ -61,6 +61,13 @@ object Caffe2DML {
// Naming conventions:
val X = "X"; val y = "y"; val batchSize = "BATCH_SIZE"; val numImages = "num_images"; val numValidationImages = "num_validation"
val XVal = "X_val"; val yVal = "y_val"
+
+ var USE_NESTEROV_UDF = {
+ // Developer environment variable flag 'USE_NESTEROV_UDF' until codegen starts working.
+ // Then, we will remove this flag and also the class org.apache.sysml.udf.lib.SGDNesterovUpdate
+ val envFlagNesterovUDF = System.getenv("USE_NESTEROV_UDF")
+ envFlagNesterovUDF != null && envFlagNesterovUDF.toBoolean
+ }
}
class Caffe2DML(val sc: SparkContext, val solverParam:Caffe.SolverParameter,
@@ -283,6 +290,10 @@ class Caffe2DML(val sc: SparkContext, val solverParam:Caffe.SolverParameter,
source(net, solver, Array[String]("l2_reg"))
appendVisualizationHeaders(dmlScript, numTabs)
+ if(Caffe2DML.USE_NESTEROV_UDF) {
+ tabDMLScript(dmlScript, numTabs).append("update_nesterov = externalFunction(matrix[double] X, matrix[double] dX, double lr, double mu, matrix[double] v) return (matrix[double] X, matrix[double] v) implemented in (classname=\"org.apache.sysml.udf.lib.SGDNesterovUpdate\",exectype=\"mem\"); \n")
+ }
+
// Read and convert to one-hote encoding
assign(tabDMLScript, "X_full", "read(\" \", format=\"csv\")")
assign(tabDMLScript, "y_full", "read(\" \", format=\"csv\")")
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/44d7a885/src/main/scala/org/apache/sysml/api/dl/CaffeSolver.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/dl/CaffeSolver.scala b/src/main/scala/org/apache/sysml/api/dl/CaffeSolver.scala
index 755949d..ae3d21d 100644
--- a/src/main/scala/org/apache/sysml/api/dl/CaffeSolver.scala
+++ b/src/main/scala/org/apache/sysml/api/dl/CaffeSolver.scala
@@ -145,10 +145,11 @@ class AdaGrad(lambda:Double=5e-04, epsilon:Double=1e-6) extends CaffeSolver {
class Nesterov(lambda:Double=5e-04, momentum:Double=0.9) extends CaffeSolver {
def update(dmlScript:StringBuilder, layer:CaffeLayer):Unit = {
l2reg_update(lambda, dmlScript, layer)
+ val fn = if(Caffe2DML.USE_NESTEROV_UDF) "update_nesterov" else "sgd_nesterov::update"
if(layer.shouldUpdateWeight) dmlScript.append("\t").append("["+ commaSep(layer.weight, layer.weight+"_v") + "] " +
- "= sgd_nesterov::update(" + commaSep(layer.weight, layer.dWeight, getWeightLr(layer), momentum.toString, layer.weight+"_v") + ")\n")
+ "= " + fn + "(" + commaSep(layer.weight, layer.dWeight, getWeightLr(layer), momentum.toString, layer.weight+"_v") + ")\n")
if(layer.shouldUpdateBias) dmlScript.append("\t").append("["+ commaSep(layer.bias, layer.bias+"_v") + "] " +
- "= sgd_nesterov::update(" + commaSep(layer.bias, layer.dBias, getBiasLr(layer), momentum.toString, layer.bias+"_v") + ")\n")
+ "= " + fn + "(" + commaSep(layer.bias, layer.dBias, getBiasLr(layer), momentum.toString, layer.bias+"_v") + ")\n")
}
def init(dmlScript:StringBuilder, layer:CaffeLayer):Unit = {
if(layer.shouldUpdateWeight) dmlScript.append(layer.weight+"_v = sgd_nesterov::init(" + layer.weight + ")\n")