You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by ni...@apache.org on 2019/03/23 00:58:05 UTC
[systemml] branch master updated: [SYSTEMML-540] Added zero padding layer in Caffe2DML, Keras2DML and nn library

This is an automated email from the ASF dual-hosted git repository.

niketanpansare pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemml.git


The following commit(s) were added to refs/heads/master by this push:
     new f48235f  [SYSTEMML-540] Added zero padding layer in Caffe2DML, Keras2DML and nn library
f48235f is described below

commit f48235f3b4ffd254e37570747d019d4c1f312a2d
Author: Niketan Pansare <np...@us.ibm.com>
AuthorDate: Fri Mar 22 17:57:51 2019 -0700

    [SYSTEMML-540] Added zero padding layer in Caffe2DML, Keras2DML and nn library
    
    - Updated the tests and the documentation.
    - This layer is required for ResNet-50 demo with Keras2DML.
---
 docs/beginners-guide-keras2dml.md                  |  6 +-
 docs/reference-guide-caffe2dml.md                  | 30 +++++++
 scripts/nn/layers/zero_pad2d.dml                   | 93 ++++++++++++++++++++++
 scripts/nn/test/grad_check.dml                     | 52 ++++++++++++
 scripts/nn/test/run_tests.dml                      |  2 +
 .../org/apache/sysml/parser/ParserWrapper.java     |  3 +
 src/main/proto/caffe/caffe.proto                   | 10 +++
 src/main/python/systemml/mllearn/estimators.py     |  4 +-
 src/main/python/systemml/mllearn/keras2caffe.py    | 38 ++++++++-
 src/main/python/tests/test_nn_numpy.py             | 20 ++++-
 .../scala/org/apache/sysml/api/dl/CaffeLayer.scala | 37 +++++++++
 .../org/apache/sysml/api/dl/CaffeNetwork.scala     |  1 +
 .../org/apache/sysml/api/dl/DMLGenerator.scala     |  4 +-
 .../sysml/test/integration/scripts/nn/NNTest.java  |  2 +
 14 files changed, 292 insertions(+), 10 deletions(-)

diff --git a/docs/beginners-guide-keras2dml.md b/docs/beginners-guide-keras2dml.md
index 60de360..4517be5 100644
--- a/docs/beginners-guide-keras2dml.md
+++ b/docs/beginners-guide-keras2dml.md
@@ -161,12 +161,16 @@ sysml_model.fit(features, labels)
 
 #### What optimizer and loss does Keras2DML use by default if `keras_model` is not compiled ?
 
-If the user does not `compile` the keras model, then we use cross entropy loss and SGD optimizer with nesterov momentum:
+If the user does not `compile` the keras model, then we throw an error.
+
+For classification applications, you can consider using cross entropy loss and SGD optimizer with nesterov momentum:
 
 ```python 
 keras_model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.SGD(lr=0.01, momentum=0.95, decay=5e-4, nesterov=True))
 ```
 
+Please refer to [Keras's documentation](https://keras.io/losses/) for more detail.
+
 #### What is the learning rate schedule used ?
 
 Keras2DML does not support the `LearningRateScheduler` callback. 
diff --git a/docs/reference-guide-caffe2dml.md b/docs/reference-guide-caffe2dml.md
index 8e2ed1f..381b96d 100644
--- a/docs/reference-guide-caffe2dml.md
+++ b/docs/reference-guide-caffe2dml.md
@@ -139,6 +139,36 @@ layer {
 }
 ```
 
+### Padding Layer
+
+Invokes [nn/layers/zero_pad2d.dml](https://github.com/apache/systemml/blob/master/scripts/nn/layers/zero_pad2d.dml) layer.
+ 
+**Optional Parameters:**
+
+- top_pad: Padding for top side (default: 0).
+- bottom_pad: Padding for bottom side (default: 0).
+- left_pad: Padding for left side (default: 0).
+- right_pad: Padding for right side (default: 0).
+- right_pad: Padding for right side (default: 0).
+- pad_value: value to use for padding (default: 0). Only zero padding supported for now.
+
+**Sample Usage:**
+```
+layer {
+  name: "padding1"
+  type: "Padding"
+  bottom: "pool1"
+  top: "padding1"
+  padding_param  {
+    top_pad = 1
+    bottom_pad = 1
+    left_pad = 1
+    right_pad = 1
+    pad_value = 0
+  }
+}
+```
+
 
 ### Deconvolution Layer
 
diff --git a/scripts/nn/layers/zero_pad2d.dml b/scripts/nn/layers/zero_pad2d.dml
new file mode 100644
index 0000000..ac3eedf
--- /dev/null
+++ b/scripts/nn/layers/zero_pad2d.dml
@@ -0,0 +1,93 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+/*
+ * Zero-padding layer for 2D input.
+ */
+
+forward = function(matrix[double] img, int C, int Hin, int Win, int top_pad, int bottom_pad, int left_pad, int right_pad)
+    return (matrix[double] img_padded) {
+  /*
+   * Computes the forward pass for a zero-padding layer.
+   *
+   * Inputs:
+   *  - img: Input images, of shape (N, C*Hin*Win)
+   *  - C: Number of input channels
+   *  - Hin: Input height.
+   *  - Win: Input width.
+   *  - top_pad: Padding for top and bottom side.
+   *  - bottom_pad: Padding for bottom side.
+   *  - left_pad: Padding for left side.
+   *  - right_pad: Padding for right side.
+   *
+   * Outputs:
+   *  - img_padded: The input images padded along the height and width
+   *      dimensions, of shape (N, C*(Hin+top_pad+bottom_pad)*(Win+left_pad+right_pad)).
+   */
+  N = nrow(img)
+  img_padded = matrix(0, rows=N, cols=C*(Hin+top_pad+bottom_pad)*(Win+left_pad+right_pad))  # zeros
+  img_index = 1
+  img_padded_index = 1
+  for(c in 1:C) {
+  	img_padded_index = img_padded_index + top_pad*(Win+left_pad+right_pad)
+    for(h in 1:Hin) {
+      img_padded_index = img_padded_index + left_pad
+  	  img_padded[,img_padded_index:(img_padded_index+Win-1)] = img[,img_index:(img_index+Win-1)] # vectorized over all images
+  	  img_padded_index = img_padded_index + Win + right_pad
+  	  img_index = img_index + Win
+  	}
+  	img_padded_index = img_padded_index + bottom_pad*(Win+left_pad+right_pad)
+  }
+}
+
+backward = function(matrix[double] dout, int C, int Hin, int Win, int top_pad, int bottom_pad, int left_pad, int right_pad)
+    return (matrix[double] dX) {
+  /*
+   * Computes the backward pass for a zero-padding layer.
+   *
+   * Inputs:
+   *  - dout: Gradient wrt `out` from upstream, of shape (N, C*(Hin+top_pad+bottom_pad)*(Win+left_pad+right_pad)).
+   *  - C: Number of input channels
+   *  - Hin: Input height.
+   *  - Win: Input width.
+   *  - top_pad: Padding for top and bottom side.
+   *  - bottom_pad: Padding for bottom side.
+   *  - left_pad: Padding for left side.
+   *  - right_pad: Padding for right side.
+   *
+   * Outputs:
+   *  - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
+   */
+  N = nrow(dout)
+  dX = matrix(0, rows=N, cols=C*Hin*Win)  # zeros
+  img_index = 1
+  img_padded_index = 1
+  for(c in 1:C) {
+  	img_padded_index = img_padded_index + top_pad*(Win+left_pad+right_pad)
+    for(h in 1:Hin) {
+      img_padded_index = img_padded_index + left_pad
+  	  dX[,img_index:(img_index+Win-1)] = dout[,img_padded_index:(img_padded_index+Win-1)] # vectorized over all images
+  	  img_padded_index = img_padded_index + Win + right_pad
+  	  img_index = img_index + Win
+  	}
+  	img_padded_index = img_padded_index + bottom_pad*(Win+left_pad+right_pad)
+  }
+}
diff --git a/scripts/nn/test/grad_check.dml b/scripts/nn/test/grad_check.dml
index a5da859..bb93db5 100644
--- a/scripts/nn/test/grad_check.dml
+++ b/scripts/nn/test/grad_check.dml
@@ -58,6 +58,7 @@ source("nn/test/max_pool2d_simple.dml") as max_pool2d_simple
 source("nn/test/util.dml") as test_util
 source("nn/util.dml") as util
 source("nn/layers/elu.dml") as elu
+source("nn/layers/zero_pad2d.dml") as zero_pad2d
 
 affine = function() {
   /*
@@ -1827,6 +1828,57 @@ relu = function() {
   }
 }
 
+zero_pad2d = function() {
+  /*
+   * Gradient check for the Zero-padding layer for 2D input.
+   *
+   * NOTE: This could result in a false-negative in which the test
+   * fails due to a kink being crossed in the nonlinearity.  This
+   * occurs when the tests, f(x-h) and f(x+h), end up on opposite
+   * sides of the zero threshold of max(0, fx).  For now, just run
+   * the tests again.  In the future, we can explicitly check for
+   * this and rerun the test automatically.
+   */
+  print("Grad checking the Zero-padding layer for 2D input with L2 loss.")
+
+  # Generate data
+  N = 48 # number of images
+  C = 3 # number channels
+  H = 32 # height
+  W = 64 # width
+  top_pad = 1
+  bottom_pad = 3
+  left_pad = 4
+  right_pad = 2
+  X = rand(rows=N, cols=C*H*W, min=-5, max=5)
+  y = rand(rows=N, cols=C*(H+top_pad+bottom_pad)*(W+left_pad+right_pad))
+
+  # Compute analytical gradients of loss wrt parameters
+  out = zero_pad2d::forward(X, C, H, W, top_pad, bottom_pad, left_pad, right_pad)
+  dout = l2_loss::backward(out, y)
+  dX = zero_pad2d::backward(dout, C, H, W, top_pad, bottom_pad, left_pad, right_pad)
+
+  # Grad check
+  h = 1e-5
+  for (i in 1:nrow(X)) {
+    for (j in 1:ncol(X)) {
+      # Compute numerical derivative
+      old = as.scalar(X[i,j])
+      X[i,j] = old - h
+      outmh = zero_pad2d::forward(X, C, H, W, top_pad, bottom_pad, left_pad, right_pad)
+      lossmh = l2_loss::forward(outmh, y)
+      X[i,j] = old + h
+      outph = zero_pad2d::forward(X, C, H, W, top_pad, bottom_pad, left_pad, right_pad)
+      lossph = l2_loss::forward(outph, y)
+      X[i,j] = old  # reset
+      dX_num = (lossph-lossmh) / (2*h)  # numerical derivative
+
+      # Check error
+      rel_error = test_util::check_rel_grad_error(as.scalar(dX[i,j]), dX_num, lossph, lossmh)
+    }
+  }
+}
+
 rnn = function() {
   /*
    * Gradient check for the simple RNN layer.
diff --git a/scripts/nn/test/run_tests.dml b/scripts/nn/test/run_tests.dml
index 36f1583..5fc74ec 100644
--- a/scripts/nn/test/run_tests.dml
+++ b/scripts/nn/test/run_tests.dml
@@ -67,6 +67,8 @@ grad_check::sigmoid()
 grad_check::softmax()
 grad_check::softmax2d()
 grad_check::tanh()
+# TODO: Enable after adding a builtin function. The layer was tested by comparing its results with TensorFlow.
+# grad_check::zero_pad2d()
 print("")
 
 # Example model
diff --git a/src/main/java/org/apache/sysml/parser/ParserWrapper.java b/src/main/java/org/apache/sysml/parser/ParserWrapper.java
index 8dc9712..dfcfa65 100644
--- a/src/main/java/org/apache/sysml/parser/ParserWrapper.java
+++ b/src/main/java/org/apache/sysml/parser/ParserWrapper.java
@@ -53,6 +53,9 @@ public abstract class ParserWrapper {
 	 * @return corresponding statement block
 	 */
 	public static StatementBlock getStatementBlock(Statement current) {
+		if(current == null) {
+			throw new LanguageException("Error occured while parsing the script");
+		}
 		StatementBlock blk = null;
 		if(current instanceof ParForStatement) {
 			blk = new ParForStatementBlock();
diff --git a/src/main/proto/caffe/caffe.proto b/src/main/proto/caffe/caffe.proto
index 8d1d796..d8671d6 100644
--- a/src/main/proto/caffe/caffe.proto
+++ b/src/main/proto/caffe/caffe.proto
@@ -408,6 +408,7 @@ message LayerParameter {
   
   // Nike:
   optional UpsampleParameter upsample_param = 147;
+  optional PaddingParameter padding_param = 148;
 }
 
 // Message that stores parameters used to apply transformation
@@ -623,6 +624,15 @@ message ConvolutionParameter {
   optional bool force_nd_im2col = 17 [default = false];
 }
 
+// Nike:
+message PaddingParameter {
+  optional uint32 top_pad = 1 [default = 0]; // The top padding height (2D only)
+  optional uint32 bottom_pad = 2 [default = 0]; // The bottom padding height (2D only)
+  optional uint32 left_pad = 3 [default = 0]; // The left_pad padding width (2D only)
+  optional uint32 right_pad = 4 [default = 0]; // The right_pad padding width (2D only)
+  optional float pad_value = 5 [default = 0]; // only zero supported for now
+}
+
 message CropParameter {
   // To crop, elements of the first bottom are selected to fit the dimensions
   // of the second, reference bottom. The crop is configured by
diff --git a/src/main/python/systemml/mllearn/estimators.py b/src/main/python/systemml/mllearn/estimators.py
index d6aa8e8..8d1e164 100644
--- a/src/main/python/systemml/mllearn/estimators.py
+++ b/src/main/python/systemml/mllearn/estimators.py
@@ -36,9 +36,7 @@ from sklearn.metrics import accuracy_score, r2_score
 from py4j.protocol import Py4JError
 import traceback
 from sklearn.preprocessing import LabelEncoder
-import threading
-import time
-import math
+import threading, time, math, os
 
 from ..converters import *
 from ..classloader import *
diff --git a/src/main/python/systemml/mllearn/keras2caffe.py b/src/main/python/systemml/mllearn/keras2caffe.py
index 892deb2..2b97560 100755
--- a/src/main/python/systemml/mllearn/keras2caffe.py
+++ b/src/main/python/systemml/mllearn/keras2caffe.py
@@ -56,6 +56,12 @@ except ImportError:
 # - To add an activation, simply add the keras type to caffe type in supportedCaffeActivations.
 # - To add a layer, add the corresponding caffe layer type in supportedLayers. If the layer accepts parameters then update layerParamMapping too.
 # - The above logic is implemented in the function converKerasToCaffeNetwork
+#
+#
+# Example guide to add a new layer that does not have a weight and bias (eg: UpSampling2D or ZeroPadding2D):
+# - Add mapping of Keras class to Caffe layer in the supportedLayers map below
+# - Define a helper method that returns Caffe's layer parameter in JSON-like data structure. See getConvParam, getUpSamplingParam, getPaddingParam, etc.
+# - Add mapping of Keras class to Caffe layer parameter in the layerParamMapping map below
 # --------------------------------------------------------------------------------------
 
 supportedCaffeActivations = {
@@ -78,7 +84,8 @@ supportedLayers = {
     keras.layers.LSTM: 'LSTM',
     keras.layers.Flatten: 'Flatten',
     keras.layers.BatchNormalization: 'None',
-    keras.layers.Activation: 'None'
+    keras.layers.Activation: 'None',
+    keras.layers.ZeroPadding2D: 'Padding'
 }
 
 
@@ -199,6 +206,7 @@ specialLayers = {
     keras.layers.BatchNormalization: _parseBatchNorm
 }
 
+# Used by convolution and maxpooling to return the padding value as integer based on type 'same' and 'valid'
 def getPadding(kernel_size, padding):
     if padding.lower() == 'same':
         return int(kernel_size/2)
@@ -207,6 +215,7 @@ def getPadding(kernel_size, padding):
     else:
         raise ValueError('Unsupported padding:' + str(padding))
 
+# Helper method to return Caffe's ConvolutionParameter in JSON-like data structure
 def getConvParam(layer):
     stride = (1, 1) if layer.strides is None else layer.strides
     config = layer.get_config()
@@ -215,17 +224,37 @@ def getConvParam(layer):
             'pad_h': getPadding(layer.kernel_size[0], layer.padding), 'pad_w': getPadding(layer.kernel_size[1], layer.padding)}
 
 
+# Helper method to return newly added UpsampleParameter
+# (search for UpsampleParameter in the file src/main/proto/caffe/caffe.proto) in JSON-like data structure
 def getUpSamplingParam(layer):
     return {'size_h': layer.size[0], 'size_w': layer.size[1]}
 
+# Used by padding to extract different types of possible padding:
+# int: the same symmetric padding is applied to height and width.
+# tuple of 2 ints: interpreted as two different symmetric padding values for height and width: (symmetric_height_pad, symmetric_width_pad)
+# tuple of 2 tuples of 2 ints: interpreted as  ((top_pad, bottom_pad), (left_pad, right_pad))
+def getPaddingTuple(padding):
+    return [padding, padding] if isinstance(padding, int) else [padding[0], padding[1]]
+
+# Helper method to return newly added PaddingParameter
+# (search for UpsampleParameter in the file src/main/proto/caffe/caffe.proto) in JSON-like data structure
+def getPaddingParam(layer):
+    if isinstance(layer.padding, int):
+        padding = getPaddingTuple(layer.padding) + getPaddingTuple(layer.padding)
+    elif hasattr(layer.padding, '__len__') and len(layer.padding) == 2:
+        padding = getPaddingTuple(layer.padding[0]) + getPaddingTuple(layer.padding[1])
+    else:
+        raise ValueError('padding should be either an int, a tuple of 2 ints or or a tuple of 2 tuples of 2 ints. Found: ' + str(layer.padding))
+    return {'top_pad': padding[0], 'bottom_pad': padding[1], 'left_pad': padding[2], 'right_pad': padding[3], 'pad_value':0}
 
+# Helper method to return Caffe's PoolingParameter in JSON-like data structure
 def getPoolingParam(layer, pool='MAX'):
     stride = (1, 1) if layer.strides is None else layer.strides
     return {'pool': pool, 'kernel_h': layer.pool_size[0], 'kernel_w': layer.pool_size[1],
             'stride_h': stride[0], 'stride_w': stride[1], 'pad_h': getPadding(layer.pool_size[0], layer.padding),
             'pad_w': getPadding(layer.pool_size[1], layer.padding)}
 
-
+# Helper method to return Caffe's RecurrentParameter in JSON-like data structure
 def getRecurrentParam(layer):
     if (not layer.use_bias):
         raise Exception('Only use_bias=True supported for recurrent layers')
@@ -236,14 +265,13 @@ def getRecurrentParam(layer):
     return {'num_output': layer.units, 'return_sequences': str(
         layer.return_sequences).lower()}
 
-
+# Helper method to return Caffe's InnerProductParameter in JSON-like data structure
 def getInnerProductParam(layer):
     if len(layer.output_shape) != 2:
         raise Exception('Only 2-D input is supported for the Dense layer in the current implementation, but found '
                         + str(layer.input_shape) + '. Consider adding a Flatten before ' + str(layer.name))
     return {'num_output': layer.units}
 
-# TODO: Update AveragePooling2D when we add maxpooling support
 layerParamMapping = {
     keras.layers.InputLayer: lambda l:
     {'data_param': {'batch_size': l.batch_size}},
@@ -259,6 +287,8 @@ layerParamMapping = {
     {'convolution_param': getConvParam(l)},
     keras.layers.UpSampling2D: lambda l:
     {'upsample_param': getUpSamplingParam(l)},
+    keras.layers.ZeroPadding2D: lambda l:
+    {'padding_param': getPaddingParam(l)},
     keras.layers.Conv2D: lambda l:
     {'convolution_param': getConvParam(l)},
     keras.layers.MaxPooling2D: lambda l:
diff --git a/src/main/python/tests/test_nn_numpy.py b/src/main/python/tests/test_nn_numpy.py
index 43e3303..d30c692 100644
--- a/src/main/python/tests/test_nn_numpy.py
+++ b/src/main/python/tests/test_nn_numpy.py
@@ -44,7 +44,7 @@ import unittest
 
 import numpy as np
 from keras.models import Sequential
-from keras.layers import Input, Dense, Conv2D, MaxPooling2D, Dropout, Flatten, LSTM, UpSampling2D, SimpleRNN, Activation
+from keras.layers import Input, Dense, Conv2D, MaxPooling2D, Dropout, Flatten, LSTM, UpSampling2D, SimpleRNN, Activation, ZeroPadding2D
 from keras.optimizers import SGD
 from keras import backend as K
 from keras.models import Model
@@ -276,5 +276,23 @@ class TestNNLibrary(unittest.TestCase):
     def test_upsampling_backward(self):
         self.failUnless(test_backward(UpSampling2D(size=(2, 2), input_shape=(3, 64, 32))))
 
+    def test_zeropadding_forward(self):
+        self.failUnless(test_forward(ZeroPadding2D(padding=1, input_shape=(3, 64, 32))))
+
+    def test_zeropadding_backward(self):
+        self.failUnless(test_backward(ZeroPadding2D(padding=1, input_shape=(3, 64, 32))))
+
+    def test_zeropadding_forward1(self):
+        self.failUnless(test_forward(ZeroPadding2D(padding=(1, 2), input_shape=(3, 64, 32))))
+
+    def test_zeropadding_backward1(self):
+        self.failUnless(test_backward(ZeroPadding2D(padding=(1, 2), input_shape=(3, 64, 32))))
+
+    def test_zeropadding_forward2(self):
+        self.failUnless(test_forward(ZeroPadding2D(padding=((3, 2), (1, 3)), input_shape=(3, 64, 32))))
+
+    def test_zeropadding_backward2(self):
+        self.failUnless(test_backward(ZeroPadding2D(padding=((3, 2), (1, 3)), input_shape=(3, 64, 32))))
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala b/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala
index cd17af5..62323d1 100644
--- a/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala
+++ b/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala
@@ -671,6 +671,43 @@ class TanH(val param: LayerParameter, val id: Int, val net: CaffeNetwork) extend
   // -------------------------------------------------
 }
 
+class Padding(val param: LayerParameter, val id: Int, val net: CaffeNetwork) extends CaffeLayer {
+  override def sourceFileName                       = {
+    if(param.getPaddingParam.getPadValue == 0) "zero_pad2d"
+    else throw new DMLRuntimeException("Only pad_value = 0 is supported. Found: " + param.getPaddingParam.getPadValue)
+  }
+  override def init(dmlScript: StringBuilder): Unit = {}
+  
+  override def forward(dmlScript: StringBuilder, isPrediction: Boolean) = {
+    if(skipPadding) {
+      assign(dmlScript, out, X)
+    }
+    else {
+      invokeForward(dmlScript, List[String](out), X, numChannels, Hin, Win, top_pad, bottom_pad, left_pad, right_pad)
+    }
+  }
+  override def backward(dmlScript: StringBuilder, outSuffix: String): Unit = {
+    if(skipPadding) {
+      assignDoutToDX(dmlScript, outSuffix)
+    }
+    else {
+      invokeBackward(dmlScript, outSuffix, List[String]("dOut" + id), dout, numChannels, Hin, Win, top_pad, bottom_pad, left_pad, right_pad)
+    }
+  }
+  override def weightShape(): Array[Int]                             = null
+  override def biasShape(): Array[Int]                               = null
+  override def outputShape = (numChannels, int_add(Hin, top_pad, bottom_pad), int_add(Win, left_pad, right_pad))
+  def skipPadding = param.getPaddingParam.getTopPad == 0 && param.getPaddingParam.getBottomPad == 0 && 
+    param.getPaddingParam.getLeftPad == 0 && param.getPaddingParam.getRightPad == 0
+  def top_pad = param.getPaddingParam.getTopPad.toString
+  def bottom_pad = param.getPaddingParam.getBottomPad.toString
+  def left_pad = param.getPaddingParam.getLeftPad.toString
+  def right_pad = param.getPaddingParam.getRightPad.toString
+  def numChannels  = bottomLayerOutputShape._1
+  def Hin          = bottomLayerOutputShape._2
+  def Win          = bottomLayerOutputShape._3
+}
+
 class ReLU(val param: LayerParameter, val id: Int, val net: CaffeNetwork) extends CaffeLayer {
   // TODO: Leaky ReLU: negative_slope [default 0]: specifies whether to leak the negative part by multiplying it with the slope value rather than setting it to 0.
   // -------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/dl/CaffeNetwork.scala b/src/main/scala/org/apache/sysml/api/dl/CaffeNetwork.scala
index d3449f3..278b07b 100644
--- a/src/main/scala/org/apache/sysml/api/dl/CaffeNetwork.scala
+++ b/src/main/scala/org/apache/sysml/api/dl/CaffeNetwork.scala
@@ -250,6 +250,7 @@ class CaffeNetwork(netFilePath: String, val currentPhase: Phase, var numChannels
       case "rnn"             => new RNN(param, id, this)
       case "lstm"            => new LSTM(param, id, this)
       case "flatten"         => new Flatten(param, id, this)
+      case "padding"         => new Padding(param, id, this)
       case _                 => throw new LanguageException("Layer of type " + param.getType + " is not supported")
     }
   }
diff --git a/src/main/scala/org/apache/sysml/api/dl/DMLGenerator.scala b/src/main/scala/org/apache/sysml/api/dl/DMLGenerator.scala
index 59c75ad..8597efd 100644
--- a/src/main/scala/org/apache/sysml/api/dl/DMLGenerator.scala
+++ b/src/main/scala/org/apache/sysml/api/dl/DMLGenerator.scala
@@ -51,6 +51,8 @@ trait BaseDMLGenerator {
     try { (v1.toDouble * v2.toDouble * v3.toDouble).toInt.toString } catch { case _: Throwable => "(" + v1 + "*" + v2 + "*" + v3 + ")" }
   def int_mult(v1: String, v2: String): String =
     try { (v1.toDouble * v2.toDouble).toInt.toString } catch { case _: Throwable => "(" + v1 + "*" + v2 + ")" }
+  def int_add(v1: String, v2: String, v3: String): String =
+    try { (v1.toDouble + v2.toDouble + v3.toDouble).toInt.toString } catch { case _: Throwable => "(" + v1 + "+" + v2 + "+" + v3 + ")" }
   def isNumber(x: String): Boolean                                                   = x forall Character.isDigit
   def transpose(x: String): String                                                   = "t(" + x + ")"
   def write(varName: String, fileName: String, format: String): String               = "write(" + varName + ", \"" + fileName + "\", format=\"" + format + "\")\n"
@@ -246,7 +248,7 @@ trait DMLGenerator extends SourceDMLGenerator with NextBatchGenerator {
     // Append source statements for layers as well as solver
     source(net, solver, if (isTraining) Array[String]("l1_reg") else null)
     source(net, solver, if (isTraining) Array[String]("l2_reg") else null)
-    source(dmlScript, numTabs, "util", Caffe2DML.nnDir)
+    source(dmlScript, numTabs, "util", Caffe2DML.nnDir)  
 
     if (isTraining) {
       // Append external built-in function headers:
diff --git a/src/test/java/org/apache/sysml/test/integration/scripts/nn/NNTest.java b/src/test/java/org/apache/sysml/test/integration/scripts/nn/NNTest.java
index 92b9f67..4bcc2b0 100644
--- a/src/test/java/org/apache/sysml/test/integration/scripts/nn/NNTest.java
+++ b/src/test/java/org/apache/sysml/test/integration/scripts/nn/NNTest.java
@@ -27,6 +27,8 @@ import org.junit.Test;
 
 /**
  * Test the SystemML deep learning library, `nn`.
+ * 
+ * mvn -Dit.test=org.apache.sysml.test.integration.scripts.nn.NNTest verify
  */
 public class NNTest extends MLContextTestBase {