You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by ni...@apache.org on 2017/07/05 18:06:35 UTC

[1/2] systemml git commit: [SYSTEMML-540] Extended Caffe2DML to support image segmentation problems

Repository: systemml
Updated Branches:
  refs/heads/master d56c05ece -> 978d4de47


http://git-wip-us.apache.org/repos/asf/systemml/blob/978d4de4/src/main/java/org/apache/sysml/parser/common/CommonSyntacticValidator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/parser/common/CommonSyntacticValidator.java b/src/main/java/org/apache/sysml/parser/common/CommonSyntacticValidator.java
index 08b4ffa..3a7a347 100644
--- a/src/main/java/org/apache/sysml/parser/common/CommonSyntacticValidator.java
+++ b/src/main/java/org/apache/sysml/parser/common/CommonSyntacticValidator.java
@@ -152,7 +152,9 @@ public abstract class CommonSyntacticValidator {
 		if (!sources.containsKey(namespace)) {
 			sources.put(namespace, filePath);
 		}
-		else {
+		else if (!sources.get(namespace).equals(filePath)) {
+			// Only throw an exception if the filepath is different
+			// If the filepath is same, ignore the statement. This is useful for repeated definition of common dml files such as source("nn/util.dml") as util
 			notifyErrorListeners("Namespace Conflict: '" + namespace + "' already defined as " + sources.get(namespace), ctx.start);
 		}
 	}

http://git-wip-us.apache.org/repos/asf/systemml/blob/978d4de4/src/main/python/systemml/mllearn/estimators.py
----------------------------------------------------------------------
diff --git a/src/main/python/systemml/mllearn/estimators.py b/src/main/python/systemml/mllearn/estimators.py
index 30e66d4..17e5f37 100644
--- a/src/main/python/systemml/mllearn/estimators.py
+++ b/src/main/python/systemml/mllearn/estimators.py
@@ -217,6 +217,76 @@ class BaseSystemMLEstimator(Estimator):
     def transform(self, X):
         return self.predict(X)
     
+    def _convertPythonXToJavaObject(self, X):
+        """
+        Converts the input python object X to a java-side object (either MatrixBlock or Java DataFrame)
+
+        Parameters
+        ----------
+        X: NumPy ndarray, Pandas DataFrame, scipy sparse matrix or PySpark DataFrame
+        """
+        if isinstance(X, SUPPORTED_TYPES) and self.transferUsingDF:
+            pdfX = convertToPandasDF(X)
+            df = assemble(self.sparkSession, pdfX, pdfX.columns, self.features_col).select(self.features_col)
+            return df._jdf
+        elif isinstance(X, SUPPORTED_TYPES):
+            return convertToMatrixBlock(self.sc, X)
+        elif hasattr(X, '_jdf') and self.features_col in X.columns:
+            # No need to assemble as input DF is likely coming via MLPipeline
+            return X._jdf
+        elif hasattr(X, '_jdf'):
+            assembler = VectorAssembler(inputCols=X.columns, outputCol=self.features_col)
+            df = assembler.transform(X)
+            return df._jdf
+        else:
+            raise Exception('Unsupported input type')
+        
+    def _convertJavaOutputToPythonObject(self, X, output):
+        """
+        Converts the a java-side object output (either MatrixBlock or Java DataFrame) to a python object (based on the type of X).
+
+        Parameters
+        ----------
+        X: NumPy ndarray, Pandas DataFrame, scipy sparse matrix or PySpark DataFrame
+        output: a java-side object (either MatrixBlock or Java DataFrame)
+        """
+        if isinstance(X, SUPPORTED_TYPES) and self.transferUsingDF:
+            retDF = DataFrame(output, self.sparkSession)
+            retPDF = retDF.sort('__INDEX').select('prediction').toPandas()
+            return retPDF.as_matrix().flatten() if isinstance(X, np.ndarray) else retPDF
+        elif isinstance(X, SUPPORTED_TYPES):
+            return convertToNumPyArr(self.sc, output)
+        elif hasattr(X, '_jdf'):
+            retDF = DataFrame(output, self.sparkSession)
+            # Return DF
+            return retDF.sort('__INDEX')
+        else:
+            raise Exception('Unsupported input type')
+        
+    def predict_proba(self, X):
+        """
+        Invokes the transform_probability method on Estimator object on JVM if X and y are on of the supported data types
+        Return predicted class probabilities for X.
+
+        Parameters
+        ----------
+        X: NumPy ndarray, Pandas DataFrame, scipy sparse matrix or PySpark DataFrame
+        """
+        if hasattr(X, '_jdf'):
+            return self.predict(X)
+        elif self.transferUsingDF:
+            raise ValueError('The parameter transferUsingDF is not valid for the method predict_proba')
+        try:
+            if self.estimator is not None and self.model is not None:
+                self.estimator.copyProperties(self.model)
+        except AttributeError:
+            pass
+        try:
+            jX = self._convertPythonXToJavaObject(X)
+            return self._convertJavaOutputToPythonObject(X, self.model.transform_probability(jX))
+        except Py4JError:
+            traceback.print_exc()
+    
     # Returns either a DataFrame or MatrixBlock after calling transform(X:MatrixBlock, y:MatrixBlock) on Model object on JVM
     def predict(self, X):
         """
@@ -231,40 +301,12 @@ class BaseSystemMLEstimator(Estimator):
                 self.estimator.copyProperties(self.model)
         except AttributeError:
             pass
-        if isinstance(X, SUPPORTED_TYPES):
-            if self.transferUsingDF:
-                pdfX = convertToPandasDF(X)
-                df = assemble(self.sparkSession, pdfX, pdfX.columns, self.features_col).select(self.features_col)
-                retjDF = self.model.transform(df._jdf)
-                retDF = DataFrame(retjDF, self.sparkSession)
-                retPDF = retDF.sort('__INDEX').select('prediction').toPandas()
-                if isinstance(X, np.ndarray):
-                    return self.decode(retPDF.as_matrix().flatten())
-                else:
-                    return self.decode(retPDF)
-            else:
-                try:
-                    retNumPy = self.decode(convertToNumPyArr(self.sc, self.model.transform(convertToMatrixBlock(self.sc, X))))
-                except Py4JError:
-                    traceback.print_exc()
-                if isinstance(X, np.ndarray):
-                    return retNumPy
-                else:
-                    return retNumPy # TODO: Convert to Pandas
-        elif hasattr(X, '_jdf'):
-            if self.features_col in X.columns:
-                # No need to assemble as input DF is likely coming via MLPipeline
-                df = X
-            else:
-                assembler = VectorAssembler(inputCols=X.columns, outputCol=self.features_col)
-                df = assembler.transform(X)
-            retjDF = self.model.transform(df._jdf)
-            retDF = DataFrame(retjDF, self.sparkSession)
-            # Return DF
-            return retDF.sort('__INDEX')
-        else:
-            raise Exception('Unsupported input type')
-
+        try:
+            jX = self._convertPythonXToJavaObject(X)
+            ret = self._convertJavaOutputToPythonObject(X, self.model.transform(jX))
+            return self.decode(ret) if isinstance(X, SUPPORTED_TYPES) else ret
+        except Py4JError:
+            traceback.print_exc()
 
 class BaseSystemMLClassifier(BaseSystemMLEstimator):
 
@@ -274,6 +316,10 @@ class BaseSystemMLClassifier(BaseSystemMLEstimator):
         return self.le.transform(y) + 1
         
     def decode(self, y):
+        if not hasattr(self, 'le'):
+            self.le = None
+        if not hasattr(self, 'labelMap'):
+            self.labelMap = None
         if self.le is not None:
             return self.le.inverse_transform(np.asarray(y - 1, dtype=int))
         elif self.labelMap is not None:
@@ -316,18 +362,17 @@ class BaseSystemMLClassifier(BaseSystemMLEstimator):
             keys = np.asarray(df._c0, dtype='int')
             values = np.asarray(df._c1, dtype='str')
             self.labelMap = {}
-            self.le = None
             for i in range(len(keys)):
                 self.labelMap[int(keys[i])] = values[i]
             # self.encode(classes) # Giving incorrect results
         
-    def load(self, weights=None, sep='/'):
+    def load(self, weights, sep='/'):
         """
         Load a pretrained model. 
 
         Parameters
         ----------
-        weights: directory whether learned weights are stored (default: None)
+        weights: directory whether learned weights are stored
         sep: seperator to use (default: '/')
         """
         self.weights = weights
@@ -737,7 +782,7 @@ class Caffe2DML(BaseSystemMLClassifier):
         if ignore_weights is not None:
             self.estimator.setWeightsToIgnore(ignore_weights)
             
-    def set(self, debug=None, train_algo=None, test_algo=None, parallel_batches=None):
+    def set(self, debug=None, train_algo=None, test_algo=None, parallel_batches=None, output_activations=None):
         """
         Set input to Caffe2DML
         
@@ -746,13 +791,26 @@ class Caffe2DML(BaseSystemMLClassifier):
         debug: to add debugging DML code such as classification report, print DML script, etc (default: False)
         train_algo: can be minibatch, batch, allreduce_parallel_batches or allreduce (default: minibatch)
         test_algo: can be minibatch, batch, allreduce_parallel_batches or allreduce (default: minibatch)
+        parallel_batches: number of parallel batches
+        output_activations: (developer flag) directory to output activations of each layer as csv while prediction. To be used only in batch mode (default: None)
         """
         if debug is not None: self.estimator.setInput("$debug", str(debug).upper())
         if train_algo is not None: self.estimator.setInput("$train_algo", str(train_algo).lower())
         if test_algo is not None: self.estimator.setInput("$test_algo", str(test_algo).lower())
         if parallel_batches is not None: self.estimator.setInput("$parallel_batches", str(parallel_batches))
+        if output_activations is not None: self.estimator.setInput("$output_activations", str(output_activations))
         return self
     
+    def summary(self):
+        """
+        Print the summary of the network
+        """
+        import pyspark
+        if type(self.sparkSession) == pyspark.sql.session.SparkSession:
+            self.estimator.summary(self.sparkSession._jsparkSession)
+        else:
+            raise TypeError('Please use spark session of type pyspark.sql.session.SparkSession in the constructor')
+    
     def visualize(self, layerName=None, varType='weight', aggFn='mean'):
         """
         Use this to visualize the training procedure (requires validation_percentage to be non-zero).

http://git-wip-us.apache.org/repos/asf/systemml/blob/978d4de4/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala b/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
index f338fd7..25d19f6 100644
--- a/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
+++ b/src/main/scala/org/apache/sysml/api/dl/Caffe2DML.scala
@@ -51,7 +51,6 @@ import org.apache.commons.logging.Log
 import org.apache.commons.logging.LogFactory
 import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer
 
-
 /***************************************************************************************
 DESIGN OF CAFFE2DML:
 
@@ -164,6 +163,17 @@ class Caffe2DML(val sc: SparkContext, val solverParam:Caffe.SolverParameter,
     new Caffe2DMLModel(this)
   }
 	// --------------------------------------------------------------
+  // Returns true if last 2 of 4 dimensions are 1.
+  // The first dimension refers to number of input datapoints.
+  // The second dimension refers to number of classes.
+  def isClassification():Boolean = {
+    val outShape = getOutputShapeOfLastLayer
+    return outShape._2 == 1 && outShape._3 == 1
+  }
+  def getOutputShapeOfLastLayer():(Int, Int, Int) = {
+    val out = net.getCaffeLayer(net.getLayers().last).outputShape
+    (out._1.toInt, out._2.toInt, out._3.toInt) 
+  }
   
   // Used for simplifying transfer learning
   private val layersToIgnore:HashSet[String] = new HashSet[String]() 
@@ -184,7 +194,23 @@ class Caffe2DML(val sc: SparkContext, val solverParam:Caffe.SolverParameter,
   
   def getTrainAlgo():String = if(inputs.containsKey("$train_algo")) inputs.get("$train_algo") else "minibatch"
   def getTestAlgo():String = if(inputs.containsKey("$test_algo")) inputs.get("$test_algo") else "minibatch"
-    
+
+  def summary(sparkSession:org.apache.spark.sql.SparkSession):Unit = {
+    val header = Seq("Name", "Type", "Output", "Weight", "Bias", "Top", "Bottom")
+    val entries = net.getLayers.map(l => (l, net.getCaffeLayer(l))).map(l => {
+      val layer = l._2
+      (l._1, layer.param.getType, 
+          "(, " + layer.outputShape._1 + ", " + layer.outputShape._2 + ", " + layer.outputShape._3 + ")",
+          if(layer.weightShape != null) "[" + layer.weightShape()(0) + " X " + layer.weightShape()(1) + "]" else "",
+          if(layer.biasShape != null) "[" + layer.biasShape()(0) + " X " + layer.biasShape()(1) + "]" else "",
+          layer.param.getTopList.mkString(","),
+          layer.param.getBottomList.mkString(",")
+      )
+    })
+    import sparkSession.implicits._
+    sc.parallelize(entries).toDF(header : _*).show(net.getLayers.size)
+  }
+  
   // ================================================================================================
   // The below method parses the provided network and solver file and generates DML script.
 	def getTrainingScript(isSingleNode:Boolean):(Script, String, String)  = {
@@ -252,6 +278,7 @@ class Caffe2DML(val sc: SparkContext, val solverParam:Caffe.SolverParameter,
             assign(tabDMLScript, "X_group_batch", Caffe2DML.X + "[group_beg:group_end,]")
             assign(tabDMLScript, "y_group_batch", Caffe2DML.y + "[group_beg:group_end,]")
             initializeGradients("parallel_batches")
+            assign(tabDMLScript, "X_group_batch_size", nrow("X_group_batch"))
             parForBlock("j", "1", "parallel_batches") {
               // Get a mini-batch in this group
               assign(tabDMLScript, "beg", "((j-1) * " + Caffe2DML.batchSize + ") %% nrow(X_group_batch) + 1")
@@ -280,6 +307,7 @@ class Caffe2DML(val sc: SparkContext, val solverParam:Caffe.SolverParameter,
 	          assign(tabDMLScript, "end", " min(beg +  " + Caffe2DML.batchSize + " - 1, " + Caffe2DML.numImages + ")")
 	          assign(tabDMLScript, "X_group_batch", Caffe2DML.X + "[beg:end,]")
             assign(tabDMLScript, "y_group_batch", Caffe2DML.y + "[beg:end,]")
+            assign(tabDMLScript, "X_group_batch_size", nrow("X_group_batch"))
 	          tabDMLScript.append("local_batch_size = nrow(y_group_batch)\n")
 	          val localBatchSize = "local_batch_size"
 	          initializeGradients(localBatchSize)
@@ -500,11 +528,14 @@ class Caffe2DML(val sc: SparkContext, val solverParam:Caffe.SolverParameter,
   }
   private def flattenGradients():Unit = {
     tabDMLScript.append("# Flatten and store gradients for this parallel execution\n")
+    // Note: We multiply by a weighting to allow for proper gradient averaging during the
+    // aggregation even with uneven batch sizes.
+    assign(tabDMLScript, "weighting", "nrow(Xb)/X_group_batch_size")
     net.getLayers.map(layer => net.getCaffeLayer(layer)).map(l => {
       if(l.shouldUpdateWeight) assign(tabDMLScript, l.dWeight + "_agg[j,]", 
-          matrix(l.dWeight, "1", multiply(nrow(l.weight), ncol(l.weight)))) 
+          matrix(l.dWeight, "1", multiply(nrow(l.weight), ncol(l.weight))) + " * weighting") 
       if(l.shouldUpdateWeight) assign(tabDMLScript, l.dBias + "_agg[j,]", 
-          matrix(l.dBias, "1", multiply(nrow(l.bias), ncol(l.bias))))
+          matrix(l.dBias, "1", multiply(nrow(l.bias), ncol(l.bias)))  + " * weighting")
     })
   }
   private def aggregateAggGradients():Unit = {
@@ -581,8 +612,8 @@ class Caffe2DMLModel(val numClasses:String, val sc: SparkContext, val solver:Caf
 	  updateMeanVarianceForBatchNorm(net, false)
 	  
 	  val lossLayers = getLossLayers(net)
-	  
-	  assign(tabDMLScript, "Prob", matrix("0", Caffe2DML.numImages, numClasses))
+	  val lastLayerShape = estimator.getOutputShapeOfLastLayer
+	  assign(tabDMLScript, "Prob", matrix("0", Caffe2DML.numImages, (lastLayerShape._1*lastLayerShape._2*lastLayerShape._3).toString))
 	  estimator.getTestAlgo.toLowerCase match {
       case "minibatch" => {
         ceilDivide(tabDMLScript(), "num_iters", Caffe2DML.numImages, Caffe2DML.batchSize)
@@ -623,7 +654,7 @@ class Caffe2DMLModel(val numClasses:String, val sc: SparkContext, val solver:Caf
         }
       }
       case "allreduce" => {
-        // This setting doesnot use the batch size for scoring and allows the parfor optimizer to select plan
+        // This setting doesnot use the batch size for scoring and allows the parfor optimizer to select the best plan
         // by minimizing the memory requirement (i.e. batch size = 1)
         parForBlock("i", "1", Caffe2DML.numImages) {
           assign(tabDMLScript, "Xb", "X_full[i,]")
@@ -633,6 +664,18 @@ class Caffe2DMLModel(val numClasses:String, val sc: SparkContext, val solver:Caf
       }
       case _ => throw new DMLRuntimeException("Unsupported test algo:" + estimator.getTestAlgo)
     }
+    
+    if(estimator.inputs.containsKey("$output_activations")) {
+      if(estimator.getTestAlgo.toLowerCase.equals("batch")) {
+        net.getLayers.map(layer => 
+          tabDMLScript.append(write(net.getCaffeLayer(layer).out, 
+              estimator.inputs.get("$output_activations") + "/" + net.getCaffeLayer(layer).param.getName + "_activations.mtx", "csv") + "\n")
+        )  
+      }
+      else {
+        throw new DMLRuntimeException("Incorrect usage of output_activations. It should be only used in batch mode.")
+      }
+    }
 		
 		val predictionScript = dmlScript.toString()
 		System.out.println("Time taken to generate prediction script from Caffe proto:" + ((System.nanoTime() - startPredictionTime)*1e-9) + "secs." )
@@ -655,9 +698,36 @@ class Caffe2DMLModel(val numClasses:String, val sc: SparkContext, val solver:Caf
   
   // Prediction
   def transform(X: MatrixBlock): MatrixBlock = {
-	  baseTransform(X, sc, "Prob")
+    if(estimator.isClassification) {
+      Caffe2DML.LOG.debug("Prediction assuming classification")
+      baseTransform(X, sc, "Prob")
+    }
+    else {
+      Caffe2DML.LOG.debug("Prediction assuming segmentation")
+      val outShape = estimator.getOutputShapeOfLastLayer
+      baseTransform(X, sc, "Prob", outShape._1.toInt, outShape._2.toInt, outShape._3.toInt)
+    }
   }
+  def transform_probability(X: MatrixBlock): MatrixBlock = {
+    if(estimator.isClassification) {
+      Caffe2DML.LOG.debug("Prediction of probability assuming classification")
+      baseTransformProbability(X, sc, "Prob")
+    }
+    else {
+      Caffe2DML.LOG.debug("Prediction of probability assuming segmentation")
+      val outShape = estimator.getOutputShapeOfLastLayer
+      baseTransformProbability(X, sc, "Prob", outShape._1.toInt, outShape._2.toInt, outShape._3.toInt)
+    }
+  } 
   def transform(df: ScriptsUtils.SparkDataType): DataFrame = {
-	  baseTransform(df, sc, "Prob")
+    if(estimator.isClassification) {
+      Caffe2DML.LOG.debug("Prediction assuming classification")
+      baseTransform(df, sc, "Prob", true)
+    }
+    else {
+      Caffe2DML.LOG.debug("Prediction assuming segmentation")
+      val outShape = estimator.getOutputShapeOfLastLayer
+      baseTransform(df, sc, "Prob", true, outShape._1.toInt, outShape._2.toInt, outShape._3.toInt)
+    }
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/systemml/blob/978d4de4/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala b/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala
index 3fdbdb1..9518d75 100644
--- a/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala
+++ b/src/main/scala/org/apache/sysml/api/dl/CaffeLayer.scala
@@ -45,6 +45,9 @@ trait CaffeLayer extends BaseDMLGenerator {
   var computedBottomLayerOutputShape:(String, String, String) = null
   def bottomLayerOutputShape:(String, String, String) = {
     if(computedBottomLayerOutputShape == null) {
+      // Note: if you get org.apache.sysml.parser.LanguageException: Map is null exception
+      // from org.apache.sysml.api.dl.CaffeNetwork.org$apache$sysml$api$dl$CaffeNetwork$$convertLayerParameterToCaffeLayer
+      // you are attempting to get traverse the network (for example: bottomLayerOutputShape) before it is created.
       val ret = net.getBottomLayers(param.getName).map(l => net.getCaffeLayer(l)).toList
       if(ret.size == 0) throw new LanguageException("Expected atleast 1 bottom layer for " + param.getName)
       computedBottomLayerOutputShape = ret(0).outputShape
@@ -487,28 +490,51 @@ class Concat(val param:LayerParameter, val id:Int, val net:CaffeNetwork) extends
 
 class SoftmaxWithLoss(val param:LayerParameter, val id:Int, val net:CaffeNetwork) extends CaffeLayer with IsLossLayer {
   // -------------------------------------------------
-  override def sourceFileName = "softmax"
+  override def sourceFileName = if(!isSegmentationProblem()) "softmax" else "softmax2d" 
   override def init(dmlScript:StringBuilder) = {}
-  override def forward(dmlScript:StringBuilder, isPrediction:Boolean) = 
-    invokeForward(dmlScript, List[String](out), scores)
+  def isSegmentationProblem():Boolean = {
+    try {
+      return outputShape._2.toInt != 1 && outputShape._3.toInt != 1
+    } catch { 
+      case _:Throwable => throw new RuntimeException("Cannot infer the output dimensions:" + outputShape)
+    }
+  }
+  override def forward(dmlScript:StringBuilder, isPrediction:Boolean) = {
+    if(!isSegmentationProblem()) {
+      invokeForward(dmlScript, List[String](out), scores)
+    }
+    else {
+      invokeForward(dmlScript, List[String](out), scores, outputShape._1)
+    }
+  }
   override def backward(dmlScript:StringBuilder, outSuffix:String) =  {
-    invoke(dmlScript, "cross_entropy_loss::", List[String]("dProbs" + outSuffix), "backward", false, out, "yb")
-    dmlScript.append("; ") 
-    invoke(dmlScript, "softmax::", List[String]("dOut" + id + outSuffix), "backward", false, "dProbs", scores)
-    val bottomLayerIDs = net.getBottomLayers(param.getName).map(l => net.getCaffeLayer(l).id)
-    dmlScript.append("; ")
-    bottomLayerIDs.map(bottomLayerID => dmlScript.append( dX(bottomLayerID) + outSuffix + " = " + "dOut" + id + outSuffix + "; "))
-    dmlScript.append("\n")
+    if(!isSegmentationProblem()) {
+      invoke(dmlScript, "cross_entropy_loss::", List[String]("dProbs" + outSuffix), "backward", false, out, "yb")
+      dmlScript.append("; ") 
+      invoke(dmlScript, "softmax::", List[String]("dOut" + id + outSuffix), "backward", false, "dProbs", scores)
+      val bottomLayerIDs = net.getBottomLayers(param.getName).map(l => net.getCaffeLayer(l).id)
+      dmlScript.append("; ")
+      bottomLayerIDs.map(bottomLayerID => dmlScript.append( dX(bottomLayerID) + outSuffix + " = " + "dOut" + id + outSuffix + "; "))
+      dmlScript.append("\n")
+    }
+    else {
+      throw new RuntimeException("backward for SoftmaxWithLoss is not implemented for segmentation problem")
+    }
   }
   override def computeLoss(dmlScript:StringBuilder, numTabs:Int) = {
-    val tabBuilder = new StringBuilder
-    for(i <- 0 until numTabs) tabBuilder.append("\t")
-    val tabs = tabBuilder.toString
-    dmlScript.append("tmp_loss = cross_entropy_loss::forward(" + commaSep(out, "yb") + ")\n")
-    dmlScript.append(tabs).append("loss = loss + tmp_loss\n")
-    dmlScript.append(tabs).append("true_yb = rowIndexMax(yb)\n")
-    dmlScript.append(tabs).append("predicted_yb = rowIndexMax(" + out + ")\n")
-    dmlScript.append(tabs).append("accuracy = mean(predicted_yb == true_yb)*100\n")
+    if(!isSegmentationProblem()) {
+      val tabBuilder = new StringBuilder
+      for(i <- 0 until numTabs) tabBuilder.append("\t")
+      val tabs = tabBuilder.toString
+      dmlScript.append("tmp_loss = cross_entropy_loss::forward(" + commaSep(out, "yb") + ")\n")
+      dmlScript.append(tabs).append("loss = loss + tmp_loss\n")
+      dmlScript.append(tabs).append("true_yb = rowIndexMax(yb)\n")
+      dmlScript.append(tabs).append("predicted_yb = rowIndexMax(" + out + ")\n")
+      dmlScript.append(tabs).append("accuracy = mean(predicted_yb == true_yb)*100\n")
+    }
+    else {
+      throw new RuntimeException("Computation of loss for SoftmaxWithLoss is not implemented for segmentation problem")
+    }
   }
   def scores():String = {
 	  val ret = net.getBottomLayers(param.getName).map(l => net.getCaffeLayer(l)).toList
@@ -840,8 +866,15 @@ class MaxPooling(val param:LayerParameter, val id:Int, val net:CaffeNetwork) ext
 }
 
 class Convolution(val param:LayerParameter, val id:Int, val net:CaffeNetwork) extends CaffeLayer with HasWeight with HasBias {
+  def isDepthWise():Boolean = {
+    if(param.getConvolutionParam.hasGroup && param.getConvolutionParam.getGroup != 1 && numChannels.toInt % param.getConvolutionParam.getGroup != 0) 
+      throw new DMLRuntimeException("The number of groups=" + param.getConvolutionParam.getGroup + " is not supported as it is not divisible by number of channels" + numChannels + ".")
+    param.getConvolutionParam.hasGroup && param.getConvolutionParam.getGroup != 1
+  }
+  def depthMultiplier():String = if(isDepthWise) (numChannels.toInt / param.getConvolutionParam.getGroup).toString else throw new DMLRuntimeException("Incorrect usage of depth")
+  
   // -------------------------------------------------
-  override def sourceFileName = "conv2d_builtin";
+  override def sourceFileName = if(isDepthWise) "conv2d_builtin_depthwise" else "conv2d_builtin" 
   /*
    * Initialize the parameters of this layer.
    *
@@ -854,17 +887,28 @@ class Convolution(val param:LayerParameter, val id:Int, val net:CaffeNetwork) ex
    * assumption of relu neurons.
    *  - http://arxiv.org/abs/1502.01852
    *
-   * Inputs:
+   * Inputs without depthwise:
    *  - F: Number of filters.
    *  - C: Number of input channels (dimensionality of depth).
    *  - Hf: Filter height.
    *  - Wf: Filter width.
    *
+   * Inputs with depthwise:
+   *  - C: Number of input channels (dimensionality of depth).
+   *  - M: Number of filters per input channel (i.e. depth multiplier).
+   *  - Hf: Filter height.
+   *  - Wf: Filter width.
+   *
    * Outputs:
    *  - W: Weights, of shape (F, C*Hf*Wf).
    *  - b: Biases, of shape (F, 1).
    */
-  override def init(dmlScript:StringBuilder) = invokeInit(dmlScript, List[String](weight, bias), numKernels, numChannels, kernel_h, kernel_w)
+  override def init(dmlScript:StringBuilder) = {
+    if(isDepthWise)
+      invokeInit(dmlScript, List[String](weight, bias), numChannels, depthMultiplier, kernel_h, kernel_w)
+    else
+      invokeInit(dmlScript, List[String](weight, bias), numKernels, numChannels, kernel_h, kernel_w)
+  }
   /*
    * Computes the forward pass for a 2D spatial convolutional layer with
    * F filters.  The input data has N examples, each represented as a 3D
@@ -880,6 +924,7 @@ class Convolution(val param:LayerParameter, val id:Int, val net:CaffeNetwork) ex
    *  - C: Number of input channels (dimensionality of depth).
    *  - Hin: Input height.
    *  - Win: Input width.
+   *  (only for depthwise) - M: Number of filters per input channel (i.e. depth multiplier).
    *  - Hf: Filter height.
    *  - Wf: Filter width.
    *  - strideh: Stride over height.
@@ -900,9 +945,14 @@ class Convolution(val param:LayerParameter, val id:Int, val net:CaffeNetwork) ex
    *  - Hout: Output height.
    *  - Wout: Output width.
    */
-  override def forward(dmlScript:StringBuilder, isPrediction:Boolean) = 
-    invokeForward(dmlScript, List[String](out, "ignoreHout_"+id, "ignoreWout_"+id), 
+  override def forward(dmlScript:StringBuilder, isPrediction:Boolean) = {
+    if(isDepthWise)
+      invokeForward(dmlScript, List[String](out, "ignoreHout_"+id, "ignoreWout_"+id), 
+        X, weight, bias, numChannels, Hin, Win, depthMultiplier, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w)
+    else
+      invokeForward(dmlScript, List[String](out, "ignoreHout_"+id, "ignoreWout_"+id), 
         X, weight, bias, numChannels, Hin, Win, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w)
+  }
   /*
    * Computes the backward pass for a 2D spatial convolutional layer
    * with F filters.
@@ -918,6 +968,7 @@ class Convolution(val param:LayerParameter, val id:Int, val net:CaffeNetwork) ex
    *  - C: Number of input channels (dimensionality of depth).
    *  - Hin: Input height.
    *  - Win: Input width.
+   *  (only for depthwise) - M: Number of filters per input channel (i.e. depth multiplier).
    *  - Hf: Filter height.
    *  - Wf: Filter width.
    *  - strideh: Stride over height.
@@ -938,10 +989,18 @@ class Convolution(val param:LayerParameter, val id:Int, val net:CaffeNetwork) ex
    *  - dW: Gradient wrt `W`, of shape (F, C*Hf*Wf).
    *  - db: Gradient wrt `b`, of shape (F, 1).
    */
-  override def backward(dmlScript:StringBuilder, outSuffix:String) = 
-    invokeBackward(dmlScript, outSuffix, List[String]("dOut" + id, dWeight, dBias), dout, Hout, Wout, X, weight, bias, numChannels, Hin, Win, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w)
-  // n * c_o * h_o * w_o, where h_o = (h_i + 2 * pad_h - kernel_h) / stride_h + 1 and w_o likewise.
-  override def outputShape = ( numKernels, Hout, Wout )
+  override def backward(dmlScript:StringBuilder, outSuffix:String) =  {
+    if(isDepthWise)
+      invokeBackward(dmlScript, outSuffix, List[String]("dOut" + id, dWeight, dBias), dout, Hout, Wout, X, weight, bias, numChannels, Hin, Win, depthMultiplier, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w)
+    else
+      invokeBackward(dmlScript, outSuffix, List[String]("dOut" + id, dWeight, dBias), dout, Hout, Wout, X, weight, bias, numChannels, Hin, Win, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w)
+  }
+  // if not depthwise, n * c_o * h_o * w_o, where h_o = (h_i + 2 * pad_h - kernel_h) / stride_h + 1 and w_o likewise.
+  // else (N, C*M*Hout*Wout)
+  override def outputShape = {
+    if(isDepthWise) ( (numChannels.toInt*depthMultiplier.toInt).toString, Hout, Wout )
+    else ( numKernels, Hout, Wout )
+  }
   // -------------------------------------------------
   def numChannels = bottomLayerOutputShape._1
   def Hin = bottomLayerOutputShape._2
@@ -950,8 +1009,16 @@ class Convolution(val param:LayerParameter, val id:Int, val net:CaffeNetwork) ex
   def Wout =  ConvolutionUtils.getConv2dOutputMap(bottomLayerOutputShape._3, kernel_w, stride_w, pad_w)
   // -------------------------------------------------
   def convParam = param.getConvolutionParam
-  override def weightShape():Array[Int] = Array(numKernels.toInt, int_mult(numChannels, kernel_h, kernel_w).toInt)
-  override def biasShape():Array[Int] = Array(numKernels.toInt, 1)
+  // if depthwise (C, M*Hf*Wf) else (F, C*Hf*Wf)
+  override def weightShape():Array[Int] = {
+    if(isDepthWise) Array(numChannels.toInt, int_mult(depthMultiplier, kernel_h, kernel_w).toInt)
+    else Array(numKernels.toInt, int_mult(numChannels, kernel_h, kernel_w).toInt)
+  }
+  // if depthwise (C*M, 1) else (F, 1)
+  override def biasShape():Array[Int] = {
+    if(isDepthWise) Array(numChannels.toInt*depthMultiplier.toInt, 1)
+    else Array(numKernels.toInt, 1)
+  }
   // num_output (c_o): the number of filters
   def numKernels = convParam.getNumOutput.toString
   // kernel_size (or kernel_h and kernel_w): specifies height and width of each filter
@@ -978,7 +1045,15 @@ class Convolution(val param:LayerParameter, val id:Int, val net:CaffeNetwork) ex
 }
 
 class DeConvolution(val param:LayerParameter, val id:Int, val net:CaffeNetwork) extends CaffeLayer with HasWeight with HasBias {
-  override def sourceFileName: String = "conv2d_transpose"
+  def isDepthWise():Boolean = {
+    if(param.getConvolutionParam.hasGroup && param.getConvolutionParam.getGroup != 1 && numChannels.toInt % param.getConvolutionParam.getGroup != 0) 
+      throw new DMLRuntimeException("The number of groups=" + param.getConvolutionParam.getGroup + " is not supported as it is not divisible by number of channels" + numChannels + ".")
+    param.getConvolutionParam.hasGroup && param.getConvolutionParam.getGroup != 1
+  }
+  def depthMultiplier():String = if(isDepthWise) (numChannels.toInt / param.getConvolutionParam.getGroup).toString else throw new DMLRuntimeException("Incorrect usage of depth")
+  
+  override def sourceFileName: String = if(isDepthWise) "conv2d_transpose_depthwise" else "conv2d_transpose" 
+  
   /*
    * Utility function to initialize the parameters of this layer.
    *
@@ -988,22 +1063,48 @@ class DeConvolution(val param:LayerParameter, val id:Int, val net:CaffeNetwork)
    * assumption of relu neurons.
    *  - http://arxiv.org/abs/1502.01852
    *
-   * Inputs:
+   * Inputs without depthwise:
    *  - F: Number of filters.
    *  - C: Number of input channels (dimensionality of depth).
    *  - Hf: Filter height.
    *  - Wf: Filter width.
    *
+   * Inputs with depthwise:
+   *  - C: Number of input channels (dimensionality of depth).
+   *  - M: Depth of each filter (C must be divisible by M).
+   *  - Hf: Filter height.
+   *  - Wf: Filter width.
+   *  
    * Outputs:
-   *  - W: Weights, of shape (F, C*Hf*Wf).
+   *  - W: Weights, of shape (C, F*Hf*Wf).
    *  - b: Biases, of shape (F, 1).
    */
-  override def init(dmlScript: StringBuilder): Unit = 
-    invokeInit(dmlScript, List[String](weight, bias), numKernels, numChannels, kernel_h, kernel_w)
-    
-  override def weightShape():Array[Int] = Array(numKernels.toInt, int_mult(numChannels, kernel_h, kernel_w).toInt)
-  override def biasShape():Array[Int] = Array(numKernels.toInt, 1)
-    
+  override def init(dmlScript: StringBuilder): Unit = {
+    if(isDepthWise)
+      invokeInit(dmlScript, List[String](weight, bias), numChannels, depthMultiplier, kernel_h, kernel_w)
+    else
+      invokeInit(dmlScript, List[String](weight, bias), numKernels, numChannels, kernel_h, kernel_w)
+  }
+  
+  private def C_DivideBy_M():Int = numChannels.toInt / depthMultiplier.toInt
+  
+  // if depthwise (C/M, M*Hf*Wf), else (C, F*Hf*Wf) 
+  override def weightShape():Array[Int] = { 
+    if(isDepthWise)
+      Array(C_DivideBy_M, int_mult(depthMultiplier, kernel_h, kernel_w).toInt)
+    else
+      Array(numChannels.toInt, int_mult(numKernels, kernel_h, kernel_w).toInt)
+  }
+  // if depthwise (C/M, 1), else (F, 1)
+  override def biasShape():Array[Int] = {
+    if(isDepthWise)
+      Array(C_DivideBy_M, 1)
+    else
+      Array(numKernels.toInt, 1)
+  }
+  
+  private def numGroups:Int = if(param.getConvolutionParam.hasGroup) param.getConvolutionParam.getGroup else 1
+  
   /*
    * Computes the forward pass for a 2D spatial transpose convolutional
    * layer with F filters.  The input data has N examples, each
@@ -1011,18 +1112,19 @@ class DeConvolution(val param:LayerParameter, val id:Int, val net:CaffeNetwork)
    *
    * Inputs:
    *  - X: Inputs, of shape (N, C*Hin*Win).
-   *  - W: Weights, of shape (F, C*Hf*Wf).
+   *  - W: Weights, of shape (C, F*Hf*Wf).
    *  - b: Biases, of shape (F, 1).
    *  - C: Number of input channels (dimensionality of depth).
    *  - Hin: Input height.
    *  - Win: Input width.
+   *  (only for depthwise): - M: Depth of each filter (C must be divisible by M).
    *  - Hf: Filter height.
    *  - Wf: Filter width.
    *  - strideh: Stride over height.
    *  - stridew: Stride over width.
    *  - padh: Padding for top and bottom sides.
    *  - padw: Padding for left and right sides.
-   *  - out_padh: extra padding for top side. This should 
+   *  - out_padh: extra padding for top side. This should
    *      lie in [0, strideh-1].
    *  - out_padw: extra padding for right side. This should
    *      lie in [0, stridew-1].
@@ -1032,9 +1134,14 @@ class DeConvolution(val param:LayerParameter, val id:Int, val net:CaffeNetwork)
    *  - Hout: Output height.
    *  - Wout: Output width.
    */
-  override def forward(dmlScript: StringBuilder,isPrediction: Boolean): Unit =
-    invokeForward(dmlScript, List[String](out, "ignoreHout_"+id, "ignoreWout_"+id), 
+  override def forward(dmlScript: StringBuilder,isPrediction: Boolean): Unit = {
+    if(isDepthWise)
+      invokeForward(dmlScript, List[String](out, "ignoreHout_"+id, "ignoreWout_"+id), 
+        X, weight, bias, numChannels, Hin, Win, depthMultiplier, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, "0", "0")
+    else
+      invokeForward(dmlScript, List[String](out, "ignoreHout_"+id, "ignoreWout_"+id), 
         X, weight, bias, numChannels, Hin, Win, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, "0", "0")
+  }
         
   /*
    * Computes the backward pass for a 2D spatial transpose
@@ -1046,11 +1153,12 @@ class DeConvolution(val param:LayerParameter, val id:Int, val net:CaffeNetwork)
    *  - Hout: Output height.
    *  - Wout: Output width.
    *  - X: Inputs, of shape (N, C*Hin*Win).
-   *  - W: Weights, of shape (F, C*Hf*Wf).
+   *  - W: Weights, of shape (C, F*Hf*Wf).
    *  - b: Biases, of shape (F, 1).
    *  - C: Number of input channels (dimensionality of depth).
    *  - Hin: Input height.
    *  - Win: Input width.
+   *  (only for depthwise): - M: Depth of each filter (C must be divisible by M).
    *  - Hf: Filter height.
    *  - Wf: Filter width.
    *  - strideh: Stride over height.
@@ -1060,14 +1168,20 @@ class DeConvolution(val param:LayerParameter, val id:Int, val net:CaffeNetwork)
    *
    * Outputs:
    *  - dX: Gradient wrt `X`, of shape (N, C*Hin*Win).
-   *  - dW: Gradient wrt `W`, of shape (F, C*Hf*Wf).
+   *  - dW: Gradient wrt `W`, of shape (C, F*Hf*Wf).
    *  - db: Gradient wrt `b`, of shape (F, 1).
    */
-  override def backward(dmlScript:StringBuilder, outSuffix:String) = 
-    invokeBackward(dmlScript, outSuffix, List[String]("dOut" + id, dWeight, dBias), 
+  override def backward(dmlScript:StringBuilder, outSuffix:String) = {
+    if(isDepthWise)
+      invokeBackward(dmlScript, outSuffix, List[String]("dOut" + id, dWeight, dBias), 
+        dout, Hout, Wout, X, weight, bias, numChannels, Hin, Win, depthMultiplier, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w)
+    else
+      invokeBackward(dmlScript, outSuffix, List[String]("dOut" + id, dWeight, dBias), 
         dout, Hout, Wout, X, weight, bias, numChannels, Hin, Win, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w)
-  // n * c_o * h_o * w_o, where h_o = (h_i + 2 * pad_h - kernel_h) / stride_h + 1 and w_o likewise.
-  override def outputShape = ( numChannels, Hout, Wout )
+  }
+  // if not depthwise n * c_o * h_o * w_o, where h_o = (h_i + 2 * pad_h - kernel_h) / stride_h + 1 and w_o likewise.
+  // else (N, C/M*Hout*Wout)
+  override def outputShape = if(isDepthWise) ( C_DivideBy_M().toString, Hout, Wout ) else ( numChannels, Hout, Wout )
   // -------------------------------------------------
   def numChannels = bottomLayerOutputShape._1
   def Hin = bottomLayerOutputShape._2

http://git-wip-us.apache.org/repos/asf/systemml/blob/978d4de4/src/main/scala/org/apache/sysml/api/dl/CaffeNetwork.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/dl/CaffeNetwork.scala b/src/main/scala/org/apache/sysml/api/dl/CaffeNetwork.scala
index 5c2dc77..67682d5 100644
--- a/src/main/scala/org/apache/sysml/api/dl/CaffeNetwork.scala
+++ b/src/main/scala/org/apache/sysml/api/dl/CaffeNetwork.scala
@@ -208,8 +208,8 @@ class CaffeNetwork(netFilePath:String, val currentPhase:Phase,
   
   // Helper functions
   private def checkKey(m:Map[String, Any], key:String): Boolean = {
-    if(m == null) throw new LanguageException("Map is null")
-    else if(key == null) throw new LanguageException("key is null")
+    if(m == null) throw new LanguageException("Map is null (key=" + key + ")")
+    else if(key == null) throw new LanguageException("key is null (map=" + m + ")")
     else m.containsKey(key)
   }
   private def convertLayerParameterToCaffeLayer(param:LayerParameter):CaffeLayer = {

http://git-wip-us.apache.org/repos/asf/systemml/blob/978d4de4/src/main/scala/org/apache/sysml/api/dl/Utils.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/dl/Utils.scala b/src/main/scala/org/apache/sysml/api/dl/Utils.scala
index d6af22c..0c00d3c 100644
--- a/src/main/scala/org/apache/sysml/api/dl/Utils.scala
+++ b/src/main/scala/org/apache/sysml/api/dl/Utils.scala
@@ -111,6 +111,30 @@ object Utils {
 	  }
 	}
 	
+	class CopyCaffeDeconvFloatToSystemMLDeconvDoubleArray(data:java.util.List[java.lang.Float], F:Int, C:Int, H:Int, W:Int, arr:Array[Double]) 
+	    extends CopyFloatToDoubleArray(data, C, F*H*W, false, arr) {
+	  override def run(): Unit = {
+	    var i = 0
+	    for(f <- 0 until F) {
+	      for(c <- 0 until C) {
+	        for(hw <- 0 until H*W) {
+	          arr(c*F*H*W + f*H*W + hw) = data.get(i).doubleValue()
+	          i = i+1
+	        }
+	      }
+	    }
+	  }
+	}
+	
+	def allocateDeconvolutionWeight(data:java.util.List[java.lang.Float], F:Int, C:Int, H:Int, W:Int):(MatrixBlock,CopyFloatToDoubleArray) = {
+	  val mb =  new MatrixBlock(C, F*H*W, false)
+    mb.allocateDenseBlock()
+    val arr = mb.getDenseBlock
+    val thread = new CopyCaffeDeconvFloatToSystemMLDeconvDoubleArray(data, F, C, H, W, arr)
+	  thread.start
+	  return (mb, thread)
+	}
+	
 	def allocateMatrixBlock(data:java.util.List[java.lang.Float], rows:Int, cols:Int, transpose:Boolean):(MatrixBlock,CopyFloatToDoubleArray) = {
 	  val mb =  new MatrixBlock(rows, cols, false)
     mb.allocateDenseBlock()
@@ -141,7 +165,7 @@ object Utils {
 	  if(inputVariables.keys.size == 0)
 	    throw new DMLRuntimeException("No weights found in the file " + caffeModelFilePath)
 	  for(input <- inputVariables.keys) {
-	    dmlScript.append("write(" + input + ", \"" + input + ".mtx\", format=\"" + format + "\");\n")
+	    dmlScript.append("write(" + input + ", \"" + outputDirectory + "/" + input + ".mtx\", format=\"" + format + "\");\n")
 	  }
 	  if(Caffe2DML.LOG.isDebugEnabled())
 	    Caffe2DML.LOG.debug("Executing the script:" + dmlScript.toString)
@@ -161,28 +185,43 @@ object Utils {
 	  val net1 = builder.build();
 	  
 	  val asyncThreads = new java.util.ArrayList[CopyFloatToDoubleArray]()
+	  val v1Layers = net1.getLayersList.map(layer => layer.getName -> layer).toMap
 	  for(layer <- net1.getLayerList) {
-	    if(layer.getBlobsCount == 0) {
+	    val blobs = if(layer.getBlobsCount != 0) layer.getBlobsList else if(v1Layers.contains(layer.getName)) v1Layers.get(layer.getName).get.getBlobsList else null
+	      
+	    if(blobs == null || blobs.size == 0) {
 	      // No weight or bias
 	      Caffe2DML.LOG.debug("The layer:" + layer.getName + " has no blobs")
 	    }
-	    else if(layer.getBlobsCount == 2) {
+	    else if(blobs.size == 2) {
 	      // Both weight and bias
 	      val caffe2DMLLayer = net.getCaffeLayer(layer.getName)
 	      val transpose = caffe2DMLLayer.isInstanceOf[InnerProduct]
 	      
 	      // weight
-	      val data = layer.getBlobs(0).getDataList
+	      val data = blobs(0).getDataList
 	      val shape = caffe2DMLLayer.weightShape()
 	      if(shape == null)
-	        throw new DMLRuntimeException("Didnot expect weights for the layer " + layer.getName)
+  	        throw new DMLRuntimeException("Didnot expect weights for the layer " + layer.getName)
 	      validateShape(shape, data, layer.getName)
-	      val ret1 = allocateMatrixBlock(data, shape(0), shape(1), transpose)
+	      
+	      val ret1 = if(caffe2DMLLayer.isInstanceOf[DeConvolution]) {
+	        // Swap dimensions: Caffe's format (F, C*Hf*Wf) to NN layer's format (C, F*Hf*Wf).
+	        val deconvLayer = caffe2DMLLayer.asInstanceOf[DeConvolution]
+	        val C = shape(0)
+	        val F = deconvLayer.numKernels.toInt
+	        val Hf = deconvLayer.kernel_h.toInt
+	        val Wf = deconvLayer.kernel_w.toInt
+	        allocateDeconvolutionWeight(data, F, C, Hf, Wf)
+	      }
+	      else {
+  	      allocateMatrixBlock(data, shape(0), shape(1), transpose)
+	      }
 	      asyncThreads.add(ret1._2)
 	      inputVariables.put(caffe2DMLLayer.weight, ret1._1)
 	      
 	      // bias
-	      val biasData = layer.getBlobs(1).getDataList
+	      val biasData = blobs(1).getDataList
 	      val biasShape = caffe2DMLLayer.biasShape()
 	      if(biasShape == null)
 	        throw new DMLRuntimeException("Didnot expect bias for the layer " + layer.getName)
@@ -192,15 +231,17 @@ object Utils {
 	      inputVariables.put(caffe2DMLLayer.bias, ret2._1)
 	      Caffe2DML.LOG.debug("Read weights/bias for layer:" + layer.getName)
 	    }
-	    else if(layer.getBlobsCount == 1) {
+	    else if(blobs.size == 1) {
 	      // Special case: convolution/deconvolution without bias
 	      // TODO: Extend nn layers to handle this situation + Generalize this to other layers, for example: InnerProduct
 	      val caffe2DMLLayer = net.getCaffeLayer(layer.getName)
 	      val convParam = if((caffe2DMLLayer.isInstanceOf[Convolution] || caffe2DMLLayer.isInstanceOf[DeConvolution]) && caffe2DMLLayer.param.hasConvolutionParam())  caffe2DMLLayer.param.getConvolutionParam else null  
 	      if(convParam == null)
 	        throw new DMLRuntimeException("Layer with blob count " + layer.getBlobsCount + " is not supported for the layer " + layer.getName)
+	      else if(convParam.hasBiasTerm && convParam.getBiasTerm)
+	        throw new DMLRuntimeException("Layer with blob count " + layer.getBlobsCount + " and with bias term is not supported for the layer " + layer.getName)
 	     
-	      val data = layer.getBlobs(0).getDataList
+	      val data = blobs(0).getDataList
 	      val shape = caffe2DMLLayer.weightShape()
 	      validateShape(shape, data, layer.getName)
 	      val ret1 = allocateMatrixBlock(data, shape(0), shape(1), false)
@@ -219,6 +260,10 @@ object Utils {
 	    t.join()
 	  }
 	  
+	  for(mb <- inputVariables.values()) {
+	    mb.recomputeNonZeros();
+	  }
+	  
 	  // Return the NetParameter without
 	  return readCaffeNet(netFilePath)
 	}
@@ -253,4 +298,4 @@ class Utils {
     Utils.saveCaffeModelFile(sc, deployFilePath, caffeModelFilePath, outputDirectory, format)
   }
   
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/systemml/blob/978d4de4/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLClassifier.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLClassifier.scala b/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLClassifier.scala
index 918a48d..3559a40 100644
--- a/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLClassifier.scala
+++ b/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLClassifier.scala
@@ -153,6 +153,8 @@ trait BaseSystemMLEstimatorModel extends BaseSystemMLEstimatorOrModel {
     double2Double(d)
   }
   
+  def transform_probability(X: MatrixBlock): MatrixBlock;
+  
   def transformSchema(schema: StructType): StructType = schema
   
   // Returns the script and variable for X
@@ -217,26 +219,48 @@ trait BaseSystemMLClassifier extends BaseSystemMLEstimator {
 
 trait BaseSystemMLClassifierModel extends BaseSystemMLEstimatorModel {
 
-  def baseTransform(X: MatrixBlock, sc: SparkContext, probVar:String): MatrixBlock = {
-    val isSingleNode = true
+	def baseTransform(X: MatrixBlock, sc: SparkContext, probVar:String): MatrixBlock = baseTransform(X, sc, probVar, -1, 1, 1)
+	
+	def baseTransform(X: MatrixBlock, sc: SparkContext, probVar:String, C:Int, H: Int, W:Int): MatrixBlock = {
+    val Prob = baseTransformHelper(X, sc, probVar, C, H, W)
+    val script1 = dml("source(\"nn/util.dml\") as util; Prediction = util::predict_class(Prob, C, H, W);")
+    							.out("Prediction").in("Prob", Prob.toMatrixBlock, Prob.getMatrixMetadata).in("C", C).in("H", H).in("W", W)
+    val ret = (new MLContext(sc)).execute(script1).getMatrix("Prediction").toMatrixBlock
+              
+    if(ret.getNumColumns != 1 && H == 1 && W == 1) {
+      throw new RuntimeException("Expected predicted label to be a column vector")
+    }
+    return ret
+  }
+	
+	def baseTransformHelper(X: MatrixBlock, sc: SparkContext, probVar:String, C:Int, H: Int, W:Int): Matrix = {
+	  val isSingleNode = true
     val ml = new MLContext(sc)
     updateML(ml)
     val script = getPredictionScript(isSingleNode)
     // Uncomment for debugging
     // ml.setExplainLevel(ExplainLevel.RECOMPILE_RUNTIME)
     val modelPredict = ml.execute(script._1.in(script._2, X, new MatrixMetadata(X.getNumRows, X.getNumColumns, X.getNonZeros)))
-    val ret = PredictionUtils.computePredictedClassLabelsFromProbability(modelPredict, isSingleNode, sc, probVar)
-              .getMatrix("Prediction").toMatrixBlock
-              
-    if(ret.getNumColumns != 1) {
-      throw new RuntimeException("Expected predicted label to be a column vector")
-    }
-    return ret
-  }
-
-  def baseTransform(df: ScriptsUtils.SparkDataType, sc: SparkContext, 
+    return modelPredict.getMatrix(probVar)
+	}
+	
+	def baseTransformProbability(X: MatrixBlock, sc: SparkContext, probVar:String): MatrixBlock = {
+	  baseTransformProbability(X, sc, probVar, -1, 1, 1)
+	}
+	
+	def baseTransformProbability(X: MatrixBlock, sc: SparkContext, probVar:String, C:Int, H: Int, W:Int): MatrixBlock = {
+	  return baseTransformHelper(X, sc, probVar, C, H, W).toMatrixBlock
+	}
+	
+	
+	def baseTransform(df: ScriptsUtils.SparkDataType, sc: SparkContext, 
       probVar:String, outputProb:Boolean=true): DataFrame = {
-    val isSingleNode = false
+		baseTransform(df, sc, probVar, outputProb, -1, 1, 1)
+	}
+	
+	def baseTransformHelper(df: ScriptsUtils.SparkDataType, sc: SparkContext, 
+      probVar:String, outputProb:Boolean, C:Int, H: Int, W:Int): Matrix = {
+	  val isSingleNode = false
     val ml = new MLContext(sc)
     updateML(ml)
     val mcXin = new MatrixCharacteristics()
@@ -245,11 +269,19 @@ trait BaseSystemMLClassifierModel extends BaseSystemMLEstimatorModel {
     val mmXin = new MatrixMetadata(mcXin)
     val Xin_bin = new Matrix(Xin, mmXin)
     val modelPredict = ml.execute(script._1.in(script._2, Xin_bin))
-    val predLabelOut = PredictionUtils.computePredictedClassLabelsFromProbability(modelPredict, isSingleNode, sc, probVar)
+    return modelPredict.getMatrix(probVar)
+	}
+
+  def baseTransform(df: ScriptsUtils.SparkDataType, sc: SparkContext, 
+      probVar:String, outputProb:Boolean, C:Int, H: Int, W:Int): DataFrame = {
+    val Prob = baseTransformHelper(df, sc, probVar, outputProb, C, H, W)
+    val script1 = dml("source(\"nn/util.dml\") as util; Prediction = util::predict_class(Prob, C, H, W);")
+    							.out("Prediction").in("Prob", Prob).in("C", C).in("H", H).in("W", W)
+    val predLabelOut = (new MLContext(sc)).execute(script1)
     val predictedDF = predLabelOut.getDataFrame("Prediction").select(RDDConverterUtils.DF_ID_COLUMN, "C1").withColumnRenamed("C1", "prediction")
       
     if(outputProb) {
-      val prob = modelPredict.getDataFrame(probVar, true).withColumnRenamed("C1", "probability").select(RDDConverterUtils.DF_ID_COLUMN, "probability")
+      val prob = Prob.toDFVectorWithIDColumn().withColumnRenamed("C1", "probability").select(RDDConverterUtils.DF_ID_COLUMN, "probability")
       val dataset = RDDConverterUtilsExt.addIDToDataFrame(df.asInstanceOf[DataFrame], df.sparkSession, RDDConverterUtils.DF_ID_COLUMN)
       return PredictionUtils.joinUsingID(dataset, PredictionUtils.joinUsingID(prob, predictedDF))
     }

http://git-wip-us.apache.org/repos/asf/systemml/blob/978d4de4/src/main/scala/org/apache/sysml/api/ml/LinearRegression.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/ml/LinearRegression.scala b/src/main/scala/org/apache/sysml/api/ml/LinearRegression.scala
index ac6c22c..b7634d7 100644
--- a/src/main/scala/org/apache/sysml/api/ml/LinearRegression.scala
+++ b/src/main/scala/org/apache/sysml/api/ml/LinearRegression.scala
@@ -92,6 +92,8 @@ class LinearRegressionModel(override val uid: String)(estimator:LinearRegression
     copyValues(that, extra)
   }
   
+  def transform_probability(X: MatrixBlock): MatrixBlock = throw new DMLRuntimeException("Unsupported method")
+  
   def baseEstimator():BaseSystemMLEstimator = estimator
   
   def this(estimator:LinearRegression) =  {

http://git-wip-us.apache.org/repos/asf/systemml/blob/978d4de4/src/main/scala/org/apache/sysml/api/ml/LogisticRegression.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/ml/LogisticRegression.scala b/src/main/scala/org/apache/sysml/api/ml/LogisticRegression.scala
index 1c368c1..b04acd1 100644
--- a/src/main/scala/org/apache/sysml/api/ml/LogisticRegression.scala
+++ b/src/main/scala/org/apache/sysml/api/ml/LogisticRegression.scala
@@ -109,6 +109,7 @@ class LogisticRegressionModel(override val uid: String)(
   def modelVariables():List[String] = List[String]("B_out")
   
   def transform(X: MatrixBlock): MatrixBlock = baseTransform(X, sc, "means")
+  def transform_probability(X: MatrixBlock): MatrixBlock = baseTransformProbability(X, sc, "means")
   def transform(df: ScriptsUtils.SparkDataType): DataFrame = baseTransform(df, sc, "means")
 }
 

http://git-wip-us.apache.org/repos/asf/systemml/blob/978d4de4/src/main/scala/org/apache/sysml/api/ml/NaiveBayes.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/ml/NaiveBayes.scala b/src/main/scala/org/apache/sysml/api/ml/NaiveBayes.scala
index bc4e77d..990ab52 100644
--- a/src/main/scala/org/apache/sysml/api/ml/NaiveBayes.scala
+++ b/src/main/scala/org/apache/sysml/api/ml/NaiveBayes.scala
@@ -110,6 +110,7 @@ class NaiveBayesModel(override val uid: String)
   
   def baseEstimator():BaseSystemMLEstimator = estimator
   def transform(X: MatrixBlock): MatrixBlock = baseTransform(X, sc, "probs")
+  def transform_probability(X: MatrixBlock): MatrixBlock = baseTransformProbability(X, sc, "probs")
   def transform(df: ScriptsUtils.SparkDataType): DataFrame = baseTransform(df, sc, "probs")
   
 }

http://git-wip-us.apache.org/repos/asf/systemml/blob/978d4de4/src/main/scala/org/apache/sysml/api/ml/SVM.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/ml/SVM.scala b/src/main/scala/org/apache/sysml/api/ml/SVM.scala
index 256bd77..9107836 100644
--- a/src/main/scala/org/apache/sysml/api/ml/SVM.scala
+++ b/src/main/scala/org/apache/sysml/api/ml/SVM.scala
@@ -116,5 +116,6 @@ class SVMModel (override val uid: String)(estimator:SVM, val sc: SparkContext, v
   }
   
   def transform(X: MatrixBlock): MatrixBlock = baseTransform(X, sc, "scores")
+  def transform_probability(X: MatrixBlock): MatrixBlock = baseTransformProbability(X, sc, "scores")
   def transform(df: ScriptsUtils.SparkDataType): DataFrame = baseTransform(df, sc, "scores")
 }


[2/2] systemml git commit: [SYSTEMML-540] Extended Caffe2DML to support image segmentation problems

Posted by ni...@apache.org.
[SYSTEMML-540] Extended Caffe2DML to support image segmentation problems

- This commit extends Caffe2DML to support image segmentation problem,
  depthwise convolution and has couple of bugfixes regarding loading
  existing caffe model.
- Additionally, we have added a summary() method to Caffe2DML to print
network.

Closes #527.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/978d4de4
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/978d4de4
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/978d4de4

Branch: refs/heads/master
Commit: 978d4de4778f7c7bbccff5255698e05a7a5ae7fd
Parents: d56c05e
Author: Niketan Pansare <np...@us.ibm.com>
Authored: Wed Jul 5 11:02:57 2017 -0700
Committer: Niketan Pansare <np...@us.ibm.com>
Committed: Wed Jul 5 11:02:57 2017 -0700

----------------------------------------------------------------------
 docs/beginners-guide-caffe2dml.md               |  534 +++++++++-
 docs/python-reference.md                        |    4 +
 .../caffe2dml/models/imagenet/labels.txt        | 1000 ++++++++++++++++++
 .../vgg19/VGG_ILSVRC_19_layers_deploy.proto     |  414 ++++++++
 .../vgg19/VGG_ILSVRC_19_layers_network.proto    |  422 ++++++++
 .../vgg19/VGG_ILSVRC_19_layers_solver.proto     |   14 +
 scripts/nn/layers/conv2d_transpose.dml          |    1 -
 scripts/nn/layers/softmax2d.dml                 |    1 -
 scripts/nn/util.dml                             |   26 +
 .../parser/common/CommonSyntacticValidator.java |    4 +-
 src/main/python/systemml/mllearn/estimators.py  |  134 ++-
 .../org/apache/sysml/api/dl/Caffe2DML.scala     |   90 +-
 .../org/apache/sysml/api/dl/CaffeLayer.scala    |  210 +++-
 .../org/apache/sysml/api/dl/CaffeNetwork.scala  |    4 +-
 .../scala/org/apache/sysml/api/dl/Utils.scala   |   65 +-
 .../sysml/api/ml/BaseSystemMLClassifier.scala   |   62 +-
 .../apache/sysml/api/ml/LinearRegression.scala  |    2 +
 .../sysml/api/ml/LogisticRegression.scala       |    1 +
 .../org/apache/sysml/api/ml/NaiveBayes.scala    |    1 +
 .../scala/org/apache/sysml/api/ml/SVM.scala     |    1 +
 20 files changed, 2813 insertions(+), 177 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/978d4de4/docs/beginners-guide-caffe2dml.md
----------------------------------------------------------------------
diff --git a/docs/beginners-guide-caffe2dml.md b/docs/beginners-guide-caffe2dml.md
index f15e025..7671c32 100644
--- a/docs/beginners-guide-caffe2dml.md
+++ b/docs/beginners-guide-caffe2dml.md
@@ -32,24 +32,14 @@ limitations under the License.
 Caffe2DML is an **experimental API** that converts an Caffe specification to DML. 
 It is designed to fit well into the mllearn framework and hence supports NumPy, Pandas as well as PySpark DataFrame.
 
-## Examples
+### Training Lenet 
 
-### Train Lenet on MNIST dataset
-
-#### MNIST dataset
-
-The MNIST dataset was constructed from two datasets of the US National Institute of Standards and Technology (NIST). The training set consists of handwritten digits from 250 different people, 50 percent high school students, and 50 percent employees from the Census Bureau. Note that the test set contains handwritten digits from different people following the same split.
-In the below example, we are using mlxtend package to load the mnist dataset into Python NumPy arrays, but you are free to download it directly from http://yann.lecun.com/exdb/mnist/.
-
-```bash
-pip install mlxtend
-```
-
-#### Lenet network
-
-Lenet is a simple convolutional neural network, proposed by Yann LeCun in 1998. It has 2 convolutions/pooling and fully connected layer. 
+To create a Caffe2DML object, one needs to create a solver and network file that conforms 
+to the [Caffe specification](http://caffe.berkeleyvision.org/).
+In this example, we will train Lenet which is a simple convolutional neural network, proposed by Yann LeCun in 1998. 
+It has 2 convolutions/pooling and fully connected layer. 
 Similar to Caffe, the network has been modified to add dropout. 
-For more detail, please see http://yann.lecun.com/exdb/lenet/
+For more detail, please see [http://yann.lecun.com/exdb/lenet/](http://yann.lecun.com/exdb/lenet/).
 
 The [solver specification](https://raw.githubusercontent.com/apache/systemml/master/scripts/nn/examples/caffe2dml/models/mnist_lenet/lenet_solver.proto)
 specifies to Caffe2DML to use following configuration when generating the training DML script:  
@@ -58,67 +48,167 @@ specifies to Caffe2DML to use following configuration when generating the traini
 - `display: 100`: Display training loss after every 100 iterations.
 - `test_interval: 500`: Display validation loss after every 500 iterations.
 - `test_iter: 10`: Validation data size = 10 * BATCH_SIZE.
- 
+
+```python
+from systemml.mllearn import Caffe2DML
+import urllib
+
+# Download the Lenet network
+urllib.urlretrieve('https://raw.githubusercontent.com/apache/systemml/master/scripts/nn/examples/caffe2dml/models/mnist_lenet/lenet.proto', 'lenet.proto')
+urllib.urlretrieve('https://raw.githubusercontent.com/apache/systemml/master/scripts/nn/examples/caffe2dml/models/mnist_lenet/lenet_solver.proto', 'lenet_solver.proto')
+# Train Lenet On MNIST using scikit-learn like API
+
+# MNIST dataset contains 28 X 28 gray-scale (number of channel=1).
+lenet = Caffe2DML(spark, solver='lenet_solver.proto', input_shape=(1, 28, 28))
+lenet.summary()
+```
+
+Output:
+
+```
++-----+---------------+--------------+------------+---------+-----------+---------+
+| Name|           Type|        Output|      Weight|     Bias|        Top|   Bottom|
++-----+---------------+--------------+------------+---------+-----------+---------+
+|mnist|           Data| (, 1, 28, 28)|            |         |mnist,mnist|         |
+|conv1|    Convolution|(, 32, 28, 28)|   [32 X 25]| [32 X 1]|      conv1|    mnist|
+|relu1|           ReLU|(, 32, 28, 28)|            |         |      relu1|    conv1|
+|pool1|        Pooling|(, 32, 14, 14)|            |         |      pool1|    relu1|
+|conv2|    Convolution|(, 64, 14, 14)|  [64 X 800]| [64 X 1]|      conv2|    pool1|
+|relu2|           ReLU|(, 64, 14, 14)|            |         |      relu2|    conv2|
+|pool2|        Pooling|  (, 64, 7, 7)|            |         |      pool2|    relu2|
+|  ip1|   InnerProduct| (, 512, 1, 1)|[3136 X 512]|[1 X 512]|        ip1|    pool2|
+|relu3|           ReLU| (, 512, 1, 1)|            |         |      relu3|      ip1|
+|drop1|        Dropout| (, 512, 1, 1)|            |         |      drop1|    relu3|
+|  ip2|   InnerProduct|  (, 10, 1, 1)|  [512 X 10]| [1 X 10]|        ip2|    drop1|
+| loss|SoftmaxWithLoss|  (, 10, 1, 1)|            |         |       loss|ip2,mnist|
++-----+---------------+--------------+------------+---------+-----------+---------+
+``` 
+
+To train the above lenet model, we use the MNIST dataset. 
+The MNIST dataset was constructed from two datasets of the US National Institute of Standards and Technology (NIST). 
+The training set consists of handwritten digits from 250 different people, 50 percent high school students, and 50 percent employees from the Census Bureau. Note that the test set contains handwritten digits from different people following the same split.
+In this example, we are using mlxtend package to load the mnist dataset into Python NumPy arrays, but you are free to download it directly from http://yann.lecun.com/exdb/mnist/.
+
+```bash
+pip install mlxtend
+```
+
+We first split the MNIST dataset into train and test.  
 
 ```python
 from mlxtend.data import mnist_data
 import numpy as np
 from sklearn.utils import shuffle
-import urllib
-from systemml.mllearn import Caffe2DML
-
 # Download the MNIST dataset
 X, y = mnist_data()
 X, y = shuffle(X, y)
-
 # Split the data into training and test
 n_samples = len(X)
 X_train = X[:int(.9 * n_samples)]
 y_train = y[:int(.9 * n_samples)]
 X_test = X[int(.9 * n_samples):]
 y_test = y[int(.9 * n_samples):]
+```
 
-# Download the Lenet network
-urllib.urlretrieve('https://raw.githubusercontent.com/apache/systemml/master/scripts/nn/examples/caffe2dml/models/mnist_lenet/lenet.proto', 'lenet.proto')
-urllib.urlretrieve('https://raw.githubusercontent.com/apache/systemml/master/scripts/nn/examples/caffe2dml/models/mnist_lenet/lenet_solver.proto', 'lenet_solver.proto')
+Finally, we use the training and test dataset to perform training and prediction using scikit-learn like API.
 
-# Train Lenet On MNIST using scikit-learn like API
-# MNIST dataset contains 28 X 28 gray-scale (number of channel=1).
-lenet = Caffe2DML(sqlCtx, solver='lenet_solver.proto', input_shape=(1, 28, 28))
+```python
+# Since Caffe2DML is a mllearn API, it allows for scikit-learn like method for training.
+lenet.fit(X_train, y_train)
+# Either perform prediction: lenet.predict(X_test) or scoring:
+lenet.score(X_test, y_test)
+```
 
-# debug=True prints will print the generated DML script along with classification report. Please donot test this flag in production.
-lenet.set(debug=True)
+Output:
+```
+Iter:100, training loss:0.189008481420049, training accuracy:92.1875
+Iter:200, training loss:0.21657020576713149, training accuracy:96.875
+Iter:300, training loss:0.05780939180052287, training accuracy:98.4375
+Iter:400, training loss:0.03406193840071965, training accuracy:100.0
+Iter:500, training loss:0.02847187709112875, training accuracy:100.0
+Iter:500, validation loss:222.736109642486, validation accuracy:96.49077868852459
+Iter:600, training loss:0.04867848427394318, training accuracy:96.875
+Iter:700, training loss:0.043060905384304224, training accuracy:98.4375
+Iter:800, training loss:0.01861298388336358, training accuracy:100.0
+Iter:900, training loss:0.03495462005933769, training accuracy:100.0
+Iter:1000, training loss:0.04598737325942163, training accuracy:98.4375
+Iter:1000, validation loss:180.04232316810746, validation accuracy:97.28483606557377
+Iter:1100, training loss:0.05630274512793694, training accuracy:98.4375
+Iter:1200, training loss:0.027278141291535066, training accuracy:98.4375
+Iter:1300, training loss:0.04356275106270366, training accuracy:98.4375
+Iter:1400, training loss:0.00780793048139091, training accuracy:100.0
+Iter:1500, training loss:0.004135965492374173, training accuracy:100.0
+Iter:1500, validation loss:156.61636761709374, validation accuracy:97.48975409836065
+Iter:1600, training loss:0.007939063305475983, training accuracy:100.0
+Iter:1700, training loss:0.0025769653351162196, training accuracy:100.0
+Iter:1800, training loss:0.0023251742357435204, training accuracy:100.0
+Iter:1900, training loss:0.0016795711023936644, training accuracy:100.0
+Iter:2000, training loss:0.03676045262879483, training accuracy:98.4375
+Iter:2000, validation loss:173.66147359346, validation accuracy:97.48975409836065
+0.97399999999999998
+```
 
-# If you want to see the statistics as well as the plan
-lenet.setStatistics(True).setExplain(True)
+### Additional Configuration
 
-# If you want to force GPU execution. Please make sure the required dependency are available.  
-# lenet.setGPU(True).setForceGPU(True)
-# Example usage of train_algo, test_algo. Assume 2 gpus on driver
-# lenet.set(train_algo="allreduce_parallel_batches", test_algo="minibatch", parallel_batches=2)
+- Print the generated DML script along with classification report:  `lenet.set(debug=True)`
+- Print the heavy hitters instruction and the execution plan (advanced users): `lenet.setStatistics(True).setExplain(True)`
+- (Optional but recommended) Enable [native BLAS](http://apache.github.io/systemml/native-backend): `lenet.setConfigProperty("native.blas", "auto")`
+- Enable experimental feature such as codegen: `lenet.setConfigProperty("codegen.enabled", "true").setConfigProperty("codegen.plancache", "true")`
+- Force GPU execution (please make sure the required jcuda dependency are included): lenet.setGPU(True).setForceGPU(True)
 
-# (Optional but recommended) Enable native BLAS. 
-lenet.setConfigProperty("native.blas", "auto")
+Unlike Caffe where default train and test algorithm is `minibatch`, you can specify the
+algorithm using the parameters `train_algo` and `test_algo` (valid values are: `minibatch`, `allreduce_parallel_batches`, 
+and `allreduce`). Here are some common settings:
 
-# In case you want to enable experimental feature such as codegen
-# lenet.setConfigProperty("codegen.enabled", "true").setConfigProperty("codegen.plancache", "true")
+|                                                                          | PySpark script                                                                                                                           | Changes to Network/Solver                                              |
+|--------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------|
+| Single-node CPU execution (similar to Caffe with solver_mode: CPU)       | `lenet.set(train_algo="minibatch", test_algo="minibatch")`                                                                               | Ensure that `batch_size` is set to appropriate value (for example: 64) |
+| Single-node single-GPU execution                                         | `lenet.set(train_algo="minibatch", test_algo="minibatch").setGPU(True).setForceGPU(True)`                                                | Ensure that `batch_size` is set to appropriate value (for example: 64) |
+| Single-node multi-GPU execution (similar to Caffe with solver_mode: GPU) | `lenet.set(train_algo="allreduce_parallel_batches", test_algo="minibatch", parallel_batches=num_gpu).setGPU(True).setForceGPU(True)`     | Ensure that `batch_size` is set to appropriate value (for example: 64) |
+| Distributed prediction                                                   | `lenet.set(test_algo="allreduce")`                                                                                                       |                                                                        |
+| Distributed synchronous training                                         | `lenet.set(train_algo="allreduce_parallel_batches", parallel_batches=num_cluster_cores)`                                                 | Ensure that `batch_size` is set to appropriate value (for example: 64) |
 
-# Since Caffe2DML is a mllearn API, it allows for scikit-learn like method for training.
+### Saving the trained model
+
+```python
 lenet.fit(X_train, y_train)
-lenet.predict(X_test)
+lenet.save('trained_weights')
+new_lenet = Caffe2DML(spark, solver='lenet_solver.proto', input_shape=(1, 28, 28))
+new_lenet.load('trained_weights')
+new_lenet.score(X_test, y_test)
 ```
 
-For more detail on enabling native BLAS, please see the documentation for the [native backend](http://apache.github.io/systemml/native-backend).
+### Loading a pretrained caffemodel
 
-Common settings for `train_algo` and `test_algo` parameters:
+We provide a converter utility to convert `.caffemodel` trained using Caffe to SystemML format.
 
-|                                                                          | PySpark script                                                                                                                           | Changes to Network/Solver                                              |
-|--------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------|
-| Single-node CPU execution (similar to Caffe with solver_mode: CPU)       | `caffe2dml.set(train_algo="minibatch", test_algo="minibatch")`                                                                           | Ensure that `batch_size` is set to appropriate value (for example: 64) |
-| Single-node single-GPU execution                                         | `caffe2dml.set(train_algo="minibatch", test_algo="minibatch").setGPU(True).setForceGPU(True)`                                            | Ensure that `batch_size` is set to appropriate value (for example: 64) |
-| Single-node multi-GPU execution (similar to Caffe with solver_mode: GPU) | `caffe2dml.set(train_algo="allreduce_parallel_batches", test_algo="minibatch", parallel_batches=num_gpu).setGPU(True).setForceGPU(True)` | Ensure that `batch_size` is set to appropriate value (for example: 64) |
-| Distributed prediction                                                   | `caffe2dml.set(test_algo="allreduce")`                                                                                                   |                                                                        |
-| Distributed synchronous training                                         | `caffe2dml.set(train_algo="allreduce_parallel_batches", parallel_batches=num_cluster_cores)`                                             | Ensure that `batch_size` is set to appropriate value (for example: 64) |
+```python
+# First download deploy file and caffemodel
+import urllib
+urllib.urlretrieve('https://raw.githubusercontent.com/apache/systemml/master/scripts/nn/examples/caffe2dml/models/imagenet/vgg19/VGG_ILSVRC_19_layers_deploy.proto', 'VGG_ILSVRC_19_layers_deploy.proto')
+urllib.urlretrieve('http://www.robots.ox.ac.uk/~vgg/software/very_deep/caffe/VGG_ILSVRC_19_layers.caffemodel', 'VGG_ILSVRC_19_layers.caffemodel')
+# Save the weights into trained_vgg_weights directory
+import systemml as sml
+sml.convert_caffemodel(sc, 'VGG_ILSVRC_19_layers_deploy.proto', 'VGG_ILSVRC_19_layers.caffemodel',  'trained_vgg_weights')
+```
+
+We can then use the `trained_vgg_weights` directory for performing prediction or fine-tuning.
+
+```python
+# Download the VGG network
+urllib.urlretrieve('https://raw.githubusercontent.com/apache/systemml/master/scripts/nn/examples/caffe2dml/models/imagenet/vgg19/VGG_ILSVRC_19_layers_network.proto', 'VGG_ILSVRC_19_layers_network.proto')
+urllib.urlretrieve('https://raw.githubusercontent.com/apache/systemml/master/scripts/nn/examples/caffe2dml/models/imagenet/vgg19/VGG_ILSVRC_19_layers_solver.proto', 'VGG_ILSVRC_19_layers_solver.proto')
+# Storing the labels.txt in the weights directory allows predict to return a label (for example: 'cougar, puma, catamount, mountain lion, painter, panther, Felis concolor') rather than the column index of one-hot encoded vector (for example: 287).
+urllib.urlretrieve('https://raw.githubusercontent.com/apache/systemml/master/scripts/nn/examples/caffe2dml/models/imagenet/labels.txt', os.path.join('trained_vgg_weights', 'labels.txt'))
+from systemml.mllearn import Caffe2DML
+vgg = Caffe2DML(sqlCtx, solver='VGG_ILSVRC_19_layers_solver.proto', input_shape=(3, 224, 224))
+vgg.load('trained_vgg_weights')
+# We can then perform prediction:
+from PIL import Image
+X_test = sml.convertImageToNumPyArr(Image.open('test.jpg'), img_shape=(3, 224, 224))
+vgg.predict(X_test)
+# OR Fine-Tuning: vgg.fit(X_train, y_train)
+```
 
 ## Frequently asked questions
 
@@ -291,16 +381,358 @@ train_df = sc.parallelize(list_jpeg_files, int(len(list_jpeg_files)/10)).map(lam
 train_df.write.parquet('kaggle-cats-dogs.parquet')
 ```
 
+An alternative way to load images into a PySpark DataFrame for prediction, is to use MLLib's LabeledPoint class:
+
+```python
+list_jpeg_files = os.listdir(train_dir)
+train_df = sc.parallelize(list_jpeg_files, int(len(list_jpeg_files)/10)).map(lambda filename : LabeledPoint(0, sml.convertImageToNumPyArr(Image.open(os.path.join(train_dir, filename)), img_shape=img_shape)[0,:])).toDF().select('features')
+# Note: convertVectorColumnsToML has an additional serialization cost
+train_df = MLUtils.convertVectorColumnsToML(train_df)
+```
+ 
+
 #### Can I use Caffe2DML via Scala ?
 
 Though we recommend using Caffe2DML via its Python interfaces, it is possible to use it by creating an object of the class
 `org.apache.sysml.api.dl.Caffe2DML`. It is important to note that Caffe2DML's scala API is packaged in `systemml-*-extra.jar`.
 
+#### How can I get summary information of my network ?
+ 
+
+```python
+lenet.summary()
+```
+
+Output:
+
+```
++-----+---------------+--------------+------------+---------+-----------+---------+
+| Name|           Type|        Output|      Weight|     Bias|        Top|   Bottom|
++-----+---------------+--------------+------------+---------+-----------+---------+
+|mnist|           Data| (, 1, 28, 28)|            |         |mnist,mnist|         |
+|conv1|    Convolution|(, 32, 28, 28)|   [32 X 25]| [32 X 1]|      conv1|    mnist|
+|relu1|           ReLU|(, 32, 28, 28)|            |         |      relu1|    conv1|
+|pool1|        Pooling|(, 32, 14, 14)|            |         |      pool1|    relu1|
+|conv2|    Convolution|(, 64, 14, 14)|  [64 X 800]| [64 X 1]|      conv2|    pool1|
+|relu2|           ReLU|(, 64, 14, 14)|            |         |      relu2|    conv2|
+|pool2|        Pooling|  (, 64, 7, 7)|            |         |      pool2|    relu2|
+|  ip1|   InnerProduct| (, 512, 1, 1)|[3136 X 512]|[1 X 512]|        ip1|    pool2|
+|relu3|           ReLU| (, 512, 1, 1)|            |         |      relu3|      ip1|
+|drop1|        Dropout| (, 512, 1, 1)|            |         |      drop1|    relu3|
+|  ip2|   InnerProduct|  (, 10, 1, 1)|  [512 X 10]| [1 X 10]|        ip2|    drop1|
+| loss|SoftmaxWithLoss|  (, 10, 1, 1)|            |         |       loss|ip2,mnist|
++-----+---------------+--------------+------------+---------+-----------+---------+
+``` 
 
 #### How can I view the script generated by Caffe2DML ?
 
 To view the generated DML script (and additional debugging information), please set the `debug` parameter to True.
 
 ```python
-caffe2dmlObject.set(debug=True)
+lenet.set(debug=True)
+```
+
+Output:
+```
+001|debug = TRUE
+002|source("nn/layers/softmax.dml") as softmax
+003|source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss
+004|source("nn/layers/conv2d_builtin.dml") as conv2d_builtin
+005|source("nn/layers/relu.dml") as relu
+006|source("nn/layers/max_pool2d_builtin.dml") as max_pool2d_builtin
+007|source("nn/layers/affine.dml") as affine
+008|source("nn/layers/dropout.dml") as dropout
+009|source("nn/optim/sgd_momentum.dml") as sgd_momentum
+010|source("nn/layers/l2_reg.dml") as l2_reg
+011|X_full_path = ifdef($X, " ")
+012|X_full = read(X_full_path)
+013|y_full_path = ifdef($y, " ")
+014|y_full = read(y_full_path)
+015|num_images = nrow(y_full)
+016|# Convert to one-hot encoding (Assumption: 1-based labels)
+017|y_full = table(seq(1,num_images,1), y_full, num_images, 10)
+018|weights = ifdef($weights, " ")
+019|# Initialize the layers and solvers
+020|X_full = X_full * 0.00390625
+021|BATCH_SIZE = 64
+022|[conv1_weight,conv1_bias] = conv2d_builtin::init(32,1,5,5)
+023|[conv2_weight,conv2_bias] = conv2d_builtin::init(64,32,5,5)
+024|[ip1_weight,ip1_bias] = affine::init(3136,512)
+025|[ip2_weight,ip2_bias] = affine::init(512,10)
+026|conv1_weight_v = sgd_momentum::init(conv1_weight)
+027|conv1_bias_v = sgd_momentum::init(conv1_bias)
+028|conv2_weight_v = sgd_momentum::init(conv2_weight)
+029|conv2_bias_v = sgd_momentum::init(conv2_bias)
+030|ip1_weight_v = sgd_momentum::init(ip1_weight)
+031|ip1_bias_v = sgd_momentum::init(ip1_bias)
+032|ip2_weight_v = sgd_momentum::init(ip2_weight)
+033|ip2_bias_v = sgd_momentum::init(ip2_bias)
+034|num_validation = 10 * BATCH_SIZE
+035|# Sanity check to ensure that validation set is not too large
+036|if(num_validation > ceil(0.3 * num_images)) {
+037|    max_test_iter = floor(ceil(0.3 * num_images) / BATCH_SIZE)
+038|    stop("Too large validation size. Please reduce test_iter to " + max_test_iter)
+039|}
+040|X = X_full[(num_validation+1):num_images,]; y = y_full[(num_validation+1):num_images,]; X_val = X_full[1:num_validation,]; y_val = y_full[1:num_validation,]; num_images = nrow(y)
+041|num_iters_per_epoch = ceil(num_images / BATCH_SIZE)
+042|max_epochs = ceil(2000 / num_iters_per_epoch)
+043|iter = 0
+044|lr = 0.01
+045|for(e in 1:max_epochs) {
+046|    for(i in 1:num_iters_per_epoch) {
+047|            beg = ((i-1) * BATCH_SIZE) %% num_images + 1; end = min(beg + BATCH_SIZE - 1, num_images); Xb = X[beg:end,]; yb = y[beg:end,];
+048|            iter = iter + 1
+049|            # Perform forward pass
+050|            [out3,ignoreHout_3,ignoreWout_3] = conv2d_builtin::forward(Xb,conv1_weight,conv1_bias,1,28,28,5,5,1,1,2,2)
+051|            out4 = relu::forward(out3)
+052|            [out5,ignoreHout_5,ignoreWout_5] = max_pool2d_builtin::forward(out4,32,28,28,2,2,2,2,0,0)
+053|            [out6,ignoreHout_6,ignoreWout_6] = conv2d_builtin::forward(out5,conv2_weight,conv2_bias,32,14,14,5,5,1,1,2,2)
+054|            out7 = relu::forward(out6)
+055|            [out8,ignoreHout_8,ignoreWout_8] = max_pool2d_builtin::forward(out7,64,14,14,2,2,2,2,0,0)
+056|            out9 = affine::forward(out8,ip1_weight,ip1_bias)
+057|            out10 = relu::forward(out9)
+058|            [out11,mask11] = dropout::forward(out10,0.5,-1)
+059|            out12 = affine::forward(out11,ip2_weight,ip2_bias)
+060|            out13 = softmax::forward(out12)
+061|            # Perform backward pass
+062|            dProbs = cross_entropy_loss::backward(out13,yb); dOut13 = softmax::backward(dProbs,out12); dOut13_12 = dOut13; dOut13_2 = dOut13;
+063|            [dOut12,ip2_dWeight,ip2_dBias] = affine::backward(dOut13_12,out11,ip2_weight,ip2_bias); dOut12_11 = dOut12;
+064|            dOut11 = dropout::backward(dOut12_11,out10,0.5,mask11); dOut11_10 = dOut11;
+065|            dOut10 = relu::backward(dOut11_10,out9); dOut10_9 = dOut10;
+066|            [dOut9,ip1_dWeight,ip1_dBias] = affine::backward(dOut10_9,out8,ip1_weight,ip1_bias); dOut9_8 = dOut9;
+067|            dOut8 = max_pool2d_builtin::backward(dOut9_8,7,7,out7,64,14,14,2,2,2,2,0,0); dOut8_7 = dOut8;
+068|            dOut7 = relu::backward(dOut8_7,out6); dOut7_6 = dOut7;
+069|            [dOut6,conv2_dWeight,conv2_dBias] = conv2d_builtin::backward(dOut7_6,14,14,out5,conv2_weight,conv2_bias,32,14,14,5,5,1,1,2,2); dOut6_5 = dOut6;
+070|            dOut5 = max_pool2d_builtin::backward(dOut6_5,14,14,out4,32,28,28,2,2,2,2,0,0); dOut5_4 = dOut5;
+071|            dOut4 = relu::backward(dOut5_4,out3); dOut4_3 = dOut4;
+072|            [dOut3,conv1_dWeight,conv1_dBias] = conv2d_builtin::backward(dOut4_3,28,28,Xb,conv1_weight,conv1_bias,1,28,28,5,5,1,1,2,2); dOut3_2 = dOut3;
+073|            # Update the parameters
+074|            conv1_dWeight_reg = l2_reg::backward(conv1_weight, 5.000000237487257E-4)
+075|            conv1_dWeight = conv1_dWeight + conv1_dWeight_reg
+076|            [conv1_weight,conv1_weight_v] = sgd_momentum::update(conv1_weight,conv1_dWeight,(lr * 1.0),0.8999999761581421,conv1_weight_v)
+077|            [conv1_bias,conv1_bias_v] = sgd_momentum::update(conv1_bias,conv1_dBias,(lr * 2.0),0.8999999761581421,conv1_bias_v)
+078|            conv2_dWeight_reg = l2_reg::backward(conv2_weight, 5.000000237487257E-4)
+079|            conv2_dWeight = conv2_dWeight + conv2_dWeight_reg
+080|            [conv2_weight,conv2_weight_v] = sgd_momentum::update(conv2_weight,conv2_dWeight,(lr * 1.0),0.8999999761581421,conv2_weight_v)
+081|            [conv2_bias,conv2_bias_v] = sgd_momentum::update(conv2_bias,conv2_dBias,(lr * 2.0),0.8999999761581421,conv2_bias_v)
+082|            ip1_dWeight_reg = l2_reg::backward(ip1_weight, 5.000000237487257E-4)
+083|            ip1_dWeight = ip1_dWeight + ip1_dWeight_reg
+084|            [ip1_weight,ip1_weight_v] = sgd_momentum::update(ip1_weight,ip1_dWeight,(lr * 1.0),0.8999999761581421,ip1_weight_v)
+085|            [ip1_bias,ip1_bias_v] = sgd_momentum::update(ip1_bias,ip1_dBias,(lr * 2.0),0.8999999761581421,ip1_bias_v)
+086|            ip2_dWeight_reg = l2_reg::backward(ip2_weight, 5.000000237487257E-4)
+087|            ip2_dWeight = ip2_dWeight + ip2_dWeight_reg
+088|            [ip2_weight,ip2_weight_v] = sgd_momentum::update(ip2_weight,ip2_dWeight,(lr * 1.0),0.8999999761581421,ip2_weight_v)
+089|            [ip2_bias,ip2_bias_v] = sgd_momentum::update(ip2_bias,ip2_dBias,(lr * 2.0),0.8999999761581421,ip2_bias_v)
+090|            # Compute training loss & accuracy
+091|            if(iter  %% 100 == 0) {
+092|                    loss = 0
+093|                    accuracy = 0
+094|                    tmp_loss = cross_entropy_loss::forward(out13,yb)
+095|                    loss = loss + tmp_loss
+096|                    true_yb = rowIndexMax(yb)
+097|                    predicted_yb = rowIndexMax(out13)
+098|                    accuracy = mean(predicted_yb == true_yb)*100
+099|                    training_loss = loss
+100|                    training_accuracy = accuracy
+101|                    print("Iter:" + iter + ", training loss:" + training_loss + ", training accuracy:" + training_accuracy)
+102|                    if(debug) {
+103|                            num_rows_error_measures = min(10, ncol(yb))
+104|                            error_measures = matrix(0, rows=num_rows_error_measures, cols=5)
+105|                            for(class_i in 1:num_rows_error_measures) {
+106|                                    tp = sum( (true_yb == predicted_yb) * (true_yb == class_i) )
+107|                                    tp_plus_fp = sum( (predicted_yb == class_i) )
+108|                                    tp_plus_fn = sum( (true_yb == class_i) )
+109|                                    precision = tp / tp_plus_fp
+110|                                    recall = tp / tp_plus_fn
+111|                                    f1Score = 2*precision*recall / (precision+recall)
+112|                                    error_measures[class_i,1] = class_i
+113|                                    error_measures[class_i,2] = precision
+114|                                    error_measures[class_i,3] = recall
+115|                                    error_measures[class_i,4] = f1Score
+116|                                    error_measures[class_i,5] = tp_plus_fn
+117|                            }
+118|                            print("class    \tprecision\trecall  \tf1-score\tnum_true_labels\n" + toString(error_measures, decimal=7, sep="\t"))
+119|                    }
+120|            }
+121|            # Compute validation loss & accuracy
+122|            if(iter  %% 500 == 0) {
+123|                    loss = 0
+124|                    accuracy = 0
+125|                    validation_loss = 0
+126|                    validation_accuracy = 0
+127|                    for(iVal in 1:num_iters_per_epoch) {
+128|                            beg = ((iVal-1) * BATCH_SIZE) %% num_validation + 1; end = min(beg + BATCH_SIZE - 1, num_validation); Xb = X_val[beg:end,]; yb = y_val[beg:end,];
+129|                            # Perform forward pass
+130|                            [out3,ignoreHout_3,ignoreWout_3] = conv2d_builtin::forward(Xb,conv1_weight,conv1_bias,1,28,28,5,5,1,1,2,2)
+131|                            out4 = relu::forward(out3)
+132|                            [out5,ignoreHout_5,ignoreWout_5] = max_pool2d_builtin::forward(out4,32,28,28,2,2,2,2,0,0)
+133|                            [out6,ignoreHout_6,ignoreWout_6] = conv2d_builtin::forward(out5,conv2_weight,conv2_bias,32,14,14,5,5,1,1,2,2)
+134|                            out7 = relu::forward(out6)
+135|                            [out8,ignoreHout_8,ignoreWout_8] = max_pool2d_builtin::forward(out7,64,14,14,2,2,2,2,0,0)
+136|                            out9 = affine::forward(out8,ip1_weight,ip1_bias)
+137|                            out10 = relu::forward(out9)
+138|                            [out11,mask11] = dropout::forward(out10,0.5,-1)
+139|                            out12 = affine::forward(out11,ip2_weight,ip2_bias)
+140|                            out13 = softmax::forward(out12)
+141|                            tmp_loss = cross_entropy_loss::forward(out13,yb)
+142|                            loss = loss + tmp_loss
+143|                            true_yb = rowIndexMax(yb)
+144|                            predicted_yb = rowIndexMax(out13)
+145|                            accuracy = mean(predicted_yb == true_yb)*100
+146|                            validation_loss = validation_loss + loss
+147|                            validation_accuracy = validation_accuracy + accuracy
+148|                    }
+149|                    validation_accuracy = validation_accuracy / num_iters_per_epoch
+150|                    print("Iter:" + iter + ", validation loss:" + validation_loss + ", validation accuracy:" + validation_accuracy)
+151|            }
+152|    }
+153|    # Learning rate
+154|    lr = (0.009999999776482582 * 0.949999988079071^e)
+155|}
+
+Iter:100, training loss:0.24014199350958168, training accuracy:87.5
+class           precision       recall          f1-score        num_true_labels
+1.0000000       1.0000000       1.0000000       1.0000000       3.0000000
+2.0000000       1.0000000       1.0000000       1.0000000       8.0000000
+3.0000000       0.8888889       0.8888889       0.8888889       9.0000000
+4.0000000       0.7500000       0.7500000       0.7500000       4.0000000
+5.0000000       0.7500000       1.0000000       0.8571429       3.0000000
+6.0000000       0.8333333       1.0000000       0.9090909       5.0000000
+7.0000000       1.0000000       1.0000000       1.0000000       8.0000000
+8.0000000       0.8571429       0.7500000       0.8000000       8.0000000
+9.0000000       1.0000000       0.5714286       0.7272727       7.0000000
+10.0000000      0.7272727       0.8888889       0.8000000       9.0000000
+
+Iter:200, training loss:0.09555593867171894, training accuracy:98.4375
+class           precision       recall          f1-score        num_true_labels
+1.0000000       1.0000000       1.0000000       1.0000000       10.0000000
+2.0000000       1.0000000       1.0000000       1.0000000       3.0000000
+3.0000000       1.0000000       1.0000000       1.0000000       9.0000000
+4.0000000       1.0000000       1.0000000       1.0000000       6.0000000
+5.0000000       1.0000000       1.0000000       1.0000000       7.0000000
+6.0000000       1.0000000       1.0000000       1.0000000       8.0000000
+7.0000000       1.0000000       0.6666667       0.8000000       3.0000000
+8.0000000       1.0000000       1.0000000       1.0000000       9.0000000
+9.0000000       0.8571429       1.0000000       0.9230769       6.0000000
+10.0000000      1.0000000       1.0000000       1.0000000       3.0000000
+
+Iter:300, training loss:0.058686794512570216, training accuracy:98.4375
+class           precision       recall          f1-score        num_true_labels
+1.0000000       1.0000000       1.0000000       1.0000000       6.0000000
+2.0000000       1.0000000       1.0000000       1.0000000       9.0000000
+3.0000000       1.0000000       1.0000000       1.0000000       4.0000000
+4.0000000       1.0000000       1.0000000       1.0000000       8.0000000
+5.0000000       1.0000000       1.0000000       1.0000000       6.0000000
+6.0000000       1.0000000       0.8750000       0.9333333       8.0000000
+7.0000000       1.0000000       1.0000000       1.0000000       5.0000000
+8.0000000       1.0000000       1.0000000       1.0000000       2.0000000
+9.0000000       0.8888889       1.0000000       0.9411765       8.0000000
+10.0000000      1.0000000       1.0000000       1.0000000       8.0000000
+
+Iter:400, training loss:0.08742103541529415, training accuracy:96.875
+class           precision       recall          f1-score        num_true_labels
+1.0000000       1.0000000       1.0000000       1.0000000       6.0000000
+2.0000000       0.8000000       1.0000000       0.8888889       8.0000000
+3.0000000       1.0000000       0.8333333       0.9090909       6.0000000
+4.0000000       1.0000000       1.0000000       1.0000000       4.0000000
+5.0000000       1.0000000       1.0000000       1.0000000       4.0000000
+6.0000000       1.0000000       1.0000000       1.0000000       6.0000000
+7.0000000       1.0000000       1.0000000       1.0000000       7.0000000
+8.0000000       1.0000000       1.0000000       1.0000000       6.0000000
+9.0000000       1.0000000       1.0000000       1.0000000       4.0000000
+10.0000000      1.0000000       0.9230769       0.9600000       13.0000000
+
+Iter:500, training loss:0.05873836245880005, training accuracy:98.4375
+class           precision       recall          f1-score        num_true_labels
+1.0000000       1.0000000       1.0000000       1.0000000       3.0000000
+2.0000000       1.0000000       1.0000000       1.0000000       5.0000000
+3.0000000       1.0000000       1.0000000       1.0000000       6.0000000
+4.0000000       1.0000000       1.0000000       1.0000000       9.0000000
+5.0000000       1.0000000       1.0000000       1.0000000       4.0000000
+6.0000000       1.0000000       0.8571429       0.9230769       7.0000000
+7.0000000       0.8571429       1.0000000       0.9230769       6.0000000
+8.0000000       1.0000000       1.0000000       1.0000000       9.0000000
+9.0000000       1.0000000       1.0000000       1.0000000       10.0000000
+10.0000000      1.0000000       1.0000000       1.0000000       5.0000000
+
+Iter:500, validation loss:260.1580978627665, validation accuracy:96.43954918032787
+Iter:600, training loss:0.07584116043829209, training accuracy:98.4375
+class           precision       recall          f1-score        num_true_labels
+1.0000000       1.0000000       1.0000000       1.0000000       8.0000000
+2.0000000       1.0000000       1.0000000       1.0000000       4.0000000
+3.0000000       1.0000000       1.0000000       1.0000000       4.0000000
+4.0000000       1.0000000       1.0000000       1.0000000       4.0000000
+5.0000000       1.0000000       1.0000000       1.0000000       5.0000000
+6.0000000       1.0000000       1.0000000       1.0000000       8.0000000
+7.0000000       1.0000000       1.0000000       1.0000000       8.0000000
+8.0000000       1.0000000       0.9230769       0.9600000       13.0000000
+9.0000000       1.0000000       1.0000000       1.0000000       5.0000000
+10.0000000      0.8333333       1.0000000       0.9090909       5.0000000
+
+Iter:700, training loss:0.07973166944626336, training accuracy:98.4375
+class           precision       recall          f1-score        num_true_labels
+1.0000000       1.0000000       1.0000000       1.0000000       5.0000000
+2.0000000       1.0000000       1.0000000       1.0000000       4.0000000
+3.0000000       1.0000000       1.0000000       1.0000000       6.0000000
+4.0000000       1.0000000       1.0000000       1.0000000       4.0000000
+5.0000000       1.0000000       1.0000000       1.0000000       5.0000000
+6.0000000       1.0000000       1.0000000       1.0000000       6.0000000
+7.0000000       1.0000000       1.0000000       1.0000000       10.0000000
+8.0000000       0.8000000       1.0000000       0.8888889       4.0000000
+9.0000000       1.0000000       1.0000000       1.0000000       8.0000000
+10.0000000      1.0000000       0.9166667       0.9565217       12.0000000
+
+Iter:800, training loss:0.0063778595034221855, training accuracy:100.0
+class           precision       recall          f1-score        num_true_labels
+1.0000000       1.0000000       1.0000000       1.0000000       9.0000000
+2.0000000       1.0000000       1.0000000       1.0000000       6.0000000
+3.0000000       1.0000000       1.0000000       1.0000000       7.0000000
+4.0000000       1.0000000       1.0000000       1.0000000       7.0000000
+5.0000000       1.0000000       1.0000000       1.0000000       4.0000000
+6.0000000       1.0000000       1.0000000       1.0000000       9.0000000
+7.0000000       1.0000000       1.0000000       1.0000000       6.0000000
+8.0000000       1.0000000       1.0000000       1.0000000       8.0000000
+9.0000000       1.0000000       1.0000000       1.0000000       2.0000000
+10.0000000      1.0000000       1.0000000       1.0000000       6.0000000
+
+Iter:900, training loss:0.019673112167879484, training accuracy:100.0
+class           precision       recall          f1-score        num_true_labels
+1.0000000       1.0000000       1.0000000       1.0000000       3.0000000
+2.0000000       1.0000000       1.0000000       1.0000000       4.0000000
+3.0000000       1.0000000       1.0000000       1.0000000       3.0000000
+4.0000000       1.0000000       1.0000000       1.0000000       5.0000000
+5.0000000       1.0000000       1.0000000       1.0000000       6.0000000
+6.0000000       1.0000000       1.0000000       1.0000000       10.0000000
+7.0000000       1.0000000       1.0000000       1.0000000       7.0000000
+8.0000000       1.0000000       1.0000000       1.0000000       7.0000000
+9.0000000       1.0000000       1.0000000       1.0000000       12.0000000
+10.0000000      1.0000000       1.0000000       1.0000000       7.0000000
+
+Iter:1000, training loss:0.06137978002508307, training accuracy:96.875
+class           precision       recall          f1-score        num_true_labels
+1.0000000       1.0000000       1.0000000       1.0000000       5.0000000
+2.0000000       1.0000000       1.0000000       1.0000000       7.0000000
+3.0000000       1.0000000       1.0000000       1.0000000       8.0000000
+4.0000000       0.8333333       0.8333333       0.8333333       6.0000000
+5.0000000       1.0000000       1.0000000       1.0000000       5.0000000
+6.0000000       1.0000000       1.0000000       1.0000000       10.0000000
+7.0000000       1.0000000       1.0000000       1.0000000       3.0000000
+8.0000000       0.8888889       0.8888889       0.8888889       9.0000000
+9.0000000       1.0000000       1.0000000       1.0000000       7.0000000
+10.0000000      1.0000000       1.0000000       1.0000000       4.0000000
+
+Iter:1000, validation loss:238.62301345198944, validation accuracy:97.02868852459017
+Iter:1100, training loss:0.023325103696013115, training accuracy:100.0
+class           precision       recall          f1-score        num_true_labels
+1.0000000       1.0000000       1.0000000       1.0000000       4.0000000
+2.0000000       1.0000000       1.0000000       1.0000000       10.0000000
+3.0000000       1.0000000       1.0000000       1.0000000       6.0000000
+4.0000000       1.0000000       1.0000000       1.0000000       4.0000000
+5.0000000       1.0000000       1.0000000       1.0000000       2.0000000
+6.0000000       1.0000000       1.0000000       1.0000000       10.0000000
+7.0000000       1.0000000       1.0000000       1.0000000       7.0000000
+8.0000000       1.0000000       1.0000000       1.0000000       6.0000000
+9.0000000       1.0000000       1.0000000       1.0000000       9.0000000
+10.0000000      1.0000000       1.0000000       1.0000000       6.0000000
+...
 ```

http://git-wip-us.apache.org/repos/asf/systemml/blob/978d4de4/docs/python-reference.md
----------------------------------------------------------------------
diff --git a/docs/python-reference.md b/docs/python-reference.md
index 7de3fb0..119c1d0 100644
--- a/docs/python-reference.md
+++ b/docs/python-reference.md
@@ -406,6 +406,10 @@ model.transform(df_test)
 </div>
 </div>
 
+Please note that when training using mllearn API (i.e. `model.fit(X_df)`), SystemML 
+expects that labels have been converted to 1-based value.
+This avoids unnecessary decoding overhead for large dataset if the label columns has already been decoded.
+For scikit-learn API, there is no such requirement.
 
 The table below describes the parameter available for mllearn algorithms:
 

http://git-wip-us.apache.org/repos/asf/systemml/blob/978d4de4/scripts/nn/examples/caffe2dml/models/imagenet/labels.txt
----------------------------------------------------------------------
diff --git a/scripts/nn/examples/caffe2dml/models/imagenet/labels.txt b/scripts/nn/examples/caffe2dml/models/imagenet/labels.txt
new file mode 100644
index 0000000..4cd9f00
--- /dev/null
+++ b/scripts/nn/examples/caffe2dml/models/imagenet/labels.txt
@@ -0,0 +1,1000 @@
+1,"tench, Tinca tinca"
+2,"goldfish, Carassius auratus"
+3,"great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias"
+4,"tiger shark, Galeocerdo cuvieri"
+5,"hammerhead, hammerhead shark"
+6,"electric ray, crampfish, numbfish, torpedo"
+7,"stingray"
+8,"cock"
+9,"hen"
+10,"ostrich, Struthio camelus"
+11,"brambling, Fringilla montifringilla"
+12,"goldfinch, Carduelis carduelis"
+13,"house finch, linnet, Carpodacus mexicanus"
+14,"junco, snowbird"
+15,"indigo bunting, indigo finch, indigo bird, Passerina cyanea"
+16,"robin, American robin, Turdus migratorius"
+17,"bulbul"
+18,"jay"
+19,"magpie"
+20,"chickadee"
+21,"water ouzel, dipper"
+22,"kite"
+23,"bald eagle, American eagle, Haliaeetus leucocephalus"
+24,"vulture"
+25,"great grey owl, great gray owl, Strix nebulosa"
+26,"European fire salamander, Salamandra salamandra"
+27,"common newt, Triturus vulgaris"
+28,"eft"
+29,"spotted salamander, Ambystoma maculatum"
+30,"axolotl, mud puppy, Ambystoma mexicanum"
+31,"bullfrog, Rana catesbeiana"
+32,"tree frog, tree-frog"
+33,"tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui"
+34,"loggerhead, loggerhead turtle, Caretta caretta"
+35,"leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea"
+36,"mud turtle"
+37,"terrapin"
+38,"box turtle, box tortoise"
+39,"banded gecko"
+40,"common iguana, iguana, Iguana iguana"
+41,"American chameleon, anole, Anolis carolinensis"
+42,"whiptail, whiptail lizard"
+43,"agama"
+44,"frilled lizard, Chlamydosaurus kingi"
+45,"alligator lizard"
+46,"Gila monster, Heloderma suspectum"
+47,"green lizard, Lacerta viridis"
+48,"African chameleon, Chamaeleo chamaeleon"
+49,"Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis"
+50,"African crocodile, Nile crocodile, Crocodylus niloticus"
+51,"American alligator, Alligator mississipiensis"
+52,"triceratops"
+53,"thunder snake, worm snake, Carphophis amoenus"
+54,"ringneck snake, ring-necked snake, ring snake"
+55,"hognose snake, puff adder, sand viper"
+56,"green snake, grass snake"
+57,"king snake, kingsnake"
+58,"garter snake, grass snake"
+59,"water snake"
+60,"vine snake"
+61,"night snake, Hypsiglena torquata"
+62,"boa constrictor, Constrictor constrictor"
+63,"rock python, rock snake, Python sebae"
+64,"Indian cobra, Naja naja"
+65,"green mamba"
+66,"sea snake"
+67,"horned viper, cerastes, sand viper, horned asp, Cerastes cornutus"
+68,"diamondback, diamondback rattlesnake, Crotalus adamanteus"
+69,"sidewinder, horned rattlesnake, Crotalus cerastes"
+70,"trilobite"
+71,"harvestman, daddy longlegs, Phalangium opilio"
+72,"scorpion"
+73,"black and gold garden spider, Argiope aurantia"
+74,"barn spider, Araneus cavaticus"
+75,"garden spider, Aranea diademata"
+76,"black widow, Latrodectus mactans"
+77,"tarantula"
+78,"wolf spider, hunting spider"
+79,"tick"
+80,"centipede"
+81,"black grouse"
+82,"ptarmigan"
+83,"ruffed grouse, partridge, Bonasa umbellus"
+84,"prairie chicken, prairie grouse, prairie fowl"
+85,"peacock"
+86,"quail"
+87,"partridge"
+88,"African grey, African gray, Psittacus erithacus"
+89,"macaw"
+90,"sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita"
+91,"lorikeet"
+92,"coucal"
+93,"bee eater"
+94,"hornbill"
+95,"hummingbird"
+96,"jacamar"
+97,"toucan"
+98,"drake"
+99,"red-breasted merganser, Mergus serrator"
+100,"goose"
+101,"black swan, Cygnus atratus"
+102,"tusker"
+103,"echidna, spiny anteater, anteater"
+104,"platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus"
+105,"wallaby, brush kangaroo"
+106,"koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus"
+107,"wombat"
+108,"jellyfish"
+109,"sea anemone, anemone"
+110,"brain coral"
+111,"flatworm, platyhelminth"
+112,"nematode, nematode worm, roundworm"
+113,"conch"
+114,"snail"
+115,"slug"
+116,"sea slug, nudibranch"
+117,"chiton, coat-of-mail shell, sea cradle, polyplacophore"
+118,"chambered nautilus, pearly nautilus, nautilus"
+119,"Dungeness crab, Cancer magister"
+120,"rock crab, Cancer irroratus"
+121,"fiddler crab"
+122,"king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica"
+123,"American lobster, Northern lobster, Maine lobster, Homarus americanus"
+124,"spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish"
+125,"crayfish, crawfish, crawdad, crawdaddy"
+126,"hermit crab"
+127,"isopod"
+128,"white stork, Ciconia ciconia"
+129,"black stork, Ciconia nigra"
+130,"spoonbill"
+131,"flamingo"
+132,"little blue heron, Egretta caerulea"
+133,"American egret, great white heron, Egretta albus"
+134,"bittern"
+135,"crane"
+136,"limpkin, Aramus pictus"
+137,"European gallinule, Porphyrio porphyrio"
+138,"American coot, marsh hen, mud hen, water hen, Fulica americana"
+139,"bustard"
+140,"ruddy turnstone, Arenaria interpres"
+141,"red-backed sandpiper, dunlin, Erolia alpina"
+142,"redshank, Tringa totanus"
+143,"dowitcher"
+144,"oystercatcher, oyster catcher"
+145,"pelican"
+146,"king penguin, Aptenodytes patagonica"
+147,"albatross, mollymawk"
+148,"grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus"
+149,"killer whale, killer, orca, grampus, sea wolf, Orcinus orca"
+150,"dugong, Dugong dugon"
+151,"sea lion"
+152,"Chihuahua"
+153,"Japanese spaniel"
+154,"Maltese dog, Maltese terrier, Maltese"
+155,"Pekinese, Pekingese, Peke"
+156,"Shih-Tzu"
+157,"Blenheim spaniel"
+158,"papillon"
+159,"toy terrier"
+160,"Rhodesian ridgeback"
+161,"Afghan hound, Afghan"
+162,"basset, basset hound"
+163,"beagle"
+164,"bloodhound, sleuthhound"
+165,"bluetick"
+166,"black-and-tan coonhound"
+167,"Walker hound, Walker foxhound"
+168,"English foxhound"
+169,"redbone"
+170,"borzoi, Russian wolfhound"
+171,"Irish wolfhound"
+172,"Italian greyhound"
+173,"whippet"
+174,"Ibizan hound, Ibizan Podenco"
+175,"Norwegian elkhound, elkhound"
+176,"otterhound, otter hound"
+177,"Saluki, gazelle hound"
+178,"Scottish deerhound, deerhound"
+179,"Weimaraner"
+180,"Staffordshire bullterrier, Staffordshire bull terrier"
+181,"American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier"
+182,"Bedlington terrier"
+183,"Border terrier"
+184,"Kerry blue terrier"
+185,"Irish terrier"
+186,"Norfolk terrier"
+187,"Norwich terrier"
+188,"Yorkshire terrier"
+189,"wire-haired fox terrier"
+190,"Lakeland terrier"
+191,"Sealyham terrier, Sealyham"
+192,"Airedale, Airedale terrier"
+193,"cairn, cairn terrier"
+194,"Australian terrier"
+195,"Dandie Dinmont, Dandie Dinmont terrier"
+196,"Boston bull, Boston terrier"
+197,"miniature schnauzer"
+198,"giant schnauzer"
+199,"standard schnauzer"
+200,"Scotch terrier, Scottish terrier, Scottie"
+201,"Tibetan terrier, chrysanthemum dog"
+202,"silky terrier, Sydney silky"
+203,"soft-coated wheaten terrier"
+204,"West Highland white terrier"
+205,"Lhasa, Lhasa apso"
+206,"flat-coated retriever"
+207,"curly-coated retriever"
+208,"golden retriever"
+209,"Labrador retriever"
+210,"Chesapeake Bay retriever"
+211,"German short-haired pointer"
+212,"vizsla, Hungarian pointer"
+213,"English setter"
+214,"Irish setter, red setter"
+215,"Gordon setter"
+216,"Brittany spaniel"
+217,"clumber, clumber spaniel"
+218,"English springer, English springer spaniel"
+219,"Welsh springer spaniel"
+220,"cocker spaniel, English cocker spaniel, cocker"
+221,"Sussex spaniel"
+222,"Irish water spaniel"
+223,"kuvasz"
+224,"schipperke"
+225,"groenendael"
+226,"malinois"
+227,"briard"
+228,"kelpie"
+229,"komondor"
+230,"Old English sheepdog, bobtail"
+231,"Shetland sheepdog, Shetland sheep dog, Shetland"
+232,"collie"
+233,"Border collie"
+234,"Bouvier des Flandres, Bouviers des Flandres"
+235,"Rottweiler"
+236,"German shepherd, German shepherd dog, German police dog, alsatian"
+237,"Doberman, Doberman pinscher"
+238,"miniature pinscher"
+239,"Greater Swiss Mountain dog"
+240,"Bernese mountain dog"
+241,"Appenzeller"
+242,"EntleBucher"
+243,"boxer"
+244,"bull mastiff"
+245,"Tibetan mastiff"
+246,"French bulldog"
+247,"Great Dane"
+248,"Saint Bernard, St Bernard"
+249,"Eskimo dog, husky"
+250,"malamute, malemute, Alaskan malamute"
+251,"Siberian husky"
+252,"dalmatian, coach dog, carriage dog"
+253,"affenpinscher, monkey pinscher, monkey dog"
+254,"basenji"
+255,"pug, pug-dog"
+256,"Leonberg"
+257,"Newfoundland, Newfoundland dog"
+258,"Great Pyrenees"
+259,"Samoyed, Samoyede"
+260,"Pomeranian"
+261,"chow, chow chow"
+262,"keeshond"
+263,"Brabancon griffon"
+264,"Pembroke, Pembroke Welsh corgi"
+265,"Cardigan, Cardigan Welsh corgi"
+266,"toy poodle"
+267,"miniature poodle"
+268,"standard poodle"
+269,"Mexican hairless"
+270,"timber wolf, grey wolf, gray wolf, Canis lupus"
+271,"white wolf, Arctic wolf, Canis lupus tundrarum"
+272,"red wolf, maned wolf, Canis rufus, Canis niger"
+273,"coyote, prairie wolf, brush wolf, Canis latrans"
+274,"dingo, warrigal, warragal, Canis dingo"
+275,"dhole, Cuon alpinus"
+276,"African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus"
+277,"hyena, hyaena"
+278,"red fox, Vulpes vulpes"
+279,"kit fox, Vulpes macrotis"
+280,"Arctic fox, white fox, Alopex lagopus"
+281,"grey fox, gray fox, Urocyon cinereoargenteus"
+282,"tabby, tabby cat"
+283,"tiger cat"
+284,"Persian cat"
+285,"Siamese cat, Siamese"
+286,"Egyptian cat"
+287,"cougar, puma, catamount, mountain lion, painter, panther, Felis concolor"
+288,"lynx, catamount"
+289,"leopard, Panthera pardus"
+290,"snow leopard, ounce, Panthera uncia"
+291,"jaguar, panther, Panthera onca, Felis onca"
+292,"lion, king of beasts, Panthera leo"
+293,"tiger, Panthera tigris"
+294,"cheetah, chetah, Acinonyx jubatus"
+295,"brown bear, bruin, Ursus arctos"
+296,"American black bear, black bear, Ursus americanus, Euarctos americanus"
+297,"ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus"
+298,"sloth bear, Melursus ursinus, Ursus ursinus"
+299,"mongoose"
+300,"meerkat, mierkat"
+301,"tiger beetle"
+302,"ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle"
+303,"ground beetle, carabid beetle"
+304,"long-horned beetle, longicorn, longicorn beetle"
+305,"leaf beetle, chrysomelid"
+306,"dung beetle"
+307,"rhinoceros beetle"
+308,"weevil"
+309,"fly"
+310,"bee"
+311,"ant, emmet, pismire"
+312,"grasshopper, hopper"
+313,"cricket"
+314,"walking stick, walkingstick, stick insect"
+315,"cockroach, roach"
+316,"mantis, mantid"
+317,"cicada, cicala"
+318,"leafhopper"
+319,"lacewing, lacewing fly"
+320,"dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk"
+321,"damselfly"
+322,"admiral"
+323,"ringlet, ringlet butterfly"
+324,"monarch, monarch butterfly, milkweed butterfly, Danaus plexippus"
+325,"cabbage butterfly"
+326,"sulphur butterfly, sulfur butterfly"
+327,"lycaenid, lycaenid butterfly"
+328,"starfish, sea star"
+329,"sea urchin"
+330,"sea cucumber, holothurian"
+331,"wood rabbit, cottontail, cottontail rabbit"
+332,"hare"
+333,"Angora, Angora rabbit"
+334,"hamster"
+335,"porcupine, hedgehog"
+336,"fox squirrel, eastern fox squirrel, Sciurus niger"
+337,"marmot"
+338,"beaver"
+339,"guinea pig, Cavia cobaya"
+340,"sorrel"
+341,"zebra"
+342,"hog, pig, grunter, squealer, Sus scrofa"
+343,"wild boar, boar, Sus scrofa"
+344,"warthog"
+345,"hippopotamus, hippo, river horse, Hippopotamus amphibius"
+346,"ox"
+347,"water buffalo, water ox, Asiatic buffalo, Bubalus bubalis"
+348,"bison"
+349,"ram, tup"
+350,"bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis"
+351,"ibex, Capra ibex"
+352,"hartebeest"
+353,"impala, Aepyceros melampus"
+354,"gazelle"
+355,"Arabian camel, dromedary, Camelus dromedarius"
+356,"llama"
+357,"weasel"
+358,"mink"
+359,"polecat, fitch, foulmart, foumart, Mustela putorius"
+360,"black-footed ferret, ferret, Mustela nigripes"
+361,"otter"
+362,"skunk, polecat, wood pussy"
+363,"badger"
+364,"armadillo"
+365,"three-toed sloth, ai, Bradypus tridactylus"
+366,"orangutan, orang, orangutang, Pongo pygmaeus"
+367,"gorilla, Gorilla gorilla"
+368,"chimpanzee, chimp, Pan troglodytes"
+369,"gibbon, Hylobates lar"
+370,"siamang, Hylobates syndactylus, Symphalangus syndactylus"
+371,"guenon, guenon monkey"
+372,"patas, hussar monkey, Erythrocebus patas"
+373,"baboon"
+374,"macaque"
+375,"langur"
+376,"colobus, colobus monkey"
+377,"proboscis monkey, Nasalis larvatus"
+378,"marmoset"
+379,"capuchin, ringtail, Cebus capucinus"
+380,"howler monkey, howler"
+381,"titi, titi monkey"
+382,"spider monkey, Ateles geoffroyi"
+383,"squirrel monkey, Saimiri sciureus"
+384,"Madagascar cat, ring-tailed lemur, Lemur catta"
+385,"indri, indris, Indri indri, Indri brevicaudatus"
+386,"Indian elephant, Elephas maximus"
+387,"African elephant, Loxodonta africana"
+388,"lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens"
+389,"giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca"
+390,"barracouta, snoek"
+391,"eel"
+392,"coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch"
+393,"rock beauty, Holocanthus tricolor"
+394,"anemone fish"
+395,"sturgeon"
+396,"gar, garfish, garpike, billfish, Lepisosteus osseus"
+397,"lionfish"
+398,"puffer, pufferfish, blowfish, globefish"
+399,"abacus"
+400,"abaya"
+401,"academic gown, academic robe, judge's robe"
+402,"accordion, piano accordion, squeeze box"
+403,"acoustic guitar"
+404,"aircraft carrier, carrier, flattop, attack aircraft carrier"
+405,"airliner"
+406,"airship, dirigible"
+407,"altar"
+408,"ambulance"
+409,"amphibian, amphibious vehicle"
+410,"analog clock"
+411,"apiary, bee house"
+412,"apron"
+413,"ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin"
+414,"assault rifle, assault gun"
+415,"backpack, back pack, knapsack, packsack, rucksack, haversack"
+416,"bakery, bakeshop, bakehouse"
+417,"balance beam, beam"
+418,"balloon"
+419,"ballpoint, ballpoint pen, ballpen, Biro"
+420,"Band Aid"
+421,"banjo"
+422,"bannister, banister, balustrade, balusters, handrail"
+423,"barbell"
+424,"barber chair"
+425,"barbershop"
+426,"barn"
+427,"barometer"
+428,"barrel, cask"
+429,"barrow, garden cart, lawn cart, wheelbarrow"
+430,"baseball"
+431,"basketball"
+432,"bassinet"
+433,"bassoon"
+434,"bathing cap, swimming cap"
+435,"bath towel"
+436,"bathtub, bathing tub, bath, tub"
+437,"beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon"
+438,"beacon, lighthouse, beacon light, pharos"
+439,"beaker"
+440,"bearskin, busby, shako"
+441,"beer bottle"
+442,"beer glass"
+443,"bell cote, bell cot"
+444,"bib"
+445,"bicycle-built-for-two, tandem bicycle, tandem"
+446,"bikini, two-piece"
+447,"binder, ring-binder"
+448,"binoculars, field glasses, opera glasses"
+449,"birdhouse"
+450,"boathouse"
+451,"bobsled, bobsleigh, bob"
+452,"bolo tie, bolo, bola tie, bola"
+453,"bonnet, poke bonnet"
+454,"bookcase"
+455,"bookshop, bookstore, bookstall"
+456,"bottlecap"
+457,"bow"
+458,"bow tie, bow-tie, bowtie"
+459,"brass, memorial tablet, plaque"
+460,"brassiere, bra, bandeau"
+461,"breakwater, groin, groyne, mole, bulwark, seawall, jetty"
+462,"breastplate, aegis, egis"
+463,"broom"
+464,"bucket, pail"
+465,"buckle"
+466,"bulletproof vest"
+467,"bullet train, bullet"
+468,"butcher shop, meat market"
+469,"cab, hack, taxi, taxicab"
+470,"caldron, cauldron"
+471,"candle, taper, wax light"
+472,"cannon"
+473,"canoe"
+474,"can opener, tin opener"
+475,"cardigan"
+476,"car mirror"
+477,"carousel, carrousel, merry-go-round, roundabout, whirligig"
+478,"carpenter's kit, tool kit"
+479,"carton"
+480,"car wheel"
+481,"cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM"
+482,"cassette"
+483,"cassette player"
+484,"castle"
+485,"catamaran"
+486,"CD player"
+487,"cello, violoncello"
+488,"cellular telephone, cellular phone, cellphone, cell, mobile phone"
+489,"chain"
+490,"chainlink fence"
+491,"chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour"
+492,"chain saw, chainsaw"
+493,"chest"
+494,"chiffonier, commode"
+495,"chime, bell, gong"
+496,"china cabinet, china closet"
+497,"Christmas stocking"
+498,"church, church building"
+499,"cinema, movie theater, movie theatre, movie house, picture palace"
+500,"cleaver, meat cleaver, chopper"
+501,"cliff dwelling"
+502,"cloak"
+503,"clog, geta, patten, sabot"
+504,"cocktail shaker"
+505,"coffee mug"
+506,"coffeepot"
+507,"coil, spiral, volute, whorl, helix"
+508,"combination lock"
+509,"computer keyboard, keypad"
+510,"confectionery, confectionary, candy store"
+511,"container ship, containership, container vessel"
+512,"convertible"
+513,"corkscrew, bottle screw"
+514,"cornet, horn, trumpet, trump"
+515,"cowboy boot"
+516,"cowboy hat, ten-gallon hat"
+517,"cradle"
+518,"crane"
+519,"crash helmet"
+520,"crate"
+521,"crib, cot"
+522,"Crock Pot"
+523,"croquet ball"
+524,"crutch"
+525,"cuirass"
+526,"dam, dike, dyke"
+527,"desk"
+528,"desktop computer"
+529,"dial telephone, dial phone"
+530,"diaper, nappy, napkin"
+531,"digital clock"
+532,"digital watch"
+533,"dining table, board"
+534,"dishrag, dishcloth"
+535,"dishwasher, dish washer, dishwashing machine"
+536,"disk brake, disc brake"
+537,"dock, dockage, docking facility"
+538,"dogsled, dog sled, dog sleigh"
+539,"dome"
+540,"doormat, welcome mat"
+541,"drilling platform, offshore rig"
+542,"drum, membranophone, tympan"
+543,"drumstick"
+544,"dumbbell"
+545,"Dutch oven"
+546,"electric fan, blower"
+547,"electric guitar"
+548,"electric locomotive"
+549,"entertainment center"
+550,"envelope"
+551,"espresso maker"
+552,"face powder"
+553,"feather boa, boa"
+554,"file, file cabinet, filing cabinet"
+555,"fireboat"
+556,"fire engine, fire truck"
+557,"fire screen, fireguard"
+558,"flagpole, flagstaff"
+559,"flute, transverse flute"
+560,"folding chair"
+561,"football helmet"
+562,"forklift"
+563,"fountain"
+564,"fountain pen"
+565,"four-poster"
+566,"freight car"
+567,"French horn, horn"
+568,"frying pan, frypan, skillet"
+569,"fur coat"
+570,"garbage truck, dustcart"
+571,"gasmask, respirator, gas helmet"
+572,"gas pump, gasoline pump, petrol pump, island dispenser"
+573,"goblet"
+574,"go-kart"
+575,"golf ball"
+576,"golfcart, golf cart"
+577,"gondola"
+578,"gong, tam-tam"
+579,"gown"
+580,"grand piano, grand"
+581,"greenhouse, nursery, glasshouse"
+582,"grille, radiator grille"
+583,"grocery store, grocery, food market, market"
+584,"guillotine"
+585,"hair slide"
+586,"hair spray"
+587,"half track"
+588,"hammer"
+589,"hamper"
+590,"hand blower, blow dryer, blow drier, hair dryer, hair drier"
+591,"hand-held computer, hand-held microcomputer"
+592,"handkerchief, hankie, hanky, hankey"
+593,"hard disc, hard disk, fixed disk"
+594,"harmonica, mouth organ, harp, mouth harp"
+595,"harp"
+596,"harvester, reaper"
+597,"hatchet"
+598,"holster"
+599,"home theater, home theatre"
+600,"honeycomb"
+601,"hook, claw"
+602,"hoopskirt, crinoline"
+603,"horizontal bar, high bar"
+604,"horse cart, horse-cart"
+605,"hourglass"
+606,"iPod"
+607,"iron, smoothing iron"
+608,"jack-o'-lantern"
+609,"jean, blue jean, denim"
+610,"jeep, landrover"
+611,"jersey, T-shirt, tee shirt"
+612,"jigsaw puzzle"
+613,"jinrikisha, ricksha, rickshaw"
+614,"joystick"
+615,"kimono"
+616,"knee pad"
+617,"knot"
+618,"lab coat, laboratory coat"
+619,"ladle"
+620,"lampshade, lamp shade"
+621,"laptop, laptop computer"
+622,"lawn mower, mower"
+623,"lens cap, lens cover"
+624,"letter opener, paper knife, paperknife"
+625,"library"
+626,"lifeboat"
+627,"lighter, light, igniter, ignitor"
+628,"limousine, limo"
+629,"liner, ocean liner"
+630,"lipstick, lip rouge"
+631,"Loafer"
+632,"lotion"
+633,"loudspeaker, speaker, speaker unit, loudspeaker system, speaker system"
+634,"loupe, jeweler's loupe"
+635,"lumbermill, sawmill"
+636,"magnetic compass"
+637,"mailbag, postbag"
+638,"mailbox, letter box"
+639,"maillot"
+640,"maillot, tank suit"
+641,"manhole cover"
+642,"maraca"
+643,"marimba, xylophone"
+644,"mask"
+645,"matchstick"
+646,"maypole"
+647,"maze, labyrinth"
+648,"measuring cup"
+649,"medicine chest, medicine cabinet"
+650,"megalith, megalithic structure"
+651,"microphone, mike"
+652,"microwave, microwave oven"
+653,"military uniform"
+654,"milk can"
+655,"minibus"
+656,"miniskirt, mini"
+657,"minivan"
+658,"missile"
+659,"mitten"
+660,"mixing bowl"
+661,"mobile home, manufactured home"
+662,"Model T"
+663,"modem"
+664,"monastery"
+665,"monitor"
+666,"moped"
+667,"mortar"
+668,"mortarboard"
+669,"mosque"
+670,"mosquito net"
+671,"motor scooter, scooter"
+672,"mountain bike, all-terrain bike, off-roader"
+673,"mountain tent"
+674,"mouse, computer mouse"
+675,"mousetrap"
+676,"moving van"
+677,"muzzle"
+678,"nail"
+679,"neck brace"
+680,"necklace"
+681,"nipple"
+682,"notebook, notebook computer"
+683,"obelisk"
+684,"oboe, hautboy, hautbois"
+685,"ocarina, sweet potato"
+686,"odometer, hodometer, mileometer, milometer"
+687,"oil filter"
+688,"organ, pipe organ"
+689,"oscilloscope, scope, cathode-ray oscilloscope, CRO"
+690,"overskirt"
+691,"oxcart"
+692,"oxygen mask"
+693,"packet"
+694,"paddle, boat paddle"
+695,"paddlewheel, paddle wheel"
+696,"padlock"
+697,"paintbrush"
+698,"pajama, pyjama, pj's, jammies"
+699,"palace"
+700,"panpipe, pandean pipe, syrinx"
+701,"paper towel"
+702,"parachute, chute"
+703,"parallel bars, bars"
+704,"park bench"
+705,"parking meter"
+706,"passenger car, coach, carriage"
+707,"patio, terrace"
+708,"pay-phone, pay-station"
+709,"pedestal, plinth, footstall"
+710,"pencil box, pencil case"
+711,"pencil sharpener"
+712,"perfume, essence"
+713,"Petri dish"
+714,"photocopier"
+715,"pick, plectrum, plectron"
+716,"pickelhaube"
+717,"picket fence, paling"
+718,"pickup, pickup truck"
+719,"pier"
+720,"piggy bank, penny bank"
+721,"pill bottle"
+722,"pillow"
+723,"ping-pong ball"
+724,"pinwheel"
+725,"pirate, pirate ship"
+726,"pitcher, ewer"
+727,"plane, carpenter's plane, woodworking plane"
+728,"planetarium"
+729,"plastic bag"
+730,"plate rack"
+731,"plow, plough"
+732,"plunger, plumber's helper"
+733,"Polaroid camera, Polaroid Land camera"
+734,"pole"
+735,"police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria"
+736,"poncho"
+737,"pool table, billiard table, snooker table"
+738,"pop bottle, soda bottle"
+739,"pot, flowerpot"
+740,"potter's wheel"
+741,"power drill"
+742,"prayer rug, prayer mat"
+743,"printer"
+744,"prison, prison house"
+745,"projectile, missile"
+746,"projector"
+747,"puck, hockey puck"
+748,"punching bag, punch bag, punching ball, punchball"
+749,"purse"
+750,"quill, quill pen"
+751,"quilt, comforter, comfort, puff"
+752,"racer, race car, racing car"
+753,"racket, racquet"
+754,"radiator"
+755,"radio, wireless"
+756,"radio telescope, radio reflector"
+757,"rain barrel"
+758,"recreational vehicle, RV, R.V."
+759,"reel"
+760,"reflex camera"
+761,"refrigerator, icebox"
+762,"remote control, remote"
+763,"restaurant, eating house, eating place, eatery"
+764,"revolver, six-gun, six-shooter"
+765,"rifle"
+766,"rocking chair, rocker"
+767,"rotisserie"
+768,"rubber eraser, rubber, pencil eraser"
+769,"rugby ball"
+770,"rule, ruler"
+771,"running shoe"
+772,"safe"
+773,"safety pin"
+774,"saltshaker, salt shaker"
+775,"sandal"
+776,"sarong"
+777,"sax, saxophone"
+778,"scabbard"
+779,"scale, weighing machine"
+780,"school bus"
+781,"schooner"
+782,"scoreboard"
+783,"screen, CRT screen"
+784,"screw"
+785,"screwdriver"
+786,"seat belt, seatbelt"
+787,"sewing machine"
+788,"shield, buckler"
+789,"shoe shop, shoe-shop, shoe store"
+790,"shoji"
+791,"shopping basket"
+792,"shopping cart"
+793,"shovel"
+794,"shower cap"
+795,"shower curtain"
+796,"ski"
+797,"ski mask"
+798,"sleeping bag"
+799,"slide rule, slipstick"
+800,"sliding door"
+801,"slot, one-armed bandit"
+802,"snorkel"
+803,"snowmobile"
+804,"snowplow, snowplough"
+805,"soap dispenser"
+806,"soccer ball"
+807,"sock"
+808,"solar dish, solar collector, solar furnace"
+809,"sombrero"
+810,"soup bowl"
+811,"space bar"
+812,"space heater"
+813,"space shuttle"
+814,"spatula"
+815,"speedboat"
+816,"spider web, spider's web"
+817,"spindle"
+818,"sports car, sport car"
+819,"spotlight, spot"
+820,"stage"
+821,"steam locomotive"
+822,"steel arch bridge"
+823,"steel drum"
+824,"stethoscope"
+825,"stole"
+826,"stone wall"
+827,"stopwatch, stop watch"
+828,"stove"
+829,"strainer"
+830,"streetcar, tram, tramcar, trolley, trolley car"
+831,"stretcher"
+832,"studio couch, day bed"
+833,"stupa, tope"
+834,"submarine, pigboat, sub, U-boat"
+835,"suit, suit of clothes"
+836,"sundial"
+837,"sunglass"
+838,"sunglasses, dark glasses, shades"
+839,"sunscreen, sunblock, sun blocker"
+840,"suspension bridge"
+841,"swab, swob, mop"
+842,"sweatshirt"
+843,"swimming trunks, bathing trunks"
+844,"swing"
+845,"switch, electric switch, electrical switch"
+846,"syringe"
+847,"table lamp"
+848,"tank, army tank, armored combat vehicle, armoured combat vehicle"
+849,"tape player"
+850,"teapot"
+851,"teddy, teddy bear"
+852,"television, television system"
+853,"tennis ball"
+854,"thatch, thatched roof"
+855,"theater curtain, theatre curtain"
+856,"thimble"
+857,"thresher, thrasher, threshing machine"
+858,"throne"
+859,"tile roof"
+860,"toaster"
+861,"tobacco shop, tobacconist shop, tobacconist"
+862,"toilet seat"
+863,"torch"
+864,"totem pole"
+865,"tow truck, tow car, wrecker"
+866,"toyshop"
+867,"tractor"
+868,"trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi"
+869,"tray"
+870,"trench coat"
+871,"tricycle, trike, velocipede"
+872,"trimaran"
+873,"tripod"
+874,"triumphal arch"
+875,"trolleybus, trolley coach, trackless trolley"
+876,"trombone"
+877,"tub, vat"
+878,"turnstile"
+879,"typewriter keyboard"
+880,"umbrella"
+881,"unicycle, monocycle"
+882,"upright, upright piano"
+883,"vacuum, vacuum cleaner"
+884,"vase"
+885,"vault"
+886,"velvet"
+887,"vending machine"
+888,"vestment"
+889,"viaduct"
+890,"violin, fiddle"
+891,"volleyball"
+892,"waffle iron"
+893,"wall clock"
+894,"wallet, billfold, notecase, pocketbook"
+895,"wardrobe, closet, press"
+896,"warplane, military plane"
+897,"washbasin, handbasin, washbowl, lavabo, wash-hand basin"
+898,"washer, automatic washer, washing machine"
+899,"water bottle"
+900,"water jug"
+901,"water tower"
+902,"whiskey jug"
+903,"whistle"
+904,"wig"
+905,"window screen"
+906,"window shade"
+907,"Windsor tie"
+908,"wine bottle"
+909,"wing"
+910,"wok"
+911,"wooden spoon"
+912,"wool, woolen, woollen"
+913,"worm fence, snake fence, snake-rail fence, Virginia fence"
+914,"wreck"
+915,"yawl"
+916,"yurt"
+917,"web site, website, internet site, site"
+918,"comic book"
+919,"crossword puzzle, crossword"
+920,"street sign"
+921,"traffic light, traffic signal, stoplight"
+922,"book jacket, dust cover, dust jacket, dust wrapper"
+923,"menu"
+924,"plate"
+925,"guacamole"
+926,"consomme"
+927,"hot pot, hotpot"
+928,"trifle"
+929,"ice cream, icecream"
+930,"ice lolly, lolly, lollipop, popsicle"
+931,"French loaf"
+932,"bagel, beigel"
+933,"pretzel"
+934,"cheeseburger"
+935,"hotdog, hot dog, red hot"
+936,"mashed potato"
+937,"head cabbage"
+938,"broccoli"
+939,"cauliflower"
+940,"zucchini, courgette"
+941,"spaghetti squash"
+942,"acorn squash"
+943,"butternut squash"
+944,"cucumber, cuke"
+945,"artichoke, globe artichoke"
+946,"bell pepper"
+947,"cardoon"
+948,"mushroom"
+949,"Granny Smith"
+950,"strawberry"
+951,"orange"
+952,"lemon"
+953,"fig"
+954,"pineapple, ananas"
+955,"banana"
+956,"jackfruit, jak, jack"
+957,"custard apple"
+958,"pomegranate"
+959,"hay"
+960,"carbonara"
+961,"chocolate sauce, chocolate syrup"
+962,"dough"
+963,"meat loaf, meatloaf"
+964,"pizza, pizza pie"
+965,"potpie"
+966,"burrito"
+967,"red wine"
+968,"espresso"
+969,"cup"
+970,"eggnog"
+971,"alp"
+972,"bubble"
+973,"cliff, drop, drop-off"
+974,"coral reef"
+975,"geyser"
+976,"lakeside, lakeshore"
+977,"promontory, headland, head, foreland"
+978,"sandbar, sand bar"
+979,"seashore, coast, seacoast, sea-coast"
+980,"valley, vale"
+981,"volcano"
+982,"ballplayer, baseball player"
+983,"groom, bridegroom"
+984,"scuba diver"
+985,"rapeseed"
+986,"daisy"
+987,"yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum"
+988,"corn"
+989,"acorn"
+990,"hip, rose hip, rosehip"
+991,"buckeye, horse chestnut, conker"
+992,"coral fungus"
+993,"agaric"
+994,"gyromitra"
+995,"stinkhorn, carrion fungus"
+996,"earthstar"
+997,"hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa"
+998,"bolete"
+999,"ear, spike, capitulum"
+1000,"toilet tissue, toilet paper, bathroom tissue"

http://git-wip-us.apache.org/repos/asf/systemml/blob/978d4de4/scripts/nn/examples/caffe2dml/models/imagenet/vgg19/VGG_ILSVRC_19_layers_deploy.proto
----------------------------------------------------------------------
diff --git a/scripts/nn/examples/caffe2dml/models/imagenet/vgg19/VGG_ILSVRC_19_layers_deploy.proto b/scripts/nn/examples/caffe2dml/models/imagenet/vgg19/VGG_ILSVRC_19_layers_deploy.proto
new file mode 100644
index 0000000..c2c11d3
--- /dev/null
+++ b/scripts/nn/examples/caffe2dml/models/imagenet/vgg19/VGG_ILSVRC_19_layers_deploy.proto
@@ -0,0 +1,414 @@
+name: "VGG_ILSVRC_19_layer"
+layer {
+    name: "data"
+    type: "Input"
+    top: "data"
+    input_param { shape: { dim: 1 dim: 3 dim: 224 dim: 224 } }
+}
+layer {
+  name: "conv1_1"
+  type: "Convolution"
+  bottom: "data"
+  top: "conv1_1"
+  convolution_param {
+    num_output: 64
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv1_1"
+  top: "relu1_1"
+  name: "relu1_1"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu1_1"
+  top: "conv1_2"
+  name: "conv1_2"
+  type: "Convolution"
+  convolution_param {
+    num_output: 64
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv1_2"
+  top: "relu1_2"
+  name: "relu1_2"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu1_2"
+  top: "pool1"
+  name: "pool1"
+  type: "Pooling"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  bottom: "pool1"
+  top: "conv2_1"
+  name: "conv2_1"
+  type: "Convolution"
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv2_1"
+  top: "relu2_1"
+  name: "relu2_1"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu2_1"
+  top: "conv2_2"
+  name: "conv2_2"
+  type: "Convolution"
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv2_2"
+  top: "relu2_2"
+  name: "relu2_2"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu2_2"
+  top: "pool2"
+  name: "pool2"
+  type: "Pooling"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  bottom: "pool2"
+  top: "conv3_1"
+  name: "conv3_1"
+  type: "Convolution"
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv3_1"
+  top: "relu3_1"
+  name: "relu3_1"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu3_1"
+  top: "conv3_2"
+  name: "conv3_2"
+  type: "Convolution"
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv3_2"
+  top: "relu3_2"
+  name: "relu3_2"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu3_2"
+  top: "conv3_3"
+  name: "conv3_3"
+  type: "Convolution"
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv3_3"
+  top: "relu3_3"
+  name: "relu3_3"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu3_3"
+  top: "conv3_4"
+  name: "conv3_4"
+  type: "Convolution"
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv3_4"
+  top: "relu3_4"
+  name: "relu3_4"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu3_4"
+  top: "pool3"
+  name: "pool3"
+  type: "Pooling"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  bottom: "pool3"
+  top: "conv4_1"
+  name: "conv4_1"
+  type: "Convolution"
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv4_1"
+  top: "relu4_1"
+  name: "relu4_1"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu4_1"
+  top: "conv4_2"
+  name: "conv4_2"
+  type: "Convolution"
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv4_2"
+  top: "relu4_2"
+  name: "relu4_2"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu4_2"
+  top: "conv4_3"
+  name: "conv4_3"
+  type: "Convolution"
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv4_3"
+  top: "relu4_3"
+  name: "relu4_3"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu4_3"
+  top: "conv4_4"
+  name: "conv4_4"
+  type: "Convolution"
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv4_4"
+  top: "relu4_4"
+  name: "relu4_4"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu4_4"
+  top: "pool4"
+  name: "pool4"
+  type: "Pooling"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  bottom: "pool4"
+  top: "conv5_1"
+  name: "conv5_1"
+  type: "Convolution"
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv5_1"
+  top: "relu5_1"
+  name: "relu5_1"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu5_1"
+  top: "conv5_2"
+  name: "conv5_2"
+  type: "Convolution"
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv5_2"
+  top: "relu5_2"
+  name: "relu5_2"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu5_2"
+  top: "conv5_3"
+  name: "conv5_3"
+  type: "Convolution"
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv5_3"
+  top: "relu5_3"
+  name: "relu5_3"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu5_3"
+  top: "conv5_4"
+  name: "conv5_4"
+  type: "Convolution"
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv5_4"
+  top: "relu5_4"
+  name: "relu5_4"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu5_4"
+  top: "pool5"
+  name: "pool5"
+  type: "Pooling"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  bottom: "pool5"
+  top: "fc6"
+  name: "fc6"
+  type: "InnerProduct"
+  inner_product_param {
+    num_output: 4096
+  }
+}
+layer {
+  bottom: "fc6"
+  top: "relu6"
+  name: "relu6"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu6"
+  top: "drop6"
+  name: "drop6"
+  type: "Dropout"
+  dropout_param {
+    dropout_ratio: 0.5
+  }
+}
+layer {
+  bottom: "drop6"
+  top: "fc7"
+  name: "fc7"
+  type: "InnerProduct"
+  inner_product_param {
+    num_output: 4096
+  }
+}
+layer {
+  bottom: "fc7"
+  top: "relu7"
+  name: "relu7"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu7"
+  top: "drop7"
+  name: "drop7"
+  type: "Dropout"
+  dropout_param {
+    dropout_ratio: 0.5
+  }
+}
+layer {
+  bottom: "drop7"
+  top: "fc8"
+  name: "fc8"
+  type: "InnerProduct"
+  inner_product_param {
+    num_output: 1000
+  }
+}
+layer {
+  bottom: "fc8"
+  top: "prob"
+  name: "prob"
+  type: "Softmax"
+}
+

http://git-wip-us.apache.org/repos/asf/systemml/blob/978d4de4/scripts/nn/examples/caffe2dml/models/imagenet/vgg19/VGG_ILSVRC_19_layers_network.proto
----------------------------------------------------------------------
diff --git a/scripts/nn/examples/caffe2dml/models/imagenet/vgg19/VGG_ILSVRC_19_layers_network.proto b/scripts/nn/examples/caffe2dml/models/imagenet/vgg19/VGG_ILSVRC_19_layers_network.proto
new file mode 100644
index 0000000..e0ec91f
--- /dev/null
+++ b/scripts/nn/examples/caffe2dml/models/imagenet/vgg19/VGG_ILSVRC_19_layers_network.proto
@@ -0,0 +1,422 @@
+name: "VGG_ILSVRC_19_layer"
+layer {
+  name: "data"
+  type: "Data"
+  top: "data"
+  top: "label"
+  include {
+    phase: TRAIN
+  }
+  data_param {
+    source: "imagenet_train"
+    batch_size: 64
+    backend: LMDB
+  }
+}
+layer {
+  name: "conv1_1"
+  type: "Convolution"
+  bottom: "data"
+  top: "conv1_1"
+  convolution_param {
+    num_output: 64
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv1_1"
+  top: "relu1_1"
+  name: "relu1_1"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu1_1"
+  top: "conv1_2"
+  name: "conv1_2"
+  type: "Convolution"
+  convolution_param {
+    num_output: 64
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv1_2"
+  top: "relu1_2"
+  name: "relu1_2"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu1_2"
+  top: "pool1"
+  name: "pool1"
+  type: "Pooling"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  bottom: "pool1"
+  top: "conv2_1"
+  name: "conv2_1"
+  type: "Convolution"
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv2_1"
+  top: "relu2_1"
+  name: "relu2_1"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu2_1"
+  top: "conv2_2"
+  name: "conv2_2"
+  type: "Convolution"
+  convolution_param {
+    num_output: 128
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv2_2"
+  top: "relu2_2"
+  name: "relu2_2"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu2_2"
+  top: "pool2"
+  name: "pool2"
+  type: "Pooling"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  bottom: "pool2"
+  top: "conv3_1"
+  name: "conv3_1"
+  type: "Convolution"
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv3_1"
+  top: "relu3_1"
+  name: "relu3_1"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu3_1"
+  top: "conv3_2"
+  name: "conv3_2"
+  type: "Convolution"
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv3_2"
+  top: "relu3_2"
+  name: "relu3_2"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu3_2"
+  top: "conv3_3"
+  name: "conv3_3"
+  type: "Convolution"
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv3_3"
+  top: "relu3_3"
+  name: "relu3_3"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu3_3"
+  top: "conv3_4"
+  name: "conv3_4"
+  type: "Convolution"
+  convolution_param {
+    num_output: 256
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv3_4"
+  top: "relu3_4"
+  name: "relu3_4"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu3_4"
+  top: "pool3"
+  name: "pool3"
+  type: "Pooling"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  bottom: "pool3"
+  top: "conv4_1"
+  name: "conv4_1"
+  type: "Convolution"
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv4_1"
+  top: "relu4_1"
+  name: "relu4_1"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu4_1"
+  top: "conv4_2"
+  name: "conv4_2"
+  type: "Convolution"
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv4_2"
+  top: "relu4_2"
+  name: "relu4_2"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu4_2"
+  top: "conv4_3"
+  name: "conv4_3"
+  type: "Convolution"
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv4_3"
+  top: "relu4_3"
+  name: "relu4_3"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu4_3"
+  top: "conv4_4"
+  name: "conv4_4"
+  type: "Convolution"
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv4_4"
+  top: "relu4_4"
+  name: "relu4_4"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu4_4"
+  top: "pool4"
+  name: "pool4"
+  type: "Pooling"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  bottom: "pool4"
+  top: "conv5_1"
+  name: "conv5_1"
+  type: "Convolution"
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv5_1"
+  top: "relu5_1"
+  name: "relu5_1"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu5_1"
+  top: "conv5_2"
+  name: "conv5_2"
+  type: "Convolution"
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv5_2"
+  top: "relu5_2"
+  name: "relu5_2"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu5_2"
+  top: "conv5_3"
+  name: "conv5_3"
+  type: "Convolution"
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv5_3"
+  top: "relu5_3"
+  name: "relu5_3"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu5_3"
+  top: "conv5_4"
+  name: "conv5_4"
+  type: "Convolution"
+  convolution_param {
+    num_output: 512
+    pad: 1
+    kernel_size: 3
+    stride: 1
+  }
+}
+layer {
+  bottom: "conv5_4"
+  top: "relu5_4"
+  name: "relu5_4"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu5_4"
+  top: "pool5"
+  name: "pool5"
+  type: "Pooling"
+  pooling_param {
+    pool: MAX
+    kernel_size: 2
+    stride: 2
+  }
+}
+layer {
+  bottom: "pool5"
+  top: "fc6"
+  name: "fc6"
+  type: "InnerProduct"
+  inner_product_param {
+    num_output: 4096
+  }
+}
+layer {
+  bottom: "fc6"
+  top: "relu6"
+  name: "relu6"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu6"
+  top: "drop6"
+  name: "drop6"
+  type: "Dropout"
+  dropout_param {
+    dropout_ratio: 0.5
+  }
+}
+layer {
+  bottom: "drop6"
+  top: "fc7"
+  name: "fc7"
+  type: "InnerProduct"
+  inner_product_param {
+    num_output: 4096
+  }
+}
+layer {
+  bottom: "fc7"
+  top: "relu7"
+  name: "relu7"
+  type: "ReLU"
+}
+layer {
+  bottom: "relu7"
+  top: "drop7"
+  name: "drop7"
+  type: "Dropout"
+  dropout_param {
+    dropout_ratio: 0.5
+  }
+}
+layer {
+  bottom: "drop7"
+  top: "fc8"
+  name: "fc8"
+  type: "InnerProduct"
+  inner_product_param {
+    num_output: 1000
+  }
+}
+layer {
+  bottom: "fc8"
+  top: "prob"
+  name: "prob"
+  type: "SoftmaxWithLoss"
+}
+

http://git-wip-us.apache.org/repos/asf/systemml/blob/978d4de4/scripts/nn/examples/caffe2dml/models/imagenet/vgg19/VGG_ILSVRC_19_layers_solver.proto
----------------------------------------------------------------------
diff --git a/scripts/nn/examples/caffe2dml/models/imagenet/vgg19/VGG_ILSVRC_19_layers_solver.proto b/scripts/nn/examples/caffe2dml/models/imagenet/vgg19/VGG_ILSVRC_19_layers_solver.proto
new file mode 100644
index 0000000..2206ad7
--- /dev/null
+++ b/scripts/nn/examples/caffe2dml/models/imagenet/vgg19/VGG_ILSVRC_19_layers_solver.proto
@@ -0,0 +1,14 @@
+net: "VGG_ILSVRC_19_layers_network.proto"
+test_iter: 915
+test_interval: 1000
+# lr for fine-tuning should be lower than when starting from scratch
+base_lr: 0.001
+lr_policy: "exp"
+gamma: 0.1
+# decrease lr each 20000 iterations
+stepsize: 20000
+display: 20
+max_iter: 600000
+momentum: 0.9
+weight_decay: 0.0005
+solver_mode: CPU

http://git-wip-us.apache.org/repos/asf/systemml/blob/978d4de4/scripts/nn/layers/conv2d_transpose.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/layers/conv2d_transpose.dml b/scripts/nn/layers/conv2d_transpose.dml
index bdc5090..b652c12 100644
--- a/scripts/nn/layers/conv2d_transpose.dml
+++ b/scripts/nn/layers/conv2d_transpose.dml
@@ -236,4 +236,3 @@ init_bilinear = function(int C, int K)
 
   b = matrix(0, rows=C, cols=1)
 }
-

http://git-wip-us.apache.org/repos/asf/systemml/blob/978d4de4/scripts/nn/layers/softmax2d.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/layers/softmax2d.dml b/scripts/nn/layers/softmax2d.dml
index 0207ac4..610df79 100644
--- a/scripts/nn/layers/softmax2d.dml
+++ b/scripts/nn/layers/softmax2d.dml
@@ -111,4 +111,3 @@ backward = function(matrix[double] dprobs, matrix[double] scores, int C)
   dscores_C_NHW = t(dscores_NHW_C)
   dscores = util::transpose_NCHW_to_CNHW(dscores_C_NHW, N)
 }
-

http://git-wip-us.apache.org/repos/asf/systemml/blob/978d4de4/scripts/nn/util.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/util.dml b/scripts/nn/util.dml
index ba345c5..807d7ba 100644
--- a/scripts/nn/util.dml
+++ b/scripts/nn/util.dml
@@ -42,6 +42,32 @@ channel_sums = function(matrix[double] X, int C, int Hin, int Win)
   out = rowSums(matrix(colSums(X), rows=C, cols=Hin*Win))  # shape (C, 1)
 }
 
+predict_class = function(matrix[double] Prob, int C, int H, int W) return (matrix[double] Prediction) {
+  /*
+   * Computes the class labels from the probabilities.
+   *
+   * Inputs:
+   *  - Prob: Input Probability
+   *  - C: Number of output labels
+   *  - Hin: Input height.
+   *  - Win: Input width.
+   *
+   * Outputs:
+   *  - Prediction: Class Labels.
+   */
+  if(H == 1 & W == 1) {
+    Prediction = rowIndexMax(Prob); # assuming one-based label mapping
+  }
+  else {
+    N = nrow(Prob);
+    Prediction = matrix(0, rows=N, cols=H*W);
+    parfor(n in 1:N) {
+      Prob1 = matrix(Prob[n,], rows=C, cols=H*W);
+      Prediction[n,] = t(rowIndexMax(t(Prob1))); # assuming one-based label mapping
+    }
+  }
+}
+
 im2col = function(matrix[double] img, int Hin, int Win, int Hf, int Wf, int strideh, int stridew)
     return (matrix[double] img_cols) {
   /*