You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by ni...@apache.org on 2017/07/21 17:24:20 UTC
systemml git commit: [SYSTEMML-540] Allow mllearn models to load the model eagerly.

Repository: systemml
Updated Branches:
  refs/heads/master eee35e984 -> 1f5b14dda


[SYSTEMML-540] Allow mllearn models to load the model eagerly.

- This simplifies performance debugging of training and scoring.

Closes #574.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/1f5b14dd
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/1f5b14dd
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/1f5b14dd

Branch: refs/heads/master
Commit: 1f5b14dda5ee231a37bd1935b92ba8212132355b
Parents: eee35e9
Author: Niketan Pansare <np...@us.ibm.com>
Authored: Fri Jul 21 10:22:04 2017 -0700
Committer: Niketan Pansare <np...@us.ibm.com>
Committed: Fri Jul 21 10:22:57 2017 -0700

----------------------------------------------------------------------
 src/main/python/systemml/mllearn/estimators.py       | 15 +++++++++------
 .../apache/sysml/api/ml/BaseSystemMLClassifier.scala | 11 ++++++++++-
 2 files changed, 19 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/1f5b14dd/src/main/python/systemml/mllearn/estimators.py
----------------------------------------------------------------------
diff --git a/src/main/python/systemml/mllearn/estimators.py b/src/main/python/systemml/mllearn/estimators.py
index 17e5f37..1ec3628 100644
--- a/src/main/python/systemml/mllearn/estimators.py
+++ b/src/main/python/systemml/mllearn/estimators.py
@@ -366,7 +366,7 @@ class BaseSystemMLClassifier(BaseSystemMLEstimator):
                 self.labelMap[int(keys[i])] = values[i]
             # self.encode(classes) # Giving incorrect results
         
-    def load(self, weights, sep='/'):
+    def load(self, weights, sep='/', eager=False):
         """
         Load a pretrained model. 
 
@@ -374,9 +374,10 @@ class BaseSystemMLClassifier(BaseSystemMLEstimator):
         ----------
         weights: directory whether learned weights are stored
         sep: seperator to use (default: '/')
+        eager: load the model eagerly. This flag should be only used for debugging purposes. (default: False)
         """
         self.weights = weights
-        self.model.load(self.sc._jsc, weights, sep)
+        self.model.load(self.sc._jsc, weights, sep, eager)
         self.loadLabels(weights + '/labels.txt')
         
     def save(self, outputDir, format='binary', sep='/'):
@@ -421,7 +422,7 @@ class BaseSystemMLRegressor(BaseSystemMLEstimator):
         """
         return r2_score(y, self.predict(X), multioutput='variance_weighted')
         
-    def load(self, weights=None, sep='/'):
+    def load(self, weights=None, sep='/', eager=False):
         """
         Load a pretrained model. 
 
@@ -429,9 +430,10 @@ class BaseSystemMLRegressor(BaseSystemMLEstimator):
         ----------
         weights: directory whether learned weights are stored (default: None)
         sep: seperator to use (default: '/')
+        eager: load the model eagerly (default: False)
         """
         self.weights = weights
-        self.model.load(self.sc._jsc, weights, sep)
+        self.model.load(self.sc._jsc, weights, sep, eager)
 
     def save(self, outputDir, format='binary', sep='/'):
         """
@@ -764,7 +766,7 @@ class Caffe2DML(BaseSystemMLClassifier):
         if tensorboard_log_dir is not None:
             self.estimator.setTensorBoardLogDir(tensorboard_log_dir)
 
-    def load(self, weights=None, sep='/', ignore_weights=None):
+    def load(self, weights=None, sep='/', ignore_weights=None, eager=False):
         """
         Load a pretrained model. 
 
@@ -773,11 +775,12 @@ class Caffe2DML(BaseSystemMLClassifier):
         weights: directory whether learned weights are stored (default: None)
         sep: seperator to use (default: '/')
         ignore_weights: names of layers to not read from the weights directory (list of string, default:None)
+        eager: load the model eagerly (default: False)
         """
         self.weights = weights
         self.estimator.setInput("$weights", str(weights))
         self.model = self.sc._jvm.org.apache.sysml.api.dl.Caffe2DMLModel(self.estimator)
-        self.model.load(self.sc._jsc, weights, sep)
+        self.model.load(self.sc._jsc, weights, sep, eager)
         self.loadLabels(weights + '/labels.txt')
         if ignore_weights is not None:
             self.estimator.setWeightsToIgnore(ignore_weights)

http://git-wip-us.apache.org/repos/asf/systemml/blob/1f5b14dd/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLClassifier.scala
----------------------------------------------------------------------
diff --git a/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLClassifier.scala b/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLClassifier.scala
index 3559a40..8b4817f 100644
--- a/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLClassifier.scala
+++ b/src/main/scala/org/apache/sysml/api/ml/BaseSystemMLClassifier.scala
@@ -36,6 +36,7 @@ import org.apache.spark.sql._
 import org.apache.sysml.api.mlcontext.MLContext.ExplainLevel
 import java.util.HashMap
 import scala.collection.JavaConversions._
+import java.util.Random
 
 
 /****************************************************
@@ -162,12 +163,20 @@ trait BaseSystemMLEstimatorModel extends BaseSystemMLEstimatorOrModel {
   def baseEstimator():BaseSystemMLEstimator
   def modelVariables():List[String]
   // self.model.load(self.sc._jsc, weights, format, sep)
-  def load(sc:JavaSparkContext, outputDir:String, sep:String):Unit = {
+  def load(sc:JavaSparkContext, outputDir:String, sep:String, eager:Boolean=false):Unit = {
   	val dmlScript = new StringBuilder
   	dmlScript.append("print(\"Loading the model from " + outputDir + "...\")\n")
+  	val tmpSum = "tmp_sum_var" + Math.abs((new Random()).nextInt())
+  	if(eager)
+  	  dmlScript.append(tmpSum + " = 0\n")
 		for(varName <- modelVariables) {
 			dmlScript.append(varName + " = read(\"" + outputDir + sep + varName + ".mtx\")\n")
+			if(eager)
+			  dmlScript.append(tmpSum + " = " + tmpSum + " + 0.001*mean(" + varName + ")\n")
 		}
+  	if(eager) {
+  	  dmlScript.append("if(" + tmpSum + " > 0) { print(\"Loaded the model\"); } else {  print(\"Loaded the model.\"); }")
+  	}
   	val script = dml(dmlScript.toString)
 		for(varName <- modelVariables) {
 			script.out(varName)