You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2018/07/13 04:32:12 UTC

systemml git commit: [MINOR] Improved JMLC memory profiling, command line arg -mem

Repository: systemml
Updated Branches:
  refs/heads/master 8af7a9ea3 -> 6bb136911


[MINOR] Improved JMLC memory profiling, command line arg -mem

Closes #797.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/6bb13691
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/6bb13691
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/6bb13691

Branch: refs/heads/master
Commit: 6bb136911b290b8c75ee16ea7da199bcc9dd0ba6
Parents: 8af7a9e
Author: Anthony Thomas <ah...@ibm.com>
Authored: Thu Jul 12 21:30:22 2018 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Thu Jul 12 21:30:23 2018 -0700

----------------------------------------------------------------------
 docs/jmlc.md                                    |   8 +-
 .../java/org/apache/sysml/api/DMLScript.java    | 148 ++++++++-----------
 .../org/apache/sysml/api/jmlc/Connection.java   |   2 +-
 .../controlprogram/LocalVariableMap.java        |   2 +-
 .../runtime/controlprogram/ProgramBlock.java    |   2 +-
 .../controlprogram/caching/CacheableData.java   |  14 +-
 .../context/ExecutionContext.java               |   3 +
 .../context/SparkExecutionContext.java          |   3 +
 .../java/org/apache/sysml/utils/Statistics.java |  38 +----
 9 files changed, 79 insertions(+), 141 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/6bb13691/docs/jmlc.md
----------------------------------------------------------------------
diff --git a/docs/jmlc.md b/docs/jmlc.md
index a703d01..08d1688 100644
--- a/docs/jmlc.md
+++ b/docs/jmlc.md
@@ -55,12 +55,8 @@ JMLC can be configured to gather runtime statistics, as in the MLContext API, by
 method with a value of `true`. JMLC can also be configured to gather statistics on the memory used by matrices and
 frames in the DML script. To enable collection of memory statistics, call Connection's `gatherMemStats()` method
 with a value of `true`. When finegrained statistics are enabled in `SystemML.conf`, JMLC will also report the variables
-in the DML script which used the most memory. By default, the memory use reported will be an overestimte of the actual
-memory required to run the program. When finegrained statistics are enabled, JMLC will gather more accurate statistics
-by keeping track of garbage collection events and reducing the memory estimate accordingly. The most accurate way to
-determine the memory required by a script is to run the script in a single thread and enable finegrained statistics.
-
-An example showing how to enable statistics in JMLC is presented in the section below.
+in the DML script which used the most memory. An example showing how to enable statistics in JMLC is presented in the
+section below.
 
 ---
 

http://git-wip-us.apache.org/repos/asf/systemml/blob/6bb13691/src/main/java/org/apache/sysml/api/DMLScript.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/DMLScript.java b/src/main/java/org/apache/sysml/api/DMLScript.java
index 9ecd83b..227c14b 100644
--- a/src/main/java/org/apache/sysml/api/DMLScript.java
+++ b/src/main/java/org/apache/sysml/api/DMLScript.java
@@ -97,11 +97,11 @@ import org.apache.sysml.yarn.DMLYarnClientProxy;
 public class DMLScript 
 {	
 	public enum RUNTIME_PLATFORM { 
-		HADOOP, 	    // execute all matrix operations in MR
+		HADOOP,         // execute all matrix operations in MR
 		SINGLE_NODE,    // execute all matrix operations in CP
 		HYBRID,         // execute matrix operations in CP or MR
 		HYBRID_SPARK,   // execute matrix operations in CP or Spark
-		SPARK			// execute matrix operations in Spark
+		SPARK           // execute matrix operations in Spark
 	}
 	
 	/**
@@ -130,7 +130,8 @@ public class DMLScript
 		public String               configFile    = null;             // Path to config file if default config and default config is to be overriden
 		public boolean              clean         = false;            // Whether to clean up all SystemML working directories (FS, DFS)
 		public boolean              stats         = false;            // Whether to record and print the statistics
-		public int                  statsCount    = 10;	              // Default statistics count
+		public int                  statsCount    = 10;               // Default statistics count
+		public boolean              memStats      = false;            // max memory statistics
 		public Explain.ExplainType  explainType   = Explain.ExplainType.NONE;  // Whether to print the "Explain" and if so, what type
 		public DMLScript.RUNTIME_PLATFORM execMode = OptimizerUtils.getDefaultExecutionMode();  // Execution mode standalone, MR, Spark or a hybrid
 		public boolean              gpu           = false;            // Whether to use the GPU
@@ -146,28 +147,29 @@ public class DMLScript
 		@Override
 		public String toString() {
 			return "DMLOptions{" +
-							"argVals=" + argVals +
-							", configFile='" + configFile + '\'' +
-							", clean=" + clean +
-							", stats=" + stats +
-							", statsCount=" + statsCount +
-							", explainType=" + explainType +
-							", execMode=" + execMode +
-							", gpu=" + gpu +
-							", forceGPU=" + forceGPU +
-							", debug=" + debug +
-							", scriptType=" + scriptType +
-							", filePath='" + filePath + '\'' +
-							", script='" + script + '\'' +
-							", help=" + help +
-							'}';
+				"argVals=" + argVals +
+				", configFile='" + configFile + '\'' +
+				", clean=" + clean +
+				", stats=" + stats +
+				", statsCount=" + statsCount +
+				", memStats=" + memStats +
+				", explainType=" + explainType +
+				", execMode=" + execMode +
+				", gpu=" + gpu +
+				", forceGPU=" + forceGPU +
+				", debug=" + debug +
+				", scriptType=" + scriptType +
+				", filePath='" + filePath + '\'' +
+				", script='" + script + '\'' +
+				", help=" + help +
+				'}';
 		}
 	}
 
 	public static RUNTIME_PLATFORM  rtplatform          = DMLOptions.defaultOptions.execMode;    // the execution mode
 	public static boolean           STATISTICS          = DMLOptions.defaultOptions.stats;       // whether to print statistics
 	public static boolean           FINEGRAINED_STATISTICS  = false;                             // whether to print fine-grained statistics
-	public static boolean           JMLC_MEMORY_STATISTICS = false;                              // whether to gather memory use stats in JMLC
+	public static boolean           JMLC_MEM_STATISTICS = false;                                 // whether to gather memory use stats in JMLC
 	public static int               STATISTICS_COUNT    = DMLOptions.defaultOptions.statsCount;  // statistics maximum heavy hitter count
 	public static int               STATISTICS_MAX_WRAP_LEN = 30;                                // statistics maximum wrap length
 	public static boolean           ENABLE_DEBUG_MODE   = DMLOptions.defaultOptions.debug;       // debug mode
@@ -315,6 +317,7 @@ public class DMLScript
 				}
 			}
 		}
+		dmlOptions.memStats = line.hasOption("mem");
 
 		dmlOptions.clean = line.hasOption("clean");
 
@@ -390,9 +393,11 @@ public class DMLScript
 		Option cleanOpt = OptionBuilder.withDescription("cleans up all SystemML working directories (FS, DFS); all other flags are ignored in this mode. \n")
 						.create("clean");
 		Option statsOpt = OptionBuilder.withArgName("count")
-						.withDescription("monitors and reports caching/recompilation statistics; heavy hitter <count> is 10 unless overridden; default off")
+						.withDescription("monitors and reports summary execution statistics; heavy hitter <count> is 10 unless overridden; default off")
 						.hasOptionalArg()
 						.create("stats");
+		Option memOpt = OptionBuilder.withDescription("monitors and reports max memory consumption in CP; default off")
+						.create("mem");
 		Option explainOpt = OptionBuilder.withArgName("level")
 						.withDescription("explains plan levels; can be 'hops' / 'runtime'[default] / 'recompile_hops' / 'recompile_runtime'")
 						.hasOptionalArg()
@@ -436,6 +441,7 @@ public class DMLScript
 		options.addOption(configOpt);
 		options.addOption(cleanOpt);
 		options.addOption(statsOpt);
+		options.addOption(memOpt);
 		options.addOption(explainOpt);
 		options.addOption(execOpt);
 		options.addOption(gpuOpt);
@@ -465,17 +471,15 @@ public class DMLScript
 		{
 			DMLOptions dmlOptions = parseCLArguments(args, options);
 
-			// String[] scriptArgs = null; //optional script arguments
-			// boolean namedScriptArgs = false;
-
-			STATISTICS        = dmlOptions.stats;
-			STATISTICS_COUNT  = dmlOptions.statsCount;
-			USE_ACCELERATOR   = dmlOptions.gpu;
-			FORCE_ACCELERATOR = dmlOptions.forceGPU;
-			EXPLAIN           = dmlOptions.explainType;
-			ENABLE_DEBUG_MODE = dmlOptions.debug;
-			SCRIPT_TYPE       = dmlOptions.scriptType;
-			rtplatform        = dmlOptions.execMode;
+			STATISTICS          = dmlOptions.stats;
+			STATISTICS_COUNT    = dmlOptions.statsCount;
+			JMLC_MEM_STATISTICS = dmlOptions.memStats;
+			USE_ACCELERATOR     = dmlOptions.gpu;
+			FORCE_ACCELERATOR   = dmlOptions.forceGPU;
+			EXPLAIN             = dmlOptions.explainType;
+			ENABLE_DEBUG_MODE   = dmlOptions.debug;
+			SCRIPT_TYPE         = dmlOptions.scriptType;
+			rtplatform          = dmlOptions.execMode;
 
 			String fnameOptConfig = dmlOptions.configFile;
 			boolean isFile = dmlOptions.filePath != null;
@@ -517,35 +521,26 @@ public class DMLScript
 			else {
 				execute(dmlScriptStr, fnameOptConfig, argVals, args, SCRIPT_TYPE);
 			}
-
 		}
-		catch(AlreadySelectedException e)
-		{
+		catch(AlreadySelectedException e) {
 			System.err.println("Mutually exclusive options were selected. " + e.getMessage());
 			HelpFormatter formatter = new HelpFormatter();
 			formatter.printHelp( "systemml", options );
 			return false;
 		}
-		catch(org.apache.commons.cli.ParseException e)
-		{
+		catch(org.apache.commons.cli.ParseException e) {
 			System.err.println(e.getMessage());
 			HelpFormatter formatter = new HelpFormatter();
 			formatter.printHelp( "systemml", options );
 		}
-		catch (ParseException pe) {
-			throw pe;
-		}
-		catch (DMLScriptException e) {
-			//rethrow DMLScriptException to propagate stop call
+		catch (ParseException | DMLScriptException e) {
 			throw e;
 		}
-		catch(Exception ex)
-		{
+		catch(Exception ex) {
 			LOG.error("Failed to execute DML script.", ex);
 			throw new DMLException(ex);
 		}
-		finally
-		{
+		finally {
 			//reset runtime platform and visualize flag
 			rtplatform = oldrtplatform;
 			EXPLAIN = oldexplain;
@@ -626,20 +621,15 @@ public class DMLScript
 	}
 
 	
-	private static void setLoggingProperties( Configuration conf )
-	{
+	private static void setLoggingProperties( Configuration conf ) {
 		String debug = conf.get("systemml.logging");
-		
 		if (debug == null)
 			debug = System.getProperty("systemml.logging");
-		
 		if (debug != null){
-			if (debug.equalsIgnoreCase("debug")){
+			if (debug.equalsIgnoreCase("debug"))
 				Logger.getLogger("org.apache.sysml").setLevel((Level) Level.DEBUG);
-			}
-			else if (debug.equalsIgnoreCase("trace")){
+			else if (debug.equalsIgnoreCase("trace"))
 				Logger.getLogger("org.apache.sysml").setLevel((Level) Level.TRACE);
-			}
 		}
 	}
 	
@@ -825,7 +815,7 @@ public class DMLScript
 		//init caching (incl set active)
 		LocalFileUtils.createWorkingDirectory();
 		CacheableData.initCaching();
-						
+		
 		//reset statistics (required if multiple scripts executed in one JVM)
 		Statistics.resetNoOfExecutedJobs();
 		if( STATISTICS )
@@ -871,9 +861,8 @@ public class DMLScript
 				+ MRConfigurationNames.DFS_PERMISSIONS_ENABLED + " = " + perm );
 
 		//print warning if permission issues possible
-		if( flagDiffUser && ( flagLocalFS || flagSecurity ) )
-		{
-			LOG.warn("Cannot run map/reduce tasks as user '"+userName+"'. Using tasktracker group '"+ttGroupName+"'."); 		 
+		if( flagDiffUser && ( flagLocalFS || flagSecurity ) ) {
+			LOG.warn("Cannot run map/reduce tasks as user '"+userName+"'. Using tasktracker group '"+ttGroupName+"'.");
 		}
 	}
 	
@@ -895,25 +884,19 @@ public class DMLScript
 		//this implementation does not create job specific sub directories)
 		JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
 		if( InfrastructureAnalyzer.isLocalMode(job) ) {
-			try 
-			{
-				LocalFileUtils.deleteFileIfExists( DMLConfig.LOCAL_MR_MODE_STAGING_DIR + //staging dir (for local mode only) 
-					                                   dirSuffix  );	
-				LocalFileUtils.deleteFileIfExists( MRJobConfiguration.getLocalWorkingDirPrefix(job) + //local dir
-		                                               dirSuffix );
-				MapReduceTool.deleteFileIfExistOnHDFS( MRJobConfiguration.getSystemWorkingDirPrefix(job) + //system dir
-													   dirSuffix  );
-				MapReduceTool.deleteFileIfExistOnHDFS( MRJobConfiguration.getStagingWorkingDirPrefix(job) + //staging dir
-								                       dirSuffix  );
+			try {
+				LocalFileUtils.deleteFileIfExists( DMLConfig.LOCAL_MR_MODE_STAGING_DIR + dirSuffix );
+				LocalFileUtils.deleteFileIfExists( MRJobConfiguration.getLocalWorkingDirPrefix(job) + dirSuffix );
+				MapReduceTool.deleteFileIfExistOnHDFS( MRJobConfiguration.getSystemWorkingDirPrefix(job) + dirSuffix );
+				MapReduceTool.deleteFileIfExistOnHDFS( MRJobConfiguration.getStagingWorkingDirPrefix(job) + dirSuffix );
 			}
-			catch(Exception ex)
-			{
+			catch(Exception ex) {
 				//we give only a warning because those directories are written by the mapred deamon 
 				//and hence, execution can still succeed
 				LOG.warn("Unable to cleanup hadoop working dirs: "+ex.getMessage());
 			}
-		}			
-			
+		}
+		
 		//3) cleanup systemml-internal working dirs
 		CacheableData.cleanupCacheDir(); //might be local/hdfs
 		LocalFileUtils.cleanupWorkingDirectory();
@@ -924,12 +907,10 @@ public class DMLScript
 	// private internal helper functionalities
 	////////
 
-	private static void printInvocationInfo(String fnameScript, String fnameOptConfig, Map<String,String> argVals)
-	{		
+	private static void printInvocationInfo(String fnameScript, String fnameOptConfig, Map<String,String> argVals) {
 		LOG.debug("****** args to DML Script ******\n" + "UUID: " + getUUID() + "\n" + "SCRIPT PATH: " + fnameScript + "\n" 
-	                + "RUNTIME: " + rtplatform + "\n" + "BUILTIN CONFIG: " + DMLConfig.DEFAULT_SYSTEMML_CONFIG_FILEPATH + "\n"
-	                + "OPTIONAL CONFIG: " + fnameOptConfig + "\n");
-
+			+ "RUNTIME: " + rtplatform + "\n" + "BUILTIN CONFIG: " + DMLConfig.DEFAULT_SYSTEMML_CONFIG_FILEPATH + "\n"
+			+ "OPTIONAL CONFIG: " + fnameOptConfig + "\n");
 		if( !argVals.isEmpty() ) {
 			LOG.debug("Script arguments are: \n");
 			for (int i=1; i<= argVals.size(); i++)
@@ -937,27 +918,23 @@ public class DMLScript
 		}
 	}
 	
-	private static void printStartExecInfo(String dmlScriptString)
-	{
+	private static void printStartExecInfo(String dmlScriptString) {
 		LOG.info("BEGIN DML run " + getDateTime());
 		LOG.debug("DML script: \n" + dmlScriptString);
-		
 		if (rtplatform == RUNTIME_PLATFORM.HADOOP || rtplatform == RUNTIME_PLATFORM.HYBRID) {
 			String hadoop_home = System.getenv("HADOOP_HOME");
 			LOG.info("HADOOP_HOME: " + hadoop_home);
 		}
 	}
 	
-	private static String getDateTime() 
-	{
+	private static String getDateTime() {
 		DateFormat dateFormat = new SimpleDateFormat("MM/dd/yyyy HH:mm:ss");
 		Date date = new Date();
 		return dateFormat.format(date);
 	}
 
 	private static void cleanSystemMLWorkspace() {
-		try
-		{
+		try {
 			//read the default config
 			DMLConfig conf = DMLConfig.readConfigurationFile(null);
 			
@@ -974,9 +951,8 @@ public class DMLScript
 			if( localtmp != null )
 				LocalFileUtils.cleanupRcWorkingDirectory(localtmp);
 		}
-		catch(Exception ex)
-		{
+		catch(Exception ex) {
 			throw new DMLException("Failed to run SystemML workspace cleanup.", ex);
 		}
 	}
-}  
+}

http://git-wip-us.apache.org/repos/asf/systemml/blob/6bb13691/src/main/java/org/apache/sysml/api/jmlc/Connection.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/api/jmlc/Connection.java b/src/main/java/org/apache/sysml/api/jmlc/Connection.java
index 550b1c6..ea521e5 100644
--- a/src/main/java/org/apache/sysml/api/jmlc/Connection.java
+++ b/src/main/java/org/apache/sysml/api/jmlc/Connection.java
@@ -194,7 +194,7 @@ public class Connection implements Closeable
 	 */
 	public void gatherMemStats(boolean stats) {
 		DMLScript.STATISTICS = stats || DMLScript.STATISTICS;
-		DMLScript.JMLC_MEMORY_STATISTICS = stats;
+		DMLScript.JMLC_MEM_STATISTICS = stats;
 	}
 	
 	/**

http://git-wip-us.apache.org/repos/asf/systemml/blob/6bb13691/src/main/java/org/apache/sysml/runtime/controlprogram/LocalVariableMap.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/LocalVariableMap.java b/src/main/java/org/apache/sysml/runtime/controlprogram/LocalVariableMap.java
index dd8d7d2..2081aae 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/LocalVariableMap.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/LocalVariableMap.java
@@ -137,7 +137,7 @@ public class LocalVariableMap implements Cloneable
 			if( !dict.containsKey(hash) && e.getValue() instanceof CacheableData ) {
 				dict.put(hash, e.getValue());
 				double size = ((CacheableData<?>) e.getValue()).getDataSize();
-				if ((DMLScript.JMLC_MEMORY_STATISTICS) && (DMLScript.FINEGRAINED_STATISTICS))
+				if (DMLScript.JMLC_MEM_STATISTICS && DMLScript.FINEGRAINED_STATISTICS)
 					Statistics.maintainCPHeavyHittersMem(e.getKey(), size);
 				total += size;
 			}

http://git-wip-us.apache.org/repos/asf/systemml/blob/6bb13691/src/main/java/org/apache/sysml/runtime/controlprogram/ProgramBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/ProgramBlock.java b/src/main/java/org/apache/sysml/runtime/controlprogram/ProgramBlock.java
index b7476ae..72b5051 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/ProgramBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/ProgramBlock.java
@@ -259,7 +259,7 @@ public class ProgramBlock implements ParseInfo
 				Statistics.maintainCPHeavyHitters(
 					tmp.getExtendedOpcode(), System.nanoTime()-t0);
 			}
-			if ((DMLScript.JMLC_MEMORY_STATISTICS) && (DMLScript.FINEGRAINED_STATISTICS))
+			if (DMLScript.JMLC_MEM_STATISTICS && DMLScript.FINEGRAINED_STATISTICS)
 				ec.getVariables().getPinnedDataSize();
 
 			// optional trace information (instruction and runtime)

http://git-wip-us.apache.org/repos/asf/systemml/blob/6bb13691/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java
index 5b1c26b..6ce1cd3 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java
@@ -487,6 +487,8 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
 		if( DMLScript.STATISTICS ){
 			long t1 = System.nanoTime();
 			CacheStatistics.incrementAcquireMTime(t1-t0);
+			if (DMLScript.JMLC_MEM_STATISTICS)
+				Statistics.addCPMemObject(System.identityHashCode(this), getDataSize());
 		}
 		
 		return ret;
@@ -504,9 +506,6 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
 		
 		setDirty(true);
 		_isAcquireFromEmpty = false;
-
-		if (DMLScript.JMLC_MEMORY_STATISTICS)
-			Statistics.addCPMemObject(newData);
 		
 		//set references to new data
 		if (newData == null)
@@ -573,11 +572,6 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
 				}
 				_requiresLocalWrite = false;
 			}
-
-			if ((DMLScript.JMLC_MEMORY_STATISTICS) && (this._data != null)) {
-				int hash = System.identityHashCode(this._data);
-				Statistics.removeCPMemObject(hash);
-			}
 			
 			//create cache
 			createCache();
@@ -608,10 +602,6 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
 			  ||(_data!=null && !isCachingActive()) )) //additional condition for JMLC
 			freeEvictedBlob();
 
-		if ((DMLScript.JMLC_MEMORY_STATISTICS) && (this._data != null)) {
-			int hash = System.identityHashCode(this._data);
-			Statistics.removeCPMemObject(hash);
-		}
 		// clear the in-memory data
 		_data = null;
 		clearCache();

http://git-wip-us.apache.org/repos/asf/systemml/blob/6bb13691/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java b/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
index 77598bf..d0d0b08 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
@@ -59,6 +59,7 @@ import org.apache.sysml.runtime.matrix.data.OutputInfo;
 import org.apache.sysml.runtime.matrix.data.Pair;
 import org.apache.sysml.runtime.util.MapReduceTool;
 import org.apache.sysml.utils.GPUStatistics;
+import org.apache.sysml.utils.Statistics;
 
 
 public class ExecutionContext {
@@ -600,6 +601,8 @@ public class ExecutionContext {
 	}
 	
 	public void cleanupCacheableData(CacheableData<?> mo) {
+		if (DMLScript.JMLC_MEM_STATISTICS)
+			Statistics.removeCPMemObject(System.identityHashCode(mo));
 		//early abort w/o scan of symbol table if no cleanup required
 		boolean fileExists = (mo.isHDFSFileExists() && mo.getFileName() != null);
 		if( !CacheableData.isCachingActive() && !fileExists )

http://git-wip-us.apache.org/repos/asf/systemml/blob/6bb13691/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java b/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
index 4196ef8..a3c7677 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/context/SparkExecutionContext.java
@@ -1097,6 +1097,9 @@ public class SparkExecutionContext extends ExecutionContext
 		//and hence is transparently used by rmvar instructions and other users. The
 		//core difference is the lineage-based cleanup of RDD and broadcast variables.
 
+		if (DMLScript.JMLC_MEM_STATISTICS)
+			Statistics.removeCPMemObject(System.identityHashCode(mo));
+
 		if( !mo.isCleanupEnabled() )
 			return;
 		

http://git-wip-us.apache.org/repos/asf/systemml/blob/6bb13691/src/main/java/org/apache/sysml/utils/Statistics.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/utils/Statistics.java b/src/main/java/org/apache/sysml/utils/Statistics.java
index 7c16375..8f0d853 100644
--- a/src/main/java/org/apache/sysml/utils/Statistics.java
+++ b/src/main/java/org/apache/sysml/utils/Statistics.java
@@ -22,7 +22,6 @@ package org.apache.sysml.utils;
 import java.lang.management.CompilationMXBean;
 import java.lang.management.GarbageCollectorMXBean;
 import java.lang.management.ManagementFactory;
-import java.lang.ref.SoftReference;
 import java.text.DecimalFormat;
 import java.util.Arrays;
 import java.util.Comparator;
@@ -36,7 +35,6 @@ import java.util.concurrent.atomic.LongAdder;
 import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.hops.OptimizerUtils;
-import org.apache.sysml.runtime.controlprogram.caching.CacheBlock;
 import org.apache.sysml.runtime.controlprogram.caching.CacheStatistics;
 import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext;
 import org.apache.sysml.runtime.instructions.Instruction;
@@ -81,12 +79,6 @@ public class Statistics
 	private static final ConcurrentHashMap<String,Double> _cpMemObjs = new ConcurrentHashMap<>();
 	private static final ConcurrentHashMap<Integer,Double> _currCPMemObjs = new ConcurrentHashMap<>();
 
-	// this hash map maintains soft references to the cache blocks in memory. It is periodically scanned to check for
-	// objects which have been garbage collected. This enables more accurate memory statistics. Relying on rmvar
-	// instructions to determine when an object has been de-allocated results in a substantial underestimate to memory
-	// use by the program since garbage collection will not occur immediately.
-	private static final ConcurrentHashMap<Integer,SoftReference<CacheBlock>> _liveObjects = new ConcurrentHashMap<>();
-
 	//JVM stats (low frequency updates)
 	private static long jitCompileTime = 0; //in milli sec
 	private static long jvmGCTime = 0; //in milli sec
@@ -601,33 +593,11 @@ public class Statistics
 		return opcode;
 	}
 
-	public static void addCPMemObject(CacheBlock data) {
-		int hash = System.identityHashCode(data);
-		double sizeof = data.getInMemorySize();
-
+	public static void addCPMemObject(int hash, double sizeof) {
 		double sizePrev = _currCPMemObjs.getOrDefault(hash, 0.0);
 		_currCPMemObjs.put(hash, sizeof);
 		sizeofPinnedObjects.add(sizeof - sizePrev);
-		if (DMLScript.FINEGRAINED_STATISTICS)
-			_liveObjects.putIfAbsent(hash, new SoftReference<>(data));
 		maintainMemMaxStats();
-		checkForDeadBlocks();
-	}
-
-	/**
-	 * If finegrained statistics are enabled searches through a map of soft references to find objects
-	 * which have been garbage collected. This results in more accurate statistics on memory use but
-	 * introduces overhead so is only enabled with finegrained stats and when running in JMLC
-	 */
-	public static void checkForDeadBlocks() {
-		if (!DMLScript.FINEGRAINED_STATISTICS)
-			return;
-		for (Entry<Integer,SoftReference<CacheBlock>> e : _liveObjects.entrySet()) {
-			if (e.getValue().get() == null) {
-				removeCPMemObject(e.getKey());
-				_liveObjects.remove(e.getKey());
-			}
-		}
 	}
 
 	/**
@@ -994,8 +964,8 @@ public class Statistics
 			sb.append("Cache hits (Mem, WB, FS, HDFS):\t" + CacheStatistics.displayHits() + ".\n");
 			sb.append("Cache writes (WB, FS, HDFS):\t" + CacheStatistics.displayWrites() + ".\n");
 			sb.append("Cache times (ACQr/m, RLS, EXP):\t" + CacheStatistics.displayTime() + " sec.\n");
-			if (DMLScript.JMLC_MEMORY_STATISTICS)
-				sb.append("Max size of objects in CP memory:\t" + byteCountToDisplaySize(getSizeofPinnedObjects()) + " ("  + getNumPinnedObjects() + " total objects)" + "\n");
+			if (DMLScript.JMLC_MEM_STATISTICS)
+				sb.append("Max size of live objects:\t" + byteCountToDisplaySize(getSizeofPinnedObjects()) + " ("  + getNumPinnedObjects() + " total objects)" + "\n");
 			sb.append("HOP DAGs recompiled (PRED, SB):\t" + getHopRecompiledPredDAGs() + "/" + getHopRecompiledSBDAGs() + ".\n");
 			sb.append("HOP DAGs recompile time:\t" + String.format("%.3f", ((double)getHopRecompileTime())/1000000000) + " sec.\n");
 			if( getFunRecompiles()>0 ) {
@@ -1047,7 +1017,7 @@ public class Statistics
 			sb.append("Total JVM GC time:\t\t" + ((double)getJVMgcTime())/1000 + " sec.\n");
 			LibMatrixDNN.appendStatistics(sb);
 			sb.append("Heavy hitter instructions:\n" + getHeavyHitters(maxHeavyHitters));
-			if ((DMLScript.JMLC_MEMORY_STATISTICS) && (DMLScript.FINEGRAINED_STATISTICS))
+			if (DMLScript.JMLC_MEM_STATISTICS && DMLScript.FINEGRAINED_STATISTICS)
 				sb.append("Heavy hitter objects:\n" + getCPHeavyHittersMem(maxHeavyHitters));
 		}