You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2018/04/20 08:46:01 UTC

[2/7] systemml git commit: [SYSTEMML-2258] Fine tuning and cleanup bufferpool (thread contention)

[SYSTEMML-2258] Fine tuning and cleanup bufferpool (thread contention)

This patch makes some additional improvements to the buffer pool
primitives acquireRead, acquireModify, and release which help further
reduce unnecessary thread contention on shared objects. As a by product
this also improves the scope of related stats reporting and a cleanup of
these primitives to simplify their maintenance.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/5590513d
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/5590513d
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/5590513d

Branch: refs/heads/master
Commit: 5590513d7d4ec8acad218bee7f2c3239c13042eb
Parents: 9a08915
Author: Matthias Boehm <mb...@gmail.com>
Authored: Thu Apr 19 19:27:26 2018 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Thu Apr 19 19:27:26 2018 -0700

----------------------------------------------------------------------
 .../sysml/debug/DMLDebuggerFunctions.java       |  15 +-
 .../apache/sysml/hops/cost/CostEstimator.java   |   3 +-
 .../controlprogram/caching/CacheableData.java   | 192 +++++--------------
 .../context/ExecutionContext.java               |  34 ++--
 .../parfor/opt/OptimizerRuleBased.java          |   2 +-
 5 files changed, 80 insertions(+), 166 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/5590513d/src/main/java/org/apache/sysml/debug/DMLDebuggerFunctions.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/debug/DMLDebuggerFunctions.java b/src/main/java/org/apache/sysml/debug/DMLDebuggerFunctions.java
index d32d0e5..c34272f 100644
--- a/src/main/java/org/apache/sysml/debug/DMLDebuggerFunctions.java
+++ b/src/main/java/org/apache/sysml/debug/DMLDebuggerFunctions.java
@@ -31,6 +31,7 @@ import org.apache.sysml.lops.Lop;
 import org.apache.sysml.parser.Expression.DataType;
 import org.apache.sysml.runtime.controlprogram.LocalVariableMap;
 import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
+import org.apache.sysml.runtime.controlprogram.caching.CacheableData.CacheStatus;
 import org.apache.sysml.runtime.instructions.Instruction;
 import org.apache.sysml.runtime.instructions.MRJobInstruction;
 import org.apache.sysml.runtime.instructions.cp.BreakPointInstruction;
@@ -291,7 +292,7 @@ public class DMLDebuggerFunctions {
 						
 						try {
 							mo = (MatrixObject) variables.get(varname);
-							if (mo.getStatusAsString().equals("EMPTY") && (OptimizerUtils.estimateSizeExactSparsity(mo.getNumRows(), mo.getNumColumns(), mo.getSparsity()) > OptimizerUtils.getLocalMemBudget())) {
+							if (mo.getStatus()==CacheStatus.EMPTY && (OptimizerUtils.estimateSizeExactSparsity(mo.getNumRows(), mo.getNumColumns(), mo.getSparsity()) > OptimizerUtils.getLocalMemBudget())) {
 								//TODO @jlugoma Need to add functionality to bring and display a block. 
 								System.err.println("ERROR: Matrix dimensions are too large to fit in main memory.");
 								return;
@@ -367,7 +368,7 @@ public class DMLDebuggerFunctions {
 				if (variables.get(varname).getDataType() == DataType.MATRIX) {
 					try {
 						MatrixObject mo = (MatrixObject) variables.get(varname);
-						if (mo.getStatusAsString().equals("EMPTY") && (OptimizerUtils.estimateSizeExactSparsity(mo.getNumRows(), mo.getNumColumns(), mo.getSparsity()) > OptimizerUtils.getLocalMemBudget())) {
+						if (mo.getStatus()==CacheStatus.EMPTY && (OptimizerUtils.estimateSizeExactSparsity(mo.getNumRows(), mo.getNumColumns(), mo.getSparsity()) > OptimizerUtils.getLocalMemBudget())) {
 							//TODO @jlugoma Need to add functionality to bring and display a block. 
 							System.err.println("ERROR: DML matrix/vector dimensions are too large to fit in main memory.");
 							return;
@@ -417,7 +418,7 @@ public class DMLDebuggerFunctions {
 					double cellValue;
 					try {
 						MatrixObject mo = (MatrixObject) variables.get(varname);
-						if (mo.getStatusAsString().equals("EMPTY") && (OptimizerUtils.estimateSizeExactSparsity(mo.getNumRows(), mo.getNumColumns(), mo.getSparsity()) > OptimizerUtils.getLocalMemBudget())) {
+						if (mo.getStatus()==CacheStatus.EMPTY && (OptimizerUtils.estimateSizeExactSparsity(mo.getNumRows(), mo.getNumColumns(), mo.getSparsity()) > OptimizerUtils.getLocalMemBudget())) {
 							//TODO @jlugoma Need to add functionality to bring and display a block. 
 							System.err.println("ERROR: DML matrix/vector dimensions are too large to fit in main memory.");
 							return;
@@ -464,14 +465,16 @@ public class DMLDebuggerFunctions {
 					double updatedCellValue;
 					try {
 						MatrixObject mo = (MatrixObject) variables.get(varname);
-						if (mo.getStatusAsString().equals("EMPTY") && (OptimizerUtils.estimateSizeExactSparsity(mo.getNumRows(), mo.getNumColumns(), mo.getSparsity()) > OptimizerUtils.getLocalMemBudget())) {
+						if (mo.getStatus()==CacheStatus.EMPTY && (OptimizerUtils.estimateSizeExactSparsity(mo.getNumRows(), mo.getNumColumns(), mo.getSparsity()) > OptimizerUtils.getLocalMemBudget())) {
 							//TODO @jlugoma Need to add functionality to bring and display a block. 
 							System.err.println("ERROR: DML matrix/vector dimensions are too large to fit in main memory.");
 							return;
-						}						
-						MatrixBlock mb = mo.acquireModify();
+						}
+						MatrixBlock mb = mo.acquireRead();
+						mo.release();
 						mb.setValue(rowIndex, columnIndex, value);
 						updatedCellValue = mb.getValue(rowIndex, columnIndex);
+						mo.acquireModify(mb);
 						mo.release();
 					} catch (Exception e) {
 						System.err.println("Error processing DML matrix variable "+varname+". Certain matrix operations are disabled due to memory constraints or read-only restrictions.");

http://git-wip-us.apache.org/repos/asf/systemml/blob/5590513d/src/main/java/org/apache/sysml/hops/cost/CostEstimator.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/cost/CostEstimator.java b/src/main/java/org/apache/sysml/hops/cost/CostEstimator.java
index 37e588f..05c6906 100644
--- a/src/main/java/org/apache/sysml/hops/cost/CostEstimator.java
+++ b/src/main/java/org/apache/sysml/hops/cost/CostEstimator.java
@@ -39,6 +39,7 @@ import org.apache.sysml.runtime.controlprogram.LocalVariableMap;
 import org.apache.sysml.runtime.controlprogram.Program;
 import org.apache.sysml.runtime.controlprogram.ProgramBlock;
 import org.apache.sysml.runtime.controlprogram.WhileProgramBlock;
+import org.apache.sysml.runtime.controlprogram.caching.CacheableData.CacheStatus;
 import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
 import org.apache.sysml.runtime.instructions.Instruction;
 import org.apache.sysml.runtime.instructions.InstructionUtils;
@@ -223,7 +224,7 @@ public abstract class CostEstimator
 				int brlen = mc.getRowsPerBlock();
 				int bclen = mc.getColsPerBlock();
 				long nnz = mc.getNonZeros();
-				boolean inmem = mo.getStatusAsString().equals("CACHED");
+				boolean inmem = mo.getStatus()==CacheStatus.CACHED;
 				vs = new VarStats(rlen, clen, brlen, bclen, nnz, inmem);
 			}
 			else //scalar

http://git-wip-us.apache.org/repos/asf/systemml/blob/5590513d/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java
index 6e63284..3682fe1 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java
@@ -40,7 +40,6 @@ import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.controlprogram.caching.LazyWriteBuffer.RPolicy;
 import org.apache.sysml.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
 import org.apache.sysml.runtime.controlprogram.parfor.util.IDSequence;
-import org.apache.sysml.runtime.instructions.cp.CPInstruction;
 import org.apache.sysml.runtime.instructions.cp.Data;
 import org.apache.sysml.runtime.instructions.gpu.context.GPUContext;
 import org.apache.sysml.runtime.instructions.gpu.context.GPUObject;
@@ -53,11 +52,9 @@ import org.apache.sysml.runtime.matrix.MetaDataNumItemsByEachReducer;
 import org.apache.sysml.runtime.matrix.MetaData;
 import org.apache.sysml.runtime.matrix.data.FileFormatProperties;
 import org.apache.sysml.runtime.matrix.data.InputInfo;
-import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 import org.apache.sysml.runtime.matrix.data.OutputInfo;
 import org.apache.sysml.runtime.util.LocalFileUtils;
 import org.apache.sysml.runtime.util.MapReduceTool;
-import org.apache.sysml.utils.GPUStatistics;
 
 
 /**
@@ -103,7 +100,7 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
 	 * <code>CACHED</code>:   The data blob is in main memory, and nobody is using nor referencing it. 
 	 * There is always an persistent recovery object for it
 	 **/
-	protected enum CacheStatus {
+	public enum CacheStatus {
 		EMPTY, 
 		READ, 
 		MODIFY, 
@@ -115,7 +112,7 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
 	private static volatile boolean _activeFlag = false;
 	
 	/** Global sequence for generating unique ids. */
-	private static IDSequence _seq = null;   
+	private static IDSequence _seq = null;
 
 	// Global eviction path and prefix (prefix used for isolation purposes)
 	public static String cacheEvictionLocalFilePath = null; //set during init
@@ -246,6 +243,10 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
 	public boolean isCleanupEnabled() {
 		return _cleanupFlag;
 	}
+	
+	public CacheStatus getStatus() {
+		return _cacheStatus;
+	}
 
 	public boolean isHDFSFileExists() {
 		return _hdfsFileExists;
@@ -411,18 +412,15 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
 				}
 			}
 		}
-
+		
 		//read data from HDFS/RDD if required
 		//(probe data for cache_nowrite / jvm_reuse)
-		if( isEmpty(true) && _data==null )
-		{
-			try
-			{
+		if( _data==null && isEmpty(true) ) {
+			try {
 				if( DMLScript.STATISTICS )
 					CacheStatistics.incrementHDFSHits();
 				
-				if( getRDDHandle()==null || getRDDHandle().allowsShortCircuitRead() )
-				{
+				if( getRDDHandle()==null || getRDDHandle().allowsShortCircuitRead() ) {
 					//check filename
 					if( _hdfsFileName == null )
 						throw new DMLRuntimeException("Cannot read matrix for empty filename.");
@@ -433,8 +431,7 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
 					//mark for initial local write despite read operation
 					_requiresLocalWrite = CACHING_WRITE_CACHE_ON_READ;
 				}
-				else
-				{
+				else {
 					//read matrix from rdd (incl execute pending rdd operations)
 					MutableBoolean writeStatus = new MutableBoolean();
 					_data = readBlobFromRDD( getRDDHandle(), writeStatus );
@@ -449,92 +446,48 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
 			catch (IOException e) {
 				throw new DMLRuntimeException("Reading of " + _hdfsFileName + " ("+hashCode()+") failed.", e);
 			}
-			
 			_isAcquireFromEmpty = true;
 		}
-		else if( DMLScript.STATISTICS ) {
-			if( _data!=null )
-				CacheStatistics.incrementMemHits();
+		else if( _data!=null && DMLScript.STATISTICS ) {
+			CacheStatistics.incrementMemHits();
 		}
 		
 		//cache status maintenance
 		acquire( false, _data==null );
 		return _data;
 	}
-
+	
 	/**
-	 * Acquires the exclusive "write" lock for a thread that wants to change cache block
-	 * cell values.  Produces the reference to the cache block, restores the cache block
-	 * to main memory, reads from HDFS if needed.
-	 * 
+	 * Acquires the exclusive "write" lock for a thread that wants to throw away the
+	 * old cache block data and link up with new cache block data. Abandons the old data
+	 * without reading it and sets the new data reference.
+
 	 * In-Status:  EMPTY, EVICTABLE, EVICTED;
 	 * Out-Status: MODIFY.
 	 * 
+	 * @param newData new data
 	 * @return cacheable data
 	 */
-	public synchronized T acquireModify() 
-	{
-		//TODO remove after debugger (as only consumer) has been removed, because
-		//recent features such as gpu data transfers are not yet integrated
+	public T acquireModify(T newData) {
 		long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
 		
-		if ( !isAvailableToModify() )
-			throw new DMLRuntimeException("MatrixObject not available to modify.");
+		//core internal acquire (synchronized per object)
+		T ret = acquireModifyIntern(newData);
 		
-		//get object from cache
-		if( _data == null )
-			getCache();
-
-		//read data from HDFS if required
-		if( isEmpty(true) && _data == null )
-		{
-			//check filename
-			if( _hdfsFileName == null )
-				throw new DMLRuntimeException("Cannot read matrix for empty filename.");
-			
-			//load data
-			try {
-				_data = readBlobFromHDFS( _hdfsFileName );
-			}
-			catch (IOException e) {
-				throw new DMLRuntimeException("Reading of " + _hdfsFileName + " ("+hashCode()+") failed.", e);
-			}
-		}
-
-		//cache status maintenance
-		acquire( true, _data==null );
-		updateStatusPinned(true);
-		setDirty(true);
-		_isAcquireFromEmpty = false;
+		//update thread-local status (after pin but outside the
+		//critical section of accessing a shared object)
+		if( !isBelowCachingThreshold() )
+			updateStatusPinned(true);
 		
 		if( DMLScript.STATISTICS ){
 			long t1 = System.nanoTime();
 			CacheStatistics.incrementAcquireMTime(t1-t0);
 		}
 		
-		return _data;
-	}
-	
-	public T acquireModify(T newData) {
-		return acquireModify(newData, null);
+		return ret;
 	}
 	
-	/**
-	 * Acquires the exclusive "write" lock for a thread that wants to throw away the
-	 * old cache block data and link up with new cache block data. Abandons the old data
-	 * without reading it and sets the new data reference.
-
-	 * In-Status:  EMPTY, EVICTABLE, EVICTED;
-	 * Out-Status: MODIFY.
-	 * 
-	 * @param newData new data
-	 * @param opcode extended instruction opcode
-	 * @return cacheable data
-	 */
-	public synchronized T acquireModify(T newData, String opcode)
-	{
-		long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
-		
+	private synchronized T acquireModifyIntern(T newData) {
 		if (! isAvailableToModify ())
 			throw new DMLRuntimeException("CacheableData not available to modify.");
 		
@@ -550,29 +503,7 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
 		//set references to new data
 		if (newData == null)
 			throw new DMLRuntimeException("acquireModify with empty cache block.");
-		_data = newData;
-		if( !isBelowCachingThreshold() )
-			updateStatusPinned(true);
-		
-		if( DMLScript.STATISTICS ){
-			long t1 = System.nanoTime();
-			CacheStatistics.incrementAcquireMTime(t1-t0);
-			if(DMLScript.FINEGRAINED_STATISTICS && opcode != null) {
-				if(_data instanceof MatrixBlock) {
-					MatrixBlock currObject = (MatrixBlock)_data;
-					if(currObject.isInSparseFormat())
-						GPUStatistics.maintainCPMiscTimes(opcode, CPInstruction.MISC_TIMER_ACQ_MODIFY_SPARSE_MB, t1-t0);
-					else
-						GPUStatistics.maintainCPMiscTimes(opcode, CPInstruction.MISC_TIMER_ACQ_MODIFY_DENSE_MB, t1-t0);
-				}
-			}
-		}
-		
-		return _data;
-	}
-	
-	public void release() {
-		release(null);
+		return _data = newData;
 	}
 	
 	/**
@@ -586,7 +517,7 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
 	 * Out-Status: READ(-1), EVICTABLE, EMPTY.
 	 * 
 	 */
-	public void release(String opcode) {
+	public void release() {
 		long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
 		
 		//update thread-local status (before unpin but outside
@@ -595,7 +526,7 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
 			updateStatusPinned(false);
 		
 		//core internal release (synchronized per object)
-		releaseIntern(opcode);
+		releaseIntern();
 		
 		if( DMLScript.STATISTICS ){
 			long t1 = System.nanoTime();
@@ -603,8 +534,7 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
 		}
 	}
 	
-	private synchronized void releaseIntern(String opcode)
-	{
+	private synchronized void releaseIntern() {
 		boolean write = false;
 		if ( isModify() ) {
 			//set flags for write
@@ -613,11 +543,11 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
 			
 			//update meta data
 			refreshMetaData();
+			
+			//compact empty in-memory block 
+			_data.compactEmptyBlock();
 		}
 		
-		//compact empty in-memory block 
-		_data.compactEmptyBlock();
-		
 		//cache status maintenance (pass cacheNoWrite flag)
 		release(_isAcquireFromEmpty && !_requiresLocalWrite);
 		
@@ -626,17 +556,9 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
 			&& !isBelowCachingThreshold() ) //min size for caching
 		{
 			if( write || _requiresLocalWrite ) {
-				//evict blob
 				String filePath = getCacheFilePathAndName();
 				try {
-					long t1 = DMLScript.STATISTICS && DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
-					
-					int numEvicted = LazyWriteBuffer.writeBlock(filePath, _data);
-					
-					if(DMLScript.STATISTICS && DMLScript.FINEGRAINED_STATISTICS && opcode != null) {
-						long t2 = DMLScript.STATISTICS && DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
-						GPUStatistics.maintainCPMiscTimes(opcode, CPInstruction.MISC_TIMER_RELEASE_BUFF_WRITE, t2-t1, numEvicted);
-					}
+					LazyWriteBuffer.writeBlock(filePath, _data);
 				}
 				catch (Exception e) {
 					throw new DMLRuntimeException("Eviction to local path " + filePath + " ("+hashCode()+") failed.", e);
@@ -666,7 +588,7 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
 			return; // do nothing
 		if( !isAvailableToModify() )
 			throw new DMLRuntimeException("CacheableData (" + getDebugName() + ") not available to "
-					+ "modify. Status = " + getStatusAsString() + ".");
+					+ "modify. Status = " + _cacheStatus.name() + ".");
 		
 		// clear existing WB / FS representation (but prevent unnecessary probes)
 		if( !(isEmpty(true)||(_data!=null && isBelowCachingThreshold()) 
@@ -810,7 +732,7 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
 				throw new DMLRuntimeException("Export to " + fName + " failed.", e);
 			}
 			finally {
-				release(opcode);
+				release();
 			}
 		}
 		else if( pWrite ) // pwrite with same output format
@@ -1078,7 +1000,7 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
 		}
 
 		if( LOG.isTraceEnabled() )
-			LOG.trace("Acquired lock on " + this.getDebugName() + ", status: " + this.getStatusAsString() );		
+			LOG.trace("Acquired lock on " + getDebugName() + ", status: " + _cacheStatus.name() );
 	}
 
 	
@@ -1114,7 +1036,7 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
 		}
 		
 		if( LOG.isTraceEnabled() )
-			LOG.trace("Released lock on " + this.getDebugName() + ", status: " + this.getStatusAsString());
+			LOG.trace("Released lock on " + getDebugName() + ", status: " + _cacheStatus.name());
 		
 	}
 
@@ -1125,16 +1047,9 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
 	//  ***                                            ***
 	//  **************************************************
 	
-	
-	public String getStatusAsString() {
-		return _cacheStatus.toString();
-	}
-
 	public boolean isCached(boolean inclCachedNoWrite) {
-		if( inclCachedNoWrite )
-			return (_cacheStatus == CacheStatus.CACHED || _cacheStatus == CacheStatus.CACHED_NOWRITE);
-		else
-			return (_cacheStatus == CacheStatus.CACHED);
+		return _cacheStatus == CacheStatus.CACHED
+			|| (inclCachedNoWrite && _cacheStatus == CacheStatus.CACHED_NOWRITE);
 	}
 	
 	public void setEmptyStatus() {
@@ -1142,10 +1057,8 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
 	}
 	
 	protected boolean isEmpty(boolean inclCachedNoWrite) {
-		if( inclCachedNoWrite )
-			return (_cacheStatus == CacheStatus.EMPTY || _cacheStatus == CacheStatus.CACHED_NOWRITE);
-		else
-			return (_cacheStatus == CacheStatus.EMPTY);
+		return _cacheStatus == CacheStatus.EMPTY
+			|| (inclCachedNoWrite && _cacheStatus == CacheStatus.CACHED_NOWRITE);
 	}
 	
 	protected boolean isModify() {
@@ -1170,22 +1083,19 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
 	}
 	
 	protected void removeOneRead(boolean doesBlobExist, boolean cacheNoWrite) {
-		_numReadThreads --;					
+		_numReadThreads --;
 		if (_numReadThreads == 0) {
 			if( cacheNoWrite )
 				_cacheStatus = (doesBlobExist ? 
-						CacheStatus.CACHED_NOWRITE : CacheStatus.EMPTY);
+					CacheStatus.CACHED_NOWRITE : CacheStatus.EMPTY);
 			else
 				_cacheStatus = (doesBlobExist ? 
-						CacheStatus.CACHED : CacheStatus.EMPTY);
+					CacheStatus.CACHED : CacheStatus.EMPTY);
 		}
 	}
 	
 	protected boolean isAvailableToRead() {
-		return (   _cacheStatus == CacheStatus.EMPTY 
-				|| _cacheStatus == CacheStatus.CACHED
-				|| _cacheStatus == CacheStatus.CACHED_NOWRITE
-				|| _cacheStatus == CacheStatus.READ);
+		return (_cacheStatus != CacheStatus.MODIFY);
 	}
 	
 	protected boolean isAvailableToModify() {
@@ -1206,7 +1116,8 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
 	 * referenced cache block.  
 	 */
 	protected void createCache( ) {
-		_cache = new SoftReference<>( _data );
+		if( _cache == null || _cache.get() == null )
+			_cache = new SoftReference<>( _data );
 	}
 
 	/**
@@ -1214,9 +1125,8 @@ public abstract class CacheableData<T extends CacheBlock> extends Data
 	 * and subsequently clears the cache soft reference if existing.
 	 */
 	protected void getCache() {
-		if( _cache !=null ) {
+		if( _cache != null ) {
 			_data = _cache.get();
-			clearCache();
 		}
 	}
 	

http://git-wip-us.apache.org/repos/asf/systemml/blob/5590513d/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java b/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
index 9c6dc5b..dfc3dc5 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
@@ -380,21 +380,19 @@ public class ExecutionContext {
 	 * 
 	 * @param varName variable name
 	 */
+	public void releaseMatrixInput(String varName) {
+		getMatrixObject(varName).release();
+	}
+	
 	public void releaseMatrixInput(String varName, String opcode) {
 		long t1 = opcode != null && DMLScript.STATISTICS && DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() : 0;
-		MatrixObject mo = getMatrixObject(varName);
-		mo.release(opcode);
+		releaseMatrixInput(varName);
 		if(opcode != null && DMLScript.STATISTICS && DMLScript.FINEGRAINED_STATISTICS) {
 			long t2 = System.nanoTime();
 			GPUStatistics.maintainCPMiscTimes(opcode, CPInstruction.MISC_TIMER_RELEASE_INPUT_MB, t2-t1);
 		}
 	}
 	
-	public void releaseMatrixInput(String varName) {
-		MatrixObject mo = getMatrixObject(varName);
-		mo.release(null);
-	}
-	
 	public void releaseMatrixInputForGPUInstruction(String varName) {
 		MatrixObject mo = getMatrixObject(varName);
 		mo.getGPUObject(getGPUContext(0)).releaseInput();
@@ -450,25 +448,27 @@ public class ExecutionContext {
 	}
 	
 	public void setMatrixOutput(String varName, MatrixBlock outputData) {
-		setMatrixOutput(varName, outputData, null);
-	}
-
-	public void setMatrixOutput(String varName, MatrixBlock outputData, String opcode) {
 		MatrixObject mo = getMatrixObject(varName);
-		mo.acquireModify(outputData, opcode);
-		mo.release(opcode);
+		mo.acquireModify(outputData);
+		mo.release();
 		setVariable(varName, mo);
 	}
+	
+	public void setMatrixOutput(String varName, MatrixBlock outputData, String opcode) {
+		setMatrixOutput(varName, outputData);
+	}
 
-	public void setMatrixOutput(String varName, MatrixBlock outputData, UpdateType flag, String opcode) {
+	public void setMatrixOutput(String varName, MatrixBlock outputData, UpdateType flag) {
 		if( flag.isInPlace() ) {
 			//modify metadata to carry update status
 			MatrixObject mo = getMatrixObject(varName);
 			mo.setUpdateType( flag );
 		}
-		
-		//default case
-		setMatrixOutput(varName, outputData, opcode);
+		setMatrixOutput(varName, outputData);
+	}
+	
+	public void setMatrixOutput(String varName, MatrixBlock outputData, UpdateType flag, String opcode) {
+		setMatrixOutput(varName, outputData, flag);
 	}
 
 	public void setFrameOutput(String varName, FrameBlock outputData) {

http://git-wip-us.apache.org/repos/asf/systemml/blob/5590513d/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
index 91124a0..e13f2a7 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/parfor/opt/OptimizerRuleBased.java
@@ -2039,7 +2039,7 @@ public class OptimizerRuleBased extends Optimizer
 				//replace existing matrix object with empty matrix
 				MatrixObject mo = (MatrixObject)dat;
 				ec.cleanupCacheableData(mo);
-				ec.setMatrixOutput(rvar._name, new MatrixBlock((int)mo.getNumRows(), (int)mo.getNumColumns(),false), null);
+				ec.setMatrixOutput(rvar._name, new MatrixBlock((int)mo.getNumRows(), (int)mo.getNumColumns(),false));
 				
 				//keep track of cleaned result variables
 				cleanedVars.add(rvar);