You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2018/08/02 07:01:58 UTC

systemml git commit: [SYSTEMML-2481] Rework cleanup of lists and matrices/frames in lists

Repository: systemml
Updated Branches:
  refs/heads/master 51154f17b -> aed66df13


[SYSTEMML-2481] Rework cleanup of lists and matrices/frames in lists

This patch contains a major rework of the cleanup of lists as well as
any cacheable data (i.e., matrices and frames) in list objects.
Specifically, we now ensure full consistent to the behavior without
lists, which prevents missing cleanups and thus unnecessary evictions.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/aed66df1
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/aed66df1
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/aed66df1

Branch: refs/heads/master
Commit: aed66df1360f5a74833bd64d457e26050b35164b
Parents: 51154f1
Author: Matthias Boehm <mb...@gmail.com>
Authored: Thu Aug 2 00:03:42 2018 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Thu Aug 2 00:03:42 2018 -0700

----------------------------------------------------------------------
 .../controlprogram/LocalVariableMap.java        |  4 +-
 .../controlprogram/ParForProgramBlock.java      |  4 +-
 .../context/ExecutionContext.java               | 51 ++++++++++++++++----
 .../cp/FunctionCallCPInstruction.java           | 10 ++--
 .../runtime/instructions/cp/ListObject.java     | 11 +++--
 .../cp/ParameterizedBuiltinCPInstruction.java   |  4 +-
 .../cp/ScalarBuiltinNaryCPInstruction.java      |  5 +-
 .../instructions/cp/VariableCPInstruction.java  | 19 +++-----
 8 files changed, 68 insertions(+), 40 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/aed66df1/src/main/java/org/apache/sysml/runtime/controlprogram/LocalVariableMap.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/LocalVariableMap.java b/src/main/java/org/apache/sysml/runtime/controlprogram/LocalVariableMap.java
index 62ae3d0..842a40a 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/LocalVariableMap.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/LocalVariableMap.java
@@ -31,6 +31,7 @@ import org.apache.sysml.runtime.controlprogram.caching.CacheableData;
 import org.apache.sysml.runtime.util.ProgramConverter;
 import org.apache.sysml.runtime.controlprogram.parfor.util.IDSequence;
 import org.apache.sysml.runtime.instructions.cp.Data;
+import org.apache.sysml.runtime.instructions.cp.ListObject;
 import org.apache.sysml.utils.Statistics;
 
 /**
@@ -113,7 +114,8 @@ public class LocalVariableMap implements Cloneable
 	}
 
 	public boolean hasReferences( Data d ) {
-		return localMap.containsValue(d);
+		return localMap.values().stream().anyMatch(e -> (e instanceof ListObject) ?
+			((ListObject)e).getData().contains(d) : e == d);
 	}
 	
 	public void setRegisteredOutputs(HashSet<String> outputs) {

http://git-wip-us.apache.org/repos/asf/systemml/blob/aed66df1/src/main/java/org/apache/sysml/runtime/controlprogram/ParForProgramBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/ParForProgramBlock.java b/src/main/java/org/apache/sysml/runtime/controlprogram/ParForProgramBlock.java
index 85c4c31..58be1ae 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/ParForProgramBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/ParForProgramBlock.java
@@ -1684,8 +1684,8 @@ public class ParForProgramBlock extends ForProgramBlock
 					
 					//cleanup existing var
 					Data exdata = ec.removeVariable(var._name);
-					if( exdata != null && exdata != outNew && exdata instanceof MatrixObject )
-						ec.cleanupCacheableData((MatrixObject)exdata);
+					if( exdata != null && exdata != outNew )
+						ec.cleanupDataObject(exdata);
 					
 					//cleanup of intermediate result variables
 					cleanWorkerResultVariables( ec, out, in );

http://git-wip-us.apache.org/repos/asf/systemml/blob/aed66df1/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java b/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
index 88ec092..5740174 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
@@ -539,21 +539,40 @@ public class ExecutionContext {
 	 */
 	public boolean[] pinVariables(List<String> varList) 
 	{
+		//analyze list variables
+		int nlist = 0;
+		int nlistItems = 0;
+		for( int i=0; i<varList.size(); i++ ) {
+			Data dat = _variables.get(varList.get(i));
+			if( dat instanceof ListObject ) {
+				nlistItems += ((ListObject)dat).getNumCacheableData();
+				nlist++;
+			}
+		}
+		
 		//2-pass approach since multiple vars might refer to same matrix object
-		boolean[] varsState = new boolean[varList.size()];
+		boolean[] varsState = new boolean[varList.size()-nlist+nlistItems];
 		
 		//step 1) get current information
-		for( int i=0; i<varList.size(); i++ ) {
+		for( int i=0, pos=0; i<varList.size(); i++ ) {
 			Data dat = _variables.get(varList.get(i));
-			if( dat instanceof MatrixObject )
-				varsState[i] = ((MatrixObject)dat).isCleanupEnabled();
+			if( dat instanceof CacheableData<?>  )
+				varsState[pos++] = ((CacheableData<?>)dat).isCleanupEnabled();
+			else if( dat instanceof ListObject )
+				for( Data dat2 : ((ListObject)dat).getData() )
+					if( dat2 instanceof CacheableData<?> )
+						varsState[pos++] = ((CacheableData<?>)dat2).isCleanupEnabled();
 		}
 		
 		//step 2) pin variables
 		for( int i=0; i<varList.size(); i++ ) {
 			Data dat = _variables.get(varList.get(i));
-			if( dat instanceof MatrixObject )
-				((MatrixObject)dat).enableCleanup(false); 
+			if( dat instanceof CacheableData<?> )
+				((CacheableData<?>)dat).enableCleanup(false);
+			else if( dat instanceof ListObject )
+				for( Data dat2 : ((ListObject)dat).getData() )
+					if( dat2 instanceof CacheableData<?> )
+						((CacheableData<?>)dat2).enableCleanup(false);
 		}
 		
 		return varsState;
@@ -576,10 +595,14 @@ public class ExecutionContext {
 	 * @param varsState variable state
 	 */
 	public void unpinVariables(List<String> varList, boolean[] varsState) {
-		for( int i=0; i<varList.size(); i++ ) {
+		for( int i=0, pos=0; i<varList.size(); i++ ) {
 			Data dat = _variables.get(varList.get(i));
-			if( dat instanceof MatrixObject )
-				((MatrixObject)dat).enableCleanup(varsState[i]);
+			if( dat instanceof CacheableData<?> )
+				((CacheableData<?>)dat).enableCleanup(varsState[pos++]);
+			else if( dat instanceof ListObject )
+				for( Data dat2 : ((ListObject)dat).getData() )
+					if( dat2 instanceof CacheableData<?> )
+						((CacheableData<?>)dat2).enableCleanup(varsState[pos++]);
 		}
 	}
 	
@@ -608,6 +631,16 @@ public class ExecutionContext {
 		return ret;
 	}
 	
+	public final void cleanupDataObject(Data dat) {
+		if( dat == null ) return;
+		if ( dat instanceof CacheableData )
+			cleanupCacheableData( (CacheableData<?>)dat );
+		else if( dat instanceof ListObject )
+			for( Data dat2 : ((ListObject)dat).getData() )
+				if( dat2 instanceof CacheableData<?> )
+					cleanupCacheableData( (CacheableData<?>)dat2 );
+	}
+	
 	public void cleanupCacheableData(CacheableData<?> mo) {
 		if (DMLScript.JMLC_MEM_STATISTICS)
 			Statistics.removeCPMemObject(System.identityHashCode(mo));

http://git-wip-us.apache.org/repos/asf/systemml/blob/aed66df1/src/main/java/org/apache/sysml/runtime/instructions/cp/FunctionCallCPInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/cp/FunctionCallCPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/cp/FunctionCallCPInstruction.java
index cd741fe..8e47fce 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/cp/FunctionCallCPInstruction.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/cp/FunctionCallCPInstruction.java
@@ -32,7 +32,6 @@ import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.DMLScriptException;
 import org.apache.sysml.runtime.controlprogram.FunctionProgramBlock;
 import org.apache.sysml.runtime.controlprogram.LocalVariableMap;
-import org.apache.sysml.runtime.controlprogram.caching.CacheableData;
 import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
 import org.apache.sysml.runtime.controlprogram.context.ExecutionContextFactory;
 import org.apache.sysml.runtime.instructions.Instruction;
@@ -181,9 +180,7 @@ public class FunctionCallCPInstruction extends CPInstruction {
 			if( expectRetVars.contains(varName) )
 				continue;
 			//cleanup unexpected return values to avoid leaks
-			Data var = fn_ec.removeVariable(varName);
-			if( var instanceof CacheableData )
-				fn_ec.cleanupCacheableData((CacheableData<?>)var);
+			fn_ec.cleanupDataObject(fn_ec.removeVariable(varName));
 		}
 		
 		// Unpin the pinned variables
@@ -200,9 +197,8 @@ public class FunctionCallCPInstruction extends CPInstruction {
 
 			//cleanup existing data bound to output variable name
 			Data exdata = ec.removeVariable(boundVarName);
-			if ( exdata != null && exdata instanceof CacheableData && exdata != boundValue ) {
-				ec.cleanupCacheableData( (CacheableData<?>)exdata );
-			}
+			if( exdata != boundValue )
+				ec.cleanupDataObject(exdata);
 			
 			//add/replace data in symbol table
 			ec.setVariable(boundVarName, boundValue);

http://git-wip-us.apache.org/repos/asf/systemml/blob/aed66df1/src/main/java/org/apache/sysml/runtime/instructions/cp/ListObject.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/cp/ListObject.java b/src/main/java/org/apache/sysml/runtime/instructions/cp/ListObject.java
index 1468c76..8834514 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/cp/ListObject.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/cp/ListObject.java
@@ -34,17 +34,18 @@ public class ListObject extends Data {
 	private final List<Data> _data;
 	private boolean[] _dataState = null;
 	private List<String> _names = null;
+	private int _nCacheable;
 	
 	public ListObject(List<Data> data) {
-		super(DataType.LIST, ValueType.UNKNOWN);
-		_data = data;
-		_names = null;
+		this(data, null);
 	}
 
 	public ListObject(List<Data> data, List<String> names) {
 		super(DataType.LIST, ValueType.UNKNOWN);
 		_data = data;
 		_names = names;
+		_nCacheable = (int) data.stream().filter(
+			d -> d instanceof CacheableData).count();
 	}
 	
 	public ListObject(ListObject that) {
@@ -66,6 +67,10 @@ public class ListObject extends Data {
 		return _data.size();
 	}
 	
+	public int getNumCacheableData() {
+		return _nCacheable;
+	}
+	
 	public List<String> getNames() {
 		return _names;
 	}

http://git-wip-us.apache.org/repos/asf/systemml/blob/aed66df1/src/main/java/org/apache/sysml/runtime/instructions/cp/ParameterizedBuiltinCPInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/cp/ParameterizedBuiltinCPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/cp/ParameterizedBuiltinCPInstruction.java
index b2506b8..4435738 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/cp/ParameterizedBuiltinCPInstruction.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/cp/ParameterizedBuiltinCPInstruction.java
@@ -344,9 +344,7 @@ public class ParameterizedBuiltinCPInstruction extends ComputationCPInstruction
 			
 			//create list object over all inputs
 			ListObject list = new ListObject(data, names);
-			
-			//disable cleanup of individual objects and store cleanup state
-			list.setStatus(ec.pinVariables(new ArrayList<>(params.values())));
+			list.setStatus(new boolean[params.size()]);
 			
 			ec.setVariable(output.getName(), list);
 		}

http://git-wip-us.apache.org/repos/asf/systemml/blob/aed66df1/src/main/java/org/apache/sysml/runtime/instructions/cp/ScalarBuiltinNaryCPInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/cp/ScalarBuiltinNaryCPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/cp/ScalarBuiltinNaryCPInstruction.java
index 6acef43..c3363da 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/cp/ScalarBuiltinNaryCPInstruction.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/cp/ScalarBuiltinNaryCPInstruction.java
@@ -97,10 +97,7 @@ public class ScalarBuiltinNaryCPInstruction extends BuiltinNaryCPInstruction {
 			
 			//create list object over all inputs
 			ListObject list = new ListObject(data);
-			
-			//disable cleanup of individual objects and store cleanup state
-			list.setStatus(ec.pinVariables(Arrays.stream(inputs)
-				.map(in -> in.getName()).collect(Collectors.toList())));
+			list.setStatus(new boolean[data.size()]);
 			
 			ec.setVariable(output.getName(), list);
 		}

http://git-wip-us.apache.org/repos/asf/systemml/blob/aed66df1/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java
index 4da1a17..b425707 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/cp/VariableCPInstruction.java
@@ -726,8 +726,8 @@ public class VariableCPInstruction extends CPInstruction {
 			// example: mvvar tempA A
 			
 			// get source variable 
-			Data srcData = ec.getVariable(getInput1().getName());		
-				
+			Data srcData = ec.getVariable(getInput1().getName());
+			
 			if ( srcData == null ) {
 				throw new DMLRuntimeException("Unexpected error: could not find a data object "
 					+ "for variable name:" + getInput1().getName() + ", while processing instruction ");
@@ -738,9 +738,8 @@ public class VariableCPInstruction extends CPInstruction {
 				Data tgt = ec.removeVariable(getInput2().getName());
 					
 				//cleanup matrix data on fs/hdfs (if necessary)
-				if ( tgt != null && tgt instanceof CacheableData ) {
-					ec.cleanupCacheableData((CacheableData<?>) tgt);
-				}
+				if( tgt != null )
+					ec.cleanupDataObject(tgt);
 			}
 			
 			// do the actual move
@@ -788,9 +787,8 @@ public class VariableCPInstruction extends CPInstruction {
 		Data input2_data = ec.removeVariable(getInput2().getName());
 		
 		//cleanup matrix data on fs/hdfs (if necessary)
-		if ( input2_data != null && input2_data instanceof CacheableData ) {
-			ec.cleanupCacheableData((CacheableData<?>) input2_data);
-		}
+		if( input2_data != null )
+			ec.cleanupDataObject(input2_data);
 		
 		// do the actual copy!
 		ec.setVariable(getInput2().getName(), dd);
@@ -844,9 +842,8 @@ public class VariableCPInstruction extends CPInstruction {
 		// remove variable from symbol table
 		Data dat = ec.removeVariable(varname);
 		//cleanup matrix data on fs/hdfs (if necessary)
-		if ( dat != null && dat instanceof CacheableData ) {
-			ec.cleanupCacheableData((CacheableData<?>) dat);
-		}
+		if( dat != null )
+			ec.cleanupDataObject(dat);
 	}
 	
 	/**