You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2020/04/09 16:55:45 UTC

[systemml] branch master updated: [SYSTEMDS-331] Extended lineage-based reuse (caching of scalars)

This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemml.git


The following commit(s) were added to refs/heads/master by this push:
     new 6c94556  [SYSTEMDS-331] Extended lineage-based reuse (caching of scalars)
6c94556 is described below

commit 6c9455678b2c38a41db741270a86812b05ee77ca
Author: arnabp <ar...@tugraz.at>
AuthorDate: Thu Apr 9 17:45:12 2020 +0200

    [SYSTEMDS-331] Extended lineage-based reuse (caching of scalars)
    
    - This patch contains lineage caching support for scalar objects. This
    enables instruction level and multi-level reuse of
    operations/functions/statementblocks producing at least one scalar
    output. This patch improves multi-level cache hits.
    - Furthermore, this adds a new option `-reuse_multilevel` to enable
    multi-level reuse.
    - This patch also fixes few bugs and enhances reusable instructions
    list.
    - Additional fix for lineage cache reset to avoid endless loops on
    eviction in sequences of tests
    
    Closes #876.
---
 docs/Tasks.txt                                     |  12 +
 src/main/java/org/apache/sysds/api/DMLOptions.java |   2 +
 .../runtime/controlprogram/BasicProgramBlock.java  |   5 +-
 .../runtime/instructions/cp/BooleanObject.java     |   5 +
 .../runtime/instructions/cp/DoubleObject.java      |   5 +
 .../sysds/runtime/instructions/cp/IntObject.java   |   5 +
 .../runtime/instructions/cp/ScalarObject.java      |   3 +
 .../runtime/instructions/cp/StringObject.java      |   5 +
 .../org/apache/sysds/runtime/lineage/Lineage.java  |   1 +
 .../apache/sysds/runtime/lineage/LineageCache.java | 255 ++++++++++++++-------
 .../sysds/runtime/lineage/LineageCacheConfig.java  |   8 +-
 .../runtime/lineage/LineageCacheStatistics.java    |   4 +-
 .../sysds/runtime/lineage/LineageCodegenItem.java  |   4 +
 .../apache/sysds/runtime/lineage/LineageMap.java   |   8 +-
 .../sysds/runtime/lineage/LineageParser.java       |   6 +-
 .../sysds/runtime/lineage/LineageRewriteReuse.java |  77 ++-----
 .../sysds/runtime/lineage/LineageTokenizer.java    |   2 +-
 .../test/functions/lineage/FullReuseTest.java      |   7 +
 .../functions/lineage/FunctionFullReuseTest.java   |   2 +-
 .../test/functions/lineage/SBFullReuseTest.java    |   2 +-
 src/test/scripts/functions/lineage/FullReuse4.dml  |  34 +++
 21 files changed, 296 insertions(+), 156 deletions(-)

diff --git a/docs/Tasks.txt b/docs/Tasks.txt
index 5fd6749..42741da 100644
--- a/docs/Tasks.txt
+++ b/docs/Tasks.txt
@@ -236,6 +236,18 @@ SYSTEMDS-320 Merge SystemDS into Apache SystemML                      OK
  * 321 Merge histories of SystemDS and SystemML                       OK
  * 322 Change global package names                                    OK
  * 323 Fix licenses and notice file                                   OK 
+ 
+SYSTEMDS-330 Lineage Tracing, Reuse and Integration
+ * 331 Cache and reuse scalar outputs (instruction and multi-level)   OK
+ * 332 Parfor integration with multi-level reuse 
+ * 333 Use exact execution time for cost based eviction
+ 
+SYSTEMDS-340 Compiler Assisted Lineage Caching and Reuse
+ * 341 Finalize unmarking of loop dependent operations
+ * 342 Mark functions as last-use to enable early eviction
+ * 343 Identify equal last level HOPs to ensure SB-level reuse
+ * 344 Unmark functions/SBs containing non-determinism for caching
+ * 345 Compiler assisted cache configuration
 
 Others:
  * Break append instruction to cbind and rbind 
diff --git a/src/main/java/org/apache/sysds/api/DMLOptions.java b/src/main/java/org/apache/sysds/api/DMLOptions.java
index 7eca8ab..b9972a3 100644
--- a/src/main/java/org/apache/sysds/api/DMLOptions.java
+++ b/src/main/java/org/apache/sysds/api/DMLOptions.java
@@ -120,6 +120,8 @@ public class DMLOptions {
 							dmlOptions.linReuseType = ReuseCacheType.REUSE_FULL;
 						else if (lineageType.equalsIgnoreCase("reuse_partial"))
 							dmlOptions.linReuseType = ReuseCacheType.REUSE_PARTIAL;
+						else if (lineageType.equalsIgnoreCase("reuse_multilevel"))
+							dmlOptions.linReuseType = ReuseCacheType.REUSE_MULTILEVEL;
 						else if (lineageType.equalsIgnoreCase("reuse_hybrid"))
 							dmlOptions.linReuseType = ReuseCacheType.REUSE_HYBRID;
 						else if (lineageType.equalsIgnoreCase("none"))
diff --git a/src/main/java/org/apache/sysds/runtime/controlprogram/BasicProgramBlock.java b/src/main/java/org/apache/sysds/runtime/controlprogram/BasicProgramBlock.java
index 2a9e281..1f52a75 100644
--- a/src/main/java/org/apache/sysds/runtime/controlprogram/BasicProgramBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/controlprogram/BasicProgramBlock.java
@@ -28,6 +28,7 @@ import org.apache.sysds.runtime.DMLRuntimeException;
 import org.apache.sysds.runtime.controlprogram.context.ExecutionContext;
 import org.apache.sysds.runtime.instructions.Instruction;
 import org.apache.sysds.runtime.lineage.LineageCache;
+import org.apache.sysds.runtime.lineage.LineageCacheConfig;
 import org.apache.sysds.runtime.lineage.LineageCacheConfig.ReuseCacheType;
 import org.apache.sysds.runtime.lineage.LineageCacheStatistics;
 import org.apache.sysds.runtime.lineage.LineageItem;
@@ -107,7 +108,9 @@ public class BasicProgramBlock extends ProgramBlock
 		
 		//statement-block-level, lineage-based reuse
 		LineageItem[] liInputs = null;
-		if (_sb != null && !ReuseCacheType.isNone()) {
+		if (_sb != null 
+			&& !ReuseCacheType.isNone()
+			&& LineageCacheConfig.getCacheType().isMultilevelReuse()) {
 			String name = "SB" + _sb.getSBID();
 			liInputs = LineageItemUtils.getLineageItemInputstoSB(_sb.getInputstoSB(), ec);
 			if( LineageCache.reuse(_sb.getOutputsofSB(), _sb.getOutputsofSB().size(), liInputs, name, ec) ) {
diff --git a/src/main/java/org/apache/sysds/runtime/instructions/cp/BooleanObject.java b/src/main/java/org/apache/sysds/runtime/instructions/cp/BooleanObject.java
index 35f5705..c285761 100644
--- a/src/main/java/org/apache/sysds/runtime/instructions/cp/BooleanObject.java
+++ b/src/main/java/org/apache/sysds/runtime/instructions/cp/BooleanObject.java
@@ -62,4 +62,9 @@ public class BooleanObject extends ScalarObject
 	public Object getValue(){
 		return _value;
 	}
+
+	@Override
+	public int getSize() {
+		return 16 + 8;
+	}
 }
diff --git a/src/main/java/org/apache/sysds/runtime/instructions/cp/DoubleObject.java b/src/main/java/org/apache/sysds/runtime/instructions/cp/DoubleObject.java
index 0fc0b9f..f7327ca 100644
--- a/src/main/java/org/apache/sysds/runtime/instructions/cp/DoubleObject.java
+++ b/src/main/java/org/apache/sysds/runtime/instructions/cp/DoubleObject.java
@@ -62,4 +62,9 @@ public class DoubleObject extends ScalarObject
 	public String getDebugName() {
 		return null;
 	}
+	
+	@Override
+	public int getSize() {
+		return 16 + Double.SIZE/Byte.SIZE;
+	}
 }
diff --git a/src/main/java/org/apache/sysds/runtime/instructions/cp/IntObject.java b/src/main/java/org/apache/sysds/runtime/instructions/cp/IntObject.java
index 0d0a743..84a0934 100644
--- a/src/main/java/org/apache/sysds/runtime/instructions/cp/IntObject.java
+++ b/src/main/java/org/apache/sysds/runtime/instructions/cp/IntObject.java
@@ -58,4 +58,9 @@ public class IntObject extends ScalarObject
 	public Object getValue(){
 		return _value;
 	}
+	
+	@Override
+	public int getSize() {
+		return 16 + Integer.SIZE/Byte.SIZE;
+	}
 }
diff --git a/src/main/java/org/apache/sysds/runtime/instructions/cp/ScalarObject.java b/src/main/java/org/apache/sysds/runtime/instructions/cp/ScalarObject.java
index e114b97..f91ac80 100644
--- a/src/main/java/org/apache/sysds/runtime/instructions/cp/ScalarObject.java
+++ b/src/main/java/org/apache/sysds/runtime/instructions/cp/ScalarObject.java
@@ -38,6 +38,9 @@ public abstract class ScalarObject extends Data
 
 	public abstract String getStringValue();
 	
+	//TODO: Get the actual sizes by using profilers (JOL/Instrumentation) and hardcode in here.
+	public abstract int getSize();
+	
 	public String getLanguageSpecificStringValue() {
 		return getStringValue();
 	}
diff --git a/src/main/java/org/apache/sysds/runtime/instructions/cp/StringObject.java b/src/main/java/org/apache/sysds/runtime/instructions/cp/StringObject.java
index d5c01cc..00d0037 100644
--- a/src/main/java/org/apache/sysds/runtime/instructions/cp/StringObject.java
+++ b/src/main/java/org/apache/sysds/runtime/instructions/cp/StringObject.java
@@ -59,6 +59,11 @@ public class StringObject extends ScalarObject
 	public Object getValue(){
 		return _value;
 	}
+	
+	@Override
+	public int getSize() {
+		return 16 + _value.length() * 1;
+	}
 
 	public static void checkMaxStringLength( long len ) {
 		if( len > MAX_STRING_SIZE ) {
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/Lineage.java b/src/main/java/org/apache/sysds/runtime/lineage/Lineage.java
index 9e61981..2b2c45a 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/Lineage.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/Lineage.java
@@ -123,6 +123,7 @@ public class Lineage {
 	public static void resetInternalState() {
 		LineageItem.resetIDSequence();
 		LineageCache.resetCache();
+		LineageCacheStatistics.reset();
 	}
 	
 	public static void setLinReusePartial() {
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageCache.java b/src/main/java/org/apache/sysds/runtime/lineage/LineageCache.java
index 1e07522..789b9f7 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageCache.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageCache.java
@@ -31,7 +31,6 @@ import org.apache.sysds.runtime.controlprogram.context.ExecutionContext;
 import org.apache.sysds.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
 import org.apache.sysds.runtime.instructions.CPInstructionParser;
 import org.apache.sysds.runtime.instructions.Instruction;
-import org.apache.sysds.runtime.instructions.cp.BinaryMatrixMatrixCPInstruction;
 import org.apache.sysds.runtime.instructions.cp.CPInstruction.CPType;
 import org.apache.sysds.runtime.instructions.cp.ComputationCPInstruction;
 import org.apache.sysds.runtime.instructions.cp.Data;
@@ -56,7 +55,7 @@ public class LineageCache {
 	private static final Map<LineageItem, Entry> _cache = new HashMap<>();
 	private static final Map<LineageItem, SpilledItem> _spillList = new HashMap<>();
 	private static final HashSet<LineageItem> _removelist = new HashSet<>();
-	private static final double CACHE_FRAC = 0.05; // 5% of JVM mem
+	private static final double CACHE_FRAC = 0.05; // 5% of JVM heap size
 	private static final long CACHE_LIMIT; //limit in bytes
 	private static String outdir = null;
 	private static long _cachesize = 0;
@@ -93,62 +92,62 @@ public class LineageCache {
 				//create a placeholder if no reuse to avoid redundancy
 				//(e.g., concurrent threads that try to start the computation)
 				if(!reuse && isMarkedForCaching(inst, ec))
-					putIntern(item, null, 0);
+					putIntern(item, null, null, 0);
 			}
 		}
 		
 		return reuse;
 	}
 	
-	public static MatrixBlock reuse(LineageItem item) {
+	public static Entry reuse(LineageItem item) {
 		if (ReuseCacheType.isNone())
 			return null;
 
-		MatrixBlock d = null;
+		Entry e = null;
 		synchronized( _cache ) {
 			if (LineageCache.probe(item)) 
-				d = LineageCache.get(item);
+				e = LineageCache.get(item);
 			else
 				//create a placeholder if no reuse to avoid redundancy
 				//(e.g., concurrent threads that try to start the computation)
-				putIntern(item, null, 0);
+				putIntern(item, null, null, 0);
 				//FIXME: parfor - every thread gets different function names
 		}
-		return d;
+		return e;
 	}
 	
 	public static boolean reuse(List<String> outputs, int numOutputs, LineageItem[] liInputs, String name, ExecutionContext ec)
 	{
-		if( ReuseCacheType.isNone() )
+		if( ReuseCacheType.isNone() || !LineageCacheConfig.getCacheType().isMultilevelReuse())
 			return false;
 
 		boolean reuse = (numOutputs != 0);
+		HashMap<String, Data> funcOutputs = new HashMap<>();
+		HashMap<String, LineageItem> funcLIs = new HashMap<>();
 		for (int i=0; i<numOutputs; i++) {
 			String opcode = name + String.valueOf(i+1);
 			LineageItem li = new LineageItem(outputs.get(i), opcode, liInputs);
-			MatrixBlock cachedValue = LineageCache.reuse(li); 
+			Entry cachedValue = LineageCache.reuse(li); 
 			//TODO: handling of recursive calls
 			
-			if (cachedValue != null) {
+			if (cachedValue != null && !cachedValue.isNullVal()) {
 				String boundVarName = outputs.get(i);
+				Data boundValue = null;
 				//convert to matrix object
-				MetaDataFormat md = new MetaDataFormat(cachedValue.getDataCharacteristics(), 
-						OutputInfo.BinaryCellOutputInfo, InputInfo.BinaryCellInputInfo);
-				MatrixObject boundValue = new MatrixObject(ValueType.FP64, boundVarName, md);
-				boundValue.acquireModify(cachedValue);
-				boundValue.release();
-
-				//cleanup existing data bound to output variable name
-				Data exdata = ec.removeVariable(boundVarName);
-				if( exdata != boundValue)
-					ec.cleanupDataObject(exdata);
+				if (cachedValue.isMatrixValue()) {
+					MetaDataFormat md = new MetaDataFormat(cachedValue.getMBValue().getDataCharacteristics(), 
+							OutputInfo.BinaryCellOutputInfo, InputInfo.BinaryCellInputInfo);
+					boundValue = new MatrixObject(ValueType.FP64, boundVarName, md);
+					((MatrixObject)boundValue).acquireModify(cachedValue.getMBValue());
+					((MatrixObject)boundValue).release();
+				}
+				else
+					boundValue = cachedValue.getSOValue();
 
-				//add/replace data in symbol table
-				ec.setVariable(boundVarName, boundValue);
+				funcOutputs.put(boundVarName, boundValue);
 				
-				// map original lineage of function return to the calling site
 				LineageItem orig = _cache.get(li)._origItem; //FIXME: synchronize
-				ec.getLineage().set(boundVarName, orig);
+				funcLIs.put(boundVarName, orig);
 			}
 			else {
 				// if one output cannot be reused, we need to execute the function
@@ -157,6 +156,19 @@ public class LineageCache {
 				reuse = false;
 			}
 		}
+		
+		if (reuse) {
+			funcOutputs.forEach((var, val) -> {
+				//cleanup existing data bound to output variable name
+				Data exdata = ec.removeVariable(var);
+				if( exdata != val)
+					ec.cleanupDataObject(exdata);
+				//add/replace data in symbol table
+				ec.setVariable(var, val);
+			});
+			//map original lineage items return to the calling site
+			funcLIs.forEach((var, li) -> ec.getLineage().set(var, li));
+		}
 		return reuse;
 	}
 	
@@ -164,9 +176,10 @@ public class LineageCache {
 	public static void put(Instruction inst, ExecutionContext ec) {
 		if (inst instanceof ComputationCPInstruction && isReusable(inst, ec) ) {
 			LineageItem item = ((LineageTraceable) inst).getLineageItems(ec)[0];
+			//This method is called only to put matrix value
 			MatrixObject mo = ec.getMatrixObject(((ComputationCPInstruction) inst).output);
 			synchronized( _cache ) {
-				putIntern(item, mo.acquireReadAndRelease(), getRecomputeEstimate(inst, ec));
+				putIntern(item, mo.acquireReadAndRelease(), null, getRecomputeEstimate(inst, ec));
 			}
 		}
 	}
@@ -177,14 +190,18 @@ public class LineageCache {
 		if (inst instanceof ComputationCPInstruction && isReusable(inst, ec) ) {
 			if (!isMarkedForCaching(inst, ec)) return;
 			LineageItem item = ((LineageTraceable) inst).getLineageItems(ec)[0];
-			MatrixObject mo = ec.getMatrixObject(((ComputationCPInstruction) inst).output);
-			MatrixBlock value = mo.acquireReadAndRelease();
-			_cache.get(item).setValue(value, getRecomputeEstimate(inst, ec)); //outside sync to prevent deadlocks
+			//MatrixObject mo = ec.getMatrixObject(((ComputationCPInstruction) inst).output);
+			Data data = ec.getVariable(((ComputationCPInstruction) inst).output);
+			MatrixObject mo = data instanceof MatrixObject ? (MatrixObject)data : null;
+			ScalarObject so = data instanceof ScalarObject ? (ScalarObject)data : null;
+			MatrixBlock Mval = mo != null ? mo.acquireReadAndRelease() : null;
+			_cache.get(item).setValue(Mval, so, getRecomputeEstimate(inst, ec)); //outside sync to prevent deadlocks
+			long size = _cache.get(item).getSize();
 			
 			synchronized( _cache ) {
-				if( !isBelowThreshold(value) ) 
-					makeSpace(value);
-				updateSize(value, true);
+				if( !isBelowThreshold(size) ) 
+					makeSpace(size);
+				updateSize(size, true);
 			}
 		}
 	}
@@ -193,15 +210,20 @@ public class LineageCache {
 		if (ReuseCacheType.isNone())
 			return;
 		if (LineageCache.probe(probeItem)) {
-			MatrixBlock value = LineageCache.get(probeItem);
+			Entry oe = LineageCache.get(probeItem);
 			Entry e = _cache.get(item);
-			e.setValue(value, 0); //TODO: compute estimate for function
+			//TODO: compute estimate for function
+			if (oe.isMatrixValue())
+				e.setValue(oe.getMBValue(), null, 0); 
+			else
+				e.setValue(null, oe.getSOValue(), 0);
 			e._origItem = probeItem; 
 
+			long size = oe.getSize();
 			synchronized( _cache ) {
-				if(!isBelowThreshold(value)) 
-					makeSpace(value);
-				updateSize(value, true);
+				if(!isBelowThreshold(size)) 
+					makeSpace(size);
+				updateSize(size, true);
 			}
 		}
 		else
@@ -211,7 +233,7 @@ public class LineageCache {
 
 	public static void putValue(List<String> outputs, int numOutputs, LineageItem[] liInputs, String name, ExecutionContext ec)
 	{
-		if( ReuseCacheType.isNone() )
+		if( ReuseCacheType.isNone() || !LineageCacheConfig.getCacheType().isMultilevelReuse())
 			return;
 
 		HashMap<LineageItem, LineageItem> FuncLIMap = new HashMap<>();
@@ -221,13 +243,11 @@ public class LineageCache {
 			LineageItem li = new LineageItem(outputs.get(i), opcode, liInputs);
 			String boundVarName = outputs.get(i);
 			LineageItem boundLI = ec.getLineage().get(boundVarName);
-			Data boundValue = ec.getVariable(boundVarName);
 			if (boundLI != null)
 				boundLI.resetVisitStatus();
 			if (boundLI == null 
 				|| !LineageCache.probe(li)
-				|| LineageItemUtils.containsRandDataGen(new HashSet<>(Arrays.asList(liInputs)), boundLI)
-				|| boundValue instanceof ScalarObject) { //TODO: cache scalar objects
+				|| LineageItemUtils.containsRandDataGen(new HashSet<>(Arrays.asList(liInputs)), boundLI)) {
 				AllOutputsCacheable = false;
 			}
 			FuncLIMap.put(li, boundLI);
@@ -243,22 +263,23 @@ public class LineageCache {
 		return;
 	}
 	
-	private static void putIntern(LineageItem key, MatrixBlock value, double compcost) {
+	private static void putIntern(LineageItem key, MatrixBlock Mval, ScalarObject Sval, double compcost) {
 		if (_cache.containsKey(key))
 			//can come here if reuse_partial option is enabled
 			return; 
 			//throw new DMLRuntimeException("Redundant lineage caching detected: "+inst);
 		
 		// Create a new entry.
-		Entry newItem = new Entry(key, value, compcost);
+		Entry newItem = new Entry(key, Mval, Sval, compcost);
 		
 		// Make space by removing or spilling LRU entries.
-		if( value != null ) {
-			if( value.getInMemorySize() > CACHE_LIMIT )
+		if( Mval != null || Sval != null ) {
+			long size = newItem.getSize();
+			if( size > CACHE_LIMIT )
 				return; //not applicable
-			if( !isBelowThreshold(value) ) 
-				makeSpace(value);
-			updateSize(value, true);
+			if( !isBelowThreshold(size) ) 
+				makeSpace(size);
+			updateSize(size, true);
 		}
 		
 		// Place the entry at head position.
@@ -282,6 +303,9 @@ public class LineageCache {
 		_spillList.clear();
 		_head = null;
 		_end = null;
+		// reset cache size, otherwise the cache clear leads to unusable 
+		// space which means evictions could run into endless loops
+		_cachesize = 0;
 		if (DMLScript.STATISTICS)
 			_removelist.clear();
 	}
@@ -289,14 +313,17 @@ public class LineageCache {
 
 	private static boolean fullReuse (LineageItem item, ComputationCPInstruction inst, ExecutionContext ec) {
 		if (LineageCache.probe(item)) {
-			MatrixBlock d = LineageCache.get(item);
-			ec.setMatrixOutput(inst.output.getName(), d);
+			Entry e = LineageCache.get(item);
+			if (e.isMatrixValue())
+				ec.setMatrixOutput(inst.output.getName(), e.getMBValue());
+			else
+				ec.setScalarOutput(inst.output.getName(), e.getSOValue());
 			return true;
 		}
 		return false;
 	}
 	
-	protected static MatrixBlock get(LineageItem key) {
+	protected static Entry get(LineageItem key) {
 		// This method is called only when entry is present either in cache or in local FS.
 		if (_cache.containsKey(key)) {
 			// Read and put the entry at head.
@@ -305,7 +332,7 @@ public class LineageCache {
 			setHead(e);
 			if (DMLScript.STATISTICS)
 				LineageCacheStatistics.incrementMemHits();
-			return e.getValue();
+			return e;
 		}
 		else
 			return readFromLocalFS(key);
@@ -315,10 +342,13 @@ public class LineageCache {
 		// TODO: Move this to the new class LineageCacheConfig and extend
 		return inst.getOpcode().equalsIgnoreCase("tsmm")
 				|| inst.getOpcode().equalsIgnoreCase("ba+*")
-				|| ((inst.getOpcode().equalsIgnoreCase("*") 
-				|| inst.getOpcode().equalsIgnoreCase("/")) &&
-					inst instanceof BinaryMatrixMatrixCPInstruction) //TODO support scalar
+				|| inst.getOpcode().equalsIgnoreCase("*") 
+				|| inst.getOpcode().equalsIgnoreCase("/")
+				|| inst.getOpcode().equalsIgnoreCase("+")
+				|| inst.getOpcode().equalsIgnoreCase("nrow")
+				|| inst.getOpcode().equalsIgnoreCase("ncol")
 				|| inst.getOpcode().equalsIgnoreCase("rightIndex")
+				|| inst.getOpcode().equalsIgnoreCase("leftIndex")
 				|| inst.getOpcode().equalsIgnoreCase("groupedagg")
 				|| inst.getOpcode().equalsIgnoreCase("r'")
 				|| (inst.getOpcode().equalsIgnoreCase("append") && isVectorAppend(inst, ec))
@@ -339,42 +369,56 @@ public class LineageCache {
 		if (!LineageCacheConfig.getCompAssRW())
 			return true;
 
-		MatrixObject mo = ec.getMatrixObject(((ComputationCPInstruction)inst).output);
-		//limit this to full reuse as partial reuse is applicable even for loop dependent operation
-		boolean marked = (LineageCacheConfig.getCacheType() == ReuseCacheType.REUSE_FULL  && !mo.isMarked()) ? false : true; 
-		return marked;
+		if (((ComputationCPInstruction)inst).output.isMatrix()) {
+			MatrixObject mo = ec.getMatrixObject(((ComputationCPInstruction)inst).output);
+			//limit this to full reuse as partial reuse is applicable even for loop dependent operation
+			boolean marked = (LineageCacheConfig.getCacheType() == ReuseCacheType.REUSE_FULL  
+					&& !mo.isMarked()) ? false : true; 
+			return marked;
+		}
+		else
+			return true;
 	}
 	
 	//---------------- CACHE SPACE MANAGEMENT METHODS -----------------
 	
-	private static boolean isBelowThreshold(MatrixBlock value) {
-		return ((value.getInMemorySize() + _cachesize) <= CACHE_LIMIT);
+	private static boolean isBelowThreshold(long spaceNeeded) {
+		return ((spaceNeeded + _cachesize) <= CACHE_LIMIT);
 	}
 	
-	private static void makeSpace(MatrixBlock value) {
-		double valSize = value.getInMemorySize();
+	private static void makeSpace(long spaceNeeded) {
 		// cost based eviction
-		while ((valSize+_cachesize) > CACHE_LIMIT)
+		while ((spaceNeeded +_cachesize) > CACHE_LIMIT)
 		{
 			if (_cache.get(_end._key).isNullVal()) {
-				setEnd2Head(_end);  // Must be null function entry. Move to next.
+				//Must be a null function/SB placeholder entry. This 
+				//function is currently being executed. Skip and continue.
+				setEnd2Head(_end);
 				continue;
 			}
-				
-			double reduction = _cache.get(_end._key).getValue().getInMemorySize();
-			if (_cache.get(_end._key)._compEst > getDiskSpillEstimate() 
-					&& LineageCacheConfig.isSetSpill())
-				spillToLocalFS(); // If re-computation is more expensive, spill data to disk.
+			
+			double reduction = _cache.get(_end._key).getSize();
+			if (_cache.get(_end._key).isMatrixValue()) { //spill matrix blocks only
+				if (_cache.get(_end._key)._compEst > getDiskSpillEstimate() 
+						&& LineageCacheConfig.isSetSpill())
+					spillToLocalFS(); // If re-computation is more expensive, spill data to disk.
+			}
 
+			if (_cache.get(_end._key)._compEst == 0) {
+				//Must be a function/SB/scalar entry. Move to next.
+				//FIXME: Remove this logic after implementing new eviction logic.
+				setEnd2Head(_end);  
+				continue;
+			}
 			removeEntry(reduction);
 		} 
 	}
 	
-	private static void updateSize(MatrixBlock value, boolean addspace) {
+	private static void updateSize(long space, boolean addspace) {
 		if (addspace)
-			_cachesize += value.getInMemorySize();
+			_cachesize += space;
 		else
-			_cachesize -= value.getInMemorySize();
+			_cachesize -= space;
 	}
 
 	//---------------- COSTING RELATED METHODS -----------------
@@ -382,7 +426,7 @@ public class LineageCache {
 	private static double getDiskSpillEstimate() {
 		// This includes sum of writing to and reading from disk
 		long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
-		MatrixBlock mb = _cache.get(_end._key).getValue();
+		MatrixBlock mb = _cache.get(_end._key).getMBValue();
 		long r = mb.getNumRows();
 		long c = mb.getNumColumns();
 		long nnz = mb.getNonZeros();
@@ -395,6 +439,10 @@ public class LineageCache {
 	}
 	
 	private static double getRecomputeEstimate(Instruction inst, ExecutionContext ec) {
+		if (!((ComputationCPInstruction)inst).output.isMatrix()
+			|| (((ComputationCPInstruction)inst).input1 != null && !((ComputationCPInstruction)inst).input1.isMatrix()))
+			return 0; //this method will be deprecated. No need to support scalar
+
 		long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
 		double nflops = 0;
 		String instop= inst.getOpcode().contains("spoof") ? "spoof" : inst.getOpcode();
@@ -464,7 +512,7 @@ public class LineageCache {
 				long nnz1 = mo1.getNnz();
 				double s1 = OptimizerUtils.getSparsity(r1, c1, nnz1);
 				boolean lsparse = MatrixBlock.evalSparseFormatInMemory(r1, c1, nnz1);
-				if (inst.getOpcode().equalsIgnoreCase("rightIndex"))
+				//if (inst.getOpcode().equalsIgnoreCase("rightIndex"))
 					nflops = 1.0 * (lsparse ? r1 * c1 * s1 : r1 * c1); //FIXME
 				break;
 			}
@@ -545,7 +593,7 @@ public class LineageCache {
 		}
 		String outfile = outdir+"/"+_cache.get(_end._key)._key.getId();
 		try {
-			LocalFileUtils.writeMatrixBlockToLocal(outfile, _cache.get(_end._key).getValue());
+			LocalFileUtils.writeMatrixBlockToLocal(outfile, _cache.get(_end._key).getMBValue());
 		} catch (IOException e) {
 			throw new DMLRuntimeException ("Write to " + outfile + " failed.", e);
 		}
@@ -558,7 +606,7 @@ public class LineageCache {
 		_spillList.put(_end._key, new SpilledItem(outfile, _end._compEst));
 	}
 	
-	private static MatrixBlock readFromLocalFS(LineageItem key) {
+	private static Entry readFromLocalFS(LineageItem key) {
 		long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
 		MatrixBlock mb = null;
 		// Read from local FS
@@ -569,14 +617,14 @@ public class LineageCache {
 		}
 		// Restore to cache
 		LocalFileUtils.deleteFileIfExists(_spillList.get(key)._outfile, true);
-		putIntern(key, mb, _spillList.get(key)._compEst);
+		putIntern(key, mb, null, _spillList.get(key)._compEst);
 		_spillList.remove(key);
 		if (DMLScript.STATISTICS) {
 			long t1 = System.nanoTime();
 			LineageCacheStatistics.incrementFSReadTime(t1-t0);
 			LineageCacheStatistics.incrementFSHits();
 		}
-		return mb;
+		return _cache.get(key);
 	}
 
 	//------------------ LINKEDLIST MAINTENANCE METHODS -------------------
@@ -623,41 +671,72 @@ public class LineageCache {
 		_cache.remove(key);
 	}
 	
-	private static class Entry {
+	static class Entry {
 		private final LineageItem _key;
-		private MatrixBlock _val;
+		private MatrixBlock _MBval;
+		private ScalarObject _SOval;
 		double _compEst;
 		private Entry _prev;
 		private Entry _next;
 		private LineageItem _origItem;
 		
-		public Entry(LineageItem key, MatrixBlock value, double computecost) {
+		public Entry(LineageItem key, MatrixBlock Mval, ScalarObject Sval, double computecost) {
 			_key = key;
-			_val = value;
+			_MBval = Mval;
+			_SOval = Sval;
 			_compEst = computecost;
 			_origItem = null;
 		}
 
-		public synchronized MatrixBlock getValue() {
+		public synchronized MatrixBlock getMBValue() {
 			try {
 				//wait until other thread completes operation
 				//in order to avoid redundant computation
-				while( _val == null ) {
+				while( _MBval == null ) {
 					wait();
 				}
-				return _val;
+				return _MBval;
+			}
+			catch( InterruptedException ex ) {
+				throw new DMLRuntimeException(ex);
+			}
+		}
+
+		public synchronized ScalarObject getSOValue() {
+			try {
+				//wait until other thread completes operation
+				//in order to avoid redundant computation
+				while( _SOval == null ) {
+					wait();
+				}
+				return _SOval;
 			}
 			catch( InterruptedException ex ) {
 				throw new DMLRuntimeException(ex);
 			}
 		}
 		
+		public synchronized long getSize() {
+			return ((_MBval != null ? _MBval.getInMemorySize() : 0) + (_SOval != null ? _SOval.getSize() : 0));
+		}
+		
 		public boolean isNullVal() {
-			return(_val == null);
+			return(_MBval == null && _SOval == null);
+		}
+		
+		public boolean isMatrixValue() {
+			return(_MBval != null);
 		}
 		
 		public synchronized void setValue(MatrixBlock val, double compEst) {
-			_val = val;
+			_MBval = val;
+			_compEst = compEst;
+			notifyAll();
+		}
+
+		public synchronized void setValue(MatrixBlock mval, ScalarObject so, double compEst) {
+			_MBval = mval;
+			_SOval = so;
 			_compEst = compEst;
 			notifyAll();
 		}
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheConfig.java b/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheConfig.java
index 2615cdb..e4ce09b 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheConfig.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheConfig.java
@@ -27,14 +27,18 @@ public class LineageCacheConfig {
 	public enum ReuseCacheType {
 		REUSE_FULL,
 		REUSE_PARTIAL,
+		REUSE_MULTILEVEL,
 		REUSE_HYBRID,
 		NONE;
 		public boolean isFullReuse() {
-			return this == REUSE_FULL || this == REUSE_HYBRID;
+			return this == REUSE_FULL || this == REUSE_MULTILEVEL || this == REUSE_HYBRID;
 		}
 		public boolean isPartialReuse() {
 			return this == REUSE_PARTIAL || this == REUSE_HYBRID;
 		}
+		public boolean isMultilevelReuse() {
+			return this == REUSE_MULTILEVEL || this == REUSE_HYBRID;
+		}
 		public static boolean isNone() {
 			return DMLScript.LINEAGE_REUSE == null
 				|| DMLScript.LINEAGE_REUSE == NONE;
@@ -102,7 +106,7 @@ public class LineageCacheConfig {
 	public static boolean isSetSpill() {
 		return _allowSpill;
 	}
-	
+
 	public static ReuseCacheType getCacheType() {
 		return _cacheType;
 	}
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheStatistics.java b/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheStatistics.java
index f3fdd20..98ad75e 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheStatistics.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheStatistics.java
@@ -94,7 +94,7 @@ public class LineageCacheStatistics {
 	}
 
 	public static void incrementPRewrites() {
-		// Number of times written in local FS.
+		// Number of partial rewrites.
 		_numRewrites.increment();
 	}
 
@@ -119,7 +119,7 @@ public class LineageCacheStatistics {
 	}
 
 	public static void incrementPRewriteTime(long delta) {
-		// Total time spent executing lineage rewrites.
+		// Total time spent compiling lineage rewrites.
 		_ctimeRewrite.add(delta);
 	}
 
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageCodegenItem.java b/src/main/java/org/apache/sysds/runtime/lineage/LineageCodegenItem.java
index 688a78c..5f24c89 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageCodegenItem.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageCodegenItem.java
@@ -25,6 +25,10 @@ import java.util.Map;
 public class LineageCodegenItem {
 	private static Map<String, LineageItem> _codegentraces = new HashMap<>();
 
+	public static void reset() {
+		_codegentraces.clear();
+	}
+	
 	public static LineageItem setCodegenLTrace(String classname, LineageItem li) {
 		return _codegentraces.put(classname, li);
 	}
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageMap.java b/src/main/java/org/apache/sysds/runtime/lineage/LineageMap.java
index 56fb725..0c4947e 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageMap.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageMap.java
@@ -36,14 +36,16 @@ import java.util.Map;
 
 public class LineageMap {
 	
-	private Map<String, LineageItem> _traces = new HashMap<>();
-	private Map<String, LineageItem> _literals = new HashMap<>();
+	private final Map<String, LineageItem> _traces;
+	private final Map<String, LineageItem> _literals;
 	
 	public LineageMap() {
-	
+		_traces = new HashMap<>();
+		_literals = new HashMap<>();
 	}
 	
 	public LineageMap(LineageMap that) {
+		this();
 		_traces.putAll(that._traces);
 		_literals.putAll(that._literals);
 	}
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageParser.java b/src/main/java/org/apache/sysds/runtime/lineage/LineageParser.java
index e30dba7..d6b9ab8 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageParser.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageParser.java
@@ -31,9 +31,9 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Map;
 
-public class LineageParser {
-	
-	public static LineageTokenizer lineageTraceTokenizer = new LineageTokenizer();
+public class LineageParser
+{
+	public final static LineageTokenizer lineageTraceTokenizer = new LineageTokenizer();
 	
 	static {
 		lineageTraceTokenizer.add("\\(");
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageRewriteReuse.java b/src/main/java/org/apache/sysds/runtime/lineage/LineageRewriteReuse.java
index 8a31f3b..fb5a21f 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageRewriteReuse.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageRewriteReuse.java
@@ -100,7 +100,11 @@ public class LineageRewriteReuse
 
 		//put the result into the cache
 		LineageCache.put(curr, ec);
-		DMLScript.EXPLAIN = et;
+		DMLScript.EXPLAIN = et; //TODO can't change this here
+		
+		//cleanup execution context
+		lrwec.getVariables().removeAll();
+		
 		return true;
 	}
 	
@@ -195,11 +199,6 @@ public class LineageRewriteReuse
 		BinaryOp lrwHop = HopRewriteUtils.createBinary(lastRes, tsmm_lr, OpOp2.PLUS);
 		DataOp lrwWrite = HopRewriteUtils.createTransientWrite(LR_VAR, lrwHop);
 
-		if (DMLScript.STATISTICS) {
-			LineageCacheStatistics.incrementPRewriteTime(System.nanoTime() - t0);
-			LineageCacheStatistics.incrementPRewrites();
-		}
-		
 		// generate runtime instructions
 		LOG.debug("LINEAGE REWRITE rewriteTsmmRbind APPLIED");
 		ArrayList<Instruction> inst = genInst(lrwWrite, lrwec);
@@ -262,11 +261,6 @@ public class LineageRewriteReuse
 		NaryOp lrwHop = HopRewriteUtils.createNary(OpOpN.RBIND, rowOne, newCol, rowTwo);
 		DataOp lrwWrite = HopRewriteUtils.createTransientWrite(LR_VAR, lrwHop);
 
-		if (DMLScript.STATISTICS) {
-			LineageCacheStatistics.incrementPRewriteTime(System.nanoTime() - t0);
-			LineageCacheStatistics.incrementPRewrites();
-		}
-
 		// generate runtime instructions
 		LOG.debug("LINEAGE REWRITE rewriteTsmm2Cbind APPLIED");
 		ArrayList<Instruction> inst = genInst(lrwWrite, lrwec);
@@ -311,11 +305,6 @@ public class LineageRewriteReuse
 		BinaryOp lrwHop= HopRewriteUtils.createBinary(lastRes, rowTwo, OpOp2.RBIND);
 		DataOp lrwWrite = HopRewriteUtils.createTransientWrite(LR_VAR, lrwHop);
 
-		if (DMLScript.STATISTICS) {
-			LineageCacheStatistics.incrementPRewriteTime(System.nanoTime() - t0);
-			LineageCacheStatistics.incrementPRewrites();
-		}
-		
 		// generate runtime instructions
 		LOG.debug("LINEAGE REWRITE rewriteMetMulRbindLeft APPLIED");
 		ArrayList<Instruction> inst = genInst(lrwWrite, lrwec);
@@ -360,11 +349,6 @@ public class LineageRewriteReuse
 		BinaryOp lrwHop= HopRewriteUtils.createBinary(lastRes, rowTwo, OpOp2.CBIND);
 		DataOp lrwWrite = HopRewriteUtils.createTransientWrite(LR_VAR, lrwHop);
 
-		if (DMLScript.STATISTICS) {
-			LineageCacheStatistics.incrementPRewriteTime(System.nanoTime() - t0);
-			LineageCacheStatistics.incrementPRewrites();
-		}
-		
 		// generate runtime instructions
 		LOG.debug("LINEAGE REWRITE rewriteMatMulCbindRight APPLIED");
 		ArrayList<Instruction> inst = genInst(lrwWrite, lrwec);
@@ -420,11 +404,6 @@ public class LineageRewriteReuse
 		BinaryOp lrwHop= HopRewriteUtils.createBinary(lastRes, rowTwo, OpOp2.RBIND);
 		DataOp lrwWrite = HopRewriteUtils.createTransientWrite(LR_VAR, lrwHop);
 
-		if (DMLScript.STATISTICS) {
-			LineageCacheStatistics.incrementPRewriteTime(System.nanoTime() - t0);
-			LineageCacheStatistics.incrementPRewrites();
-		}
-		
 		// generate runtime instructions
 		LOG.debug("LINEAGE REWRITE rewriteElementMulRbind APPLIED");
 		ArrayList<Instruction> inst = genInst(lrwWrite, lrwec);
@@ -480,11 +459,6 @@ public class LineageRewriteReuse
 		BinaryOp lrwHop= HopRewriteUtils.createBinary(lastRes, rowTwo, OpOp2.CBIND);
 		DataOp lrwWrite = HopRewriteUtils.createTransientWrite(LR_VAR, lrwHop);
 
-		if (DMLScript.STATISTICS) {
-			LineageCacheStatistics.incrementPRewriteTime(System.nanoTime() - t0);
-			LineageCacheStatistics.incrementPRewrites();
-		}
-		
 		// generate runtime instructions
 		LOG.debug("LINEAGE REWRITE rewriteElementMulCbind APPLIED");
 		ArrayList<Instruction> inst = genInst(lrwWrite, lrwec);
@@ -540,11 +514,6 @@ public class LineageRewriteReuse
 		BinaryOp lrwHop= HopRewriteUtils.createBinary(lastRes, rowTwo, OpOp2.CBIND);
 		DataOp lrwWrite = HopRewriteUtils.createTransientWrite(LR_VAR, lrwHop);
 
-		if (DMLScript.STATISTICS) {
-			LineageCacheStatistics.incrementPRewriteTime(System.nanoTime() - t0);
-			LineageCacheStatistics.incrementPRewrites();
-		}
-
 		// generate runtime instructions
 		LOG.debug("LINEAGE REWRITE rewriteElementMulCbind APPLIED");
 		ArrayList<Instruction> inst = genInst(lrwWrite, lrwec);
@@ -574,10 +543,10 @@ public class LineageRewriteReuse
 				LineageItem input1 = source.getInputs()[0];
 				LineageItem tmp = new LineageItem("toProbe", curr.getOpcode(), new LineageItem[] {input1});
 				if (LineageCache.probe(tmp)) 
-					inCache.put("lastMatrix", LineageCache.get(tmp));
+					inCache.put("lastMatrix", LineageCache.get(tmp).getMBValue());
 				// look for the appended column in cache
 				if (LineageCache.probe(source.getInputs()[1])) 
-					inCache.put("deltaX", LineageCache.get(source.getInputs()[1]));
+					inCache.put("deltaX", LineageCache.get(source.getInputs()[1]).getMBValue());
 			}
 		// return true only if the last tsmm is found
 		return inCache.containsKey("lastMatrix") ? true : false;
@@ -597,10 +566,10 @@ public class LineageRewriteReuse
 				LineageItem input1 = source.getInputs()[0];
 				LineageItem tmp = new LineageItem("toProbe", curr.getOpcode(), new LineageItem[] {input1});
 				if (LineageCache.probe(tmp)) 
-					inCache.put("lastMatrix", LineageCache.get(tmp));
+					inCache.put("lastMatrix", LineageCache.get(tmp).getMBValue());
 				// look for the appended column in cache
 				if (LineageCache.probe(source.getInputs()[1])) 
-					inCache.put("deltaX", LineageCache.get(source.getInputs()[1]));
+					inCache.put("deltaX", LineageCache.get(source.getInputs()[1]).getMBValue());
 			}
 		// return true only if the last tsmm is found
 		return inCache.containsKey("lastMatrix") ? true : false;
@@ -624,10 +593,10 @@ public class LineageRewriteReuse
 					LineageItem tmp = new LineageItem("comb", "cbind", new LineageItem[] {L2appin1, source.getInputs()[1]});
 					LineageItem toProbe = new LineageItem("toProbe", curr.getOpcode(), new LineageItem[] {tmp});
 					if (LineageCache.probe(toProbe)) 
-						inCache.put("lastMatrix", LineageCache.get(toProbe));
+						inCache.put("lastMatrix", LineageCache.get(toProbe).getMBValue());
 					// look for the appended column in cache
 					if (LineageCache.probe(input.getInputs()[1])) 
-						inCache.put("deltaX", LineageCache.get(input.getInputs()[1]));
+						inCache.put("deltaX", LineageCache.get(input.getInputs()[1]).getMBValue());
 				}
 			}
 		// return true only if the last tsmm is found
@@ -649,10 +618,10 @@ public class LineageRewriteReuse
 				// create ba+* lineage on top of the input of last append
 				LineageItem tmp = new LineageItem("toProbe", curr.getOpcode(), new LineageItem[] {leftSource, right});
 				if (LineageCache.probe(tmp))
-					inCache.put("lastMatrix", LineageCache.get(tmp));
+					inCache.put("lastMatrix", LineageCache.get(tmp).getMBValue());
 				// look for the appended column in cache
 				if (LineageCache.probe(left.getInputs()[1])) 
-					inCache.put("deltaX", LineageCache.get(left.getInputs()[1]));
+					inCache.put("deltaX", LineageCache.get(left.getInputs()[1]).getMBValue());
 			}
 		}
 		// return true only if the last tsmm is found
@@ -674,10 +643,10 @@ public class LineageRewriteReuse
 				// create ba+* lineage on top of the input of last append
 				LineageItem tmp = new LineageItem("toProbe", curr.getOpcode(), new LineageItem[] {left, rightSource});
 				if (LineageCache.probe(tmp))
-					inCache.put("lastMatrix", LineageCache.get(tmp));
+					inCache.put("lastMatrix", LineageCache.get(tmp).getMBValue());
 				// look for the appended column in cache
 				if (LineageCache.probe(right.getInputs()[1])) 
-					inCache.put("deltaY", LineageCache.get(right.getInputs()[1]));
+					inCache.put("deltaY", LineageCache.get(right.getInputs()[1]).getMBValue());
 			}
 		}
 		return inCache.containsKey("lastMatrix") ? true : false;
@@ -699,12 +668,12 @@ public class LineageRewriteReuse
 				// create * lineage on top of the input of last append
 				LineageItem tmp = new LineageItem("toProbe", curr.getOpcode(), new LineageItem[] {leftSource, rightSource});
 				if (LineageCache.probe(tmp))
-					inCache.put("lastMatrix", LineageCache.get(tmp));
+					inCache.put("lastMatrix", LineageCache.get(tmp).getMBValue());
 				// look for the appended rows in cache
 				if (LineageCache.probe(left.getInputs()[1]))
-					inCache.put("deltaX", LineageCache.get(left.getInputs()[1]));
+					inCache.put("deltaX", LineageCache.get(left.getInputs()[1]).getMBValue());
 				if (LineageCache.probe(right.getInputs()[1]))
-					inCache.put("deltaY", LineageCache.get(right.getInputs()[1]));
+					inCache.put("deltaY", LineageCache.get(right.getInputs()[1]).getMBValue());
 			}
 		}
 		return inCache.containsKey("lastMatrix") ? true : false;
@@ -726,12 +695,12 @@ public class LineageRewriteReuse
 				// create * lineage on top of the input of last append
 				LineageItem tmp = new LineageItem("toProbe", curr.getOpcode(), new LineageItem[] {leftSource, rightSource});
 				if (LineageCache.probe(tmp))
-					inCache.put("lastMatrix", LineageCache.get(tmp));
+					inCache.put("lastMatrix", LineageCache.get(tmp).getMBValue());
 				// look for the appended columns in cache
 				if (LineageCache.probe(left.getInputs()[1]))
-					inCache.put("deltaX", LineageCache.get(left.getInputs()[1]));
+					inCache.put("deltaX", LineageCache.get(left.getInputs()[1]).getMBValue());
 				if (LineageCache.probe(right.getInputs()[1]))
-					inCache.put("deltaY", LineageCache.get(right.getInputs()[1]));
+					inCache.put("deltaY", LineageCache.get(right.getInputs()[1]).getMBValue());
 			}
 		}
 		return inCache.containsKey("lastMatrix") ? true : false;
@@ -757,10 +726,10 @@ public class LineageRewriteReuse
 				LineageItem tmp = new LineageItem("toProbe", curr.getOpcode(), 
 						new LineageItem[] {input1, groups, weights, fn, ngroups});
 				if (LineageCache.probe(tmp)) 
-					inCache.put("lastMatrix", LineageCache.get(tmp));
+					inCache.put("lastMatrix", LineageCache.get(tmp).getMBValue());
 				// look for the appended column in cache
 				if (LineageCache.probe(target.getInputs()[1])) 
-					inCache.put("deltaX", LineageCache.get(target.getInputs()[1]));
+					inCache.put("deltaX", LineageCache.get(target.getInputs()[1]).getMBValue());
 			}
 		}
 		// return true only if the last tsmm is found
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageTokenizer.java b/src/main/java/org/apache/sysds/runtime/lineage/LineageTokenizer.java
index b32acd5..1056df0 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageTokenizer.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageTokenizer.java
@@ -32,7 +32,7 @@ import java.util.regex.Pattern;
 
 public class LineageTokenizer {
 	
-	private List<TokenInfo> _tokenInfos;
+	private final List<TokenInfo> _tokenInfos;
 	
 	public LineageTokenizer() {
 		_tokenInfos = new ArrayList<>();
diff --git a/src/test/java/org/apache/sysds/test/functions/lineage/FullReuseTest.java b/src/test/java/org/apache/sysds/test/functions/lineage/FullReuseTest.java
index 934d0d8..3d052e1 100644
--- a/src/test/java/org/apache/sysds/test/functions/lineage/FullReuseTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/lineage/FullReuseTest.java
@@ -39,6 +39,7 @@ public class FullReuseTest extends AutomatedTestBase {
 	protected static final String TEST_NAME1 = "FullReuse1";
 	protected static final String TEST_NAME2 = "FullReuse2";
 	protected static final String TEST_NAME3 = "FullReuse3";
+	protected static final String TEST_NAME4 = "FullReuse4";
 	protected String TEST_CLASS_DIR = TEST_DIR + FullReuseTest.class.getSimpleName() + "/";
 	
 	@Override
@@ -47,6 +48,7 @@ public class FullReuseTest extends AutomatedTestBase {
 		addTestConfiguration(TEST_NAME1, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1));
 		addTestConfiguration(TEST_NAME2, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME2));
 		addTestConfiguration(TEST_NAME3, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME3));
+		addTestConfiguration(TEST_NAME4, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME4));
 	}
 	
 	@Test
@@ -63,6 +65,11 @@ public class FullReuseTest extends AutomatedTestBase {
 	public void testLineageTrace3() {
 		testLineageTrace(TEST_NAME3);
 	}
+
+	@Test
+	public void testLineageTrace4() {    //caching scalar
+		testLineageTrace(TEST_NAME4);
+	}
 	
 	public void testLineageTrace(String testname) {
 		boolean old_simplification = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
diff --git a/src/test/java/org/apache/sysds/test/functions/lineage/FunctionFullReuseTest.java b/src/test/java/org/apache/sysds/test/functions/lineage/FunctionFullReuseTest.java
index 272e3e2..9819fe0 100644
--- a/src/test/java/org/apache/sysds/test/functions/lineage/FunctionFullReuseTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/lineage/FunctionFullReuseTest.java
@@ -111,7 +111,7 @@ public class FunctionFullReuseTest extends AutomatedTestBase {
 			proArgs.clear();
 			proArgs.add("-stats");
 			proArgs.add("-lineage");
-			proArgs.add(ReuseCacheType.REUSE_FULL.name().toLowerCase());
+			proArgs.add(ReuseCacheType.REUSE_MULTILEVEL.name().toLowerCase());
 			proArgs.add("-args");
 			proArgs.add(output("X"));
 			programArgs = proArgs.toArray(new String[proArgs.size()]);
diff --git a/src/test/java/org/apache/sysds/test/functions/lineage/SBFullReuseTest.java b/src/test/java/org/apache/sysds/test/functions/lineage/SBFullReuseTest.java
index b3a33de..cabc349 100644
--- a/src/test/java/org/apache/sysds/test/functions/lineage/SBFullReuseTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/lineage/SBFullReuseTest.java
@@ -97,7 +97,7 @@ public class SBFullReuseTest extends AutomatedTestBase {
 			proArgs.add("-stats");
 			proArgs.add("-explain");
 			proArgs.add("-lineage");
-			proArgs.add(ReuseCacheType.REUSE_FULL.name().toLowerCase());
+			proArgs.add(ReuseCacheType.REUSE_MULTILEVEL.name().toLowerCase());
 			proArgs.add("-args");
 			proArgs.add(output("X"));
 			programArgs = proArgs.toArray(new String[proArgs.size()]);
diff --git a/src/test/scripts/functions/lineage/FullReuse4.dml b/src/test/scripts/functions/lineage/FullReuse4.dml
new file mode 100644
index 0000000..fc3a2eb
--- /dev/null
+++ b/src/test/scripts/functions/lineage/FullReuse4.dml
@@ -0,0 +1,34 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Increase k for better performance gains
+
+X = rand(rows=1024, cols=1024, seed=42);
+k = 10
+
+for(i in 1:k){
+  t = sum(X) + nrow(X);  #cache scalar result
+  while(FALSE){}
+  tmp = X + t;
+}
+
+write(tmp, $1, format="text");
+