You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by ar...@apache.org on 2021/02/16 23:45:42 UTC

[systemds] branch master updated: [SYSTEMDS 2582, 2583] Reuse of compressed lineage DAGs

This is an automated email from the ASF dual-hosted git repository.

arnabp20 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/master by this push:
     new f24a021  [SYSTEMDS 2582,2583] Reuse of compressed lineage DAGs
f24a021 is described below

commit f24a02139abaa8dc730480cf33c608876577b6b9
Author: arnabp <ar...@tugraz.at>
AuthorDate: Wed Feb 17 00:44:14 2021 +0100

    [SYSTEMDS 2582,2583] Reuse of compressed lineage DAGs
    
    This patch extends hash and equal of lineage items to support
    dedup nodes -- which also enables reuse with deduplication.
    This patch brings a substantial amount of changes in deduplication
    and lineage DAG comparison logic to support all the possible
    cases. Even though all the basic tests are passing on this patch, we
    need extensive experiments to confirm the robustness of this change.
---
 .../org/apache/sysds/parser/ForStatementBlock.java |   6 +-
 .../org/apache/sysds/runtime/lineage/Lineage.java  |   6 +-
 .../sysds/runtime/lineage/LineageDedupUtils.java   |  41 +++--
 .../apache/sysds/runtime/lineage/LineageItem.java  | 173 ++++++++++++++++++++-
 .../apache/sysds/runtime/lineage/LineageMap.java   |  11 +-
 ...eageTraceDedupTest.java => DedupReuseTest.java} | 113 ++++++--------
 .../test/functions/lineage/FullReuseTest.java      |   1 +
 .../functions/lineage/LineageTraceDedupTest.java   |   1 +
 src/test/scripts/functions/lineage/DedupReuse1.dml |  38 +++++
 src/test/scripts/functions/lineage/DedupReuse2.dml |  38 +++++
 src/test/scripts/functions/lineage/DedupReuse3.dml |  39 +++++
 src/test/scripts/functions/lineage/DedupReuse4.dml |  30 ++++
 src/test/scripts/functions/lineage/DedupReuse5.dml |  45 ++++++
 13 files changed, 455 insertions(+), 87 deletions(-)

diff --git a/src/main/java/org/apache/sysds/parser/ForStatementBlock.java b/src/main/java/org/apache/sysds/parser/ForStatementBlock.java
index c2686da..ff18f78 100644
--- a/src/main/java/org/apache/sysds/parser/ForStatementBlock.java
+++ b/src/main/java/org/apache/sysds/parser/ForStatementBlock.java
@@ -21,6 +21,7 @@ package org.apache.sysds.parser;
 
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.HashSet;
 
 import org.apache.sysds.conf.ConfigurationManager;
 import org.apache.sysds.hops.Hop;
@@ -282,11 +283,12 @@ public class ForStatementBlock extends StatementBlock
 		// By calling getInputstoSB on all the child statement blocks,
 		// we remove the variables only read in the for predicate but
 		// never used in the body from the input list.
-		ArrayList<String> inputs = new ArrayList<>();
+		HashSet<String> inputs = new HashSet<>();
 		ForStatement fstmt = (ForStatement)_statements.get(0);
 		for (StatementBlock sb : fstmt.getBody())
 			inputs.addAll(sb.getInputstoSB());
-		return inputs;
+		// Hashset ensures no duplicates in the variable list
+		return new ArrayList<>(inputs);
 	}
 
 	@Override
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/Lineage.java b/src/main/java/org/apache/sysds/runtime/lineage/Lineage.java
index b6519bc..86bf0c5 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/Lineage.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/Lineage.java
@@ -53,7 +53,7 @@ public class Lineage {
 	}
 	
 	public void trace(Instruction inst, ExecutionContext ec) {
-		if (_activeDedupBlock == null || !_activeDedupBlock.isAllPathsTaken())
+		if (_activeDedupBlock == null || !_activeDedupBlock.isAllPathsTaken() || !LineageCacheConfig.ReuseCacheType.isNone())
 			_map.trace(inst, ec);
 	}
 	
@@ -62,10 +62,10 @@ public class Lineage {
 			ArrayList<String> inputnames = pb.getStatementBlock().getInputstoSB();
 			LineageItem[] liinputs = LineageItemUtils.getLineageItemInputstoSB(inputnames, ec);
 			long lpath = _activeDedupBlock.getPath();
-			LineageDedupUtils.setDedupMap(_activeDedupBlock, lpath);
+			Map<String, Integer> dedupPatchHashList = LineageDedupUtils.setDedupMap(_activeDedupBlock, lpath);
 			LineageMap lm = _activeDedupBlock.getMap(lpath);
 			if (lm != null)
-				_map.processDedupItem(lm, lpath, liinputs, pb.getStatementBlock().getName());
+				_map.processDedupItem(lm, lpath, liinputs, pb.getStatementBlock().getName(), dedupPatchHashList);
 		}
 	}
 	
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageDedupUtils.java b/src/main/java/org/apache/sysds/runtime/lineage/LineageDedupUtils.java
index 496de6e..b781bbd 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageDedupUtils.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageDedupUtils.java
@@ -20,7 +20,10 @@
 package org.apache.sysds.runtime.lineage;
 
 import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Map;
+import java.util.Set;
 import java.util.Stack;
 
 import org.apache.sysds.runtime.controlprogram.BasicProgramBlock;
@@ -92,11 +95,11 @@ public class LineageDedupUtils {
 	}
 	
 	public static void setNewDedupPatch(LineageDedupBlock ldb, ProgramBlock fpb, ExecutionContext ec) {
-		// no need to trace anymore if all the paths are taken, 
+		// No need to trace anymore if all the paths are taken, 
 		// instead reuse the stored maps for this and future interations
-		// NOTE: this optimization saves redundant tracing, but that
-		//       kills reuse opportunities
-		if (ldb.isAllPathsTaken())
+		// But, tracing is required if reuse is enabled, even though the traced 
+		// lineage map is discarded after each iteration (if all paths are taken)
+		if (ldb.isAllPathsTaken() && LineageCacheConfig.ReuseCacheType.isNone())
 			return;
 
 		// copy the input LineageItems of the loop-body
@@ -121,14 +124,25 @@ public class LineageDedupUtils {
 		ec.setLineage(_mainLineage);
 	}
 	
-	public static void setDedupMap(LineageDedupBlock ldb, long takenPath) {
+	public static Map<String, Integer> setDedupMap(LineageDedupBlock ldb, long takenPath) {
 		// if this iteration took a new path, store the corresponding map
 		if (ldb.getMap(takenPath) == null) {
 			LineageMap patchMap = _tmpLineage.getLineageMap();
-			// Cut the DAGs at placeholders
-			cutAtPlaceholder(patchMap);
+			// Clean unused variables, and cut the DAGs at placeholders
+			cleanDedupMap(patchMap);
 			ldb.setMap(takenPath, patchMap);
 		}
+		// Copy and return the hash values of all the roots
+		if (!LineageCacheConfig.ReuseCacheType.isNone()) {
+			Map<String, Integer> dedupPatchHashList = new HashMap<>();
+			LineageMap patchMap = _tmpLineage.getLineageMap();
+			for (Map.Entry<String, LineageItem> litem : patchMap.getTraces().entrySet()) {
+				if (!litem.getValue().isPlaceholder())
+					dedupPatchHashList.put(litem.getKey(), litem.getValue().hashCode());
+			}
+			return dedupPatchHashList;
+		}
+		return null;
 	}
 	
 	private static void initLocalLineage(ExecutionContext ec) {
@@ -168,16 +182,23 @@ public class LineageDedupUtils {
 		return sb.toString();
 	}
 	
-	public static void cutAtPlaceholder(LineageMap lmap) {
+	private static void cleanDedupMap(LineageMap lmap) {
 		// Gather all the DAG roots and cut each at placeholder
+		Set<String> emptyRoots = new HashSet<>();
 		for (Map.Entry<String, LineageItem> litem : lmap.getTraces().entrySet()) {
 			LineageItem root = litem.getValue();
+			// Clean empty DAG roots such as iterator i
+			if (root.isPlaceholder()) {
+				emptyRoots.add(litem.getKey());
+				continue;
+			}
 			root.resetVisitStatusNR();
 			cutAtPlaceholder(root);
 		}
+		lmap.getTraces().keySet().removeAll(emptyRoots);
 	}
 	
-	public static void cutAtPlaceholder(LineageItem root) {
+	private static void cutAtPlaceholder(LineageItem root) {
 		Stack<LineageItem> q = new Stack<>();
 		q.push(root);
 		while (!q.empty()) {
@@ -185,7 +206,7 @@ public class LineageDedupUtils {
 			if (tmp.isVisited())
 				continue;
 
-			if (tmp.getOpcode().startsWith(LineageItemUtils.LPLACEHOLDER)) {
+			if (tmp.isPlaceholder()) {
 				// set inputs to null
 				tmp.resetInputs();
 				tmp.setVisited();
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageItem.java b/src/main/java/org/apache/sysds/runtime/lineage/LineageItem.java
index 3903bf0..2210fff 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageItem.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageItem.java
@@ -19,6 +19,8 @@
 
 package org.apache.sysds.runtime.lineage;
 
+import java.util.HashMap;
+import java.util.Map;
 import java.util.Stack;
 
 import org.apache.sysds.runtime.DMLRuntimeException;
@@ -33,6 +35,7 @@ public class LineageItem {
 	private final String _data;
 	private LineageItem[] _inputs;
 	private int _hash = 0;
+	private LineageItem _dedupPatch;
 	private long _distLeaf2Node;
 	// init visited to true to ensure visited items are
 	// not hidden when used as inputs to new items
@@ -64,6 +67,20 @@ public class LineageItem {
 	public LineageItem(String data, String opcode, LineageItem[] inputs) {
 		this(_idSeq.getNextID(), data, opcode, inputs);
 	}
+
+	public LineageItem(String opcode, LineageItem dedupPatch, LineageItem[] inputs) { 
+		this(_idSeq.getNextID(), "", opcode, inputs);
+		// maintain a pointer to the dedup patch
+		_dedupPatch = dedupPatch;
+		_hash = _dedupPatch._hash;
+	}
+
+	public LineageItem(String opcode, LineageItem dedupPatch, int dpatchHash, LineageItem[] inputs) { 
+		this(_idSeq.getNextID(), "", opcode, inputs);
+		// maintain a pointer to the dedup patch
+		_dedupPatch = dedupPatch;
+		_hash = dpatchHash;
+	}
 	
 	public LineageItem(LineageItem li) {
 		this(_idSeq.getNextID(), li);
@@ -95,7 +112,8 @@ public class LineageItem {
 	
 	public void resetInputs() {
 		_inputs = null;
-		_hash = 0;
+		//_hash = 0;
+		// Keep the hash for equality check
 	}
 	
 	public void setInput(int i, LineageItem item) {
@@ -151,6 +169,10 @@ public class LineageItem {
 		return _opcode;
 	}
 	
+	public boolean isPlaceholder() {
+		return _opcode.startsWith(LineageItemUtils.LPLACEHOLDER);
+	}
+	
 	public void setDistLeaf2Node(long d) {
 		_distLeaf2Node = d;
 	}
@@ -159,6 +181,10 @@ public class LineageItem {
 		return _distLeaf2Node;
 	}
 	
+	public LineageItem getDedupPatch() {
+		return _dedupPatch;
+	}
+	
 	public LineageItemType getType() {
 		if (_opcode.startsWith(dedupItemOpcode))
 			return LineageItemType.Dedup;
@@ -183,7 +209,8 @@ public class LineageItem {
 			return false;
 		
 		resetVisitStatusNR();
-		boolean ret = equalsLINR((LineageItem) o);
+		//boolean ret = equalsLINR((LineageItem) o);
+		boolean ret = equalsLINR_dedup((LineageItem) o);
 		resetVisitStatusNR();
 		return ret;
 	}
@@ -230,10 +257,152 @@ public class LineageItem {
 		
 		return ret;
 	}
+
+	// Deduplication aware equality check
+	private boolean equalsLINR_dedup(LineageItem that) {
+		Stack<LineageItem> s1 = new Stack<>();
+		Stack<LineageItem> s2 = new Stack<>();
+		s1.push(this);
+		s2.push(that);
+		//boolean ret = false;
+		boolean ret = true;
+		while (!s1.empty() && !s2.empty()) {
+			LineageItem li1 = s1.pop();
+			LineageItem li2 = s2.pop();
+
+			if (li1.isVisited() || li1 == li2)
+				// skip this sub-DAG.
+				continue;
+
+			if (!li1.isDedup() && !li2.isDedup()) {
+				// Opcodes don't match if either entry is dedup
+				ret = li1._opcode.equals(li2._opcode);
+				ret &= li1._data.equals(li2._data);
+			}
+			ret &= (li1.hashCode() == li2.hashCode());
+			if (!ret) break;
+
+			if (ret && li1._inputs != null && li1._inputs.length == li2._inputs.length || li1.isDedup() || li2.isDedup())
+				for (int i=0; i<li1._inputs.length; i++) {
+					// Find the i'th inputs. If the input is a non-leaf placeholder, read the inputs to it,
+					// else read the normal input or the leaf placeholder.
+					LineageItem in1 = li1._inputs[i].isPlaceholder() ? 
+							li1._inputs[i]._inputs != null ? li1._inputs[i]._inputs[0]:li1._inputs[i] : li1._inputs[i];
+					LineageItem in2 = li2._inputs[i].isPlaceholder() ? 
+							li2._inputs[i]._inputs != null ? li2._inputs[i]._inputs[0]:li2._inputs[i] : li2._inputs[i];
+
+					// If either input is a dedup node, match the corresponding dedup patch DAG to the 
+					// sub-dag of the non-dedup DAG. If matched, push the inputs into the stacks in a
+					// order-preserving way.
+					if (in1.isDedup() && !in2.isDedup()) {
+						Map<Integer, LineageItem> phMap = new HashMap<>();
+						in1._dedupPatch.resetVisitStatusNR();
+						ret = equalsDedupPatch(in1._dedupPatch, in2, phMap);
+						in1.setVisited();
+						if (!ret) {
+							li1.setVisited();
+							return false;
+						}
+						for (Map.Entry<Integer, LineageItem> ph : phMap.entrySet()) {
+							s1.push(in1._inputs[ph.getKey()]);
+							s2.push(ph.getValue());
+						}
+					}
+					else if (in2.isDedup() && !in1.isDedup()) {
+						Map<Integer, LineageItem> phMap = new HashMap<>();
+						ret = equalsDedupPatch(in1, in2._dedupPatch, phMap);
+						if (!ret) {
+							li1.setVisited();
+							return false;
+						}
+						for (Map.Entry<Integer, LineageItem> ph : phMap.entrySet()) {
+							s1.push(ph.getValue());
+							s1.push(in2._inputs[ph.getKey()]);
+						}
+					}
+					// If both inputs are dedup nodes, compare the corresponding patches
+					// and push all the inputs into the stacks.
+					else if (in1.isDedup() && in2.isDedup()) {
+						in1._dedupPatch.resetVisitStatusNR();
+						in2._dedupPatch.resetVisitStatusNR();
+						ret = in1._dedupPatch.equalsLINR(in2._dedupPatch);
+						in1.setVisited();
+						if (!ret) {
+							li1.setVisited();
+							return false;
+						}
+						if (in1._inputs.length == in2._inputs.length)
+							// FIXME: Two dedup nodes can have matching patches but different #inputs
+							for (int j=0; j<in1._inputs.length; j++) {
+								s1.push(in1.getInputs()[j]);
+								s2.push(in2.getInputs()[j]);
+							}
+						else {
+							li1.setVisited();
+							return false;
+						}
+					}
+					else {
+						s1.push(in1);
+						s2.push(in2);
+					}
+				}
+			li1.setVisited();
+		}
+		return ret;
+	}
+	
+	// Compare a dedup patch with a sub-DAG, and map the inputs of the sub-dag
+	// to the placeholder inputs of the dedup patch
+	private boolean equalsDedupPatch(LineageItem dli1, LineageItem dli2, Map<Integer, LineageItem> phMap) {
+		Stack<LineageItem> s1 = new Stack<>();
+		Stack<LineageItem> s2 = new Stack<>();
+		s1.push(dli1);
+		s2.push(dli2);
+		boolean ret = true;
+		while (!s1.empty() && !s2.empty()) {
+			LineageItem li1 = s1.pop();
+			LineageItem li2 = s2.pop();
+			if (li1.isVisited() || li1 == li2)
+				continue; //FIXME: fill phMap
+			ret = li1._opcode.equals(li2._opcode);
+			ret &= li1._data.equals(li2._data);
+			//ret &= (li1.hashCode() == li2.hashCode());
+			// Do not match the hash codes, as the hash of a dedup patch node doesn't represent the whole dag.
+			if (!ret) break;
+			if (ret && li1._inputs != null && li1._inputs.length == li2._inputs.length)
+				for (int i=0; i<li1._inputs.length; i++) {
+					LineageItem in1 = li1.getInputs()[i];
+					LineageItem in2 = li2.getInputs()[i];
+					in1 = in1.isPlaceholder() ? in1._inputs != null ? in1.getInputs()[0] : in1 : in1;
+					in2 = in2.isPlaceholder() ? in2._inputs != null ? in2.getInputs()[0] : in2 : in2;
+					if (in1.isPlaceholder() && in1._inputs == null && !in2.isPlaceholder()) {
+						int phId = Integer.parseInt(in1.getOpcode().substring(3));
+						phMap.put(phId, in2);
+						continue;
+					}
+					if (in2.isPlaceholder() && in2._inputs == null && !in1.isPlaceholder()) {
+						int phId = Integer.parseInt(in2.getOpcode().substring(3));
+						phMap.put(phId, in1);
+						continue;
+					}
+					if (in1.isPlaceholder() && in2.isPlaceholder())
+						continue;
+
+					s1.push(in1);
+					s2.push(in2);
+				}
+			li1.setVisited();
+		}
+		return ret;
+	}
 	
 	@Override
 	public int hashCode() {
 		if (_hash == 0) {
+			if (isPlaceholder() && _inputs != null)
+				return _inputs[0].hashCode();
+
 			//compute hash over opcode and all inputs
 			int h = UtilFunctions.intHashCode(
 				_opcode.hashCode(), _data.hashCode());
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageMap.java b/src/main/java/org/apache/sysds/runtime/lineage/LineageMap.java
index 47af4f5..e9d9368 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageMap.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageMap.java
@@ -72,14 +72,19 @@ public class LineageMap {
 		}
 	}
 	
-	public void processDedupItem(LineageMap lm, Long path, LineageItem[] liinputs, String name) {
+	public void processDedupItem(LineageMap lm, Long path, LineageItem[] liinputs, String name, Map<String, Integer> dpatchHashList) {
 		String delim = LineageDedupUtils.DEDUP_DELIM;
 		for (Map.Entry<String, LineageItem> entry : lm._traces.entrySet()) {
 			// Encode everything needed by the recomputation logic in the
 			// opcode to map this lineage item to the right patch.
 			String opcode = LineageItem.dedupItemOpcode + delim + entry.getKey()
 				+ delim + name + delim + path.toString();
-			LineageItem li = new LineageItem(opcode, liinputs);
+			// If reuse is enabled, set the hash of the dedup node as of the corresponding root.
+			// This way dedup nodes have the same hash as the matching non-dedup DAGs, which is 
+			// required for reuse.
+			// Note: 2 node may point to the same patch, but have different hashes
+			LineageItem li = dpatchHashList == null ? new LineageItem(opcode, entry.getValue(), liinputs)
+					: new LineageItem(opcode, entry.getValue(), dpatchHashList.get(entry.getKey()), liinputs);
 			addLineageItem(Pair.of(entry.getKey(), li));
 		}
 	}
@@ -224,7 +229,7 @@ public class LineageMap {
 			_traces.put(keyTo, input);
 	}
 	
-	private LineageItem removeLineageItem(String key) {
+	public LineageItem removeLineageItem(String key) {
 		//remove item if present
 		return _traces.remove(key);
 	}
diff --git a/src/test/java/org/apache/sysds/test/functions/lineage/LineageTraceDedupTest.java b/src/test/java/org/apache/sysds/test/functions/lineage/DedupReuseTest.java
similarity index 57%
copy from src/test/java/org/apache/sysds/test/functions/lineage/LineageTraceDedupTest.java
copy to src/test/java/org/apache/sysds/test/functions/lineage/DedupReuseTest.java
index 7862295..a19eaa3 100644
--- a/src/test/java/org/apache/sysds/test/functions/lineage/LineageTraceDedupTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/lineage/DedupReuseTest.java
@@ -19,15 +19,13 @@
 
 package org.apache.sysds.test.functions.lineage;
 
+import org.junit.Assert;
 import org.junit.Test;
 import org.apache.sysds.common.Types;
 import org.apache.sysds.hops.OptimizerUtils;
 import org.apache.sysds.hops.recompile.Recompiler;
-import org.apache.sysds.runtime.controlprogram.caching.MatrixObject;
-import org.apache.sysds.runtime.instructions.cp.Data;
 import org.apache.sysds.runtime.lineage.Lineage;
-import org.apache.sysds.runtime.lineage.LineageRecomputeUtils;
-import org.apache.sysds.runtime.matrix.data.MatrixBlock;
+import org.apache.sysds.runtime.lineage.LineageCacheStatistics;
 import org.apache.sysds.runtime.matrix.data.MatrixValue.CellIndex;
 import org.apache.sysds.test.AutomatedTestBase;
 import org.apache.sysds.test.TestConfiguration;
@@ -37,97 +35,66 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 
-public class LineageTraceDedupTest extends LineageBase
+public class DedupReuseTest extends AutomatedTestBase
 {
 	protected static final String TEST_DIR = "functions/lineage/";
-	protected static final String TEST_NAME = "LineageTraceDedup";
-	protected static final String TEST_NAME1 = "LineageTraceDedup1";
-	protected static final String TEST_NAME10 = "LineageTraceDedup10";
-	protected static final String TEST_NAME2 = "LineageTraceDedup2";
-	protected static final String TEST_NAME3 = "LineageTraceDedup3";
-	protected static final String TEST_NAME4 = "LineageTraceDedup4";
-	protected static final String TEST_NAME5 = "LineageTraceDedup5";
-	protected static final String TEST_NAME6 = "LineageTraceDedup6";
-	protected static final String TEST_NAME7 = "LineageTraceDedup7"; //nested if-else branches
-	protected static final String TEST_NAME8 = "LineageTraceDedup8"; //while loop
-	protected static final String TEST_NAME9 = "LineageTraceDedup9"; //while loop w/ if
-	protected static final String TEST_NAME11 = "LineageTraceDedup11"; //mini-batch
-	
-	protected String TEST_CLASS_DIR = TEST_DIR + LineageTraceDedupTest.class.getSimpleName() + "/";
+	protected static final String TEST_NAME = "DedupReuse"; 
+	protected static final String TEST_NAME1 = "DedupReuse1"; 
+	protected static final String TEST_NAME2 = "DedupReuse2"; 
+	protected static final String TEST_NAME3 = "DedupReuse3"; 
+	protected static final String TEST_NAME4 = "DedupReuse4"; 
+	protected static final String TEST_NAME5 = "DedupReuse5"; 
+
+	protected String TEST_CLASS_DIR = TEST_DIR + DedupReuseTest.class.getSimpleName() + "/";
 	
-	protected static final int numRecords = 10;
-	protected static final int numFeatures = 5;
+	protected static final int numRecords = 100;
+	protected static final int numFeatures = 50;
 	
 	
 	@Override
 	public void setUp() {
 		TestUtils.clearAssertionInformation();
-		for(int i=1; i<=11; i++)
+		for(int i=1; i<=5; i++)
 			addTestConfiguration(TEST_NAME+i, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME+i));
 	}
 	
 	@Test
 	public void testLineageTrace1() {
+		// Reuse operations from outside to a dedup-loop
 		testLineageTrace(TEST_NAME1);
 	}
 
 	@Test
-	public void testLineageTrace10() {
-		testLineageTrace(TEST_NAME10);
+	public void testLineageTrace5() {
+		// Reuse operations from outside to a dedup-loop
+		testLineageTrace(TEST_NAME5);
 	}
-	
+
 	@Test
 	public void testLineageTrace2() {
+		// Reuse all operations from a dedup loop to another dedup loop
 		testLineageTrace(TEST_NAME2);
 	}
 	
 	@Test
 	public void testLineageTrace3() {
+		// Reuse all operations from a non-dedup-loop to a dedup loop
 		testLineageTrace(TEST_NAME3);
 	}
-	
+
 	@Test
 	public void testLineageTrace4() {
+		// Reuse an operation for each iteration of a dedup loop
 		testLineageTrace(TEST_NAME4);
 	}
 	
-	@Test
-	public void testLineageTrace5() {
-		testLineageTrace(TEST_NAME5);
-	}
-	
-	@Test
-	public void testLineageTrace6() {
-		testLineageTrace(TEST_NAME6);
-	}
-
-	@Test
-	public void testLineageTrace7() {
-		testLineageTrace(TEST_NAME7);
-	}
-	
-	@Test
-	public void testLineageTrace8() {
-		testLineageTrace(TEST_NAME8);
-	}
-	
-	@Test
-	public void testLineageTrace9() {
-		testLineageTrace(TEST_NAME9);
-	}
-
-	@Test
-	public void testLineageTrace11() {
-		testLineageTrace(TEST_NAME11);
-	}
-	
 	public void testLineageTrace(String testname) {
 		boolean old_simplification = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
 		boolean old_sum_product = OptimizerUtils.ALLOW_SUM_PRODUCT_REWRITES;
 		Types.ExecMode old_rtplatform = AutomatedTestBase.rtplatform;
 		
 		try {
-			LOG.debug("------------ BEGIN " + testname + "------------");
+			System.out.println("------------ BEGIN " + testname + "------------");
 			
 			OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = false;
 			OptimizerUtils.ALLOW_SUM_PRODUCT_REWRITES = false;
@@ -137,10 +104,25 @@ public class LineageTraceDedupTest extends LineageBase
 			fullDMLScriptName = getScript();
 			List<String> proArgs;
 
-			// w/o lineage deduplication
+			//w/o lineage deduplication
+			proArgs = new ArrayList<>();
+			proArgs.add("-stats");
+			proArgs.add("-lineage");
+			proArgs.add("reuse_full");
+			proArgs.add("-args");
+			proArgs.add(output("R"));
+			programArgs = proArgs.toArray(new String[proArgs.size()]);
+
+			Lineage.resetInternalState();
+			runTest(true, EXCEPTION_NOT_EXPECTED, null, -1);
+			HashMap<CellIndex, Double> orig = readDMLMatrixFromOutputDir("R");
+			long hitCount_nd = LineageCacheStatistics.getInstHits();
+
+			//w/ lineage deduplication
 			proArgs = new ArrayList<>();
 			proArgs.add("-stats");
 			proArgs.add("-lineage");
+			proArgs.add("reuse_full");
 			proArgs.add("dedup");
 			proArgs.add("-args");
 			proArgs.add(output("R"));
@@ -148,16 +130,13 @@ public class LineageTraceDedupTest extends LineageBase
 
 			Lineage.resetInternalState();
 			runTest(true, EXCEPTION_NOT_EXPECTED, null, -1);
+			HashMap<CellIndex, Double> dedup = readDMLMatrixFromOutputDir("R");
+			long hitCount_d = LineageCacheStatistics.getInstHits();
 
-			//deserialize, generate program and execute
-			String Rtrace = readDMLLineageFromHDFS("R");
-			String RDedupPatches = readDMLLineageDedupFromHDFS("R");
-			Data ret = LineageRecomputeUtils.parseNComputeLineageTrace(Rtrace, RDedupPatches);
-			
-			//match the original and recomputed results
-			HashMap<CellIndex, Double> orig = readDMLMatrixFromOutputDir("R");
-			MatrixBlock recomputed = ((MatrixObject)ret).acquireReadAndRelease();
-			TestUtils.compareMatrices(orig, recomputed, 1e-6);
+			//match the results
+			TestUtils.compareMatrices(orig, dedup, 1e-6, "Original", "Dedup");
+			//compare cache hits
+			Assert.assertTrue(hitCount_nd >= hitCount_d); //FIXME
 		}
 		finally {
 			OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = old_simplification;
diff --git a/src/test/java/org/apache/sysds/test/functions/lineage/FullReuseTest.java b/src/test/java/org/apache/sysds/test/functions/lineage/FullReuseTest.java
index 3ac4aaf..c9a6333 100644
--- a/src/test/java/org/apache/sysds/test/functions/lineage/FullReuseTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/lineage/FullReuseTest.java
@@ -100,6 +100,7 @@ public class FullReuseTest extends LineageBase {
 			proArgs.add("-stats");
 			proArgs.add("-lineage");
 			proArgs.add(ReuseCacheType.REUSE_FULL.name().toLowerCase());
+			//proArgs.add("dedup");
 			proArgs.add("-args");
 			proArgs.add(output("X"));
 			programArgs = proArgs.toArray(new String[proArgs.size()]);
diff --git a/src/test/java/org/apache/sysds/test/functions/lineage/LineageTraceDedupTest.java b/src/test/java/org/apache/sysds/test/functions/lineage/LineageTraceDedupTest.java
index 7862295..f372cd3 100644
--- a/src/test/java/org/apache/sysds/test/functions/lineage/LineageTraceDedupTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/lineage/LineageTraceDedupTest.java
@@ -141,6 +141,7 @@ public class LineageTraceDedupTest extends LineageBase
 			proArgs = new ArrayList<>();
 			proArgs.add("-stats");
 			proArgs.add("-lineage");
+			proArgs.add("reuse_full"); //test reuse + deduplication
 			proArgs.add("dedup");
 			proArgs.add("-args");
 			proArgs.add(output("R"));
diff --git a/src/test/scripts/functions/lineage/DedupReuse1.dml b/src/test/scripts/functions/lineage/DedupReuse1.dml
new file mode 100644
index 0000000..34352e7
--- /dev/null
+++ b/src/test/scripts/functions/lineage/DedupReuse1.dml
@@ -0,0 +1,38 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = rand(rows=1024, cols=1024, seed=42);
+k = 3
+
+X1 = X + 1;
+while(FALSE) {}
+X1 = X1 + 1;
+while(FALSE) {}
+X1 = X1 + 1;
+tmp = t(X1) %*% X1; #reusable (inside loop)
+
+for(i in 1:3){
+  X = X + 1;
+  R = t(X) %*% X;
+}
+R = R + tmp;
+
+write(R, $1, format="text");
diff --git a/src/test/scripts/functions/lineage/DedupReuse2.dml b/src/test/scripts/functions/lineage/DedupReuse2.dml
new file mode 100644
index 0000000..b81a089
--- /dev/null
+++ b/src/test/scripts/functions/lineage/DedupReuse2.dml
@@ -0,0 +1,38 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = rand(rows=1024, cols=1024, seed=42);
+
+# Reuse from one deduplicated loop to another deduplicated loop
+X1 = X;
+for(i in 1:3){
+  X1 = X1 + 1;
+  R1 = t(X1) %*% X1;
+}
+
+X2 = X;
+for(i in 1:3){
+  X2 = X2 + 1;
+  R2 = t(X2) %*% X2;
+}
+R = R1 + R2;
+
+write(R, $1, format="text");
diff --git a/src/test/scripts/functions/lineage/DedupReuse3.dml b/src/test/scripts/functions/lineage/DedupReuse3.dml
new file mode 100644
index 0000000..3e3f5ac
--- /dev/null
+++ b/src/test/scripts/functions/lineage/DedupReuse3.dml
@@ -0,0 +1,39 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = rand(rows=1024, cols=1024, seed=42);
+
+# Reuse from non-deduplicated loop to deduplicated loop
+X1 = X;
+for(i in 1:3){
+  X1 = X1 + 1;
+  while(FALSE){}    #stops deduplication for the 'for' loop
+  R1 = t(X1) %*% X1;
+}
+
+X2 = X;
+for(i in 1:3){
+  X2 = X2 + 1;
+  R2 = t(X2) %*% X2;
+}
+R = R1 + R2;
+
+write(R, $1, format="text");
diff --git a/src/test/scripts/functions/lineage/DedupReuse4.dml b/src/test/scripts/functions/lineage/DedupReuse4.dml
new file mode 100644
index 0000000..00645d2
--- /dev/null
+++ b/src/test/scripts/functions/lineage/DedupReuse4.dml
@@ -0,0 +1,30 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = rand(rows=1024, cols=1024, seed=42);
+
+X1 = X;
+for(i in 1:3){
+  X = X + 1;
+  R = t(X1) %*% X1;  #reuse
+}
+
+write(R, $1, format="text");
diff --git a/src/test/scripts/functions/lineage/DedupReuse5.dml b/src/test/scripts/functions/lineage/DedupReuse5.dml
new file mode 100644
index 0000000..b8229c7
--- /dev/null
+++ b/src/test/scripts/functions/lineage/DedupReuse5.dml
@@ -0,0 +1,45 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+r = 100000
+c = 100
+
+X = rand(rows=r, cols=c, seed=42);
+y = rand(rows=r, cols=1, seed=43);
+
+j = 5 
+R = matrix(0, c, j+1);
+
+A = t(X) %*% X + diag(matrix(1, rows=ncol(X), cols=1));
+b = t(X) %*% y;
+beta = solve(A, b);
+R[,j+1] = beta;
+  
+# reuse from the outside of the loop
+for(i in 1:j) {
+  lambda = i;
+  A = t(X) %*% X + diag(matrix(lambda, rows=ncol(X), cols=1));
+  b = t(X) %*% y;
+  beta = solve(A, b);
+  R[,i] = beta;
+}
+
+write(R, $1, format="text");