You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by ni...@apache.org on 2018/10/09 23:41:31 UTC
systemml git commit: [SYSTEMML-445] Avoid unnecessary transfer to the GPU for size estimation

Repository: systemml
Updated Branches:
  refs/heads/master 3702df7c1 -> 97fd7d1aa


[SYSTEMML-445] Avoid unnecessary transfer to the GPU for size estimation

- Compute memory estimates (exact and worst-case) using metadata rather
than requiring pointer transfer.

Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/97fd7d1a
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/97fd7d1a
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/97fd7d1a

Branch: refs/heads/master
Commit: 97fd7d1aa3ce7a152066d4d4b713fb0a9aee4092
Parents: 3702df7
Author: Niketan Pansare <np...@us.ibm.com>
Authored: Tue Oct 9 16:41:18 2018 -0700
Committer: Niketan Pansare <np...@us.ibm.com>
Committed: Tue Oct 9 16:41:18 2018 -0700

----------------------------------------------------------------------
 .../gpu/context/GPUMatrixMemoryManager.java     | 26 ---------------
 .../gpu/context/GPUMemoryManager.java           | 16 +++++-----
 .../instructions/gpu/context/GPUObject.java     | 33 +++++++++++++++-----
 3 files changed, 34 insertions(+), 41 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/97fd7d1a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMatrixMemoryManager.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMatrixMemoryManager.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMatrixMemoryManager.java
index 457968b..47a8391 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMatrixMemoryManager.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMatrixMemoryManager.java
@@ -44,32 +44,6 @@ public class GPUMatrixMemoryManager {
 		gpuObjects.add(gpuObj);
 	}
 	
-	/**
-	 * Returns worst-case contiguous memory size
-	 * @param gpuObj gpu object
-	 * @return memory size in bytes
-	 */
-	long getWorstCaseContiguousMemorySize(GPUObject gpuObj) {
-		long ret = 0;
-		if(!gpuObj.isDensePointerNull()) {
-			if(!gpuObj.shadowBuffer.isBuffered())
-				ret = gpuManager.allPointers.get(gpuObj.getDensePointer()).getSizeInBytes();
-			else
-				ret = 0; // evicted hence no contiguous memory on GPU
-		}
-		else if(gpuObj.getJcudaSparseMatrixPtr() != null) {
-			CSRPointer sparsePtr = gpuObj.getJcudaSparseMatrixPtr();
-			if(sparsePtr.nnz > 0) {
-				if(sparsePtr.rowPtr != null)
-					ret = Math.max(ret, gpuManager.allPointers.get(sparsePtr.rowPtr).getSizeInBytes());
-				if(sparsePtr.colInd != null)
-					ret = Math.max(ret, gpuManager.allPointers.get(sparsePtr.colInd).getSizeInBytes());
-				if(sparsePtr.val != null)
-					ret = Math.max(ret, gpuManager.allPointers.get(sparsePtr.val).getSizeInBytes());
-			}
-		}
-		return ret;
-	}
 	
 	/**
 	 * Get list of all Pointers in a GPUObject 

http://git-wip-us.apache.org/repos/asf/systemml/blob/97fd7d1a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMemoryManager.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMemoryManager.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMemoryManager.java
index 57b76f6..6772b4a 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMemoryManager.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMemoryManager.java
@@ -292,7 +292,7 @@ public class GPUMemoryManager {
 		if(A == null) {
 			long t0 =  ConfigurationManager.isStatistics() ? System.nanoTime() : 0;
 			Optional<GPUObject> sizeBasedUnlockedGPUObjects = matrixMemoryManager.gpuObjects.stream()
-						.filter(gpuObj -> !gpuObj.isLocked() && matrixMemoryManager.getWorstCaseContiguousMemorySize(gpuObj) >= size)
+						.filter(gpuObj -> !gpuObj.isLocked() && gpuObj.getWorstCaseContiguousMemorySize() >= size)
 						.min((o1, o2) -> worstCaseContiguousMemorySizeCompare(o1, o2));
 			if(sizeBasedUnlockedGPUObjects.isPresent()) {
 				evictOrClear(sizeBasedUnlockedGPUObjects.get(), opcode);
@@ -363,7 +363,7 @@ public class GPUMemoryManager {
 	}
 	
 	private int worstCaseContiguousMemorySizeCompare(GPUObject o1, GPUObject o2) {
-		long ret = matrixMemoryManager.getWorstCaseContiguousMemorySize(o1) - matrixMemoryManager.getWorstCaseContiguousMemorySize(o2);
+		long ret = o1.getWorstCaseContiguousMemorySize() - o2.getWorstCaseContiguousMemorySize();
 		return ret < 0 ? -1 : (ret == 0 ? 0 : 1);
 	}
 	
@@ -423,7 +423,7 @@ public class GPUMemoryManager {
 				jcuda.runtime.JCuda.cudaDeviceSynchronize(); // Force a device synchronize after free-ing the pointer for debugging
 		}
 		else {
-			throw new RuntimeException("Attempting to free an unaccounted pointer:" + toFree);
+			throw new RuntimeException("ERROR : Internal state corrupted, attempting to free an unaccounted pointer:" + toFree);
 		}
 
 	}
@@ -439,6 +439,12 @@ public class GPUMemoryManager {
 	public void free(String opcode, Pointer toFree, boolean eager) throws DMLRuntimeException {
 		if(LOG.isTraceEnabled())
 			LOG.trace("Free-ing the pointer with eager=" + eager);
+		if(toFree == null)
+			throw new DMLRuntimeException("Attempting to free a null pointer");
+		else if (!allPointers.containsKey(toFree)) {
+			LOG.info("GPU memory info before failure:" + toString());
+			throw new RuntimeException("ERROR : Internal state corrupted, attempting to free an unaccounted pointer:" + toFree);
+		}
 		long size = allPointers.get(toFree).getSizeInBytes();
 		if(ConfigurationManager.isStatistics()) {
 			currentSize -= size;
@@ -449,10 +455,6 @@ public class GPUMemoryManager {
 			addMiscTime(opcode, GPUStatistics.cudaDeAllocTime, GPUStatistics.cudaDeAllocCount, GPUInstruction.MISC_TIMER_CUDA_FREE, t0);
 		}
 		else {
-			if (!allPointers.containsKey(toFree)) {
-				LOG.info("GPU memory info before failure:" + toString());
-				throw new RuntimeException("ERROR : Internal state corrupted, cache block size map is not aware of a block it trying to free up");
-			}
 			lazyCudaFreeMemoryManager.add(size, toFree);
 		}
 	}

http://git-wip-us.apache.org/repos/asf/systemml/blob/97fd7d1a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java
index 552ee3b..6d7d73b 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java
@@ -774,17 +774,34 @@ public class GPUObject {
 	}
 
 	protected long getSizeOnDevice() {
-		long GPUSize = 0;
 		long rlen = mat.getNumRows();
 		long clen = mat.getNumColumns();
 		long nnz = mat.getNnz();
-
-		if (LibMatrixCUDA.isInSparseFormat(getGPUContext(), mat)) {
-			GPUSize = CSRPointer.estimateSize(nnz, rlen);
-		} else {
-			GPUSize = getDatatypeSizeOf(rlen * clen);
-		}
-		return GPUSize;
+		
+		if(jcudaDenseMatrixPtr != null)
+			return getDatatypeSizeOf(rlen * clen); // allocated in dense format
+		else if(jcudaSparseMatrixPtr != null || LibMatrixCUDA.isInSparseFormat(getGPUContext(), mat))
+			return CSRPointer.estimateSize(nnz, rlen); // either allocated in sparse format or matrix object is in sparse format
+		else 
+			return getDatatypeSizeOf(rlen * clen); // not allocated and matrix object is in dense format
+	}
+	
+	/**
+	 * Returns worst-case contiguous memory size
+	 * 
+	 * @return memory size in bytes
+	 */
+	long getWorstCaseContiguousMemorySize() {
+		long rlen = mat.getNumRows();
+		long clen = mat.getNumColumns();
+		long nnz = mat.getNnz();
+		
+		if(jcudaDenseMatrixPtr != null)
+			return getDatatypeSizeOf(rlen * clen); // allocated in dense format
+		else if(jcudaSparseMatrixPtr != null || LibMatrixCUDA.isInSparseFormat(getGPUContext(), mat))
+			return Math.max(getDatatypeSizeOf(nnz), getIntSizeOf(Math.max(Math.max(rlen+1, clen), 4))); // either allocated in sparse format or matrix object is in sparse format
+		else 
+			return getDatatypeSizeOf(rlen * clen); // not allocated and matrix object is in dense format
 	}
 
 	void copyFromHostToDevice(String opcode) {