You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by ni...@apache.org on 2018/10/09 23:41:31 UTC
systemml git commit: [SYSTEMML-445] Avoid unnecessary transfer to the
GPU for size estimation
Repository: systemml
Updated Branches:
refs/heads/master 3702df7c1 -> 97fd7d1aa
[SYSTEMML-445] Avoid unnecessary transfer to the GPU for size estimation
- Compute memory estimates (exact and worst-case) using metadata rather
than requiring pointer transfer.
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/97fd7d1a
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/97fd7d1a
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/97fd7d1a
Branch: refs/heads/master
Commit: 97fd7d1aa3ce7a152066d4d4b713fb0a9aee4092
Parents: 3702df7
Author: Niketan Pansare <np...@us.ibm.com>
Authored: Tue Oct 9 16:41:18 2018 -0700
Committer: Niketan Pansare <np...@us.ibm.com>
Committed: Tue Oct 9 16:41:18 2018 -0700
----------------------------------------------------------------------
.../gpu/context/GPUMatrixMemoryManager.java | 26 ---------------
.../gpu/context/GPUMemoryManager.java | 16 +++++-----
.../instructions/gpu/context/GPUObject.java | 33 +++++++++++++++-----
3 files changed, 34 insertions(+), 41 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/systemml/blob/97fd7d1a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMatrixMemoryManager.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMatrixMemoryManager.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMatrixMemoryManager.java
index 457968b..47a8391 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMatrixMemoryManager.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMatrixMemoryManager.java
@@ -44,32 +44,6 @@ public class GPUMatrixMemoryManager {
gpuObjects.add(gpuObj);
}
- /**
- * Returns worst-case contiguous memory size
- * @param gpuObj gpu object
- * @return memory size in bytes
- */
- long getWorstCaseContiguousMemorySize(GPUObject gpuObj) {
- long ret = 0;
- if(!gpuObj.isDensePointerNull()) {
- if(!gpuObj.shadowBuffer.isBuffered())
- ret = gpuManager.allPointers.get(gpuObj.getDensePointer()).getSizeInBytes();
- else
- ret = 0; // evicted hence no contiguous memory on GPU
- }
- else if(gpuObj.getJcudaSparseMatrixPtr() != null) {
- CSRPointer sparsePtr = gpuObj.getJcudaSparseMatrixPtr();
- if(sparsePtr.nnz > 0) {
- if(sparsePtr.rowPtr != null)
- ret = Math.max(ret, gpuManager.allPointers.get(sparsePtr.rowPtr).getSizeInBytes());
- if(sparsePtr.colInd != null)
- ret = Math.max(ret, gpuManager.allPointers.get(sparsePtr.colInd).getSizeInBytes());
- if(sparsePtr.val != null)
- ret = Math.max(ret, gpuManager.allPointers.get(sparsePtr.val).getSizeInBytes());
- }
- }
- return ret;
- }
/**
* Get list of all Pointers in a GPUObject
http://git-wip-us.apache.org/repos/asf/systemml/blob/97fd7d1a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMemoryManager.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMemoryManager.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMemoryManager.java
index 57b76f6..6772b4a 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMemoryManager.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUMemoryManager.java
@@ -292,7 +292,7 @@ public class GPUMemoryManager {
if(A == null) {
long t0 = ConfigurationManager.isStatistics() ? System.nanoTime() : 0;
Optional<GPUObject> sizeBasedUnlockedGPUObjects = matrixMemoryManager.gpuObjects.stream()
- .filter(gpuObj -> !gpuObj.isLocked() && matrixMemoryManager.getWorstCaseContiguousMemorySize(gpuObj) >= size)
+ .filter(gpuObj -> !gpuObj.isLocked() && gpuObj.getWorstCaseContiguousMemorySize() >= size)
.min((o1, o2) -> worstCaseContiguousMemorySizeCompare(o1, o2));
if(sizeBasedUnlockedGPUObjects.isPresent()) {
evictOrClear(sizeBasedUnlockedGPUObjects.get(), opcode);
@@ -363,7 +363,7 @@ public class GPUMemoryManager {
}
private int worstCaseContiguousMemorySizeCompare(GPUObject o1, GPUObject o2) {
- long ret = matrixMemoryManager.getWorstCaseContiguousMemorySize(o1) - matrixMemoryManager.getWorstCaseContiguousMemorySize(o2);
+ long ret = o1.getWorstCaseContiguousMemorySize() - o2.getWorstCaseContiguousMemorySize();
return ret < 0 ? -1 : (ret == 0 ? 0 : 1);
}
@@ -423,7 +423,7 @@ public class GPUMemoryManager {
jcuda.runtime.JCuda.cudaDeviceSynchronize(); // Force a device synchronize after free-ing the pointer for debugging
}
else {
- throw new RuntimeException("Attempting to free an unaccounted pointer:" + toFree);
+ throw new RuntimeException("ERROR : Internal state corrupted, attempting to free an unaccounted pointer:" + toFree);
}
}
@@ -439,6 +439,12 @@ public class GPUMemoryManager {
public void free(String opcode, Pointer toFree, boolean eager) throws DMLRuntimeException {
if(LOG.isTraceEnabled())
LOG.trace("Free-ing the pointer with eager=" + eager);
+ if(toFree == null)
+ throw new DMLRuntimeException("Attempting to free a null pointer");
+ else if (!allPointers.containsKey(toFree)) {
+ LOG.info("GPU memory info before failure:" + toString());
+ throw new RuntimeException("ERROR : Internal state corrupted, attempting to free an unaccounted pointer:" + toFree);
+ }
long size = allPointers.get(toFree).getSizeInBytes();
if(ConfigurationManager.isStatistics()) {
currentSize -= size;
@@ -449,10 +455,6 @@ public class GPUMemoryManager {
addMiscTime(opcode, GPUStatistics.cudaDeAllocTime, GPUStatistics.cudaDeAllocCount, GPUInstruction.MISC_TIMER_CUDA_FREE, t0);
}
else {
- if (!allPointers.containsKey(toFree)) {
- LOG.info("GPU memory info before failure:" + toString());
- throw new RuntimeException("ERROR : Internal state corrupted, cache block size map is not aware of a block it trying to free up");
- }
lazyCudaFreeMemoryManager.add(size, toFree);
}
}
http://git-wip-us.apache.org/repos/asf/systemml/blob/97fd7d1a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java
index 552ee3b..6d7d73b 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java
@@ -774,17 +774,34 @@ public class GPUObject {
}
protected long getSizeOnDevice() {
- long GPUSize = 0;
long rlen = mat.getNumRows();
long clen = mat.getNumColumns();
long nnz = mat.getNnz();
-
- if (LibMatrixCUDA.isInSparseFormat(getGPUContext(), mat)) {
- GPUSize = CSRPointer.estimateSize(nnz, rlen);
- } else {
- GPUSize = getDatatypeSizeOf(rlen * clen);
- }
- return GPUSize;
+
+ if(jcudaDenseMatrixPtr != null)
+ return getDatatypeSizeOf(rlen * clen); // allocated in dense format
+ else if(jcudaSparseMatrixPtr != null || LibMatrixCUDA.isInSparseFormat(getGPUContext(), mat))
+ return CSRPointer.estimateSize(nnz, rlen); // either allocated in sparse format or matrix object is in sparse format
+ else
+ return getDatatypeSizeOf(rlen * clen); // not allocated and matrix object is in dense format
+ }
+
+ /**
+ * Returns worst-case contiguous memory size
+ *
+ * @return memory size in bytes
+ */
+ long getWorstCaseContiguousMemorySize() {
+ long rlen = mat.getNumRows();
+ long clen = mat.getNumColumns();
+ long nnz = mat.getNnz();
+
+ if(jcudaDenseMatrixPtr != null)
+ return getDatatypeSizeOf(rlen * clen); // allocated in dense format
+ else if(jcudaSparseMatrixPtr != null || LibMatrixCUDA.isInSparseFormat(getGPUContext(), mat))
+ return Math.max(getDatatypeSizeOf(nnz), getIntSizeOf(Math.max(Math.max(rlen+1, clen), 4))); // either allocated in sparse format or matrix object is in sparse format
+ else
+ return getDatatypeSizeOf(rlen * clen); // not allocated and matrix object is in dense format
}
void copyFromHostToDevice(String opcode) {