You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by ni...@apache.org on 2017/09/14 20:24:10 UTC
systemml git commit: [MINOR] Refactored the locks to seperate out
read and write lock
Repository: systemml
Updated Branches:
refs/heads/master 0a984a43b -> c6d499d3e
[MINOR] Refactored the locks to seperate out read and write lock
- Refactoring the locks will avoid future bugs where the developer tries
to obtain 2 write lock or a read lock on a write-locked objects, etc.
- I have also added a debugging utility to track potential memory leaks.
Closes #664.
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/c6d499d3
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/c6d499d3
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/c6d499d3
Branch: refs/heads/master
Commit: c6d499d3e27a1842ccf5987ab84c92eee72aa5c2
Parents: 0a984a4
Author: Niketan Pansare <np...@us.ibm.com>
Authored: Thu Sep 14 13:20:46 2017 -0700
Committer: Niketan Pansare <np...@us.ibm.com>
Committed: Thu Sep 14 13:23:14 2017 -0700
----------------------------------------------------------------------
.../context/ExecutionContext.java | 2 +-
.../instructions/gpu/GPUInstruction.java | 12 +++
.../instructions/gpu/context/GPUContext.java | 52 ++++++++++--
.../instructions/gpu/context/GPUObject.java | 84 ++++++++++++++------
4 files changed, 117 insertions(+), 33 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/systemml/blob/c6d499d3/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java b/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
index b74c0dd..3b2436e 100644
--- a/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
+++ b/src/main/java/org/apache/sysml/runtime/controlprogram/context/ExecutionContext.java
@@ -374,7 +374,7 @@ public class ExecutionContext {
}
// The lock is added here for an output block
// so that any block currently in use is not deallocated by eviction on the GPU
- mo.getGPUObject(getGPUContext(0)).addLock();
+ mo.getGPUObject(getGPUContext(0)).addWriteLock();
return mo;
}
http://git-wip-us.apache.org/repos/asf/systemml/blob/c6d499d3/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java
index 9a6a3bb..2aa73b4 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/GPUInstruction.java
@@ -19,6 +19,8 @@
package org.apache.sysml.runtime.instructions.gpu;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.sysml.api.DMLScript;
import org.apache.sysml.lops.runtime.RunMRJobs;
import org.apache.sysml.runtime.DMLRuntimeException;
@@ -26,6 +28,7 @@ import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysml.runtime.instructions.GPUInstructionParser;
import org.apache.sysml.runtime.instructions.Instruction;
+import org.apache.sysml.runtime.instructions.gpu.context.GPUContext;
import org.apache.sysml.runtime.matrix.data.Pair;
import org.apache.sysml.runtime.matrix.operators.Operator;
import org.apache.sysml.utils.GPUStatistics;
@@ -46,6 +49,8 @@ public abstract class GPUInstruction extends Instruction {
Builtin,
MatrixIndexing
};
+
+ private static final Log LOG = LogFactory.getLog(GPUInstruction.class.getName());
// Memory/conversions
public final static String MISC_TIMER_HOST_TO_DEVICE = "H2D"; // time spent in bringing data to gpu (from host)
@@ -191,6 +196,13 @@ public abstract class GPUInstruction extends Instruction {
if(DMLScript.SYNCHRONIZE_GPU) {
jcuda.runtime.JCuda.cudaDeviceSynchronize();
}
+ if(LOG.isDebugEnabled()) {
+ for(GPUContext gpuCtx : ec.getGPUContexts()) {
+ if(gpuCtx != null)
+ gpuCtx.printMemoryInfo(getOpcode());
+ }
+ }
+
}
/**
http://git-wip-us.apache.org/repos/asf/systemml/blob/c6d499d3/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
index a31deab..271109d 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUContext.java
@@ -43,6 +43,7 @@ import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
+import java.util.Map.Entry;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -130,6 +131,40 @@ public class GPUContext {
* to launch custom CUDA kernel, specific to the active GPU for this GPUContext
*/
private final ThreadLocal<JCudaKernels> kernels = new ThreadLocal<>();
+
+ /**
+ * Print information of memory usage.
+ *
+ * @param opcode opcode of caller
+ * @throws DMLRuntimeException if error
+ */
+ public void printMemoryInfo(String opcode) throws DMLRuntimeException {
+ if(LOG.isDebugEnabled()) {
+ long totalFreeCUDASpace = 0;
+ for(Entry<Long, LinkedList<Pointer>> kv : freeCUDASpaceMap.entrySet()) {
+ totalFreeCUDASpace += kv.getKey()*kv.getValue().size();
+ }
+ long readLockedAllocatedMemory = 0;
+ long writeLockedAllocatedMemory = 0;
+ long unlockedAllocatedMemory = 0;
+ for(GPUObject gpuObj : allocatedGPUObjects) {
+ if(gpuObj.readLocks.longValue() > 0)
+ readLockedAllocatedMemory += gpuObj.getSizeOnDevice();
+ else if(gpuObj.writeLock)
+ writeLockedAllocatedMemory += gpuObj.getSizeOnDevice();
+ else
+ unlockedAllocatedMemory += gpuObj.getSizeOnDevice();
+ }
+ long free[] = { 0 };
+ long total[] = { 0 };
+ cudaMemGetInfo(free, total);
+ long gpuFreeMemory = (long) (free[0] * GPU_MEMORY_UTILIZATION_FACTOR);
+ LOG.debug(opcode + ": Total memory: " + total[0] + ", Free memory: " + free[0] + " (with util factor: " + gpuFreeMemory + "), "
+ + "Lazy unfreed memory: " + totalFreeCUDASpace + ", Locked allocated memory (read/write): "
+ + readLockedAllocatedMemory + "/" + writeLockedAllocatedMemory + ", "
+ + " Unlocked allocated memory: " + unlockedAllocatedMemory);
+ }
+ }
protected GPUContext(int deviceNum) throws DMLRuntimeException {
this.deviceNum = deviceNum;
@@ -472,18 +507,19 @@ public class GPUContext {
Collections.sort(allocatedGPUObjects, new Comparator<GPUObject>() {
@Override
public int compare(GPUObject p1, GPUObject p2) {
- long p1Val = p1.locks.get();
- long p2Val = p2.locks.get();
-
- if (p1Val > 0 && p2Val > 0) {
+ if (p1.isLocked() && p2.isLocked()) {
// Both are locked, so don't sort
return 0;
- } else if (p1Val > 0 || p2Val > 0) {
+ } else if (p1.isLocked()) {
// Put the unlocked one to RHS
- return Long.compare(p2Val, p1Val);
+ // a value less than 0 if x < y; and a value greater than 0 if x > y
+ return -1;
+ } else if (p2.isLocked()) {
+ // Put the unlocked one to RHS
+ // a value less than 0 if x < y; and a value greater than 0 if x > y
+ return 1;
} else {
// Both are unlocked
-
if (evictionPolicy == EvictionPolicy.MIN_EVICT) {
long p1Size = 0;
long p2Size = 0;
@@ -510,7 +546,7 @@ public class GPUContext {
while (neededSize > getAvailableMemory() && allocatedGPUObjects.size() > 0) {
GPUObject toBeRemoved = allocatedGPUObjects.get(allocatedGPUObjects.size() - 1);
- if (toBeRemoved.locks.get() > 0) {
+ if (toBeRemoved.isLocked()) {
throw new DMLRuntimeException(
"There is not enough memory on device for this matrix, request (" + neededSize + "). Allocated GPU objects:" + allocatedGPUObjects.toString());
}
http://git-wip-us.apache.org/repos/asf/systemml/blob/c6d499d3/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java
index 2642011..4bc983e 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/GPUObject.java
@@ -34,6 +34,7 @@ import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyHostToDevice;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.atomic.LongAdder;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -99,9 +100,14 @@ public class GPUObject {
protected boolean dirty = false;
/**
- * number of read/write locks on this object (this GPUObject is being used in a current instruction)
+ * number of read locks on this object (this GPUObject is being used in a current instruction)
*/
- protected AtomicLong locks = new AtomicLong();
+ protected LongAdder readLocks = new LongAdder();
+
+ /**
+ * whether write lock on this object (this GPUObject is being used in a current instruction)
+ */
+ protected boolean writeLock = false;
/**
* Timestamp, needed by {@link GPUContext#evict(long)}
@@ -132,7 +138,11 @@ public class GPUObject {
that.allocateTensorDescriptor(me.tensorShape[0], me.tensorShape[1], me.tensorShape[2], me.tensorShape[3]);
}
that.dirty = me.dirty;
- that.locks = new AtomicLong(me.locks.get());
+ // TODO Nakul: Should the locks be cloned here ?
+ // The only place clone is getting called: LibMatrixCUDA's solve
+ that.readLocks.reset();
+ that.writeLock = false;
+
that.timestamp = new AtomicLong(me.timestamp.get());
that.isSparse = me.isSparse;
@@ -618,7 +628,7 @@ public class GPUObject {
copyFromHostToDevice(opcode);
transferred = true;
}
- addLock();
+ addReadLock();
if (!isAllocated())
throw new DMLRuntimeException("Expected device data to be allocated");
return transferred;
@@ -664,10 +674,6 @@ public class GPUObject {
return allocated;
}
- public void addLock() {
- locks.addAndGet(1);
- }
-
/**
* if the data is allocated on the GPU and is dirty, it is copied back to the host memory
*
@@ -693,22 +699,51 @@ public class GPUObject {
}
return copied;
}
+
+ public boolean isLocked() {
+ return writeLock || readLocks.longValue() > 0;
+ }
+
+ public void addReadLock() throws DMLRuntimeException {
+ if(writeLock)
+ throw new DMLRuntimeException("Attempting to add a read lock when writeLock="+ writeLock);
+ else
+ readLocks.increment();
+ }
+
+ public void addWriteLock() throws DMLRuntimeException {
+ if(readLocks.longValue() > 0)
+ throw new DMLRuntimeException("Attempting to add a write lock when readLocks="+ readLocks.longValue());
+ else if(writeLock)
+ throw new DMLRuntimeException("Attempting to add a write lock when writeLock="+ writeLock);
+ else
+ writeLock = true;
+ }
+
+ public void releaseReadLock() throws DMLRuntimeException {
+ readLocks.decrement();
+ if(readLocks.longValue() < 0)
+ throw new DMLRuntimeException("Attempting to release a read lock when readLocks="+ readLocks.longValue());
+ }
+
+ public void releaseWriteLock() throws DMLRuntimeException {
+ if(writeLock)
+ writeLock = false;
+ else
+ throw new DMLRuntimeException("Internal state error : Attempting to release write lock on a GPUObject, which was already released");
+ }
+
+ public void resetReadWriteLock() {
+ readLocks.reset();
+ writeLock = false;
+ }
/**
* Updates the locks depending on the eviction policy selected
*
* @throws DMLRuntimeException if there is no locked GPU Object or if could not obtain a {@link GPUContext}
*/
- private void updateReleaseLocks(int l) throws DMLRuntimeException {
- int newLocks = (int) locks.addAndGet(l);
- if (newLocks < 0) {
- throw new CacheException("Internal state error : Invalid number of locks on a GPUObject");
- }
-
- if(LOG.isTraceEnabled()) {
- LOG.trace("GPU : updateReleaseLocks, new number of locks is " + newLocks + ", on " + this + ", GPUContext="
- + getGPUContext());
- }
+ private void updateReleaseLocks() throws DMLRuntimeException {
GPUContext.EvictionPolicy evictionPolicy = getGPUContext().evictionPolicy;
switch (evictionPolicy) {
case LRU:
@@ -730,8 +765,8 @@ public class GPUObject {
* @throws DMLRuntimeException if data is not allocated or if there is no locked GPU Object or if could not obtain a {@link GPUContext}
*/
public void releaseInput() throws DMLRuntimeException {
- // A read lock is a positive quantity, therefor when the lock is freed, a negative 1 is added
- updateReleaseLocks(-1);
+ releaseReadLock();
+ updateReleaseLocks();
if (!isAllocated())
throw new CacheException("Attempting to release an input before allocating it");
}
@@ -742,8 +777,8 @@ public class GPUObject {
* @throws DMLRuntimeException if data is not allocated or if there is no locked GPU Object or if could not obtain a {@link GPUContext}
*/
public void releaseOutput() throws DMLRuntimeException {
- // A write lock is a negative quantity, therefore when the lock is freed, a positive number is added
- updateReleaseLocks(1);
+ releaseWriteLock();
+ updateReleaseLocks();
dirty = true;
if (!isAllocated())
throw new CacheException("Attempting to release an output before allocating it");
@@ -798,7 +833,7 @@ public class GPUObject {
cudnnDestroyTensorDescriptor(tensorDescriptor);
tensorDescriptor = null;
}
- locks.set(0);
+ resetReadWriteLock();
getGPUContext().removeRecordedUsage(this);
}
@@ -1061,7 +1096,8 @@ public class GPUObject {
final StringBuilder sb = new StringBuilder("GPUObject{");
sb.append(", tensorShape=").append(Arrays.toString(tensorShape));
sb.append(", dirty=").append(dirty);
- sb.append(", locks=").append(locks);
+ sb.append(", readLocks=").append(readLocks.longValue());
+ sb.append(", writeLock=").append(writeLock);
sb.append(", sparse? ").append(isSparse);
sb.append(", dims=[").append(mat.getNumRows()).append(",").append(mat.getNumColumns()).append("]");
sb.append('}');