You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by ni...@apache.org on 2017/10/26 03:00:59 UTC
systemml git commit: [SYSTEMML-446] Bugfix for GPU sparse right
indexing with empty output
Repository: systemml
Updated Branches:
refs/heads/master abbffc55e -> d3917effd
[SYSTEMML-446] Bugfix for GPU sparse right indexing with empty output
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/d3917eff
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/d3917eff
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/d3917eff
Branch: refs/heads/master
Commit: d3917effd988de0e0977a310c73c4f232214632e
Parents: abbffc5
Author: Niketan Pansare <np...@us.ibm.com>
Authored: Wed Oct 25 19:57:28 2017 -0700
Committer: Niketan Pansare <np...@us.ibm.com>
Committed: Wed Oct 25 19:57:28 2017 -0700
----------------------------------------------------------------------
.../gpu/context/ExecutionConfig.java | 29 ++------------------
.../runtime/matrix/data/LibMatrixCUDA.java | 8 ++++--
2 files changed, 7 insertions(+), 30 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/systemml/blob/d3917eff/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/ExecutionConfig.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/ExecutionConfig.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/ExecutionConfig.java
index 7f8eb9e..cae0660 100644
--- a/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/ExecutionConfig.java
+++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/context/ExecutionConfig.java
@@ -69,6 +69,8 @@ public class ExecutionConfig {
* @throws DMLRuntimeException if DMLRuntimeException occurs
*/
public static ExecutionConfig getConfigForSimpleVectorOperations(int numCells) throws DMLRuntimeException {
+ if(numCells == 0)
+ throw new DMLRuntimeException("Attempting to invoke a kernel with 0 threads");
int deviceNumber = 0;
int blockDimX = getMaxBlockDim(deviceNumber);
int gridDimX = (int) Math.ceil((double) numCells / blockDimX);
@@ -76,32 +78,6 @@ public class ExecutionConfig {
}
/**
- * Use this for simple matrix operations and use following in the kernel
- * <code>
- * int ix = blockIdx.x * blockDim.x + threadIdx.x;
- * int iy = blockIdx.y * blockDim.y + threadIdx.y;
- * </code>
- * <p>
- * This tries to schedule as minimum grids as possible.
- *
- * @param rlen number of rows
- * @param clen number of columns
- * @return execution configuration
- * @throws DMLRuntimeException if DMLRuntimeException occurs
- */
- public static ExecutionConfig getConfigForMatrixOperations(int rlen, int clen) throws DMLRuntimeException {
- int deviceNumber = 0;
- int maxBlockDim = getMaxBlockDim(deviceNumber);
- int blockDimX = (int) Math.min(maxBlockDim, rlen);
- int gridDimX = (int) Math.ceil((double) rlen / blockDimX);
- int blockDimY = (int) Math.min(Math.floor(((double) maxBlockDim) / blockDimX), clen);
- int gridDimY = (int) Math.ceil((double) clen / blockDimY);
- if (gridDimY > 65535)
- throw new DMLRuntimeException("Internal Error: gridDimY must be less than 65535 for all supported CUDA compute capabilites!");
- return new ExecutionConfig(gridDimX, gridDimY, blockDimX, blockDimY);
- }
-
- /**
* Use this for simple vector operations and use following in the kernel
* <code>
* int index = blockIdx.x * blockDim.x + threadIdx.x
@@ -116,7 +92,6 @@ public class ExecutionConfig {
return getConfigForSimpleVectorOperations(rlen * clen);
}
-
public ExecutionConfig(int gridDimX, int blockDimX) {
this.gridDimX = gridDimX;
this.blockDimX = blockDimX;
http://git-wip-us.apache.org/repos/asf/systemml/blob/d3917eff/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
index eb17e69..2cccde0 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
@@ -1821,17 +1821,19 @@ public class LibMatrixCUDA {
*/
protected static void sliceSparseDense(GPUContext gCtx, String instName, CSRPointer inPointer, Pointer outPointer,
int rl, int ru, int cl, int cu, int inClen) throws DMLRuntimeException {
+ int size = getNnz(inPointer, rl, ru);
+ // Return since nnz of the output is 0 as outPointer is expected to be zeroed out.
+ if(size == 0) return;
+
int retRlen = ru - rl + 1;
long t0 = GPUStatistics.DISPLAY_STATISTICS ? System.nanoTime() : 0;
int retClen = cu - cl + 1;
- int size = -1; String kernel = null; String timer = null;
-
+ String kernel = null; String timer = null;
// Note: row-wise parallelization scheme iterates over input rows in single thread
// whereas nnz parallelization scheme iterates over number of output rows in single thread.
if(inClen > 10 && retClen > 2*retRlen) {
// Perform nnz parallelization for wide and short matrices
- size = getNnz(inPointer, rl, ru);
timer = GPUInstruction.MISC_TIMER_RIX_SPARSE_DENSE_OP_NNZ;
kernel = "slice_sparse_dense_nnz";
}