You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by ni...@apache.org on 2017/08/24 21:41:35 UTC

[5/5] systemml git commit: [SYSTEMML-1793] Support matrix range indexing on GPU

[SYSTEMML-1793] Support matrix range indexing on GPU

- This commit supports matrix range indexing (i.e. right indexing) without requiring sparse to dense conversion of inputs. Note: this PR only supports dense output.
- Also, added RightIndexingTests in gpu package.

Closes #637.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/628ffad1
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/628ffad1
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/628ffad1

Branch: refs/heads/master
Commit: 628ffad1b26a056edd3782787f89b8bf7711f0e5
Parents: 8fb74b1
Author: Niketan Pansare <np...@us.ibm.com>
Authored: Thu Aug 24 14:38:52 2017 -0700
Committer: Niketan Pansare <np...@us.ibm.com>
Committed: Thu Aug 24 14:40:34 2017 -0700

----------------------------------------------------------------------
 src/main/cpp/kernels/SystemML.cu                |   30 +
 src/main/cpp/kernels/SystemML.ptx               | 2817 +++++++++---------
 .../java/org/apache/sysml/hops/IndexingOp.java  |   11 +-
 .../instructions/GPUInstructionParser.java      |    8 +
 .../instructions/gpu/GPUInstruction.java        |    6 +-
 .../gpu/MatrixIndexingGPUInstruction.java       |  148 +
 .../instructions/gpu/context/CSRPointer.java    |   20 +-
 .../instructions/gpu/context/GPUContext.java    |   66 +-
 .../instructions/gpu/context/GPUObject.java     |   48 +-
 .../runtime/matrix/data/LibMatrixCUDA.java      |  930 +++---
 .../org/apache/sysml/utils/GPUStatistics.java   |  366 ++-
 .../sysml/test/gpu/RightIndexingTests.java      |   74 +
 12 files changed, 2472 insertions(+), 2052 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/628ffad1/src/main/cpp/kernels/SystemML.cu
----------------------------------------------------------------------
diff --git a/src/main/cpp/kernels/SystemML.cu b/src/main/cpp/kernels/SystemML.cu
index dcd64b2..d64d8aa 100644
--- a/src/main/cpp/kernels/SystemML.cu
+++ b/src/main/cpp/kernels/SystemML.cu
@@ -28,6 +28,36 @@ nvcc -ptx -arch=sm_30 SystemML.cu
 
 
 /**
+ * Performs a slice operation where the input matrix is sparse and the output matrix is dense.
+ * This function avoids unnecessary sparse to dense conversion of the input matrix.
+ * 
+ * @params inVal input val pointer
+ * @params inRowPtr input row pointer
+ * @params colInd input col index pointer
+ * @params ret dense output pointer
+ * @param rl row lower
+ * @param ru row upper
+ * @param cl column lower
+ * @param cu column upper
+ */
+extern "C"
+__global__ void slice_sparse_dense(double* inVal, int* inRowPtr, int* colInd, double* ret, int rl, int ru, int cl, int cu) {
+	int index = blockIdx.x * blockDim.x + threadIdx.x;
+	int rowIndex = index + rl;
+    if (rowIndex <= ru){
+    	int retClen = cu - cl + 1;
+    	// Iterate over elements of the row 'rowIndex'.
+    	for(int i = inRowPtr[rowIndex]; i < inRowPtr[rowIndex+1]; i++) {
+    		// Only slice if the index falls into the given range
+    		if(cl <= colInd[i] && colInd[i] <= cu) {
+    			ret[ index*retClen + (colInd[i] - cl) ] = inVal[i];
+    		}
+    	}
+    }
+}
+
+
+/**
  * Does a copy of upper to lower triangle of the given matrix
  * @param ret the input and output array allocated on the GPU
  * @param dim the number of rows of the square matrix ret