You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by ni...@apache.org on 2017/08/24 21:41:35 UTC
[5/5] systemml git commit: [SYSTEMML-1793] Support matrix range
indexing on GPU
[SYSTEMML-1793] Support matrix range indexing on GPU
- This commit supports matrix range indexing (i.e. right indexing) without requiring sparse to dense conversion of inputs. Note: this PR only supports dense output.
- Also, added RightIndexingTests in gpu package.
Closes #637.
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/628ffad1
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/628ffad1
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/628ffad1
Branch: refs/heads/master
Commit: 628ffad1b26a056edd3782787f89b8bf7711f0e5
Parents: 8fb74b1
Author: Niketan Pansare <np...@us.ibm.com>
Authored: Thu Aug 24 14:38:52 2017 -0700
Committer: Niketan Pansare <np...@us.ibm.com>
Committed: Thu Aug 24 14:40:34 2017 -0700
----------------------------------------------------------------------
src/main/cpp/kernels/SystemML.cu | 30 +
src/main/cpp/kernels/SystemML.ptx | 2817 +++++++++---------
.../java/org/apache/sysml/hops/IndexingOp.java | 11 +-
.../instructions/GPUInstructionParser.java | 8 +
.../instructions/gpu/GPUInstruction.java | 6 +-
.../gpu/MatrixIndexingGPUInstruction.java | 148 +
.../instructions/gpu/context/CSRPointer.java | 20 +-
.../instructions/gpu/context/GPUContext.java | 66 +-
.../instructions/gpu/context/GPUObject.java | 48 +-
.../runtime/matrix/data/LibMatrixCUDA.java | 930 +++---
.../org/apache/sysml/utils/GPUStatistics.java | 366 ++-
.../sysml/test/gpu/RightIndexingTests.java | 74 +
12 files changed, 2472 insertions(+), 2052 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/systemml/blob/628ffad1/src/main/cpp/kernels/SystemML.cu
----------------------------------------------------------------------
diff --git a/src/main/cpp/kernels/SystemML.cu b/src/main/cpp/kernels/SystemML.cu
index dcd64b2..d64d8aa 100644
--- a/src/main/cpp/kernels/SystemML.cu
+++ b/src/main/cpp/kernels/SystemML.cu
@@ -28,6 +28,36 @@ nvcc -ptx -arch=sm_30 SystemML.cu
/**
+ * Performs a slice operation where the input matrix is sparse and the output matrix is dense.
+ * This function avoids unnecessary sparse to dense conversion of the input matrix.
+ *
+ * @params inVal input val pointer
+ * @params inRowPtr input row pointer
+ * @params colInd input col index pointer
+ * @params ret dense output pointer
+ * @param rl row lower
+ * @param ru row upper
+ * @param cl column lower
+ * @param cu column upper
+ */
+extern "C"
+__global__ void slice_sparse_dense(double* inVal, int* inRowPtr, int* colInd, double* ret, int rl, int ru, int cl, int cu) {
+ int index = blockIdx.x * blockDim.x + threadIdx.x;
+ int rowIndex = index + rl;
+ if (rowIndex <= ru){
+ int retClen = cu - cl + 1;
+ // Iterate over elements of the row 'rowIndex'.
+ for(int i = inRowPtr[rowIndex]; i < inRowPtr[rowIndex+1]; i++) {
+ // Only slice if the index falls into the given range
+ if(cl <= colInd[i] && colInd[i] <= cu) {
+ ret[ index*retClen + (colInd[i] - cl) ] = inVal[i];
+ }
+ }
+ }
+}
+
+
+/**
* Does a copy of upper to lower triangle of the given matrix
* @param ret the input and output array allocated on the GPU
* @param dim the number of rows of the square matrix ret