You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by re...@apache.org on 2017/11/03 18:02:29 UTC
[27/50] [abbrv] systemml git commit: [SYSTEMML-1969] Support
single-precision operations on GPU backend
http://git-wip-us.apache.org/repos/asf/systemml/blob/abbffc55/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java
index 21a2a35..d962027 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java
@@ -23,13 +23,6 @@ import static jcuda.jcusparse.cusparseOperation.CUSPARSE_OPERATION_TRANSPOSE;
import static jcuda.runtime.JCuda.cudaMemcpy;
import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyHostToDevice;
import jcuda.Pointer;
-import jcuda.Sizeof;
-import jcuda.jcublas.JCublas2;
-import jcuda.jcublas.cublasHandle;
-import jcuda.jcublas.cublasOperation;
-import jcuda.jcusparse.JCusparse;
-import jcuda.jcusparse.cusparseHandle;
-import jcuda.runtime.JCuda;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -43,6 +36,11 @@ import org.apache.sysml.runtime.instructions.gpu.context.GPUContext;
import org.apache.sysml.utils.GPUStatistics;
import org.apache.sysml.utils.Statistics;
+import jcuda.jcusparse.cusparseHandle;
+import jcuda.jcublas.cublasHandle;
+import jcuda.jcublas.cublasOperation;
+import jcuda.runtime.JCuda;
+
public class LibMatrixCuMatMult extends LibMatrixCUDA {
private static final Log LOG = LogFactory.getLog(LibMatrixCuMatMult.class.getName());
@@ -175,7 +173,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
// Step 3: Invoke the kernel
long t1 = GPUStatistics.DISPLAY_STATISTICS ? System.nanoTime() : 0;
- JCusparse.cusparseDcsrgemm(getCusparseHandle(gCtx), transa, transb, params.m, params.n, params.k, A.descr,
+ cudaSupportFunctions.cusparsecsrgemm(getCusparseHandle(gCtx), transa, transb, params.m, params.n, params.k, A.descr,
(int) A.nnz, A.val, A.rowPtr, A.colInd, B.descr, (int) B.nnz, B.val, B.rowPtr, B.colInd, C.descr,
C.val, C.rowPtr, C.colInd);
if (GPUStatistics.DISPLAY_STATISTICS)
@@ -239,7 +237,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
* allocated in dense row-major format and A is sparse.
*
* Other than input and output, this method requires additional memory =
- * outRLen * outCLen * Sizeof.DOUBLE
+ * outRLen * outCLen * sizeOfDataType
*
* @param gCtx
* a valid {@link GPUContext}
@@ -276,7 +274,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
// t(C) = t(B) %*% t(A)
Pointer output = null;
if (outRLen != 1 && outCLen != 1) {
- output = gCtx.allocate(outRLen * outCLen * Sizeof.DOUBLE);
+ output = gCtx.allocate(outRLen * outCLen * sizeOfDataType);
} else {
// no transpose required for vector output
output = C;
@@ -287,7 +285,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
if (outRLen != 1 && outCLen != 1) {
// Transpose: C = t(output)
long t0 = GPUStatistics.DISPLAY_STATISTICS ? System.nanoTime() : 0;
- JCublas2.cublasDgeam(gCtx.getCublasHandle(), cublasOperation.CUBLAS_OP_T, cublasOperation.CUBLAS_OP_T,
+ cudaSupportFunctions.cublasgeam(gCtx.getCublasHandle(), cublasOperation.CUBLAS_OP_T, cublasOperation.CUBLAS_OP_T,
toInt(outCLen), toInt(outRLen), one(), output, toInt(outRLen), zero(), new Pointer(),
toInt(outRLen), C, toInt(outCLen));
if (!DMLScript.EAGER_CUDA_FREE)
@@ -331,7 +329,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
int m = toInt(param.rightNumRows);
int n = toInt(param.rightNumCols);
int transa = reverseCusparseOp(cusparseOp(param.isLeftTransposed));
- JCusparse.cusparseDcsrmv(handle, transa, m, n, toInt(B.nnz), one(), B.descr, B.val, B.rowPtr, B.colInd, A,
+ cudaSupportFunctions.cusparsecsrmv(handle, transa, m, n, toInt(B.nnz), one(), B.descr, B.val, B.rowPtr, B.colInd, A,
zero(), C);
kernel = GPUInstruction.MISC_TIMER_SPARSE_MATRIX_DENSE_VECTOR_LIB;
} else {
@@ -342,7 +340,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
int transa = reverseCusparseOp(cusparseOp(param.isLeftTransposed));
int transb = cusparseOp(param.isRightTransposed);
LOG.debug(" GPU Sparse-Dense Matrix Multiply (rhs transpose) ");
- JCusparse.cusparseDcsrmm2(handle, transa, transb, m, param.n, k, toInt(B.nnz), one(), B.descr, B.val,
+ cudaSupportFunctions.cusparsecsrmm2(handle, transa, transb, m, param.n, k, toInt(B.nnz), one(), B.descr, B.val,
B.rowPtr, B.colInd, A, param.ldb, zero(), C, param.ldc);
}
if (GPUStatistics.DISPLAY_STATISTICS)
@@ -383,7 +381,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
// Vector product
LOG.debug(" GPU Dense-dense Vector Product");
double[] result = { 0 };
- JCublas2.cublasDdot(handle, param.k, A, 1, B, 1, Pointer.to(result));
+ cudaSupportFunctions.cublasdot(handle, param.k, A, 1, B, 1, Pointer.to(result));
// By default in CuBlas V2, cublas pointer mode is set to
// CUBLAS_POINTER_MODE_HOST.
// This means that scalar values passed are on host (as opposed to
@@ -391,7 +389,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
// The result is copied from the host back to the device so that the
// rest of
// infrastructure can treat it uniformly.
- cudaMemcpy(C, Pointer.to(result), 1 * Sizeof.DOUBLE, cudaMemcpyHostToDevice);
+ cudaMemcpy(C, Pointer.to(result), 1 * sizeOfDataType, cudaMemcpyHostToDevice);
kernel = GPUInstruction.MISC_TIMER_DENSE_DOT_LIB;
} else if (param.m == 1) {
// Vector-matrix multiply
@@ -399,18 +397,18 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
transb = reverseCublasOp(transb);
int rightNumRows = (transb == CUSPARSE_OPERATION_TRANSPOSE) ? param.k : param.n;
int rightNumCols = (transb == CUSPARSE_OPERATION_TRANSPOSE) ? param.n : param.k;
- JCublas2.cublasDgemv(handle, transb, rightNumRows, rightNumCols, one(), B, param.ldb, A, 1, zero(), C, 1);
+ cudaSupportFunctions.cublasgemv(handle, transb, rightNumRows, rightNumCols, one(), B, param.ldb, A, 1, zero(), C, 1);
kernel = GPUInstruction.MISC_TIMER_DENSE_VECTOR_DENSE_MATRIX_LIB;
} else if (param.n == 1) {
// Matrix-vector multiply
LOG.debug(" GPU Dense Matrix-Vector Multiply");
int leftNumRows = (transa == CUSPARSE_OPERATION_NON_TRANSPOSE) ? param.m : param.k;
int leftNumCols = (transa == CUSPARSE_OPERATION_NON_TRANSPOSE) ? param.k : param.m;
- JCublas2.cublasDgemv(handle, transa, leftNumRows, leftNumCols, one(), A, param.lda, B, 1, zero(), C, 1);
+ cudaSupportFunctions.cublasgemv(handle, transa, leftNumRows, leftNumCols, one(), A, param.lda, B, 1, zero(), C, 1);
kernel = GPUInstruction.MISC_TIMER_DENSE_MATRIX_DENSE_VECTOR_LIB;
} else {
LOG.debug(" GPU Dense-Dense Matrix Multiply ");
- JCublas2.cublasDgemm(handle, transa, transb, param.m, param.n, param.k, one(), A, param.lda, B, param.ldb,
+ cudaSupportFunctions.cublasgemm(handle, transa, transb, param.m, param.n, param.k, one(), A, param.lda, B, param.ldb,
zero(), C, param.ldc);
kernel = GPUInstruction.MISC_TIMER_DENSE_MATRIX_DENSE_MATRIX_LIB;
}
http://git-wip-us.apache.org/repos/asf/systemml/blob/abbffc55/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index 8ee6f8d..c023890 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -3852,8 +3852,9 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab
* @param ru row upper
* @param cl column lower
* @param cu column upper
- * @param ret ?
- * @return matrix block
+ * @param deep should perform deep copy
+ * @param ret output matrix block
+ * @return matrix block output matrix block
* @throws DMLRuntimeException if DMLRuntimeException occurs
*/
public MatrixBlock sliceOperations(int rl, int ru, int cl, int cu, boolean deep, CacheBlock ret)
http://git-wip-us.apache.org/repos/asf/systemml/blob/abbffc55/src/main/java/org/apache/sysml/runtime/matrix/data/SinglePrecisionCudaSupportFunctions.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SinglePrecisionCudaSupportFunctions.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SinglePrecisionCudaSupportFunctions.java
new file mode 100644
index 0000000..128bb39
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SinglePrecisionCudaSupportFunctions.java
@@ -0,0 +1,208 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sysml.runtime.matrix.data;
+
+import static jcuda.runtime.JCuda.cudaMemcpy;
+import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyDeviceToHost;
+import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyHostToDevice;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.instructions.gpu.GPUInstruction;
+import org.apache.sysml.runtime.instructions.gpu.context.GPUContext;
+import org.apache.sysml.utils.GPUStatistics;
+
+import jcuda.Pointer;
+import jcuda.Sizeof;
+import jcuda.jcublas.JCublas2;
+import jcuda.jcublas.cublasHandle;
+import jcuda.jcusolver.JCusolverDn;
+import jcuda.jcusolver.cusolverDnHandle;
+import jcuda.jcusparse.JCusparse;
+import jcuda.jcusparse.cusparseHandle;
+import jcuda.jcusparse.cusparseMatDescr;
+
+public class SinglePrecisionCudaSupportFunctions implements CudaSupportFunctions {
+
+ private static final Log LOG = LogFactory.getLog(SinglePrecisionCudaSupportFunctions.class.getName());
+
+ @Override
+ public int cusparsecsrgemm(cusparseHandle handle, int transA, int transB, int m, int n, int k,
+ cusparseMatDescr descrA, int nnzA, Pointer csrValA, Pointer csrRowPtrA, Pointer csrColIndA,
+ cusparseMatDescr descrB, int nnzB, Pointer csrValB, Pointer csrRowPtrB, Pointer csrColIndB,
+ cusparseMatDescr descrC, Pointer csrValC, Pointer csrRowPtrC, Pointer csrColIndC) {
+ return JCusparse.cusparseScsrgemm(handle, transA, transB, m, n, k,
+ descrA, nnzA, csrValA, csrRowPtrA, csrColIndA,
+ descrB, nnzB, csrValB, csrRowPtrB, csrColIndB,
+ descrC, csrValC, csrRowPtrC, csrColIndC);
+ }
+
+ @Override
+ public int cublasgeam(cublasHandle handle, int transa, int transb, int m, int n, Pointer alpha, Pointer A, int lda,
+ Pointer beta, Pointer B, int ldb, Pointer C, int ldc) {
+ return JCublas2.cublasSgeam(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc);
+ }
+
+ @Override
+ public int cusparsecsrmv(cusparseHandle handle, int transA, int m, int n, int nnz, Pointer alpha,
+ cusparseMatDescr descrA, Pointer csrValA, Pointer csrRowPtrA, Pointer csrColIndA, Pointer x, Pointer beta,
+ Pointer y) {
+ return JCusparse.cusparseScsrmv(handle, transA, m, n, nnz, alpha,
+ descrA, csrValA, csrRowPtrA, csrColIndA, x, beta, y);
+ }
+
+ @Override
+ public int cusparsecsrmm2(cusparseHandle handle, int transa, int transb, int m, int n, int k, int nnz, jcuda.Pointer alpha, cusparseMatDescr descrA,
+ jcuda.Pointer csrValA, jcuda.Pointer csrRowPtrA, jcuda.Pointer csrColIndA,
+ jcuda.Pointer B, int ldb, jcuda.Pointer beta, jcuda.Pointer C, int ldc) {
+ return JCusparse.cusparseScsrmm2(handle, transa, transb, m, n, k, nnz, alpha, descrA, csrValA,
+ csrRowPtrA, csrColIndA, B, ldb, beta, C, ldc);
+ }
+
+ @Override
+ public int cublasdot(cublasHandle handle, int n, Pointer x, int incx, Pointer y, int incy, Pointer result) {
+ return JCublas2.cublasSdot(handle, n, x, incx, y, incy, result);
+ }
+
+ @Override
+ public int cublasgemv(cublasHandle handle, int trans, int m, int n, Pointer alpha, Pointer A, int lda, Pointer x,
+ int incx, Pointer beta, Pointer y, int incy) {
+ return JCublas2.cublasSgemv(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy);
+ }
+
+ @Override
+ public int cublasgemm(cublasHandle handle, int transa, int transb, int m, int n, int k, Pointer alpha, Pointer A,
+ int lda, Pointer B, int ldb, Pointer beta, Pointer C, int ldc) {
+ return JCublas2.cublasSgemm(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+ }
+
+ @Override
+ public int cusparsecsr2csc(cusparseHandle handle, int m, int n, int nnz, Pointer csrVal, Pointer csrRowPtr,
+ Pointer csrColInd, Pointer cscVal, Pointer cscRowInd, Pointer cscColPtr, int copyValues, int idxBase) {
+ return JCusparse.cusparseScsr2csc(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, cscVal, cscRowInd, cscColPtr, copyValues, idxBase);
+ }
+
+ @Override
+ public int cublassyrk(cublasHandle handle, int uplo, int trans, int n, int k, Pointer alpha, Pointer A, int lda,
+ Pointer beta, Pointer C, int ldc) {
+ return JCublas2.cublasSsyrk(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
+ }
+
+ @Override
+ public int cublasaxpy(cublasHandle handle, int n, Pointer alpha, Pointer x, int incx, Pointer y, int incy) {
+ return JCublas2.cublasSaxpy(handle, n, alpha, x, incx, y, incy);
+ }
+
+ @Override
+ public int cublastrsm(cublasHandle handle, int side, int uplo, int trans, int diag, int m, int n, Pointer alpha,
+ Pointer A, int lda, Pointer B, int ldb) {
+ return JCublas2.cublasStrsm(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb);
+ }
+
+ @Override
+ public int cusolverDngeqrf_bufferSize(cusolverDnHandle handle, int m, int n, Pointer A, int lda, int[] Lwork) {
+ return JCusolverDn.cusolverDnSgeqrf_bufferSize(handle, m, n, A, lda, Lwork);
+ }
+
+ @Override
+ public int cusolverDngeqrf(cusolverDnHandle handle, int m, int n, Pointer A, int lda, Pointer TAU,
+ Pointer Workspace, int Lwork, Pointer devInfo) {
+ return JCusolverDn.cusolverDnSgeqrf(handle, m, n, A, lda, TAU, Workspace, Lwork, devInfo);
+ }
+
+ @Override
+ public int cusolverDnormqr(cusolverDnHandle handle, int side, int trans, int m, int n, int k, Pointer A, int lda,
+ Pointer tau, Pointer C, int ldc, Pointer work, int lwork, Pointer devInfo) {
+ return JCusolverDn.cusolverDnSormqr(handle, side, trans, m, n, k, A, lda, tau, C, ldc, work, lwork, devInfo);
+ }
+
+ @Override
+ public int cusparsecsrgeam(cusparseHandle handle, int m, int n, Pointer alpha, cusparseMatDescr descrA, int nnzA,
+ Pointer csrValA, Pointer csrRowPtrA, Pointer csrColIndA, Pointer beta, cusparseMatDescr descrB, int nnzB,
+ Pointer csrValB, Pointer csrRowPtrB, Pointer csrColIndB, cusparseMatDescr descrC, Pointer csrValC,
+ Pointer csrRowPtrC, Pointer csrColIndC) {
+ return JCusparse.cusparseScsrgeam(handle, m, n, alpha, descrA, nnzA,
+ csrValA, csrRowPtrA, csrColIndA, beta, descrB, nnzB,
+ csrValB, csrRowPtrB, csrColIndB, descrC, csrValC, csrRowPtrC, csrColIndC);
+ }
+
+ @Override
+ public int cusparsecsr2dense(cusparseHandle handle, int m, int n, cusparseMatDescr descrA, Pointer csrValA,
+ Pointer csrRowPtrA, Pointer csrColIndA, Pointer A, int lda) {
+ return JCusparse.cusparseScsr2dense(handle, m, n, descrA, csrValA, csrRowPtrA, csrColIndA, A, lda);
+ }
+
+ @Override
+ public int cusparsedense2csr(cusparseHandle handle, int m, int n, cusparseMatDescr descrA, Pointer A, int lda,
+ Pointer nnzPerRow, Pointer csrValA, Pointer csrRowPtrA, Pointer csrColIndA) {
+ return JCusparse.cusparseSdense2csr(handle, m, n, descrA, A, lda, nnzPerRow, csrValA, csrRowPtrA, csrColIndA);
+ }
+
+ @Override
+ public int cusparsennz(cusparseHandle handle, int dirA, int m, int n, cusparseMatDescr descrA, Pointer A, int lda,
+ Pointer nnzPerRowCol, Pointer nnzTotalDevHostPtr) {
+ return JCusparse.cusparseSnnz(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, nnzTotalDevHostPtr);
+ }
+
+ @Override
+ public void deviceToHost(GPUContext gCtx, Pointer src, double[] dest, String instName) throws DMLRuntimeException {
+ long t1 = GPUStatistics.DISPLAY_STATISTICS && instName != null? System.nanoTime() : 0;
+ LOG.debug("Potential OOM: Allocated additional space in deviceToHost");
+ if(PERFORM_CONVERSION_ON_DEVICE) {
+ Pointer deviceDoubleData = gCtx.allocate(((long)dest.length)*Sizeof.DOUBLE);
+ LibMatrixCUDA.float2double(gCtx, src, deviceDoubleData, dest.length);
+ cudaMemcpy(Pointer.to(dest), deviceDoubleData, ((long)dest.length)*Sizeof.DOUBLE, cudaMemcpyDeviceToHost);
+ gCtx.cudaFreeHelper(deviceDoubleData);
+ }
+ else {
+ // TODO: Perform conversion on GPU using double2float and float2double kernels
+ float [] floatData = new float[dest.length];
+ cudaMemcpy(Pointer.to(floatData), src, ((long)dest.length)*Sizeof.FLOAT, cudaMemcpyDeviceToHost);
+ for(int i = 0; i < dest.length; i++) {
+ dest[i] = floatData[i];
+ }
+ }
+ if(GPUStatistics.DISPLAY_STATISTICS && instName != null)
+ GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_DEVICE_TO_HOST, System.nanoTime() - t1);
+ }
+
+ @Override
+ public void hostToDevice(GPUContext gCtx, double[] src, Pointer dest, String instName) throws DMLRuntimeException {
+ LOG.debug("Potential OOM: Allocated additional space in hostToDevice");
+ // TODO: Perform conversion on GPU using double2float and float2double kernels
+ long t1 = GPUStatistics.DISPLAY_STATISTICS && instName != null? System.nanoTime() : 0;
+ if(PERFORM_CONVERSION_ON_DEVICE) {
+ Pointer deviceDoubleData = gCtx.allocate(((long)src.length)*Sizeof.DOUBLE);
+ cudaMemcpy(deviceDoubleData, Pointer.to(src), ((long)src.length)*Sizeof.DOUBLE, cudaMemcpyHostToDevice);
+ LibMatrixCUDA.double2float(gCtx, deviceDoubleData, dest, src.length);
+ gCtx.cudaFreeHelper(deviceDoubleData);
+ }
+ else {
+ float [] floatData = new float[src.length];
+ for(int i = 0; i < src.length; i++) {
+ floatData[i] = (float) src[i];
+ }
+ cudaMemcpy(dest, Pointer.to(floatData), ((long)src.length)*Sizeof.FLOAT, cudaMemcpyHostToDevice);
+ }
+
+ if(GPUStatistics.DISPLAY_STATISTICS && instName != null)
+ GPUStatistics.maintainCPMiscTimes(instName, GPUInstruction.MISC_TIMER_HOST_TO_DEVICE, System.nanoTime() - t1);
+ }
+}
http://git-wip-us.apache.org/repos/asf/systemml/blob/abbffc55/src/test/java/org/apache/sysml/test/gpu/GPUTests.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/gpu/GPUTests.java b/src/test/java/org/apache/sysml/test/gpu/GPUTests.java
index b4e4b62..d7d1ad5 100644
--- a/src/test/java/org/apache/sysml/test/gpu/GPUTests.java
+++ b/src/test/java/org/apache/sysml/test/gpu/GPUTests.java
@@ -51,9 +51,14 @@ public abstract class GPUTests extends AutomatedTestBase {
protected final static String TEST_DIR = "org/apache/sysml/api/mlcontext";
protected static SparkSession spark;
- protected final double THRESHOLD = 1e-9; // for relative error
+ protected final double DOUBLE_PRECISION_THRESHOLD = 1e-9; // for relative error
private static final boolean PRINT_MAT_ERROR = false;
+ // We will use this flag until lower precision is supported on CP.
+ private final static String DATA_TYPE = "double";
+ protected final double SINGLE_PRECISION_THRESHOLD = 1e-3; // for relative error
+
+
@BeforeClass
public static void beforeClass() {
spark = createSystemMLSparkSession("GPUTests", "local");
@@ -70,7 +75,9 @@ public abstract class GPUTests extends AutomatedTestBase {
* @return a valid threshold
*/
protected double getTHRESHOLD() {
- return THRESHOLD;
+ if(DATA_TYPE.equals("double")) return DOUBLE_PRECISION_THRESHOLD;
+ else if(DATA_TYPE.equals("float")) return SINGLE_PRECISION_THRESHOLD;
+ else throw new RuntimeException("Unsupported datatype:" + DATA_TYPE);
}
@After
@@ -228,7 +235,7 @@ public abstract class GPUTests extends AutomatedTestBase {
}
/**
- * Asserts that the values in two matrices are in {@link UnaryOpTests#THRESHOLD} of each other
+ * Asserts that the values in two matrices are in {@link UnaryOpTests#DOUBLE_PRECISION_THRESHOLD} of each other
*
* @param expected expected matrix
* @param actual actual matrix
@@ -251,11 +258,15 @@ public abstract class GPUTests extends AutomatedTestBase {
double actualDouble = actualMB.quickGetValue(i, j);
if (expectedDouble != 0.0 && !Double.isNaN(expectedDouble) && Double.isFinite(expectedDouble)) {
double relativeError = Math.abs((expectedDouble - actualDouble) / expectedDouble);
+ double absoluteError = Math.abs(expectedDouble - actualDouble);
Formatter format = new Formatter();
format.format(
"Relative error(%f) is more than threshold (%f). Expected = %f, Actual = %f, differed at [%d, %d]",
relativeError, getTHRESHOLD(), expectedDouble, actualDouble, i, j);
- Assert.assertTrue(format.toString(), relativeError < getTHRESHOLD());
+ if(DATA_TYPE.equals("double"))
+ Assert.assertTrue(format.toString(), relativeError < getTHRESHOLD());
+ else
+ Assert.assertTrue(format.toString(), relativeError < getTHRESHOLD() || absoluteError < getTHRESHOLD());
format.close();
} else {
Assert.assertEquals(expectedDouble, actualDouble, getTHRESHOLD());
@@ -313,6 +324,7 @@ public abstract class GPUTests extends AutomatedTestBase {
protected List<Object> runOnGPU(SparkSession spark, String scriptStr, Map<String, Object> inputs,
List<String> outStrs) {
MLContext gpuMLC = new MLContext(spark);
+ gpuMLC.setConfigProperty("sysml.gpu.dataType", DATA_TYPE);
gpuMLC.setGPU(true);
gpuMLC.setForceGPU(true);
gpuMLC.setStatistics(true);
http://git-wip-us.apache.org/repos/asf/systemml/blob/abbffc55/src/test/java/org/apache/sysml/test/gpu/MatrixMultiplicationOpTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/sysml/test/gpu/MatrixMultiplicationOpTest.java b/src/test/java/org/apache/sysml/test/gpu/MatrixMultiplicationOpTest.java
index d983716..cbc3563 100644
--- a/src/test/java/org/apache/sysml/test/gpu/MatrixMultiplicationOpTest.java
+++ b/src/test/java/org/apache/sysml/test/gpu/MatrixMultiplicationOpTest.java
@@ -50,9 +50,9 @@ public class MatrixMultiplicationOpTest extends GPUTests {
public void matrixMatrixTest1() {
String scriptStr = "O = X %*% Y";
- int[] X1 = { 1, 128, 1024 };
- int[] X2 = { 1, 128, 1024 };
- int[] Y2 = { 1, 128, 1024 };
+ int[] X1 = { 1, 121 };
+ int[] X2 = { 1, 123 };
+ int[] Y2 = { 1, 122 };
double[] SX = { 0.0, 0.03, 0.3, 0.9 };
double[] SY = { 0.0, 0.03, 0.3, 0.9 };
@@ -74,8 +74,8 @@ public class MatrixMultiplicationOpTest extends GPUTests {
public void commonCaseMLMatrixMatrixTest1() {
String scriptStr = "O = X %*% Y";
- int[] X1 = { 1000000 };
- int[] X2 = { 1000 };
+ int[] X1 = { 5000 };
+ int[] X2 = { 50 };
int[] Y2 = { 1, 20 };
double[] SX = { 0.0, 0.03, 0.3 };
double[] SY = { 0.0, 0.03, 0.3, 0.9 };
@@ -98,9 +98,9 @@ public class MatrixMultiplicationOpTest extends GPUTests {
public void commonCaseDLMatrixMatrixTest1() {
String scriptStr = "O = X %*% Y";
- int[] X1 = { 100 };
- int[] X2 = { 600, 900 };
- int[] Y2 = { 205800 };
+ int[] X1 = { 32 };
+ int[] X2 = { 60, 90 };
+ int[] Y2 = { 2058 };
double[] SX = { 0.0, 0.03, 0.3 };
double[] SY = { 0.0, 0.03, 0.3, 0.9 };
@@ -122,9 +122,9 @@ public class MatrixMultiplicationOpTest extends GPUTests {
public void commonCaseDLMatrixMatrixTest2() {
String scriptStr = "O = X %*% Y";
- int[] X1 = { 64 };
- int[] X2 = { 196608 };
- int[] Y2 = { 512 };
+ int[] X1 = { 32 };
+ int[] X2 = { 1966 };
+ int[] Y2 = { 256 };
double[] SX = { 0.0, 0.03, 0.3, 0.9 };
double[] SY = { 0.0, 0.03, 0.3, 0.9 };