You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by mb...@apache.org on 2021/06/02 20:20:13 UTC
[systemds] branch master updated: [MINOR] Fix robustness and
cleanup lmPredict, more gridSearch tests
This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/master by this push:
new 28742bc [MINOR] Fix robustness and cleanup lmPredict, more gridSearch tests
28742bc is described below
commit 28742bc397887c5ab9c9a8f3193c311ebd4b9e39
Author: Matthias Boehm <mb...@gmail.com>
AuthorDate: Wed Jun 2 22:19:48 2021 +0200
[MINOR] Fix robustness and cleanup lmPredict, more gridSearch tests
---
scripts/builtin/lmPredict.dml | 6 +--
.../runtime/compress/cocode/CoCodeCostTSMM.java | 2 +-
.../runtime/compress/lib/BitmapLossyEncoder.java | 3 ++
.../runtime/compress/lib/CLALibRightMultBy.java | 2 +-
.../functions/builtin/BuiltinGridSearchTest.java | 35 ++++++++++++++---
.../scripts/functions/builtin/GridSearchLM2.dml | 44 ++++++++++++----------
6 files changed, 62 insertions(+), 30 deletions(-)
diff --git a/scripts/builtin/lmPredict.dml b/scripts/builtin/lmPredict.dml
index 3a7ead7..f53e326 100644
--- a/scripts/builtin/lmPredict.dml
+++ b/scripts/builtin/lmPredict.dml
@@ -23,11 +23,11 @@ m_lmPredict = function(Matrix[Double] X, Matrix[Double] B,
Matrix[Double] ytest, Integer icpt = 0, Boolean verbose = FALSE)
return (Matrix[Double] yhat)
{
- intercept = ifelse(icpt==0, matrix(0,1,ncol(B)), B[nrow(B),]);
- yhat = X %*% B[1:ncol(X)] + matrix(1,nrow(X),1) %*% intercept;
+ intercept = ifelse(icpt>0 | ncol(X)+1==nrow(B), as.scalar(B[nrow(B),]), 0);
+ yhat = X %*% B[1:ncol(X),] + intercept;
if( verbose ) {
- y_residual = ytest - yhat;
+ y_residual = ytest - yhat;
avg_res = sum(y_residual) / nrow(ytest);
ss_res = sum(y_residual^2);
ss_avg_res = ss_res - nrow(ytest) * avg_res^2;
diff --git a/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeCostTSMM.java b/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeCostTSMM.java
index f31c53f..7635061 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeCostTSMM.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeCostTSMM.java
@@ -149,7 +149,7 @@ public class CoCodeCostTSMM extends AColumnCoCoder {
return cost;
}
- private double getCostOfSelfTSMM(CompressedSizeInfoColGroup g) {
+ private static double getCostOfSelfTSMM(CompressedSizeInfoColGroup g) {
double cost = 0;
final int nCol = g.getColumns().length;
cost += g.getNumVals() * (nCol * (nCol + 1)) / 2;
diff --git a/src/main/java/org/apache/sysds/runtime/compress/lib/BitmapLossyEncoder.java b/src/main/java/org/apache/sysds/runtime/compress/lib/BitmapLossyEncoder.java
index 88711fe..55fde03 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/lib/BitmapLossyEncoder.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/lib/BitmapLossyEncoder.java
@@ -89,6 +89,7 @@ public class BitmapLossyEncoder {
* @param numRows The number of Rows.
* @return a lossy bitmap.
*/
+ @SuppressWarnings("unused")
private static BitmapLossy make8BitLossy(Bitmap ubm, Stats stats, int numRows) {
final double[] fp = ubm.getValues();
int numCols = ubm.getNumColumns();
@@ -284,6 +285,7 @@ public class BitmapLossyEncoder {
}
}
+ @SuppressWarnings("unused")
private static double[] getMemLocalDoubleArray(int length, boolean clean) {
double[] ar = memPoolDoubleArray.get();
if(ar != null && ar.length >= length) {
@@ -312,6 +314,7 @@ public class BitmapLossyEncoder {
protected double maxDelta;
protected boolean sameDelta;
+ @SuppressWarnings("unused")
public Stats(double[] fp) {
max = Double.NEGATIVE_INFINITY;
min = Double.POSITIVE_INFINITY;
diff --git a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibRightMultBy.java b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibRightMultBy.java
index 61e275c..d486fa8 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibRightMultBy.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibRightMultBy.java
@@ -149,7 +149,7 @@ public class CLALibRightMultBy {
}
private static ColGroupEmpty findEmptyColumnsAndMakeEmptyColGroup(List<AColGroup> colGroups, int nCols, int nRows) {
- Set<Integer> emptyColumns = new HashSet<Integer>(nCols);
+ Set<Integer> emptyColumns = new HashSet<>(nCols);
for(int i = 0; i < nCols; i++)
emptyColumns.add(i);
diff --git a/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinGridSearchTest.java b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinGridSearchTest.java
index 7d4449b..34504c9 100644
--- a/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinGridSearchTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinGridSearchTest.java
@@ -23,16 +23,16 @@ import org.junit.Assert;
import org.junit.Test;
import org.apache.sysds.common.Types.ExecMode;
-import org.apache.sysds.common.Types.ExecType;
import org.apache.sysds.test.AutomatedTestBase;
import org.apache.sysds.test.TestConfiguration;
import org.apache.sysds.test.TestUtils;
-
+import org.apache.sysds.utils.Statistics;
public class BuiltinGridSearchTest extends AutomatedTestBase
{
private final static String TEST_NAME1 = "GridSearchLM";
private final static String TEST_NAME2 = "GridSearchMLogreg";
+ private final static String TEST_NAME3 = "GridSearchLM2";
private final static String TEST_DIR = "functions/builtin/";
private final static String TEST_CLASS_DIR = TEST_DIR + BuiltinGridSearchTest.class.getSimpleName() + "/";
@@ -43,24 +43,45 @@ public class BuiltinGridSearchTest extends AutomatedTestBase
public void setUp() {
addTestConfiguration(TEST_NAME1,new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1,new String[]{"R"}));
addTestConfiguration(TEST_NAME2,new TestConfiguration(TEST_CLASS_DIR, TEST_NAME2,new String[]{"R"}));
+ addTestConfiguration(TEST_NAME3,new TestConfiguration(TEST_CLASS_DIR, TEST_NAME3,new String[]{"R"}));
}
@Test
public void testGridSearchLmCP() {
- runGridSearch(TEST_NAME1, ExecType.CP);
+ runGridSearch(TEST_NAME1, ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testGridSearchLmHybrid() {
+ runGridSearch(TEST_NAME1, ExecMode.HYBRID);
}
@Test
public void testGridSearchLmSpark() {
- runGridSearch(TEST_NAME1, ExecType.SPARK);
+ runGridSearch(TEST_NAME1, ExecMode.SPARK);
}
@Test
public void testGridSearchMLogregCP() {
- runGridSearch(TEST_NAME2, ExecType.CP);
+ runGridSearch(TEST_NAME2, ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testGridSearchMLogregHybrid() {
+ runGridSearch(TEST_NAME2, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testGridSearchLm2CP() {
+ runGridSearch(TEST_NAME3, ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testGridSearchLm2Hybrid() {
+ runGridSearch(TEST_NAME3, ExecMode.HYBRID);
}
- private void runGridSearch(String testname, ExecType et)
+ private void runGridSearch(String testname, ExecMode et)
{
ExecMode modeOld = setExecMode(et);
try {
@@ -78,6 +99,8 @@ public class BuiltinGridSearchTest extends AutomatedTestBase
//expected loss smaller than default invocation
Assert.assertTrue(TestUtils.readDMLBoolean(output("R")));
+ if( et != ExecMode.SPARK )
+ Assert.assertEquals(0, Statistics.getNoOfExecutedSPInst());
}
finally {
resetExecMode(modeOld);
diff --git a/scripts/builtin/lmPredict.dml b/src/test/scripts/functions/builtin/GridSearchLM2.dml
similarity index 54%
copy from scripts/builtin/lmPredict.dml
copy to src/test/scripts/functions/builtin/GridSearchLM2.dml
index 3a7ead7..278d94c 100644
--- a/scripts/builtin/lmPredict.dml
+++ b/src/test/scripts/functions/builtin/GridSearchLM2.dml
@@ -19,24 +19,30 @@
#
#-------------------------------------------------------------
-m_lmPredict = function(Matrix[Double] X, Matrix[Double] B,
- Matrix[Double] ytest, Integer icpt = 0, Boolean verbose = FALSE)
- return (Matrix[Double] yhat)
+l2norm = function(Matrix[Double] X, Matrix[Double] y, Matrix[Double] B)
+ return (Matrix[Double] loss)
{
- intercept = ifelse(icpt==0, matrix(0,1,ncol(B)), B[nrow(B),]);
- yhat = X %*% B[1:ncol(X)] + matrix(1,nrow(X),1) %*% intercept;
-
- if( verbose ) {
- y_residual = ytest - yhat;
- avg_res = sum(y_residual) / nrow(ytest);
- ss_res = sum(y_residual^2);
- ss_avg_res = ss_res - nrow(ytest) * avg_res^2;
- R2 = 1 - ss_res / (sum(ytest^2) - nrow(ytest) * (sum(ytest)/nrow(ytest))^2);
- print("\nAccuracy:" +
- "\n--sum(ytest) = " + sum(ytest) +
- "\n--sum(yhat) = " + sum(yhat) +
- "\n--AVG_RES_Y: " + avg_res +
- "\n--SS_AVG_RES_Y: " + ss_avg_res +
- "\n--R2: " + R2 );
- }
+ yhat = lmPredict(X=X, B=B, ytest=y)
+ loss = as.matrix(sum((y - yhat)^2));
}
+
+X = read($1);
+y = read($2);
+
+N = 200;
+Xtrain = X[1:N,];
+ytrain = y[1:N,];
+Xtest = X[(N+1):nrow(X),];
+ytest = y[(N+1):nrow(X),];
+
+params = list("icpt","reg", "tol", "maxi");
+paramRanges = list(seq(0,1,2),10^seq(0,-4), 10^seq(-6,-12), 10^seq(1,3));
+[B1, opt] = gridSearch(X=Xtrain, y=ytrain, train="lm", predict="l2norm",
+ numB=ncol(X)+1, params=params, paramValues=paramRanges);
+B2 = lm(X=Xtrain, y=ytrain, verbose=FALSE);
+
+l1 = l2norm(Xtest, ytest, B1);
+l2 = l2norm(Xtest, ytest, B2);
+R = as.scalar(l1 < l2);
+
+write(R, $3)