You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/06/11 03:01:06 UTC
systemml git commit: [MINOR] Removed unused datagen/test scripts and
internal udf functions
Repository: systemml
Updated Branches:
refs/heads/master bc16b9e3d -> 608ac39c4
[MINOR] Removed unused datagen/test scripts and internal udf functions
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/608ac39c
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/608ac39c
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/608ac39c
Branch: refs/heads/master
Commit: 608ac39c40e4d2517c9900d901a403c0c579db7f
Parents: bc16b9e
Author: Matthias Boehm <mb...@gmail.com>
Authored: Sat Jun 10 20:01:00 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sat Jun 10 20:01:00 2017 -0700
----------------------------------------------------------------------
pom.xml | 4 +-
scripts/datagen/genCorrelatedData.dml | 46 ---
scripts/datagen/genLinearRegressionData.dml | 71 ----
scripts/datagen/obsolete/genCorrelatedData.dml | 46 +++
.../obsolete/genLinearRegressionData.dml | 71 ++++
.../sysml/hops/ipa/InterProceduralAnalysis.java | 19 +-
.../org/apache/sysml/udf/lib/DeNaNWrapper.java | 79 -----
.../sysml/udf/lib/DeNegInfinityWrapper.java | 79 -----
.../sysml/udf/lib/PermutationMatrixWrapper.java | 146 --------
.../applications/ctableStats/stratstats.dml | 350 -------------------
10 files changed, 119 insertions(+), 792 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 0fee7f9..5b914d5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -95,11 +95,9 @@
<directory>scripts</directory>
<excludes>
<exclude>algorithms/obsolete/*</exclude>
- <exclude>algorithms/obsolete</exclude>
+ <exclude>datagen/obsolete/*</exclude>
<exclude>perftest/*</exclude>
- <exclude>perftest</exclude>
<exclude>staging/**/*</exclude>
- <exclude>staging</exclude>
<exclude>nn/test/compare_backends/*</exclude>
<exclude>nn/test/compare_backends/*</exclude>
<exclude>nn/examples/caffe2dml/**/*</exclude>
http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/scripts/datagen/genCorrelatedData.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/genCorrelatedData.dml b/scripts/datagen/genCorrelatedData.dml
deleted file mode 100644
index d3289ce..0000000
--- a/scripts/datagen/genCorrelatedData.dml
+++ /dev/null
@@ -1,46 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# generates random correlated data
-# can generate any number of variables/columns
-# used to test univariate stats computation
-# by systemml
-
-# $1 is number of variables/columns
-# $2 is number of samples to create
-# $3 is the location to write out the covariance mat
-# $4 is the location to write out the generated data
-dims = $1
-numSamples = $2
-
-U = Rand(rows=dims, cols=dims, min=-1.0, max=1.0, pdf="uniform", seed=0)
-denoms = sqrt(colSums(U*U))
-parfor(i in 1:dims){
- U[i,] = U[i,] / denoms
-}
-
-C = t(U)%*%U
-write(C, $3, format="binary")
-
-R = Rand(rows=numSamples, cols=dims, pdf="normal", seed=0)
-Rc = R%*%U
-write(Rc, $4, format="binary")
-
http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/scripts/datagen/genLinearRegressionData.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/genLinearRegressionData.dml b/scripts/datagen/genLinearRegressionData.dml
deleted file mode 100644
index 10b094c..0000000
--- a/scripts/datagen/genLinearRegressionData.dml
+++ /dev/null
@@ -1,71 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# This script generates random data for linear regression. A matrix is generated
-# consisting of a data matrix with a label column appended to it.
-#
-# INPUT PARAMETERS:
-# --------------------------------------------------------------------------------------------
-# NAME TYPE DEFAULT MEANING
-# --------------------------------------------------------------------------------------------
-# numSamples Int --- Number of samples
-# numFeatures Int --- Number of features (independent variables)
-# maxFeatureValue Int --- Maximum feature value (absolute value)
-# maxWeight Int --- Maximum weight (absolute value)
-# addNoise Boolean --- Determines whether noise should be added to Y
-# b Double --- Intercept
-# sparsity Double --- Controls the sparsity in the generated data (a value between 0 and 1)
-# output String --- Location to write the generated data/label matrix
-# format String --- Matrix output format
-# perc Double 0.8 Percentage of training sample
-# percFile String --- File to store the percentages
-# --------------------------------------------------------------------------------------------
-# OUTPUT: Matrix of random data with appended label column
-# ---------------------------------------------------------------------------------------------
-#
-# Example
-# ./runStandaloneSystemML.sh algorithms/datagen/genLinearRegressionData.dml -nvargs numSamples=1000 numFeatures=50 maxFeatureValue=5 maxWeight=5 addNoise=FALSE b=0 sparsity=0.7 output=linRegData.csv format=csv
-#
-
-perc = ifdef($perc, 0.8)
-percFile = ifdef($percFile, "perc.csv")
-p = matrix(0, rows=2, cols=1)
-p[1,1] = perc
-p[2,1] = (1-perc)
-write(p, percFile, format="csv")
-
-X = Rand(cols=$numFeatures, max=1, min=-1, pdf="uniform", rows=$numSamples, seed=0, sparsity=$sparsity)
-X = X * $maxFeatureValue
-
-w = Rand(cols=1, max=1, min=-1, pdf="uniform", rows=$numFeatures, seed=0)
-w = w * $maxWeight
-
-Y = X %*% w
-Y = Y + $b
-
-if ($addNoise == TRUE) {
- noise = Rand(cols=1, pdf="normal", rows=$numSamples, seed=0)
- Y = Y + noise
-}
-
-Z = cbind(X,Y)
-write(Z, $output, format=$format)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/scripts/datagen/obsolete/genCorrelatedData.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/obsolete/genCorrelatedData.dml b/scripts/datagen/obsolete/genCorrelatedData.dml
new file mode 100644
index 0000000..d3289ce
--- /dev/null
+++ b/scripts/datagen/obsolete/genCorrelatedData.dml
@@ -0,0 +1,46 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# generates random correlated data
+# can generate any number of variables/columns
+# used to test univariate stats computation
+# by systemml
+
+# $1 is number of variables/columns
+# $2 is number of samples to create
+# $3 is the location to write out the covariance mat
+# $4 is the location to write out the generated data
+dims = $1
+numSamples = $2
+
+U = Rand(rows=dims, cols=dims, min=-1.0, max=1.0, pdf="uniform", seed=0)
+denoms = sqrt(colSums(U*U))
+parfor(i in 1:dims){
+ U[i,] = U[i,] / denoms
+}
+
+C = t(U)%*%U
+write(C, $3, format="binary")
+
+R = Rand(rows=numSamples, cols=dims, pdf="normal", seed=0)
+Rc = R%*%U
+write(Rc, $4, format="binary")
+
http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/scripts/datagen/obsolete/genLinearRegressionData.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/obsolete/genLinearRegressionData.dml b/scripts/datagen/obsolete/genLinearRegressionData.dml
new file mode 100644
index 0000000..10b094c
--- /dev/null
+++ b/scripts/datagen/obsolete/genLinearRegressionData.dml
@@ -0,0 +1,71 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# This script generates random data for linear regression. A matrix is generated
+# consisting of a data matrix with a label column appended to it.
+#
+# INPUT PARAMETERS:
+# --------------------------------------------------------------------------------------------
+# NAME TYPE DEFAULT MEANING
+# --------------------------------------------------------------------------------------------
+# numSamples Int --- Number of samples
+# numFeatures Int --- Number of features (independent variables)
+# maxFeatureValue Int --- Maximum feature value (absolute value)
+# maxWeight Int --- Maximum weight (absolute value)
+# addNoise Boolean --- Determines whether noise should be added to Y
+# b Double --- Intercept
+# sparsity Double --- Controls the sparsity in the generated data (a value between 0 and 1)
+# output String --- Location to write the generated data/label matrix
+# format String --- Matrix output format
+# perc Double 0.8 Percentage of training sample
+# percFile String --- File to store the percentages
+# --------------------------------------------------------------------------------------------
+# OUTPUT: Matrix of random data with appended label column
+# ---------------------------------------------------------------------------------------------
+#
+# Example
+# ./runStandaloneSystemML.sh algorithms/datagen/genLinearRegressionData.dml -nvargs numSamples=1000 numFeatures=50 maxFeatureValue=5 maxWeight=5 addNoise=FALSE b=0 sparsity=0.7 output=linRegData.csv format=csv
+#
+
+perc = ifdef($perc, 0.8)
+percFile = ifdef($percFile, "perc.csv")
+p = matrix(0, rows=2, cols=1)
+p[1,1] = perc
+p[2,1] = (1-perc)
+write(p, percFile, format="csv")
+
+X = Rand(cols=$numFeatures, max=1, min=-1, pdf="uniform", rows=$numSamples, seed=0, sparsity=$sparsity)
+X = X * $maxFeatureValue
+
+w = Rand(cols=1, max=1, min=-1, pdf="uniform", rows=$numFeatures, seed=0)
+w = w * $maxWeight
+
+Y = X %*% w
+Y = Y + $b
+
+if ($addNoise == TRUE) {
+ noise = Rand(cols=1, pdf="normal", rows=$numSamples, seed=0)
+ Y = Y + noise
+}
+
+Z = cbind(X,Y)
+write(Z, $output, format=$format)
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/src/main/java/org/apache/sysml/hops/ipa/InterProceduralAnalysis.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/ipa/InterProceduralAnalysis.java b/src/main/java/org/apache/sysml/hops/ipa/InterProceduralAnalysis.java
index 19396a9..3562c9f 100644
--- a/src/main/java/org/apache/sysml/hops/ipa/InterProceduralAnalysis.java
+++ b/src/main/java/org/apache/sysml/hops/ipa/InterProceduralAnalysis.java
@@ -75,8 +75,6 @@ import org.apache.sysml.runtime.instructions.cp.ScalarObject;
import org.apache.sysml.runtime.instructions.cp.ScalarObjectFactory;
import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
import org.apache.sysml.runtime.matrix.MatrixFormatMetaData;
-import org.apache.sysml.udf.lib.DeNaNWrapper;
-import org.apache.sysml.udf.lib.DeNegInfinityWrapper;
import org.apache.sysml.udf.lib.DynamicReadMatrixCP;
import org.apache.sysml.udf.lib.DynamicReadMatrixRcCP;
import org.apache.sysml.udf.lib.OrderWrapper;
@@ -908,28 +906,13 @@ public class InterProceduralAnalysis
{
String className = fstmt.getOtherParams().get(ExternalFunctionStatement.CLASS_NAME);
- if( className.equals(OrderWrapper.class.getName())
- || className.equals(DeNaNWrapper.class.getCanonicalName())
- || className.equals(DeNegInfinityWrapper.class.getCanonicalName()) )
+ if( className.equals(OrderWrapper.class.getName()) )
{
Hop input = fop.getInput().get(0);
long lnnz = className.equals(OrderWrapper.class.getName()) ? input.getNnz() : -1;
MatrixObject moOut = createOutputMatrix(input.getDim1(), input.getDim2(),lnnz);
callVars.put(fop.getOutputVariableNames()[0], moOut);
}
- else if( className.equals("org.apache.sysml.udf.lib.EigenWrapper") )
- //else if( className.equals(EigenWrapper.class.getName()) ) //string ref for build flexibility
- {
- Hop input = fop.getInput().get(0);
- callVars.put(fop.getOutputVariableNames()[0], createOutputMatrix(input.getDim1(), 1, -1));
- callVars.put(fop.getOutputVariableNames()[1], createOutputMatrix(input.getDim1(), input.getDim1(),-1));
- }
- else if( className.equals("org.apache.sysml.udf.lib.LinearSolverWrapperCP") )
- //else if( className.equals(LinearSolverWrapperCP.class.getName()) ) //string ref for build flexibility
- {
- Hop input = fop.getInput().get(1);
- callVars.put(fop.getOutputVariableNames()[0], createOutputMatrix(input.getDim1(), 1, -1));
- }
else if( className.equals(DynamicReadMatrixCP.class.getName())
|| className.equals(DynamicReadMatrixRcCP.class.getName()) )
{
http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/src/main/java/org/apache/sysml/udf/lib/DeNaNWrapper.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/udf/lib/DeNaNWrapper.java b/src/main/java/org/apache/sysml/udf/lib/DeNaNWrapper.java
deleted file mode 100644
index 5443893..0000000
--- a/src/main/java/org/apache/sysml/udf/lib/DeNaNWrapper.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.udf.lib;
-
-
-import org.apache.sysml.udf.FunctionParameter;
-import org.apache.sysml.udf.Matrix;
-import org.apache.sysml.udf.PackageFunction;
-import org.apache.sysml.udf.Matrix.ValueType;
-
-/**
- * Wrapper class to deNaN matrices by replacing all NaNs with zeros,
- * made by modifying <code>OrderWrapper.java</code>
- */
-@Deprecated
-public class DeNaNWrapper extends PackageFunction
-{
-
- private static final long serialVersionUID = 1L;
- private static final String OUTPUT_FILE = "TMP";
-
- //return matrix
- private Matrix ret;
-
- @Override
- public int getNumFunctionOutputs()
- {
- return 1;
- }
-
- @Override
- public FunctionParameter getFunctionOutput(int pos)
- {
- if(pos == 0)
- return ret;
-
- throw new RuntimeException("Invalid function output being requested");
- }
-
- @Override
- public void execute()
- {
- try
- {
- Matrix inM = (Matrix) getFunctionInput(0);
- double [][] inData = inM.getMatrixAsDoubleArray();
- for (int i = 0; i < inData.length; i++) {
- for (int j = 0; j < inData[i].length; j++) {
- if (Double.isNaN (inData [i][j])) {
- inData [i][j] = 0.0;
- } } }
- //create and copy output matrix
- String dir = createOutputFilePathAndName( OUTPUT_FILE );
- ret = new Matrix( dir, inM.getNumRows(), inM.getNumCols(), ValueType.Double );
- ret.setMatrixDoubleArray(inData);
- }
- catch (Exception e)
- {
- throw new RuntimeException("Error executing external removeNaN function", e);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/src/main/java/org/apache/sysml/udf/lib/DeNegInfinityWrapper.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/udf/lib/DeNegInfinityWrapper.java b/src/main/java/org/apache/sysml/udf/lib/DeNegInfinityWrapper.java
deleted file mode 100644
index d964c0a..0000000
--- a/src/main/java/org/apache/sysml/udf/lib/DeNegInfinityWrapper.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.udf.lib;
-
-
-import org.apache.sysml.udf.FunctionParameter;
-import org.apache.sysml.udf.Matrix;
-import org.apache.sysml.udf.PackageFunction;
-import org.apache.sysml.udf.Matrix.ValueType;
-
-/**
- * Wrapper class to deNegInfinity matrices by replacing all Negative Infinities
- * with zeros, made by modifying <code>DeNaNWrapper.java</code>
- */
-@Deprecated
-public class DeNegInfinityWrapper extends PackageFunction
-{
-
- private static final long serialVersionUID = 1L;
- private static final String OUTPUT_FILE = "TMP";
-
- //return matrix
- private Matrix ret;
-
- @Override
- public int getNumFunctionOutputs()
- {
- return 1;
- }
-
- @Override
- public FunctionParameter getFunctionOutput(int pos)
- {
- if(pos == 0)
- return ret;
-
- throw new RuntimeException("Invalid function output being requested");
- }
-
- @Override
- public void execute()
- {
- try
- {
- Matrix inM = (Matrix) getFunctionInput(0);
- double [][] inData = inM.getMatrixAsDoubleArray();
- for (int i = 0; i < inData.length; i++) {
- for (int j = 0; j < inData[i].length; j++) {
- if (Double.NEGATIVE_INFINITY == inData [i][j]) {
- inData [i][j] = 0.0;
- } } }
- //create and copy output matrix
- String dir = createOutputFilePathAndName( OUTPUT_FILE );
- ret = new Matrix( dir, inM.getNumRows(), inM.getNumCols(), ValueType.Double );
- ret.setMatrixDoubleArray(inData);
- }
- catch (Exception e)
- {
- throw new RuntimeException("Error executing external order function", e);
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/src/main/java/org/apache/sysml/udf/lib/PermutationMatrixWrapper.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/udf/lib/PermutationMatrixWrapper.java b/src/main/java/org/apache/sysml/udf/lib/PermutationMatrixWrapper.java
deleted file mode 100644
index 845d92e..0000000
--- a/src/main/java/org/apache/sysml/udf/lib/PermutationMatrixWrapper.java
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.udf.lib;
-
-import java.util.Arrays;
-import java.util.Comparator;
-
-import org.apache.sysml.runtime.matrix.data.InputInfo;
-import org.apache.sysml.runtime.matrix.data.MatrixBlock;
-import org.apache.sysml.runtime.matrix.data.OutputInfo;
-import org.apache.sysml.udf.FunctionParameter;
-import org.apache.sysml.udf.Matrix;
-import org.apache.sysml.udf.PackageFunction;
-import org.apache.sysml.udf.Scalar;
-import org.apache.sysml.udf.Matrix.ValueType;
-
-/**
- * Wrapper class for Sorting and Creating of a Permutation Matrix
- *
- * Sort single-column matrix and produce a permutation matrix. Pre-multiplying
- * the input matrix with the permutation matrix produces a sorted matrix. A
- * permutation matrix is a matrix where each row and each column as exactly one
- * 1: To From 1
- *
- * Input: (n x 1)-matrix, and true/false for sorting in descending order Output:
- * (n x n)- matrix
- *
- * permutation_matrix= externalFunction(Matrix[Double] A, Boolean desc) return
- * (Matrix[Double] P) implemented in
- * (classname="org.apache.sysml.udf.lib.PermutationMatrixWrapper"
- * ,exectype="mem"); A = read( "Data/A.mtx"); P = permutation_matrix( A[,2],
- * false); B = P %*% A
- *
- */
-@Deprecated
-public class PermutationMatrixWrapper extends PackageFunction
-{
-
- private static final long serialVersionUID = 1L;
- private static final String OUTPUT_FILE = "TMP";
-
- // return matrix
- private Matrix _ret;
-
- @Override
- public int getNumFunctionOutputs() {
- return 1;
- }
-
- @Override
- public FunctionParameter getFunctionOutput(int pos) {
- if (pos == 0)
- return _ret;
-
- throw new RuntimeException(
- "Invalid function output being requested");
- }
-
- @Override
- public void execute() {
- try {
- Matrix inM = (Matrix) getFunctionInput(0);
- double[][] inData = inM.getMatrixAsDoubleArray();
- boolean desc = Boolean.parseBoolean(((Scalar) getFunctionInput(1))
- .getValue());
-
- // add index column as first column
- double[][] idxData = new double[(int) inM.getNumRows()][2];
- for (int i = 0; i < idxData.length; i++) {
- idxData[i][0] = i;
- idxData[i][1] = inData[i][0];
- }
-
- // sort input matrix (in-place)
- if (!desc) // asc
- Arrays.sort(idxData, new AscRowComparator(1));
- else
- // desc
- Arrays.sort(idxData, new DescRowComparator(1));
-
- // create and populate sparse matrixblock for result
- MatrixBlock mb = new MatrixBlock(idxData.length, idxData.length,
- true, idxData.length);
- for (int i = 0; i < idxData.length; i++) {
- mb.quickSetValue(i, (int) idxData[i][0], 1.0);
- }
- mb.examSparsity();
-
- // set result
- String dir = createOutputFilePathAndName(OUTPUT_FILE);
- _ret = new Matrix(dir, mb.getNumRows(), mb.getNumColumns(),
- ValueType.Double);
- _ret.setMatrixDoubleArray(mb, OutputInfo.BinaryBlockOutputInfo,
- InputInfo.BinaryBlockInputInfo);
- }
- catch (Exception e) {
- throw new RuntimeException(
- "Error executing external permutation_matrix function", e);
- }
- }
-
- private static class AscRowComparator implements Comparator<double[]> {
- private int _col = -1;
-
- public AscRowComparator(int col) {
- _col = col;
- }
-
- @Override
- public int compare(double[] arg0, double[] arg1) {
- return (arg0[_col] < arg1[_col] ? -1
- : (arg0[_col] == arg1[_col] ? 0 : 1));
- }
- }
-
- private static class DescRowComparator implements Comparator<double[]> {
- private int _col = -1;
-
- public DescRowComparator(int col) {
- _col = col;
- }
-
- @Override
- public int compare(double[] arg0, double[] arg1) {
- return (arg0[_col] > arg1[_col] ? -1
- : (arg0[_col] == arg1[_col] ? 0 : 1));
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/src/test/scripts/applications/ctableStats/stratstats.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/ctableStats/stratstats.dml b/src/test/scripts/applications/ctableStats/stratstats.dml
deleted file mode 100644
index 5d190e7..0000000
--- a/src/test/scripts/applications/ctableStats/stratstats.dml
+++ /dev/null
@@ -1,350 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# STRATIFIED BIVARIATE STATISTICS, VERSION 2
-#
-# INPUT 1: Dataset with records as rows (matrix filename)
-# INPUT 2: The stratum ID column number (integer)
-# Stratum ID must be a small positive integer; fractional values are rounded; if 0 or less, shifted to positive.
-# INPUT 3: 1st variate column numbers (matrix filename)
-# INPUT 4: 2nd variate column numbers (matrix filename)
-# INPUT 5: Output (matrix filename)
-#
-# OUTPUT 1: Output Matrix with 40 columns, containing the following information:
-# Rows: One row per each distinct pair (1st variate, 2nd variate)
-# Col 01: 1st variate column number
-# Col 02: 1st variate global presence count
-# Col 03: 1st variate global mean
-# Col 04: 1st variate global standard deviation
-# Col 05: 1st variate stratified standard deviation
-# Col 06: R-squared, 1st variate vs. strata
-# Col 07: P-value, 1st variate vs. strata
-# Col 08-10: Reserved
-# Col 11: 2nd variate column number
-# Col 12: 2nd variate global presence count
-# Col 13: 2nd variate global mean
-# Col 14: 2nd variate global standard deviation
-# Col 15: 2nd variate stratified standard deviation
-# Col 16: R-squared, 2nd variate vs. strata
-# Col 17: P-value, 2nd variate vs. strata
-# Col 18-20: Reserved
-# Col 21: Global 1st & 2nd variate presence count
-# Col 22: Global regression slope (2nd variate vs. 1st variate)
-# Col 23: Global regression slope standard deviation
-# Col 24: Global correlation = +/- sqrt(R-squared)
-# Col 25: Global residual standard deviation
-# Col 26: Global R-squared
-# Col 27: Global P-value for hypothesis "slope = 0"
-# Col 28-30: Reserved
-# Col 31: Stratified 1st & 2nd variate presence count
-# Col 32: Stratified regression slope (2nd variate vs. 1st variate)
-# Col 33: Stratified regression slope standard deviation
-# Col 34: Stratified correlation = +/- sqrt(R-squared)
-# Col 35: Stratified residual standard deviation
-# Col 36: Stratified R-squared
-# Col 37: Stratified P-value for hypothesis "slope = 0"
-# Col 38: Number of strata with at least two counted points
-# Col 39-40: Reserved
-# TO DO: GOODNESS OF FIT MEASURE
-#
-# EXAMPLE:
-# hadoop jar SystemML.jar -f PATH/stratstats.dml -exec singlenode -args PATH/stratstats_test_data.mtx 1 PATH/stratstats_test_X.mtx PATH/stratstats_test_Y.mtx PATH/stratstats_test_output.mtx
-
-NaN = 0/0;
-
-print ("BEGIN STRATIFIED STATISTICS SCRIPT");
-
-print ("Reading the input matrices...");
-
-DataWithNaNs = read ($1, format = "text");
-Xcols = read ($3, format = "text");
-Ycols = read ($4, format = "text");
-stratum_column_id = $2;
-num_records = nrow(DataWithNaNs);
-num_attrs = ncol(DataWithNaNs);
-num_attrs_X = ncol(Xcols);
-num_attrs_Y = ncol(Ycols);
-num_attrs_XY = num_attrs_X * num_attrs_Y;
-
-
-print ("Preparing the variates...");
-
-Data = deNaN (DataWithNaNs);
-DataNaNmask = ppred (DataWithNaNs, NaN, "==");
-
-tXcols = t(Xcols);
-ones = matrix (1.0, rows = num_attrs_X, cols = 1);
-one_to_num_attrs_X = sumup (ones);
-ProjX = matrix (0.0, rows = num_attrs, cols = num_attrs_X);
-ProjX_ctable = table (tXcols, one_to_num_attrs_X);
-ProjX [1:nrow(ProjX_ctable), ] = ProjX_ctable;
-X = Data %*% ProjX;
-X_mask = 1 - (DataNaNmask %*% ProjX);
-
-tYcols = t(Ycols);
-ones = matrix (1.0, rows = num_attrs_Y, cols = 1);
-one_to_num_attrs_Y = sumup (ones);
-ProjY = matrix (0.0, rows = num_attrs, cols = num_attrs_Y);
-ProjY_ctable = table (tYcols, one_to_num_attrs_Y);
-ProjY [1:nrow(ProjY_ctable), ] = ProjY_ctable;
-Y = Data %*% ProjY;
-Y_mask = 1 - (DataNaNmask %*% ProjY);
-
-
-print ("Preparing the strata...");
-
-Proj_to_deNaN_strata = diag (1 - DataNaNmask [, stratum_column_id]);
-Proj_to_deNaN_strata = removeEmpty (target = Proj_to_deNaN_strata, margin = "rows");
-vector_of_strata_with_empty_but_no_NaNs = round (Proj_to_deNaN_strata %*% (Data [, stratum_column_id]));
-vector_of_strata_with_empty_but_no_NaNs = vector_of_strata_with_empty_but_no_NaNs + (1 - min (vector_of_strata_with_empty_but_no_NaNs));
-num_strata_with_empty_but_no_NaNs = max (vector_of_strata_with_empty_but_no_NaNs);
-num_records_with_nonNaN_strata = nrow (Proj_to_deNaN_strata);
-ones = matrix (1.0, rows = num_records_with_nonNaN_strata, cols = 1);
-one_to_num_records_with_nonNaN_strata = sumup (ones);
-StrataSummator_with_empty_from_nonNaNs = table (vector_of_strata_with_empty_but_no_NaNs, one_to_num_records_with_nonNaN_strata);
-StrataSummator_from_nonNaNs = removeEmpty (target = StrataSummator_with_empty_from_nonNaNs, margin = "rows");
-StrataSummator = StrataSummator_from_nonNaNs %*% Proj_to_deNaN_strata;
-num_strata = nrow (StrataSummator);
-num_empty_strata = num_strata_with_empty_but_no_NaNs - num_strata;
-print ("There are " + num_strata + " nonempty strata and " + num_empty_strata + " empty but non-NaN strata.");
-
-print ("Computing the global single-variate statistics...");
-
-cnt_X_global = colSums (X_mask);
-cnt_Y_global = colSums (Y_mask);
-avg_X_global = colSums (X) / cnt_X_global;
-avg_Y_global = colSums (Y) / cnt_Y_global;
-var_sumX_global = colSums (X * X) - cnt_X_global * (avg_X_global * avg_X_global);
-var_sumY_global = colSums (Y * Y) - cnt_Y_global * (avg_Y_global * avg_Y_global);
- sqrt_failsafe_input_1 = var_sumX_global / (cnt_X_global - 1);
-stdev_X_global = sqrt_failsafe (sqrt_failsafe_input_1);
- sqrt_failsafe_input_2 = var_sumY_global / (cnt_Y_global - 1)
-stdev_Y_global = sqrt_failsafe (sqrt_failsafe_input_2);
-
-print ("Computing the stratified single-variate statistics...");
-
-# Compute per-stratum statistics, prevent div-0 for locally empty (NaN-filled) strata
-
-Cnt_X_per_stratum = StrataSummator %*% X_mask;
-Cnt_Y_per_stratum = StrataSummator %*% Y_mask;
-Is_none_X_per_stratum = ppred (Cnt_X_per_stratum, 0, "==");
-Is_none_Y_per_stratum = ppred (Cnt_Y_per_stratum, 0, "==");
-One_over_cnt_X_per_stratum = (1 - Is_none_X_per_stratum) / (Cnt_X_per_stratum + Is_none_X_per_stratum);
-One_over_cnt_Y_per_stratum = (1 - Is_none_Y_per_stratum) / (Cnt_Y_per_stratum + Is_none_Y_per_stratum);
-num_X_nonempty_strata = num_strata - colSums (Is_none_X_per_stratum);
-num_Y_nonempty_strata = num_strata - colSums (Is_none_Y_per_stratum);
-
-Sum_X_per_stratum = StrataSummator %*% X;
-Sum_Y_per_stratum = StrataSummator %*% Y;
-
-# Recompute some global statistics to exclude NaN-stratum records
-
-cnt_X_excluding_NaNstrata = colSums (Cnt_X_per_stratum);
-cnt_Y_excluding_NaNstrata = colSums (Cnt_Y_per_stratum);
-sum_X_excluding_NaNstrata = colSums (Sum_X_per_stratum);
-sum_Y_excluding_NaNstrata = colSums (Sum_Y_per_stratum);
-var_sumX_excluding_NaNstrata = colSums (StrataSummator %*% (X * X)) - (sum_X_excluding_NaNstrata * sum_X_excluding_NaNstrata) / cnt_X_excluding_NaNstrata;
-var_sumY_excluding_NaNstrata = colSums (StrataSummator %*% (Y * Y)) - (sum_Y_excluding_NaNstrata * sum_Y_excluding_NaNstrata) / cnt_Y_excluding_NaNstrata;
-
-# Compute the stratified statistics
-
-var_sumX_stratified = colSums (StrataSummator %*% (X * X)) - colSums (One_over_cnt_X_per_stratum * Sum_X_per_stratum * Sum_X_per_stratum);
-var_sumY_stratified = colSums (StrataSummator %*% (Y * Y)) - colSums (One_over_cnt_Y_per_stratum * Sum_Y_per_stratum * Sum_Y_per_stratum);
- sqrt_failsafe_input_3 = var_sumX_stratified / (cnt_X_excluding_NaNstrata - num_X_nonempty_strata);
-stdev_X_stratified = sqrt_failsafe (sqrt_failsafe_input_3);
- sqrt_failsafe_input_4 = var_sumY_stratified / (cnt_Y_excluding_NaNstrata - num_Y_nonempty_strata);
-stdev_Y_stratified = sqrt_failsafe (sqrt_failsafe_input_4);
-r_sqr_X_vs_strata = 1 - var_sumX_stratified / var_sumX_excluding_NaNstrata;
-r_sqr_Y_vs_strata = 1 - var_sumY_stratified / var_sumY_excluding_NaNstrata;
-fStat_X_vs_strata = ((var_sumX_excluding_NaNstrata - var_sumX_stratified) / (num_X_nonempty_strata - 1)) / (var_sumX_stratified / (cnt_X_excluding_NaNstrata - num_X_nonempty_strata));
-fStat_Y_vs_strata = ((var_sumY_excluding_NaNstrata - var_sumY_stratified) / (num_Y_nonempty_strata - 1)) / (var_sumY_stratified / (cnt_Y_excluding_NaNstrata - num_Y_nonempty_strata));
-p_val_X_vs_strata = fStat_tailprob (fStat_X_vs_strata, num_X_nonempty_strata - 1, cnt_X_excluding_NaNstrata - num_X_nonempty_strata);
-p_val_Y_vs_strata = fStat_tailprob (fStat_Y_vs_strata, num_Y_nonempty_strata - 1, cnt_Y_excluding_NaNstrata - num_Y_nonempty_strata);
-
-print ("Computing the global bivariate statistics...");
-
-# Compute the aggregate X vs. Y statistics and map them into proper positions
-
-cnt_XY_rectangle = t(X_mask) %*% Y_mask;
-sum_X_forXY_rectangle = t(X) %*% Y_mask;
-sum_XX_forXY_rectangle = t(X * X) %*% Y_mask;
-sum_Y_forXY_rectangle = t(X_mask) %*% Y;
-sum_YY_forXY_rectangle = t(X_mask) %*% (Y * Y);
-sum_XY_rectangle = t(X) %*% Y;
-cnt_XY_global = matrix (cnt_XY_rectangle, rows = 1, cols = num_attrs_XY, byrow = TRUE);
-sum_X_forXY_global = matrix (sum_X_forXY_rectangle, rows = 1, cols = num_attrs_XY, byrow = TRUE);
-sum_XX_forXY_global = matrix (sum_XX_forXY_rectangle, rows = 1, cols = num_attrs_XY, byrow = TRUE);
-sum_Y_forXY_global = matrix (sum_Y_forXY_rectangle, rows = 1, cols = num_attrs_XY, byrow = TRUE);
-sum_YY_forXY_global = matrix (sum_YY_forXY_rectangle, rows = 1, cols = num_attrs_XY, byrow = TRUE);
-sum_XY_global = matrix (sum_XY_rectangle, rows = 1, cols = num_attrs_XY, byrow = TRUE);
-ones_XY = matrix (1.0, rows = 1, cols = num_attrs_XY);
-
-# Compute the global bivariate statistics for output
-
-cov_sumX_sumY_global = sum_XY_global - sum_X_forXY_global * sum_Y_forXY_global / cnt_XY_global;
-var_sumX_forXY_global = sum_XX_forXY_global - sum_X_forXY_global * sum_X_forXY_global / cnt_XY_global;
-var_sumY_forXY_global = sum_YY_forXY_global - sum_Y_forXY_global * sum_Y_forXY_global / cnt_XY_global;
-slope_XY_global = cov_sumX_sumY_global / var_sumX_forXY_global;
- sqrt_failsafe_input_5 = var_sumX_forXY_global * var_sumY_forXY_global;
- sqrt_failsafe_output_5 = sqrt_failsafe (sqrt_failsafe_input_5);
-corr_XY_global = cov_sumX_sumY_global / sqrt_failsafe_output_5;
-r_sqr_X_vs_Y_global = cov_sumX_sumY_global * cov_sumX_sumY_global / (var_sumX_forXY_global * var_sumY_forXY_global);
- sqrt_failsafe_input_6 = (1 - r_sqr_X_vs_Y_global) * var_sumY_forXY_global / var_sumX_forXY_global / (cnt_XY_global - 2)
-stdev_slope_XY_global = sqrt_failsafe (sqrt_failsafe_input_6);
- sqrt_failsafe_input_7 = (1 - r_sqr_X_vs_Y_global) * var_sumY_forXY_global / (cnt_XY_global - 2)
-stdev_errY_vs_X_global = sqrt_failsafe (sqrt_failsafe_input_7);
-fStat_Y_vs_X_global = (cnt_XY_global - 2) * r_sqr_X_vs_Y_global / (1 - r_sqr_X_vs_Y_global);
-p_val_Y_vs_X_global = fStat_tailprob (fStat_Y_vs_X_global, ones_XY, cnt_XY_global - 2);
-
-print ("Computing the stratified bivariate statistics...");
-
-# Create projections to "intermingle" X and Y into attribute pairs
-
-Proj_X_to_XY = matrix (0.0, rows = num_attrs_X, cols = num_attrs_XY);
-Proj_Y_to_XY = matrix (0.0, rows = num_attrs_Y, cols = num_attrs_XY);
-ones_Y_row = matrix (1.0, rows = 1, cols = num_attrs_Y);
-for (i in 1:num_attrs_X) {
- start_cid = (i - 1) * num_attrs_Y + 1;
- end_cid = i * num_attrs_Y;
- Proj_X_to_XY [i, start_cid:end_cid] = ones_Y_row;
- Proj_Y_to_XY [ , start_cid:end_cid] = diag (ones_Y_row);
-}
-
-# Compute per-stratum statistics, prevent div-0 for locally empty (NaN-filled) strata
-
-Cnt_XY_per_stratum = StrataSummator %*% (( X_mask %*% Proj_X_to_XY) * ( Y_mask %*% Proj_Y_to_XY));
-Sum_X_forXY_per_stratum = StrataSummator %*% (( X %*% Proj_X_to_XY) * ( Y_mask %*% Proj_Y_to_XY));
-Sum_XX_forXY_per_stratum = StrataSummator %*% (((X * X) %*% Proj_X_to_XY) * ( Y_mask %*% Proj_Y_to_XY));
-Sum_Y_forXY_per_stratum = StrataSummator %*% (( X_mask %*% Proj_X_to_XY) * ( Y %*% Proj_Y_to_XY));
-Sum_YY_forXY_per_stratum = StrataSummator %*% (( X_mask %*% Proj_X_to_XY) * ((Y * Y) %*% Proj_Y_to_XY));
-Sum_XY_per_stratum = StrataSummator %*% (( X %*% Proj_X_to_XY) * ( Y %*% Proj_Y_to_XY));
-
-Is_none_XY_per_stratum = ppred (Cnt_XY_per_stratum, 0, "==");
-One_over_cnt_XY_per_stratum = (1 - Is_none_XY_per_stratum) / (Cnt_XY_per_stratum + Is_none_XY_per_stratum);
-num_XY_nonempty_strata = num_strata - colSums (Is_none_XY_per_stratum);
-
-# Recompute some global aggregate X vs. Y statistics to exclude NaN-stratum records
-
-cnt_XY_excluding_NaNstrata = colSums (Cnt_XY_per_stratum);
-sum_XX_forXY_excluding_NaNstrata = colSums (Sum_XX_forXY_per_stratum);
-sum_YY_forXY_excluding_NaNstrata = colSums (Sum_YY_forXY_per_stratum);
-sum_XY_excluding_NaNstrata = colSums (Sum_XY_per_stratum);
-
-# Compute the stratified bivariate statistics
-
-var_sumX_forXY_stratified = sum_XX_forXY_excluding_NaNstrata - colSums (Sum_X_forXY_per_stratum * Sum_X_forXY_per_stratum * One_over_cnt_XY_per_stratum);
-var_sumY_forXY_stratified = sum_YY_forXY_excluding_NaNstrata - colSums (Sum_Y_forXY_per_stratum * Sum_Y_forXY_per_stratum * One_over_cnt_XY_per_stratum);
-cov_sumX_sumY_stratified = sum_XY_excluding_NaNstrata - colSums (Sum_X_forXY_per_stratum * Sum_Y_forXY_per_stratum * One_over_cnt_XY_per_stratum);
-
-slope_XY_stratified = cov_sumX_sumY_stratified / var_sumX_forXY_stratified;
- sqrt_failsafe_input_8 = var_sumX_forXY_stratified * var_sumY_forXY_stratified;
- sqrt_failsafe_output_8 = sqrt_failsafe (sqrt_failsafe_input_8);
-corr_XY_stratified = cov_sumX_sumY_stratified / sqrt_failsafe_output_8;
-r_sqr_X_vs_Y_stratified = cov_sumX_sumY_stratified * cov_sumX_sumY_stratified / (var_sumX_forXY_stratified * var_sumY_forXY_stratified);
-r_sqr_X_vs_Y_stratified = corr_XY_stratified * corr_XY_stratified;
- sqrt_failsafe_input_9 = (1 - r_sqr_X_vs_Y_stratified) * var_sumY_forXY_stratified / var_sumX_forXY_stratified / (cnt_XY_excluding_NaNstrata - num_XY_nonempty_strata - 1);
-stdev_slope_XY_stratified = sqrt_failsafe (sqrt_failsafe_input_9);
- sqrt_failsafe_input_10 = (1 - r_sqr_X_vs_Y_stratified) * var_sumY_forXY_stratified / (cnt_XY_excluding_NaNstrata - num_XY_nonempty_strata - 1);
-stdev_errY_vs_X_stratified = sqrt_failsafe (sqrt_failsafe_input_10);
-fStat_Y_vs_X_stratified = (cnt_XY_excluding_NaNstrata - num_XY_nonempty_strata - 1) * r_sqr_X_vs_Y_stratified / (1 - r_sqr_X_vs_Y_stratified);
-p_val_Y_vs_X_stratified = fStat_tailprob (fStat_Y_vs_X_stratified, ones_XY, cnt_XY_excluding_NaNstrata - num_XY_nonempty_strata - 1);
-
-print ("Preparing the output matrix...");
-OutMtx = matrix (0.0, rows = 40, cols = num_attrs_XY);
-
-OutMtx [ 1, ] = Xcols %*% Proj_X_to_XY; # 1st variate column number
-OutMtx [ 2, ] = cnt_X_global %*% Proj_X_to_XY; # 1st variate global presence count
-OutMtx [ 3, ] = avg_X_global %*% Proj_X_to_XY; # 1st variate global mean
-OutMtx [ 4, ] = stdev_X_global %*% Proj_X_to_XY; # 1st variate global standard deviation
-OutMtx [ 5, ] = stdev_X_stratified %*% Proj_X_to_XY; # 1st variate stratified standard deviation
-OutMtx [ 6, ] = r_sqr_X_vs_strata %*% Proj_X_to_XY; # R-squared, 1st variate vs. strata
-OutMtx [ 7, ] = p_val_X_vs_strata %*% Proj_X_to_XY; # P-value, 1st variate vs. strata
-OutMtx [11, ] = Ycols %*% Proj_Y_to_XY; # 2nd variate column number
-OutMtx [12, ] = cnt_Y_global %*% Proj_Y_to_XY; # 2nd variate global presence count
-OutMtx [13, ] = avg_Y_global %*% Proj_Y_to_XY; # 2nd variate global mean
-OutMtx [14, ] = stdev_Y_global %*% Proj_Y_to_XY; # 2nd variate global standard deviation
-OutMtx [15, ] = stdev_Y_stratified %*% Proj_Y_to_XY; # 2nd variate stratified standard deviation
-OutMtx [16, ] = r_sqr_Y_vs_strata %*% Proj_Y_to_XY; # R-squared, 2nd variate vs. strata
-OutMtx [17, ] = p_val_Y_vs_strata %*% Proj_Y_to_XY; # P-value, 2nd variate vs. strata
-
-
-OutMtx [21, ] = cnt_XY_global; # Global 1st & 2nd variate presence count
-OutMtx [22, ] = slope_XY_global; # Global regression slope (2nd variate vs. 1st variate)
-OutMtx [23, ] = stdev_slope_XY_global; # Global regression slope standard deviation
-OutMtx [24, ] = corr_XY_global; # Global correlation = +/- sqrt(R-squared)
-OutMtx [25, ] = stdev_errY_vs_X_global; # Global residual standard deviation
-OutMtx [26, ] = r_sqr_X_vs_Y_global; # Global R-squared
-OutMtx [27, ] = p_val_Y_vs_X_global; # Global P-value for hypothesis "slope = 0"
-OutMtx [31, ] = cnt_XY_excluding_NaNstrata; # Stratified 1st & 2nd variate presence count
-OutMtx [32, ] = slope_XY_stratified; # Stratified regression slope (2nd variate vs. 1st variate)
-OutMtx [33, ] = stdev_slope_XY_stratified; # Stratified regression slope standard deviation
-OutMtx [34, ] = corr_XY_stratified; # Stratified correlation = +/- sqrt(R-squared)
-OutMtx [35, ] = stdev_errY_vs_X_stratified; # Stratified residual standard deviation
-OutMtx [36, ] = r_sqr_X_vs_Y_stratified; # Stratified R-squared
-OutMtx [37, ] = p_val_Y_vs_X_stratified; # Stratified P-value for hypothesis "slope = 0"
-OutMtx [38, ] = colSums (ppred (Cnt_XY_per_stratum, 2, ">=")); # Number of strata with at least two counted points
-
-OutMtx = t(OutMtx);
-
-print ("Writing the output matrix...");
-write (OutMtx, $5, format="text");
-print ("END STRATIFIED STATISTICS SCRIPT");
-
-
-deNaN = externalFunction (Matrix[Double] A) return (Matrix[Double] B)
- implemented in (classname = "org.apache.sysml.udf.lib.DeNaNWrapper", exectype = "mem");
-
-fStat_tailprob = function (Matrix[double] fStat, Matrix[double] df_1, Matrix[double] df_2) return (Matrix[double] tailprob)
-{ # TEMPORARY IMPLEMENTATION
- tailprob = fStat;
- for (i in 1:nrow(fStat)) {
- for (j in 1:ncol(fStat)) {
- q = as.scalar (fStat [i, j]);
- d1 = as.scalar (df_1 [i, j]);
- d2 = as.scalar (df_2 [i, j]);
- if (d1 >= 1 & d2 >= 1 & q >= 0.0) {
- tailprob [i, j] = pf (target = q, df1 = d1, df2 = d2, lower.tail=FALSE);
- } else {
- tailprob [i, j] = 0/0;
- }
- } }
-}
-
-sqrt_failsafe = function (Matrix[double] input_A) return (Matrix[double] output_A)
-{
- NaN = 0/0;
- mask_A = ppred (input_A, 0.0, ">=");
- prep_A = input_A * mask_A;
- mask_A = mask_A - mask_A * (ppred (prep_A, NaN, "=="));
- prep_A = deNaN (prep_A);
- output_A = sqrt (prep_A) / mask_A;
-}
-
-sumup = function (Matrix[double] A) return (Matrix[double] sum_A)
-{
- shift = 1;
- m_A = nrow(A);
- sum_A = A;
- while (shift < m_A) {
- sum_A [(shift+1):m_A, ] = sum_A [(shift+1):m_A, ] + sum_A [1:(m_A-shift), ];
- shift = 2 * shift;
- }
-}