You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by du...@apache.org on 2016/01/26 02:12:55 UTC
[31/55] [partial] incubator-systemml git commit: [SYSTEMML-482]
[SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line
endings, and normalizing all of the line endings.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/arima_box-jenkins/arima.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/arima_box-jenkins/arima.dml b/src/test/scripts/applications/arima_box-jenkins/arima.dml
index f9afea9..73052e0 100644
--- a/src/test/scripts/applications/arima_box-jenkins/arima.dml
+++ b/src/test/scripts/applications/arima_box-jenkins/arima.dml
@@ -1,287 +1,287 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Arguments
-# 1st arg: X (one column time series)
-# 2nd arg: max_func_invoc
-# 3rd arg: p (non-seasonal AR order)
-# 4th arg: d (non-seasonal differencing order)
-# 5th arg: q (non-seasonal MA order)
-# 6th arg: P (seasonal AR order)
-# 7th arg: D (seasonal differencing order)
-# 8th arg: Q (seasonal MA order)
-# 9th arg: s (period in terms of number of time-steps)
-# 10th arg: 0/1 (1 means include.mean)
-# 11th arg: 0 to use CG solver, 1 to use Jacobi's method
-# 12th arg: file name to store learnt parameters
-
-#changing to additive sar since R's arima seems to do that
-
-arima_css = function(Matrix[Double] w, Matrix[Double] X, Integer pIn, Integer P, Integer qIn, Integer Q, Integer s, Integer useJacobi) return (Double obj){
- b = X[,2:ncol(X)]%*%w
-
- R = Rand(rows=nrow(X), cols=nrow(X), min=0, max=0)
- for(i7 in 1:qIn){
- ma_ind_ns = P+pIn+i7
- err_ind_ns = i7
- ones_ns = Rand(rows=nrow(R)-err_ind_ns, cols=1, min=1, max=1)
- d_ns = ones_ns * castAsScalar(w[ma_ind_ns,1])
- R[1+err_ind_ns:nrow(R),1:ncol(R)-err_ind_ns] = R[1+err_ind_ns:nrow(R),1:ncol(R)-err_ind_ns] + diag(d_ns)
- }
- for(i8 in 1:Q){
- ma_ind_s = P+pIn+qIn+i8
- err_ind_s = s*i8
- ones_s = Rand(rows=nrow(R)-err_ind_s, cols=1, min=1, max=1)
- d_s = ones_s * castAsScalar(w[ma_ind_s,1])
- R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] = R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] + diag(d_s)
- }
-
- #checking for strict diagonal dominance
- #required for jacobi's method
- #
-
- max_iter = 100
- tol = 0.01
-
- y_hat = Rand(rows=nrow(X), cols=1, min=0, max=0)
- iter = 0
-
- if(useJacobi == 1){
- check = sum(ppred(rowSums(abs(R)), 1, ">="))
- if(check > 0){
- print("R is not diagonal dominant. Suggest switching to an exact solver.")
- }
-
- diff = tol+1.0
- while(iter < max_iter & diff > tol){
- y_hat_new = b - R%*%y_hat
- diff = sum((y_hat_new-y_hat)*(y_hat_new-y_hat))
- y_hat = y_hat_new
- iter = iter + 1
- }
- }else{
- ones = matrix(1, rows=nrow(X), cols=1)
- A = R + diag(ones)
- Z = t(A)%*%A
- y = t(A)%*%b
- r = -y
- p = -r
- norm_r2 = sum(r*r)
- continue = 1
- if(norm_r2 == 0){
- continue = 0
- }
- while(iter < max_iter & continue == 1){
- q = Z%*%p
- alpha = norm_r2 / castAsScalar(t(p) %*% q)
- y_hat = y_hat + alpha * p
- old_norm_r2 = norm_r2
- r = r + alpha * q
- norm_r2 = sum(r * r)
- if(norm_r2 < tol){
- continue = 0
- }
- beta = norm_r2 / old_norm_r2
- p = -r + beta * p
- iter = iter + 1
- }
- }
-
- errs = X[,1] - y_hat
- obj = sum(errs*errs)
-}
-
-#input col of time series data
-X = read($1)
-
-max_func_invoc = $2
-
-#non-seasonal order
-p = $3
-d = $4
-q = $5
-
-#seasonal order
-P = $6
-D = $7
-Q = $8
-
-#length of the season
-s = $9
-
-include_mean = $10
-
-useJacobi = $11
-
-num_rows = nrow(X)
-
-if(num_rows <= d){
- print("non-seasonal differencing order should be larger than length of the time-series")
-}
-
-Y = X
-for(i in 1:d){
- n1 = nrow(Y)+0.0
- Y = Y[2:n1,] - Y[1:n1-1,]
-}
-
-num_rows = nrow(Y)+0.0
-if(num_rows <= s*D){
- print("seasonal differencing order should be larger than number of observations divided by length of season")
-}
-
-for(i in 1:D){
- n1 = nrow(Y)+0.0
- Y = Y[s+1:n1,] - Y[1:n1-s,]
-}
-
-n = nrow(Y)
-
-max_ar_col = P+p
-max_ma_col = Q+q
-if(max_ar_col > max_ma_col){
- max_arma_col = max_ar_col
-}else{
- max_arma_col = max_ma_col
-}
-
-mu = 0
-if(include_mean == 1){
- mu = sum(Y)/nrow(Y)
- Y = Y - mu
-}
-
-totcols = 1+p+P+Q+q #target col (X), p-P cols, q-Q cols
-
-Z = Rand(rows=n, cols=totcols, min=0, max=0)
-Z[,1] = Y #target col
-
-parfor(i1 in 1:p, check=0){
- Z[i1+1:n,1+i1] = Y[1:n-i1,]
-}
-parfor(i2 in 1:P, check=0){
- Z[s*i2+1:n,1+p+i2] = Y[1:n-s*i2,]
-}
-parfor(i5 in 1:q, check=0){
- Z[i5+1:n,1+P+p+i5] = Y[1:n-i5,]
-}
-parfor(i6 in 1:Q, check=0){
- Z[s*i6+1:n,1+P+p+q+i6] = Y[1:n-s*i6,]
-}
-
-one = Rand(rows=1, cols=1, min=1, max=1)
-
-simplex = Rand(rows=totcols-1, cols=totcols, min=0, max=0)
-for(i in 2:ncol(simplex)){
- simplex[i-1,i] = 0.1
-}
-
-num_func_invoc = 0
-
-objvals = Rand(rows=1, cols=ncol(simplex), min=0, max=0)
-parfor(i3 in 1:ncol(simplex)){
- arima_css_obj_val = arima_css(simplex[,i3], Z, p, P, q, Q, s, useJacobi)
- objvals[1,i3] = arima_css_obj_val
-}
-num_func_invoc = num_func_invoc + ncol(simplex)
-
-tol = 1.5 * 10^(-8) * castAsScalar(objvals[1,1])
-
-continue = 1
-while(continue == 1 & num_func_invoc <= max_func_invoc) {
- best_index = 1
- worst_index = 1
- for(i in 2:ncol(objvals)){
- this = castAsScalar(objvals[1,i])
- that = castAsScalar(objvals[1,best_index])
- if(that > this){
- best_index = i
- }
- that = castAsScalar(objvals[1,worst_index])
- if(that < this){
- worst_index = i
- }
- }
-
- best_obj_val = castAsScalar(objvals[1,best_index])
- worst_obj_val = castAsScalar(objvals[1,worst_index])
- if(worst_obj_val <= best_obj_val + tol){
- continue = 0
- }
-
- print("#Function calls::" + num_func_invoc + " OBJ: " + best_obj_val)
-
- c = (rowSums(simplex) - simplex[,worst_index])/(nrow(simplex))
-
- x_r = 2*c - simplex[,worst_index]
- obj_x_r = arima_css(x_r, Z, p, P, q, Q, s, useJacobi)
- num_func_invoc = num_func_invoc + 1
-
- if(obj_x_r < best_obj_val){
- x_e = 2*x_r - c
- obj_x_e = arima_css(x_e, Z, p, P, q, Q, s, useJacobi)
- num_func_invoc = num_func_invoc + 1
-
- if(obj_x_r <= obj_x_e){
- simplex[,worst_index] = x_r
- objvals[1,worst_index] = obj_x_r
- }else{
- simplex[,worst_index] = x_e
- objvals[1,worst_index] = obj_x_e
- }
- }else{
- if(obj_x_r < worst_obj_val){
- simplex[,worst_index] = x_r
- objvals[1,worst_index] = obj_x_r
- }
-
- x_c_in = (simplex[,worst_index] + c)/2
- obj_x_c_in = arima_css(x_c_in, Z, p, P, q, Q, s, useJacobi)
- num_func_invoc = num_func_invoc + 1
-
- if(obj_x_c_in < castAsScalar(objvals[1,worst_index])){
- simplex[,worst_index] = x_c_in
- objvals[1,worst_index] = obj_x_c_in
- }else{
- if(obj_x_r >= worst_obj_val){
- best_point = simplex[,best_index]
- parfor(i4 in 1:ncol(simplex)){
- if(i4 != best_index){
- simplex[,i4] = (simplex[,i4] + best_point)/2
- tmp = arima_css(simplex[,i4], Z, p, P, q, Q, s, useJacobi)
- objvals[1,i4] = tmp*one
- }
- }
- num_func_invoc = num_func_invoc + ncol(simplex) - 1
- }
- }
- }
-}
-
-best_point = simplex[,best_index]
-if(include_mean == 1){
- tmp2 = Rand(rows=totcols, cols=1, min=0, max=0)
- tmp2[1:nrow(best_point),1] = best_point
- tmp2[nrow(tmp2),1] = mu
- best_point = tmp2
-}
-
-write(best_point, $12, format="text")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Arguments
+# 1st arg: X (one column time series)
+# 2nd arg: max_func_invoc
+# 3rd arg: p (non-seasonal AR order)
+# 4th arg: d (non-seasonal differencing order)
+# 5th arg: q (non-seasonal MA order)
+# 6th arg: P (seasonal AR order)
+# 7th arg: D (seasonal differencing order)
+# 8th arg: Q (seasonal MA order)
+# 9th arg: s (period in terms of number of time-steps)
+# 10th arg: 0/1 (1 means include.mean)
+# 11th arg: 0 to use CG solver, 1 to use Jacobi's method
+# 12th arg: file name to store learnt parameters
+
+#changing to additive sar since R's arima seems to do that
+
+arima_css = function(Matrix[Double] w, Matrix[Double] X, Integer pIn, Integer P, Integer qIn, Integer Q, Integer s, Integer useJacobi) return (Double obj){
+ b = X[,2:ncol(X)]%*%w
+
+ R = Rand(rows=nrow(X), cols=nrow(X), min=0, max=0)
+ for(i7 in 1:qIn){
+ ma_ind_ns = P+pIn+i7
+ err_ind_ns = i7
+ ones_ns = Rand(rows=nrow(R)-err_ind_ns, cols=1, min=1, max=1)
+ d_ns = ones_ns * castAsScalar(w[ma_ind_ns,1])
+ R[1+err_ind_ns:nrow(R),1:ncol(R)-err_ind_ns] = R[1+err_ind_ns:nrow(R),1:ncol(R)-err_ind_ns] + diag(d_ns)
+ }
+ for(i8 in 1:Q){
+ ma_ind_s = P+pIn+qIn+i8
+ err_ind_s = s*i8
+ ones_s = Rand(rows=nrow(R)-err_ind_s, cols=1, min=1, max=1)
+ d_s = ones_s * castAsScalar(w[ma_ind_s,1])
+ R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] = R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] + diag(d_s)
+ }
+
+ #checking for strict diagonal dominance
+ #required for jacobi's method
+ #
+
+ max_iter = 100
+ tol = 0.01
+
+ y_hat = Rand(rows=nrow(X), cols=1, min=0, max=0)
+ iter = 0
+
+ if(useJacobi == 1){
+ check = sum(ppred(rowSums(abs(R)), 1, ">="))
+ if(check > 0){
+ print("R is not diagonal dominant. Suggest switching to an exact solver.")
+ }
+
+ diff = tol+1.0
+ while(iter < max_iter & diff > tol){
+ y_hat_new = b - R%*%y_hat
+ diff = sum((y_hat_new-y_hat)*(y_hat_new-y_hat))
+ y_hat = y_hat_new
+ iter = iter + 1
+ }
+ }else{
+ ones = matrix(1, rows=nrow(X), cols=1)
+ A = R + diag(ones)
+ Z = t(A)%*%A
+ y = t(A)%*%b
+ r = -y
+ p = -r
+ norm_r2 = sum(r*r)
+ continue = 1
+ if(norm_r2 == 0){
+ continue = 0
+ }
+ while(iter < max_iter & continue == 1){
+ q = Z%*%p
+ alpha = norm_r2 / castAsScalar(t(p) %*% q)
+ y_hat = y_hat + alpha * p
+ old_norm_r2 = norm_r2
+ r = r + alpha * q
+ norm_r2 = sum(r * r)
+ if(norm_r2 < tol){
+ continue = 0
+ }
+ beta = norm_r2 / old_norm_r2
+ p = -r + beta * p
+ iter = iter + 1
+ }
+ }
+
+ errs = X[,1] - y_hat
+ obj = sum(errs*errs)
+}
+
+#input col of time series data
+X = read($1)
+
+max_func_invoc = $2
+
+#non-seasonal order
+p = $3
+d = $4
+q = $5
+
+#seasonal order
+P = $6
+D = $7
+Q = $8
+
+#length of the season
+s = $9
+
+include_mean = $10
+
+useJacobi = $11
+
+num_rows = nrow(X)
+
+if(num_rows <= d){
+ print("non-seasonal differencing order should be larger than length of the time-series")
+}
+
+Y = X
+for(i in 1:d){
+ n1 = nrow(Y)+0.0
+ Y = Y[2:n1,] - Y[1:n1-1,]
+}
+
+num_rows = nrow(Y)+0.0
+if(num_rows <= s*D){
+ print("seasonal differencing order should be larger than number of observations divided by length of season")
+}
+
+for(i in 1:D){
+ n1 = nrow(Y)+0.0
+ Y = Y[s+1:n1,] - Y[1:n1-s,]
+}
+
+n = nrow(Y)
+
+max_ar_col = P+p
+max_ma_col = Q+q
+if(max_ar_col > max_ma_col){
+ max_arma_col = max_ar_col
+}else{
+ max_arma_col = max_ma_col
+}
+
+mu = 0
+if(include_mean == 1){
+ mu = sum(Y)/nrow(Y)
+ Y = Y - mu
+}
+
+totcols = 1+p+P+Q+q #target col (X), p-P cols, q-Q cols
+
+Z = Rand(rows=n, cols=totcols, min=0, max=0)
+Z[,1] = Y #target col
+
+parfor(i1 in 1:p, check=0){
+ Z[i1+1:n,1+i1] = Y[1:n-i1,]
+}
+parfor(i2 in 1:P, check=0){
+ Z[s*i2+1:n,1+p+i2] = Y[1:n-s*i2,]
+}
+parfor(i5 in 1:q, check=0){
+ Z[i5+1:n,1+P+p+i5] = Y[1:n-i5,]
+}
+parfor(i6 in 1:Q, check=0){
+ Z[s*i6+1:n,1+P+p+q+i6] = Y[1:n-s*i6,]
+}
+
+one = Rand(rows=1, cols=1, min=1, max=1)
+
+simplex = Rand(rows=totcols-1, cols=totcols, min=0, max=0)
+for(i in 2:ncol(simplex)){
+ simplex[i-1,i] = 0.1
+}
+
+num_func_invoc = 0
+
+objvals = Rand(rows=1, cols=ncol(simplex), min=0, max=0)
+parfor(i3 in 1:ncol(simplex)){
+ arima_css_obj_val = arima_css(simplex[,i3], Z, p, P, q, Q, s, useJacobi)
+ objvals[1,i3] = arima_css_obj_val
+}
+num_func_invoc = num_func_invoc + ncol(simplex)
+
+tol = 1.5 * 10^(-8) * castAsScalar(objvals[1,1])
+
+continue = 1
+while(continue == 1 & num_func_invoc <= max_func_invoc) {
+ best_index = 1
+ worst_index = 1
+ for(i in 2:ncol(objvals)){
+ this = castAsScalar(objvals[1,i])
+ that = castAsScalar(objvals[1,best_index])
+ if(that > this){
+ best_index = i
+ }
+ that = castAsScalar(objvals[1,worst_index])
+ if(that < this){
+ worst_index = i
+ }
+ }
+
+ best_obj_val = castAsScalar(objvals[1,best_index])
+ worst_obj_val = castAsScalar(objvals[1,worst_index])
+ if(worst_obj_val <= best_obj_val + tol){
+ continue = 0
+ }
+
+ print("#Function calls::" + num_func_invoc + " OBJ: " + best_obj_val)
+
+ c = (rowSums(simplex) - simplex[,worst_index])/(nrow(simplex))
+
+ x_r = 2*c - simplex[,worst_index]
+ obj_x_r = arima_css(x_r, Z, p, P, q, Q, s, useJacobi)
+ num_func_invoc = num_func_invoc + 1
+
+ if(obj_x_r < best_obj_val){
+ x_e = 2*x_r - c
+ obj_x_e = arima_css(x_e, Z, p, P, q, Q, s, useJacobi)
+ num_func_invoc = num_func_invoc + 1
+
+ if(obj_x_r <= obj_x_e){
+ simplex[,worst_index] = x_r
+ objvals[1,worst_index] = obj_x_r
+ }else{
+ simplex[,worst_index] = x_e
+ objvals[1,worst_index] = obj_x_e
+ }
+ }else{
+ if(obj_x_r < worst_obj_val){
+ simplex[,worst_index] = x_r
+ objvals[1,worst_index] = obj_x_r
+ }
+
+ x_c_in = (simplex[,worst_index] + c)/2
+ obj_x_c_in = arima_css(x_c_in, Z, p, P, q, Q, s, useJacobi)
+ num_func_invoc = num_func_invoc + 1
+
+ if(obj_x_c_in < castAsScalar(objvals[1,worst_index])){
+ simplex[,worst_index] = x_c_in
+ objvals[1,worst_index] = obj_x_c_in
+ }else{
+ if(obj_x_r >= worst_obj_val){
+ best_point = simplex[,best_index]
+ parfor(i4 in 1:ncol(simplex)){
+ if(i4 != best_index){
+ simplex[,i4] = (simplex[,i4] + best_point)/2
+ tmp = arima_css(simplex[,i4], Z, p, P, q, Q, s, useJacobi)
+ objvals[1,i4] = tmp*one
+ }
+ }
+ num_func_invoc = num_func_invoc + ncol(simplex) - 1
+ }
+ }
+ }
+}
+
+best_point = simplex[,best_index]
+if(include_mean == 1){
+ tmp2 = Rand(rows=totcols, cols=1, min=0, max=0)
+ tmp2[1:nrow(best_point),1] = best_point
+ tmp2[nrow(tmp2),1] = mu
+ best_point = tmp2
+}
+
+write(best_point, $12, format="text")
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/arima_box-jenkins/arima.pydml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/arima_box-jenkins/arima.pydml b/src/test/scripts/applications/arima_box-jenkins/arima.pydml
index 0c4be73..9b3387c 100644
--- a/src/test/scripts/applications/arima_box-jenkins/arima.pydml
+++ b/src/test/scripts/applications/arima_box-jenkins/arima.pydml
@@ -1,258 +1,258 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Arguments
-# 1st arg: X (one column time series)
-# 2nd arg: max_func_invoc
-# 3rd arg: p (non-seasonal AR order)
-# 4th arg: d (non-seasonal differencing order)
-# 5th arg: q (non-seasonal MA order)
-# 6th arg: P (seasonal AR order)
-# 7th arg: D (seasonal differencing order)
-# 8th arg: Q (seasonal MA order)
-# 9th arg: s (period in terms of number of time-steps)
-# 10th arg: 0/1 (1 means include.mean)
-# 11th arg: 0 to use CG solver, 1 to use Jacobi's method
-# 12th arg: file name to store learnt parameters
-
-#changing to additive sar since R's arima seems to do that
-
-def arima_css(w:matrix[float], X:matrix[float], pIn: int, P: int, qIn: int, Q:int, s:int, useJacobi: int) -> (obj: float):
- b = dot(X[,2:ncol(X)], w)
-
- R = Rand(rows=nrow(X), cols=nrow(X), min=0, max=0)
- for(i7 in 1:qIn):
- ma_ind_ns = P+pIn+i7
- err_ind_ns = i7
- ones_ns = Rand(rows=nrow(R)-err_ind_ns, cols=1, min=1, max=1)
- d_ns = ones_ns * castAsScalar(w[ma_ind_ns,1])
- R[1+err_ind_ns:nrow(R),1:ncol(R)-err_ind_ns] = R[1+err_ind_ns:nrow(R),1:ncol(R)-err_ind_ns] + diag(d_ns)
- for(i8 in 1:Q):
- ma_ind_s = P+pIn+qIn+i8
- err_ind_s = s*i8
- ones_s = Rand(rows=nrow(R)-err_ind_s, cols=1, min=1, max=1)
- d_s = ones_s * castAsScalar(w[ma_ind_s,1])
- R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] = R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] + diag(d_s)
-
- #checking for strict diagonal dominance
- #required for jacobi's method
-
- max_iter = 100
- tol = 0.01
-
- y_hat = Rand(rows=nrow(X), cols=1, min=0, max=0)
- iter = 0
-
- if(useJacobi == 1):
- check = sum(ppred(rowSums(abs(R)), 1, ">="))
- if(check > 0):
- print("R is not diagonal dominant. Suggest switching to an exact solver.")
- diff = tol+1.0
- while(iter < max_iter & diff > tol):
- y_hat_new = b - dot(R, y_hat)
- diff = sum((y_hat_new-y_hat)*(y_hat_new-y_hat))
- y_hat = y_hat_new
- iter = iter + 1
- else:
- ones = full(1, rows=nrow(X), cols=1)
- A = R + diag(ones)
- transpose_A = transpose(A)
- Z = dot(transpose_A, A)
- y = dot(transpose_A, b)
- r = -y
- p = -r
- norm_r2 = sum(r*r)
- continue = 1
- if(norm_r2 == 0):
- continue = 0
- while(iter < max_iter & continue == 1):
- q = dot(Z, p)
- transpose_p = transpose(p)
- alpha = norm_r2 / castAsScalar(dot(transpose_p, q))
- y_hat = y_hat + alpha * p
- old_norm_r2 = norm_r2
- r = r + alpha * q
- norm_r2 = sum(r * r)
- if(norm_r2 < tol):
- continue = 0
- beta = norm_r2 / old_norm_r2
- p = -r + beta * p
- iter = iter + 1
- errs = X[,1] - y_hat
- obj = sum(errs*errs)
-# end arima_css function
-
-#input col of time series data
-X = load($1)
-
-max_func_invoc = $2
-
-#non-seasonal order
-p = $3
-d = $4
-q = $5
-
-#seasonal order
-P = $6
-D = $7
-Q = $8
-
-#length of the season
-s = $9
-
-include_mean = $10
-
-useJacobi = $11
-
-num_rows = nrow(X)
-
-if(num_rows <= d):
- print("non-seasonal differencing order should be larger than length of the time-series")
-
-Y = X
-for(i in 1:d):
- n1 = nrow(Y)+0.0
- Y = Y[2:n1,] - Y[1:n1-1,]
-
-num_rows = nrow(Y)+0.0
-if(num_rows <= s*D):
- print("seasonal differencing order should be larger than number of observations divided by length of season")
-
-for(i in 1:D):
- n1 = nrow(Y)+0.0
- Y = Y[s+1:n1,] - Y[1:n1-s,]
-
-n = nrow(Y)
-
-max_ar_col = P+p
-max_ma_col = Q+q
-if(max_ar_col > max_ma_col):
- max_arma_col = max_ar_col
-else:
- max_arma_col = max_ma_col
-
-mu = 0
-if(include_mean == 1):
- mu = sum(Y)/nrow(Y)
- Y = Y - mu
-
-totcols = 1+p+P+Q+q #target col (X), p-P cols, q-Q cols
-
-Z = Rand(rows=n, cols=totcols, min=0, max=0)
-Z[,1] = Y #target col
-
-parfor(i1 in 1:p, check=0):
- Z[i1+1:n,1+i1] = Y[1:n-i1,]
-
-parfor(i2 in 1:P, check=0):
- Z[s*i2+1:n,1+p+i2] = Y[1:n-s*i2,]
-
-parfor(i5 in 1:q, check=0):
- Z[i5+1:n,1+P+p+i5] = Y[1:n-i5,]
-
-parfor(i6 in 1:Q, check=0):
- Z[s*i6+1:n,1+P+p+q+i6] = Y[1:n-s*i6,]
-
-
-one = Rand(rows=1, cols=1, min=1, max=1)
-
-simplex = Rand(rows=totcols-1, cols=totcols, min=0, max=0)
-for(i in 2:ncol(simplex)):
- simplex[i-1,i] = 0.1
-
-num_func_invoc = 0
-
-objvals = Rand(rows=1, cols=ncol(simplex), min=0, max=0)
-parfor(i3 in 1:ncol(simplex)):
- arima_css_obj_val = arima_css(simplex[,i3], Z, p, P, q, Q, s, useJacobi)
- objvals[1,i3] = arima_css_obj_val
-
-num_func_invoc = num_func_invoc + ncol(simplex)
-
-tol = 1.5 * (10**-8) * castAsScalar(objvals[1,1])
-
-continue = 1
-while(continue == 1 & num_func_invoc <= max_func_invoc):
- best_index = 1
- worst_index = 1
- for(i in 2:ncol(objvals)):
- this = castAsScalar(objvals[1,i])
- that = castAsScalar(objvals[1,best_index])
- if(that > this):
- best_index = i
- that = castAsScalar(objvals[1,worst_index])
- if(that < this):
- worst_index = i
-
- best_obj_val = castAsScalar(objvals[1,best_index])
- worst_obj_val = castAsScalar(objvals[1,worst_index])
- if(worst_obj_val <= best_obj_val + tol):
- continue = 0
-
- print("#Function calls::" + num_func_invoc + " OBJ: " + best_obj_val)
-
- c = (rowSums(simplex) - simplex[,worst_index])/(nrow(simplex))
-
- x_r = 2*c - simplex[,worst_index]
- obj_x_r = arima_css(x_r, Z, p, P, q, Q, s, useJacobi)
- num_func_invoc = num_func_invoc + 1
-
- if(obj_x_r < best_obj_val):
- x_e = 2*x_r - c
- obj_x_e = arima_css(x_e, Z, p, P, q, Q, s, useJacobi)
- num_func_invoc = num_func_invoc + 1
-
- if(obj_x_r <= obj_x_e):
- simplex[,worst_index] = x_r
- objvals[1,worst_index] = obj_x_r
- else:
- simplex[,worst_index] = x_e
- objvals[1,worst_index] = obj_x_e
- else:
- if(obj_x_r < worst_obj_val):
- simplex[,worst_index] = x_r
- objvals[1,worst_index] = obj_x_r
-
- x_c_in = (simplex[,worst_index] + c)/2
- obj_x_c_in = arima_css(x_c_in, Z, p, P, q, Q, s, useJacobi)
- num_func_invoc = num_func_invoc + 1
-
- if(obj_x_c_in < castAsScalar(objvals[1,worst_index])):
- simplex[,worst_index] = x_c_in
- objvals[1,worst_index] = obj_x_c_in
- else:
- if(obj_x_r >= worst_obj_val):
- best_point = simplex[,best_index]
- parfor(i4 in 1:ncol(simplex)):
- if(i4 != best_index):
- simplex[,i4] = (simplex[,i4] + best_point)/2
- tmp = arima_css(simplex[,i4], Z, p, P, q, Q, s, useJacobi)
- objvals[1,i4] = tmp*one
- num_func_invoc = num_func_invoc + ncol(simplex) - 1
-
-best_point = simplex[,best_index]
-if(include_mean == 1):
- tmp2 = Rand(rows=totcols, cols=1, min=0, max=0)
- tmp2[1:nrow(best_point),1] = best_point
- tmp2[nrow(tmp2),1] = mu
- best_point = tmp2
-
-save(best_point, $12, format="text")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Arguments
+# 1st arg: X (one column time series)
+# 2nd arg: max_func_invoc
+# 3rd arg: p (non-seasonal AR order)
+# 4th arg: d (non-seasonal differencing order)
+# 5th arg: q (non-seasonal MA order)
+# 6th arg: P (seasonal AR order)
+# 7th arg: D (seasonal differencing order)
+# 8th arg: Q (seasonal MA order)
+# 9th arg: s (period in terms of number of time-steps)
+# 10th arg: 0/1 (1 means include.mean)
+# 11th arg: 0 to use CG solver, 1 to use Jacobi's method
+# 12th arg: file name to store learnt parameters
+
+#changing to additive sar since R's arima seems to do that
+
+def arima_css(w:matrix[float], X:matrix[float], pIn: int, P: int, qIn: int, Q:int, s:int, useJacobi: int) -> (obj: float):
+ b = dot(X[,2:ncol(X)], w)
+
+ R = Rand(rows=nrow(X), cols=nrow(X), min=0, max=0)
+ for(i7 in 1:qIn):
+ ma_ind_ns = P+pIn+i7
+ err_ind_ns = i7
+ ones_ns = Rand(rows=nrow(R)-err_ind_ns, cols=1, min=1, max=1)
+ d_ns = ones_ns * castAsScalar(w[ma_ind_ns,1])
+ R[1+err_ind_ns:nrow(R),1:ncol(R)-err_ind_ns] = R[1+err_ind_ns:nrow(R),1:ncol(R)-err_ind_ns] + diag(d_ns)
+ for(i8 in 1:Q):
+ ma_ind_s = P+pIn+qIn+i8
+ err_ind_s = s*i8
+ ones_s = Rand(rows=nrow(R)-err_ind_s, cols=1, min=1, max=1)
+ d_s = ones_s * castAsScalar(w[ma_ind_s,1])
+ R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] = R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] + diag(d_s)
+
+ #checking for strict diagonal dominance
+ #required for jacobi's method
+
+ max_iter = 100
+ tol = 0.01
+
+ y_hat = Rand(rows=nrow(X), cols=1, min=0, max=0)
+ iter = 0
+
+ if(useJacobi == 1):
+ check = sum(ppred(rowSums(abs(R)), 1, ">="))
+ if(check > 0):
+ print("R is not diagonal dominant. Suggest switching to an exact solver.")
+ diff = tol+1.0
+ while(iter < max_iter & diff > tol):
+ y_hat_new = b - dot(R, y_hat)
+ diff = sum((y_hat_new-y_hat)*(y_hat_new-y_hat))
+ y_hat = y_hat_new
+ iter = iter + 1
+ else:
+ ones = full(1, rows=nrow(X), cols=1)
+ A = R + diag(ones)
+ transpose_A = transpose(A)
+ Z = dot(transpose_A, A)
+ y = dot(transpose_A, b)
+ r = -y
+ p = -r
+ norm_r2 = sum(r*r)
+ continue = 1
+ if(norm_r2 == 0):
+ continue = 0
+ while(iter < max_iter & continue == 1):
+ q = dot(Z, p)
+ transpose_p = transpose(p)
+ alpha = norm_r2 / castAsScalar(dot(transpose_p, q))
+ y_hat = y_hat + alpha * p
+ old_norm_r2 = norm_r2
+ r = r + alpha * q
+ norm_r2 = sum(r * r)
+ if(norm_r2 < tol):
+ continue = 0
+ beta = norm_r2 / old_norm_r2
+ p = -r + beta * p
+ iter = iter + 1
+ errs = X[,1] - y_hat
+ obj = sum(errs*errs)
+# end arima_css function
+
+#input col of time series data
+X = load($1)
+
+max_func_invoc = $2
+
+#non-seasonal order
+p = $3
+d = $4
+q = $5
+
+#seasonal order
+P = $6
+D = $7
+Q = $8
+
+#length of the season
+s = $9
+
+include_mean = $10
+
+useJacobi = $11
+
+num_rows = nrow(X)
+
+if(num_rows <= d):
+ print("non-seasonal differencing order should be larger than length of the time-series")
+
+Y = X
+for(i in 1:d):
+ n1 = nrow(Y)+0.0
+ Y = Y[2:n1,] - Y[1:n1-1,]
+
+num_rows = nrow(Y)+0.0
+if(num_rows <= s*D):
+ print("seasonal differencing order should be larger than number of observations divided by length of season")
+
+for(i in 1:D):
+ n1 = nrow(Y)+0.0
+ Y = Y[s+1:n1,] - Y[1:n1-s,]
+
+n = nrow(Y)
+
+max_ar_col = P+p
+max_ma_col = Q+q
+if(max_ar_col > max_ma_col):
+ max_arma_col = max_ar_col
+else:
+ max_arma_col = max_ma_col
+
+mu = 0
+if(include_mean == 1):
+ mu = sum(Y)/nrow(Y)
+ Y = Y - mu
+
+totcols = 1+p+P+Q+q #target col (X), p-P cols, q-Q cols
+
+Z = Rand(rows=n, cols=totcols, min=0, max=0)
+Z[,1] = Y #target col
+
+parfor(i1 in 1:p, check=0):
+ Z[i1+1:n,1+i1] = Y[1:n-i1,]
+
+parfor(i2 in 1:P, check=0):
+ Z[s*i2+1:n,1+p+i2] = Y[1:n-s*i2,]
+
+parfor(i5 in 1:q, check=0):
+ Z[i5+1:n,1+P+p+i5] = Y[1:n-i5,]
+
+parfor(i6 in 1:Q, check=0):
+ Z[s*i6+1:n,1+P+p+q+i6] = Y[1:n-s*i6,]
+
+
+one = Rand(rows=1, cols=1, min=1, max=1)
+
+simplex = Rand(rows=totcols-1, cols=totcols, min=0, max=0)
+for(i in 2:ncol(simplex)):
+ simplex[i-1,i] = 0.1
+
+num_func_invoc = 0
+
+objvals = Rand(rows=1, cols=ncol(simplex), min=0, max=0)
+parfor(i3 in 1:ncol(simplex)):
+ arima_css_obj_val = arima_css(simplex[,i3], Z, p, P, q, Q, s, useJacobi)
+ objvals[1,i3] = arima_css_obj_val
+
+num_func_invoc = num_func_invoc + ncol(simplex)
+
+tol = 1.5 * (10**-8) * castAsScalar(objvals[1,1])
+
+continue = 1
+while(continue == 1 & num_func_invoc <= max_func_invoc):
+ best_index = 1
+ worst_index = 1
+ for(i in 2:ncol(objvals)):
+ this = castAsScalar(objvals[1,i])
+ that = castAsScalar(objvals[1,best_index])
+ if(that > this):
+ best_index = i
+ that = castAsScalar(objvals[1,worst_index])
+ if(that < this):
+ worst_index = i
+
+ best_obj_val = castAsScalar(objvals[1,best_index])
+ worst_obj_val = castAsScalar(objvals[1,worst_index])
+ if(worst_obj_val <= best_obj_val + tol):
+ continue = 0
+
+ print("#Function calls::" + num_func_invoc + " OBJ: " + best_obj_val)
+
+ c = (rowSums(simplex) - simplex[,worst_index])/(nrow(simplex))
+
+ x_r = 2*c - simplex[,worst_index]
+ obj_x_r = arima_css(x_r, Z, p, P, q, Q, s, useJacobi)
+ num_func_invoc = num_func_invoc + 1
+
+ if(obj_x_r < best_obj_val):
+ x_e = 2*x_r - c
+ obj_x_e = arima_css(x_e, Z, p, P, q, Q, s, useJacobi)
+ num_func_invoc = num_func_invoc + 1
+
+ if(obj_x_r <= obj_x_e):
+ simplex[,worst_index] = x_r
+ objvals[1,worst_index] = obj_x_r
+ else:
+ simplex[,worst_index] = x_e
+ objvals[1,worst_index] = obj_x_e
+ else:
+ if(obj_x_r < worst_obj_val):
+ simplex[,worst_index] = x_r
+ objvals[1,worst_index] = obj_x_r
+
+ x_c_in = (simplex[,worst_index] + c)/2
+ obj_x_c_in = arima_css(x_c_in, Z, p, P, q, Q, s, useJacobi)
+ num_func_invoc = num_func_invoc + 1
+
+ if(obj_x_c_in < castAsScalar(objvals[1,worst_index])):
+ simplex[,worst_index] = x_c_in
+ objvals[1,worst_index] = obj_x_c_in
+ else:
+ if(obj_x_r >= worst_obj_val):
+ best_point = simplex[,best_index]
+ parfor(i4 in 1:ncol(simplex)):
+ if(i4 != best_index):
+ simplex[,i4] = (simplex[,i4] + best_point)/2
+ tmp = arima_css(simplex[,i4], Z, p, P, q, Q, s, useJacobi)
+ objvals[1,i4] = tmp*one
+ num_func_invoc = num_func_invoc + ncol(simplex) - 1
+
+best_point = simplex[,best_index]
+if(include_mean == 1):
+ tmp2 = Rand(rows=totcols, cols=1, min=0, max=0)
+ tmp2[1:nrow(best_point),1] = best_point
+ tmp2[nrow(tmp2),1] = mu
+ best_point = tmp2
+
+save(best_point, $12, format="text")
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/ctableStats/Binomial.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/ctableStats/Binomial.dml b/src/test/scripts/applications/ctableStats/Binomial.dml
index 9d2f7f4..a3cbf5a 100644
--- a/src/test/scripts/applications/ctableStats/Binomial.dml
+++ b/src/test/scripts/applications/ctableStats/Binomial.dml
@@ -1,171 +1,171 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# COMMON FUNCTIONS TO SOLVE BINOMIAL DISTRIBUTION PROBLEMS
-# WORK OVER VECTORS (IN PARALLEL) TO SAVE COMPUTATION TIME
-
-# Computes binomial parameter p (the biased-coin probability)
-# such that Prob [Binom(n, p) <= m] = alpha
-# Use it for "exact" confidence intervals over p given m, n:
-# For example, for 95%-confidence intervals, use [p1, p2]
-# such that Prob [Binom(n, p1) <= m-1] = 0.975
-# and Prob [Binom(n, p2) <= m ] = 0.025
-binomQuantile =
- function (Matrix[double] n_vector, Matrix[double] m_vector, Matrix[double] alpha_vector)
- return (Matrix[double] p_vector)
-{
- num_rows = nrow (n_vector);
- p_min = Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0);
- alpha_p_min = Rand (rows = num_rows, cols = 1, min = 1.0, max = 1.0);
- p_max = Rand (rows = num_rows, cols = 1, min = 1.0, max = 1.0);
- alpha_p_max = Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0);
-
- for (i in 1:27) { # Uses "division by half" method to solve equations
- p_new = (p_min + p_max) / 2.0;
- [alpha_p_new] = binomProb (n_vector, m_vector, p_new);
- move_new_to_max = ppred (alpha_p_new, alpha_vector, "<");
- p_max = (1 - move_new_to_max) * p_max + move_new_to_max * p_new;
- p_min = (1 - move_new_to_max) * p_new + move_new_to_max * p_min;
- alpha_p_max = (1 - move_new_to_max) * alpha_p_max + move_new_to_max * alpha_p_new;
- alpha_p_min = (1 - move_new_to_max) * alpha_p_new + move_new_to_max * alpha_p_min;
- }
- p_vector = (p_min + p_max) / 2.0;
-}
-
-
-# Computes the cumulative distribution fuction of the binomial distribution,
-# that is, Prob [Binom(n, p) <= m], using the incomplete Beta function
-# approximated via a continued fraction, see "Handbook of Mathematical Functions"
-# edited by M. Abramowitz and I.A. Stegun, U.S. Nat-l Bureau of Standards,
-# 10th print (Dec 1972), Sec. 26.5.8-26.5.9, p. 944
-binomProb =
- function (Matrix[double] n_vector, Matrix[double] m_vector, Matrix[double] p_vector)
- return (Matrix[double] result)
-{
- num_rows = nrow (n_vector);
- num_iterations = 100;
-
- mean_vector = p_vector * n_vector;
- is_opposite = ppred (mean_vector, m_vector, "<");
- l_vector = is_opposite * (n_vector - (m_vector + 1)) + (1 - is_opposite) * m_vector;
- q_vector = is_opposite * (1.0 - p_vector) + (1 - is_opposite) * p_vector;
- n_minus_l_vector = n_vector - l_vector;
-
- is_result_zero1 = ppred (l_vector, - 0.0000000001, "<");
- is_result_one1 = ppred (n_minus_l_vector, 0.0000000001, "<");
- is_result_zero2 = ppred (q_vector, 0.9999999999, ">");
- is_result_one2 = ppred (q_vector, 0.0000000001, "<");
-
- is_result_zero = is_result_zero1 + (1 - is_result_zero1) * is_result_zero2 * (1 - is_result_one1);
- is_result_one = (is_result_one1 + (1 - is_result_one1) * is_result_one2) * (1 - is_result_zero);
-
- result = Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0);
- result = result + is_result_one;
- is_already_done = is_result_zero + is_result_one;
- still_iterating = 1 - is_already_done;
-
- n_vector = (1 - is_already_done) * n_vector + is_already_done * 2;
- l_vector = (1 - is_already_done) * l_vector + is_already_done;
- n_minus_l_vector = (1 - is_already_done) * n_minus_l_vector + is_already_done;
- q_vector = (1 - is_already_done) * q_vector + is_already_done * 0.8;
-
- numer_old = q_vector;
- denom_old = Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0);
- numer = Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0);
- denom = 1.0 - q_vector;
-
- is_i_even = 1;
-
- for (i in 1:num_iterations) # The continued fraction iterations
- {
- is_i_even = 1 - is_i_even;
- e_term = Rand (rows = num_rows, cols = 1, min = 1.0, max = 1.0);
- if (i > 1) {
- if (is_i_even == 1) {
- e_term = - (2 * n_minus_l_vector + (i - 2)) * (2 * l_vector - (i - 2));
- }
- if (is_i_even == 0) {
- e_term = (i - 1) * (2 * n_vector + (i - 1));
- }
- e_term = e_term / (n_minus_l_vector + (i - 2)) / (n_minus_l_vector + (i - 1));
- e_term = e_term * 0.25;
- }
- numer_new = still_iterating * (q_vector * numer + (1.0 - q_vector) * e_term * numer_old) + (1.0 - still_iterating);
- denom_new = still_iterating * (q_vector * denom + (1.0 - q_vector) * e_term * denom_old) + (1.0 - still_iterating);
- numer_old = still_iterating * (q_vector * numer) + (1.0 - still_iterating);
- denom_old = still_iterating * (q_vector * denom) + (1.0 - still_iterating);
- numer = numer_new;
- denom = denom_new;
-
- abs_denom = abs (denom);
- denom_too_big = ppred (abs_denom, 10000000000.0, ">");
- denom_too_small = ppred (abs_denom, 0.0000000001, "<");
- denom_normal = 1.0 - denom_too_big - denom_too_small;
- rescale_vector = denom_too_big * 0.0000000001 + denom_too_small * 10000000000.0 + denom_normal;
- numer_old = numer_old * rescale_vector;
- denom_old = denom_old * rescale_vector;
- numer = numer * rescale_vector;
- denom = denom * rescale_vector;
-
- convergence_check_left = abs (numer * denom_old - numer_old * denom);
- convergence_check_right = abs (numer * denom_old) * 0.000000001;
- has_converged = ppred (convergence_check_left, convergence_check_right, "<=");
- has_converged = still_iterating * has_converged;
- still_iterating = still_iterating - has_converged;
- result = result + has_converged * numer / denom;
- }
-
- result = result + still_iterating * numer / denom;
-
- n_vector_not_already_done = (1 - is_already_done) * n_vector;
- l_vector_not_already_done = (1 - is_already_done) * l_vector;
- n_minus_l_vector_not_already_done = (1 - is_already_done) * n_minus_l_vector;
- q_vector_not_already_done = (1 - is_already_done) * q_vector + is_already_done;
- one_minus_q_vector_not_already_done = (1 - is_already_done) * (1.0 - q_vector) + is_already_done;
-
- [n_logfact] = logFactorial (n_vector_not_already_done);
- [l_logfact] = logFactorial (l_vector_not_already_done);
- [n_minus_l_logfact] = logFactorial (n_minus_l_vector_not_already_done);
-
- log_update_factor = n_logfact - l_logfact - n_minus_l_logfact + l_vector * log (q_vector_not_already_done)
- + n_minus_l_vector * log (one_minus_q_vector_not_already_done);
- updated_result = result * (is_already_done + (1 - is_already_done) * exp (log_update_factor));
- result = is_opposite + (1 - 2 * is_opposite) * updated_result;
-}
-
-
-# Computes the logarithm of the factorial of x >= 0 via the Gamma function
-# From paper: C. Lanczos "A Precision Approximation of the Gamma Function",
-# Journal of the SIAM: Numerical Analysis, Series B, Vol. 1, 1964, pp. 86-96
-logFactorial = function (Matrix[double] x) return (Matrix[double] logfact)
-{
- y = 1.000000000178;
- y = y + 76.180091729406 / (x + 1);
- y = y - 86.505320327112 / (x + 2);
- y = y + 24.014098222230 / (x + 3);
- y = y - 1.231739516140 / (x + 4);
- y = y + 0.001208580030 / (x + 5);
- y = y - 0.000005363820 / (x + 6);
- logfact = log(y) + (x + 0.5) * log(x + 5.5) - (x + 5.5) + 0.91893853320467; # log(sqrt(2 * PI));
-}
-
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# COMMON FUNCTIONS TO SOLVE BINOMIAL DISTRIBUTION PROBLEMS
+# WORK OVER VECTORS (IN PARALLEL) TO SAVE COMPUTATION TIME
+
+# Computes binomial parameter p (the biased-coin probability)
+# such that Prob [Binom(n, p) <= m] = alpha
+# Use it for "exact" confidence intervals over p given m, n:
+# For example, for 95%-confidence intervals, use [p1, p2]
+# such that Prob [Binom(n, p1) <= m-1] = 0.975
+# and Prob [Binom(n, p2) <= m ] = 0.025
+binomQuantile =
+ function (Matrix[double] n_vector, Matrix[double] m_vector, Matrix[double] alpha_vector)
+ return (Matrix[double] p_vector)
+{
+ num_rows = nrow (n_vector);
+ p_min = Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0);
+ alpha_p_min = Rand (rows = num_rows, cols = 1, min = 1.0, max = 1.0);
+ p_max = Rand (rows = num_rows, cols = 1, min = 1.0, max = 1.0);
+ alpha_p_max = Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0);
+
+ for (i in 1:27) { # Uses "division by half" method to solve equations
+ p_new = (p_min + p_max) / 2.0;
+ [alpha_p_new] = binomProb (n_vector, m_vector, p_new);
+ move_new_to_max = ppred (alpha_p_new, alpha_vector, "<");
+ p_max = (1 - move_new_to_max) * p_max + move_new_to_max * p_new;
+ p_min = (1 - move_new_to_max) * p_new + move_new_to_max * p_min;
+ alpha_p_max = (1 - move_new_to_max) * alpha_p_max + move_new_to_max * alpha_p_new;
+ alpha_p_min = (1 - move_new_to_max) * alpha_p_new + move_new_to_max * alpha_p_min;
+ }
+ p_vector = (p_min + p_max) / 2.0;
+}
+
+
+# Computes the cumulative distribution fuction of the binomial distribution,
+# that is, Prob [Binom(n, p) <= m], using the incomplete Beta function
+# approximated via a continued fraction, see "Handbook of Mathematical Functions"
+# edited by M. Abramowitz and I.A. Stegun, U.S. Nat-l Bureau of Standards,
+# 10th print (Dec 1972), Sec. 26.5.8-26.5.9, p. 944
+binomProb =
+ function (Matrix[double] n_vector, Matrix[double] m_vector, Matrix[double] p_vector)
+ return (Matrix[double] result)
+{
+ num_rows = nrow (n_vector);
+ num_iterations = 100;
+
+ mean_vector = p_vector * n_vector;
+ is_opposite = ppred (mean_vector, m_vector, "<");
+ l_vector = is_opposite * (n_vector - (m_vector + 1)) + (1 - is_opposite) * m_vector;
+ q_vector = is_opposite * (1.0 - p_vector) + (1 - is_opposite) * p_vector;
+ n_minus_l_vector = n_vector - l_vector;
+
+ is_result_zero1 = ppred (l_vector, - 0.0000000001, "<");
+ is_result_one1 = ppred (n_minus_l_vector, 0.0000000001, "<");
+ is_result_zero2 = ppred (q_vector, 0.9999999999, ">");
+ is_result_one2 = ppred (q_vector, 0.0000000001, "<");
+
+ is_result_zero = is_result_zero1 + (1 - is_result_zero1) * is_result_zero2 * (1 - is_result_one1);
+ is_result_one = (is_result_one1 + (1 - is_result_one1) * is_result_one2) * (1 - is_result_zero);
+
+ result = Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0);
+ result = result + is_result_one;
+ is_already_done = is_result_zero + is_result_one;
+ still_iterating = 1 - is_already_done;
+
+ n_vector = (1 - is_already_done) * n_vector + is_already_done * 2;
+ l_vector = (1 - is_already_done) * l_vector + is_already_done;
+ n_minus_l_vector = (1 - is_already_done) * n_minus_l_vector + is_already_done;
+ q_vector = (1 - is_already_done) * q_vector + is_already_done * 0.8;
+
+ numer_old = q_vector;
+ denom_old = Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0);
+ numer = Rand (rows = num_rows, cols = 1, min = 0.0, max = 0.0);
+ denom = 1.0 - q_vector;
+
+ is_i_even = 1;
+
+ for (i in 1:num_iterations) # The continued fraction iterations
+ {
+ is_i_even = 1 - is_i_even;
+ e_term = Rand (rows = num_rows, cols = 1, min = 1.0, max = 1.0);
+ if (i > 1) {
+ if (is_i_even == 1) {
+ e_term = - (2 * n_minus_l_vector + (i - 2)) * (2 * l_vector - (i - 2));
+ }
+ if (is_i_even == 0) {
+ e_term = (i - 1) * (2 * n_vector + (i - 1));
+ }
+ e_term = e_term / (n_minus_l_vector + (i - 2)) / (n_minus_l_vector + (i - 1));
+ e_term = e_term * 0.25;
+ }
+ numer_new = still_iterating * (q_vector * numer + (1.0 - q_vector) * e_term * numer_old) + (1.0 - still_iterating);
+ denom_new = still_iterating * (q_vector * denom + (1.0 - q_vector) * e_term * denom_old) + (1.0 - still_iterating);
+ numer_old = still_iterating * (q_vector * numer) + (1.0 - still_iterating);
+ denom_old = still_iterating * (q_vector * denom) + (1.0 - still_iterating);
+ numer = numer_new;
+ denom = denom_new;
+
+ abs_denom = abs (denom);
+ denom_too_big = ppred (abs_denom, 10000000000.0, ">");
+ denom_too_small = ppred (abs_denom, 0.0000000001, "<");
+ denom_normal = 1.0 - denom_too_big - denom_too_small;
+ rescale_vector = denom_too_big * 0.0000000001 + denom_too_small * 10000000000.0 + denom_normal;
+ numer_old = numer_old * rescale_vector;
+ denom_old = denom_old * rescale_vector;
+ numer = numer * rescale_vector;
+ denom = denom * rescale_vector;
+
+ convergence_check_left = abs (numer * denom_old - numer_old * denom);
+ convergence_check_right = abs (numer * denom_old) * 0.000000001;
+ has_converged = ppred (convergence_check_left, convergence_check_right, "<=");
+ has_converged = still_iterating * has_converged;
+ still_iterating = still_iterating - has_converged;
+ result = result + has_converged * numer / denom;
+ }
+
+ result = result + still_iterating * numer / denom;
+
+ n_vector_not_already_done = (1 - is_already_done) * n_vector;
+ l_vector_not_already_done = (1 - is_already_done) * l_vector;
+ n_minus_l_vector_not_already_done = (1 - is_already_done) * n_minus_l_vector;
+ q_vector_not_already_done = (1 - is_already_done) * q_vector + is_already_done;
+ one_minus_q_vector_not_already_done = (1 - is_already_done) * (1.0 - q_vector) + is_already_done;
+
+ [n_logfact] = logFactorial (n_vector_not_already_done);
+ [l_logfact] = logFactorial (l_vector_not_already_done);
+ [n_minus_l_logfact] = logFactorial (n_minus_l_vector_not_already_done);
+
+ log_update_factor = n_logfact - l_logfact - n_minus_l_logfact + l_vector * log (q_vector_not_already_done)
+ + n_minus_l_vector * log (one_minus_q_vector_not_already_done);
+ updated_result = result * (is_already_done + (1 - is_already_done) * exp (log_update_factor));
+ result = is_opposite + (1 - 2 * is_opposite) * updated_result;
+}
+
+
+# Computes the logarithm of the factorial of x >= 0 via the Gamma function
+# From paper: C. Lanczos "A Precision Approximation of the Gamma Function",
+# Journal of the SIAM: Numerical Analysis, Series B, Vol. 1, 1964, pp. 86-96
+logFactorial = function (Matrix[double] x) return (Matrix[double] logfact)
+{
+ y = 1.000000000178;
+ y = y + 76.180091729406 / (x + 1);
+ y = y - 86.505320327112 / (x + 2);
+ y = y + 24.014098222230 / (x + 3);
+ y = y - 1.231739516140 / (x + 4);
+ y = y + 0.001208580030 / (x + 5);
+ y = y - 0.000005363820 / (x + 6);
+ logfact = log(y) + (x + 0.5) * log(x + 5.5) - (x + 5.5) + 0.91893853320467; # log(sqrt(2 * PI));
+}
+
+
+
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/ctableStats/ctci.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/ctableStats/ctci.dml b/src/test/scripts/applications/ctableStats/ctci.dml
index 22d3044..4aa3ae3 100644
--- a/src/test/scripts/applications/ctableStats/ctci.dml
+++ b/src/test/scripts/applications/ctableStats/ctci.dml
@@ -1,145 +1,145 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# CTCI.DML: TWO-ATTRIBUTE CONTINGENCY TABLE CONFIDENCE INTERVAL ANALYSIS for categorical data
-# Computes 95% confidence intervals for binomial ratios using both Wilson and Exact Scores
-# INPUT 1: Dataset as an (N x 2) matrix, input file path/name
-# Rows: Individual data points
-# Col 1: Partition attribute (e.g. US State code), must be positive integer
-# Col 2: Label attribute (e.g. positive/negative/neutral), must be positive integer
-# INPUT 2: Number of data points N (i.e. input matrix size, rows)
-# INPUT 3: "Null" label code, 0 if there is no "null" label
-# INPUT 4: Head Matrix output file path/name
-# INPUT 5: Body Matrix output file path/name
-# OUTPUT 1: Head Matrix with per-label information:
-# Rows: One row per each distinct code of the label attribute (1, 2, 3, ...)
-# Col 1: First column index of the Body Matrix block for this label, or 0 if "null"
-# Col 2: Overall number of data points with this label
-# Col 3: Percentage (out of 100) of data points to have this label
-# OUTPUT 2: Body Matrix with per-partition statistics:
-# Rows: One row per each distinct code of the partition attribute (1, 2, 3, ...)
-# Columns: Arranged in blocks with the same schema, one block per each non-null label,
-# with the first column index specified in the Head Matrix
-# Block Col 0: Number of points, i.e. the count, with this label in the given partition
-# Block Col 1: Percentage (out of 100) of points to have the label vs. all in the partition
-# Block Col 2: Small side, 95% confid. int-l (of above percentage), Wilson Score
-# Block Col 3: Large side, 95% confid. int-l (of above percentage), Wilson Score
-# Block Col 4: Small side, 95% confid. int-l (of above percentage), Exact Binomial Score
-# Block Col 5: Large side, 95% confid. int-l (of above percentage), Exact Binomial Score
-# Block Col 6: Percentage (out of 100) of points to lie in the partition vs. all with the label
-# Block Col 7: Small side, 95% confid. int-l (of above percentage), Wilson Score
-# Block Col 8: Large side, 95% confid. int-l (of above percentage), Wilson Score
-# Block Col 9: Small side, 95% confid. int-l (of above percentage), Exact Binomial Score
-# Block Col 10: Large side, 95% confid. int-l (of above percentage), Exact Binomial Score
-# Block Col 11-99: RESERVED and set to zero
-#
-# EXAMPLE:
-# hadoop jar SystemML.jar -f PATH/ctci.dml -args PATH/ctci_test.mtx 5602 2 PATH/ctci_test_head.mtx PATH/ctci_test_body.mtx
-
-setwd ("test/scripts/applications/ctableStats"); # SET TO THE SCRIPT FOLDER
-source ("Binomial.dml"); # THIS SCRIPT SHOULD BE THERE TOO
-powerOfTen = 10000; # CONSTANT FOR ROUNDING THE RESULTS
-
-print ("BEGIN CTABLE ANALYSIS SCRIPT");
-print ("Reading the input matrix...");
-InData = read($1, rows = $2, cols = 2, format = "text");
-print ("Computing the contingency table...");
-CT = table (InData [, 1], InData [, 2]);
-# DEBUG LINE ONLY: write (CT, "test/scripts/applications/ctableStats/ctci_test_CT.mtx", format="text");
-print ("Preparing for the output tables...");
-nullLabel = $3;
-numPartitions = nrow (CT);
-numLabels = ncol (CT);
-cntPartitions = rowSums (CT);
-cntLabels = t(colSums (CT));
-numBodyBlocks = numLabels;
-for (iLabel in 1:numLabels) {
- if (iLabel == nullLabel) {
- numBodyBlocks = numBodyBlocks - 1;
-} }
-numBodyCols = numBodyBlocks * 100;
-HeadMtx = Rand (rows = numLabels, cols = 3, min = 0, max = 0);
-HeadMtx [, 2] = cntLabels;
-HeadMtx [, 3] = 100.0 * cntLabels / sum (cntLabels);
-BodyMtx = Rand (rows = numPartitions, cols = numBodyCols, min = 0, max = 0);
-zeros = Rand (rows = numPartitions, cols = 1, min = 0, max = 0);
-zero = Rand (rows = 1, cols = 1, min = 0, max = 0);
-big_alpha = 0.975 + zeros;
-small_alpha = 0.025 + zeros;
-iBlock = 0;
-for (iLabel in 1:numLabels)
-{
- if (iLabel != nullLabel) {
- if (1==1) {
- print ("Processing label " + iLabel + ":");
- }
- fCol = 1 + iBlock * 100;
- HeadMtx [iLabel, 1] = fCol + zero;
- cntPartitionsWithLabel = CT [, iLabel];
- BodyMtx [, fCol] = cntPartitionsWithLabel;
-
- print (" (partition & label) / (all partition) ratios...");
-
- cntPartitionsWithLabel_minus_1 = cntPartitionsWithLabel - 1;
- [ratio1, left_conf_wilson1, right_conf_wilson1] =
- wilson_confidence (cntPartitions, cntPartitionsWithLabel);
- [left_conf_exact1] = binomQuantile (cntPartitions, cntPartitionsWithLabel_minus_1, big_alpha);
- [right_conf_exact1] = binomQuantile (cntPartitions, cntPartitionsWithLabel, small_alpha);
-
- BodyMtx [, fCol + 1] = round (ratio1 * 100.0 * powerOfTen) / powerOfTen;
- BodyMtx [, fCol + 2] = round (left_conf_wilson1 * 100.0 * powerOfTen) / powerOfTen;
- BodyMtx [, fCol + 3] = round (right_conf_wilson1 * 100.0 * powerOfTen) / powerOfTen;
- BodyMtx [, fCol + 4] = round (left_conf_exact1 * 100.0 * powerOfTen) / powerOfTen;
- BodyMtx [, fCol + 5] = round (right_conf_exact1 * 100.0 * powerOfTen) / powerOfTen;
-
- print (" (partition & label) / (all label) ratios...");
-
- cntThisLabel = zeros + castAsScalar (cntLabels [iLabel, 1]);
- [ratio2, left_conf_wilson2, right_conf_wilson2] =
- wilson_confidence (cntThisLabel, cntPartitionsWithLabel);
- [left_conf_exact2] = binomQuantile (cntThisLabel, cntPartitionsWithLabel_minus_1, big_alpha);
- [right_conf_exact2] = binomQuantile (cntThisLabel, cntPartitionsWithLabel, small_alpha);
-
- BodyMtx [, fCol + 6] = round (ratio2 * 100.0 * powerOfTen) / powerOfTen;
- BodyMtx [, fCol + 7] = round (left_conf_wilson2 * 100.0 * powerOfTen) / powerOfTen;
- BodyMtx [, fCol + 8] = round (right_conf_wilson2 * 100.0 * powerOfTen) / powerOfTen;
- BodyMtx [, fCol + 9] = round (left_conf_exact2 * 100.0 * powerOfTen) / powerOfTen;
- BodyMtx [, fCol +10] = round (right_conf_exact2 * 100.0 * powerOfTen) / powerOfTen;
-
- iBlock = iBlock + 1;
-} }
-print ("Writing the output matrices...");
-write (HeadMtx, $4, format="text");
-write (BodyMtx, $5, format="text");
-print ("END CTABLE ANALYSIS SCRIPT");
-
-wilson_confidence = function (Matrix[double] n, Matrix[double] m)
-return (Matrix[double] ratio, Matrix[double] conf_left, Matrix[double] conf_right)
-{
- z = 1.96; # 97.5% normal percentile, for 95% confidence interval
- z_sq_n = z * z * n;
- qroot = sqrt (z_sq_n * (m * (n - m) + z_sq_n / 4));
- midpt = n * m + z_sq_n / 2;
- denom = n * n + z_sq_n;
- ratio = m / n;
- conf_left = (midpt - qroot) / denom;
- conf_right = (midpt + qroot) / denom;
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# CTCI.DML: TWO-ATTRIBUTE CONTINGENCY TABLE CONFIDENCE INTERVAL ANALYSIS for categorical data
+# Computes 95% confidence intervals for binomial ratios using both Wilson and Exact Scores
+# INPUT 1: Dataset as an (N x 2) matrix, input file path/name
+# Rows: Individual data points
+# Col 1: Partition attribute (e.g. US State code), must be positive integer
+# Col 2: Label attribute (e.g. positive/negative/neutral), must be positive integer
+# INPUT 2: Number of data points N (i.e. input matrix size, rows)
+# INPUT 3: "Null" label code, 0 if there is no "null" label
+# INPUT 4: Head Matrix output file path/name
+# INPUT 5: Body Matrix output file path/name
+# OUTPUT 1: Head Matrix with per-label information:
+# Rows: One row per each distinct code of the label attribute (1, 2, 3, ...)
+# Col 1: First column index of the Body Matrix block for this label, or 0 if "null"
+# Col 2: Overall number of data points with this label
+# Col 3: Percentage (out of 100) of data points to have this label
+# OUTPUT 2: Body Matrix with per-partition statistics:
+# Rows: One row per each distinct code of the partition attribute (1, 2, 3, ...)
+# Columns: Arranged in blocks with the same schema, one block per each non-null label,
+# with the first column index specified in the Head Matrix
+# Block Col 0: Number of points, i.e. the count, with this label in the given partition
+# Block Col 1: Percentage (out of 100) of points to have the label vs. all in the partition
+# Block Col 2: Small side, 95% confid. int-l (of above percentage), Wilson Score
+# Block Col 3: Large side, 95% confid. int-l (of above percentage), Wilson Score
+# Block Col 4: Small side, 95% confid. int-l (of above percentage), Exact Binomial Score
+# Block Col 5: Large side, 95% confid. int-l (of above percentage), Exact Binomial Score
+# Block Col 6: Percentage (out of 100) of points to lie in the partition vs. all with the label
+# Block Col 7: Small side, 95% confid. int-l (of above percentage), Wilson Score
+# Block Col 8: Large side, 95% confid. int-l (of above percentage), Wilson Score
+# Block Col 9: Small side, 95% confid. int-l (of above percentage), Exact Binomial Score
+# Block Col 10: Large side, 95% confid. int-l (of above percentage), Exact Binomial Score
+# Block Col 11-99: RESERVED and set to zero
+#
+# EXAMPLE:
+# hadoop jar SystemML.jar -f PATH/ctci.dml -args PATH/ctci_test.mtx 5602 2 PATH/ctci_test_head.mtx PATH/ctci_test_body.mtx
+
+setwd ("test/scripts/applications/ctableStats"); # SET TO THE SCRIPT FOLDER
+source ("Binomial.dml"); # THIS SCRIPT SHOULD BE THERE TOO
+powerOfTen = 10000; # CONSTANT FOR ROUNDING THE RESULTS
+
+print ("BEGIN CTABLE ANALYSIS SCRIPT");
+print ("Reading the input matrix...");
+InData = read($1, rows = $2, cols = 2, format = "text");
+print ("Computing the contingency table...");
+CT = table (InData [, 1], InData [, 2]);
+# DEBUG LINE ONLY: write (CT, "test/scripts/applications/ctableStats/ctci_test_CT.mtx", format="text");
+print ("Preparing for the output tables...");
+nullLabel = $3;
+numPartitions = nrow (CT);
+numLabels = ncol (CT);
+cntPartitions = rowSums (CT);
+cntLabels = t(colSums (CT));
+numBodyBlocks = numLabels;
+for (iLabel in 1:numLabels) {
+ if (iLabel == nullLabel) {
+ numBodyBlocks = numBodyBlocks - 1;
+} }
+numBodyCols = numBodyBlocks * 100;
+HeadMtx = Rand (rows = numLabels, cols = 3, min = 0, max = 0);
+HeadMtx [, 2] = cntLabels;
+HeadMtx [, 3] = 100.0 * cntLabels / sum (cntLabels);
+BodyMtx = Rand (rows = numPartitions, cols = numBodyCols, min = 0, max = 0);
+zeros = Rand (rows = numPartitions, cols = 1, min = 0, max = 0);
+zero = Rand (rows = 1, cols = 1, min = 0, max = 0);
+big_alpha = 0.975 + zeros;
+small_alpha = 0.025 + zeros;
+iBlock = 0;
+for (iLabel in 1:numLabels)
+{
+ if (iLabel != nullLabel) {
+ if (1==1) {
+ print ("Processing label " + iLabel + ":");
+ }
+ fCol = 1 + iBlock * 100;
+ HeadMtx [iLabel, 1] = fCol + zero;
+ cntPartitionsWithLabel = CT [, iLabel];
+ BodyMtx [, fCol] = cntPartitionsWithLabel;
+
+ print (" (partition & label) / (all partition) ratios...");
+
+ cntPartitionsWithLabel_minus_1 = cntPartitionsWithLabel - 1;
+ [ratio1, left_conf_wilson1, right_conf_wilson1] =
+ wilson_confidence (cntPartitions, cntPartitionsWithLabel);
+ [left_conf_exact1] = binomQuantile (cntPartitions, cntPartitionsWithLabel_minus_1, big_alpha);
+ [right_conf_exact1] = binomQuantile (cntPartitions, cntPartitionsWithLabel, small_alpha);
+
+ BodyMtx [, fCol + 1] = round (ratio1 * 100.0 * powerOfTen) / powerOfTen;
+ BodyMtx [, fCol + 2] = round (left_conf_wilson1 * 100.0 * powerOfTen) / powerOfTen;
+ BodyMtx [, fCol + 3] = round (right_conf_wilson1 * 100.0 * powerOfTen) / powerOfTen;
+ BodyMtx [, fCol + 4] = round (left_conf_exact1 * 100.0 * powerOfTen) / powerOfTen;
+ BodyMtx [, fCol + 5] = round (right_conf_exact1 * 100.0 * powerOfTen) / powerOfTen;
+
+ print (" (partition & label) / (all label) ratios...");
+
+ cntThisLabel = zeros + castAsScalar (cntLabels [iLabel, 1]);
+ [ratio2, left_conf_wilson2, right_conf_wilson2] =
+ wilson_confidence (cntThisLabel, cntPartitionsWithLabel);
+ [left_conf_exact2] = binomQuantile (cntThisLabel, cntPartitionsWithLabel_minus_1, big_alpha);
+ [right_conf_exact2] = binomQuantile (cntThisLabel, cntPartitionsWithLabel, small_alpha);
+
+ BodyMtx [, fCol + 6] = round (ratio2 * 100.0 * powerOfTen) / powerOfTen;
+ BodyMtx [, fCol + 7] = round (left_conf_wilson2 * 100.0 * powerOfTen) / powerOfTen;
+ BodyMtx [, fCol + 8] = round (right_conf_wilson2 * 100.0 * powerOfTen) / powerOfTen;
+ BodyMtx [, fCol + 9] = round (left_conf_exact2 * 100.0 * powerOfTen) / powerOfTen;
+ BodyMtx [, fCol +10] = round (right_conf_exact2 * 100.0 * powerOfTen) / powerOfTen;
+
+ iBlock = iBlock + 1;
+} }
+print ("Writing the output matrices...");
+write (HeadMtx, $4, format="text");
+write (BodyMtx, $5, format="text");
+print ("END CTABLE ANALYSIS SCRIPT");
+
+wilson_confidence = function (Matrix[double] n, Matrix[double] m)
+return (Matrix[double] ratio, Matrix[double] conf_left, Matrix[double] conf_right)
+{
+ z = 1.96; # 97.5% normal percentile, for 95% confidence interval
+ z_sq_n = z * z * n;
+ qroot = sqrt (z_sq_n * (m * (n - m) + z_sq_n / 4));
+ midpt = n * m + z_sq_n / 2;
+ denom = n * n + z_sq_n;
+ ratio = m / n;
+ conf_left = (midpt - qroot) / denom;
+ conf_right = (midpt + qroot) / denom;
+}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/816e2db8/src/test/scripts/applications/ctableStats/ctci_odds.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/ctableStats/ctci_odds.dml b/src/test/scripts/applications/ctableStats/ctci_odds.dml
index 856b24c..3d97489 100644
--- a/src/test/scripts/applications/ctableStats/ctci_odds.dml
+++ b/src/test/scripts/applications/ctableStats/ctci_odds.dml
@@ -1,178 +1,178 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# CTCI_ODDS.DML: TWO-ATTRIBUTE CONTINGENCY TABLE ODDS-RATIO CONFIDENCE INTERVAL ANALYSIS
-# Computes 95% confidence intervals for odds ratios using a Gaussian approximation for log-odds
-# INPUT 1: Dataset as an (N x 2) matrix, input file path/name
-# Rows: Individual data points
-# Col 1: Partition attribute (e.g. US State code), must be positive integer
-# Col 2: Label attribute (e.g. positive/negative/neutral), must be positive integer
-# INPUT 2: Number of data points N (i.e. input matrix size, rows)
-# INPUT 3: "Null" label code, 0 if there is no "null" label
-# INPUT 4: Output Matrix file path/name
-# OUTPUT 1: Output Matrix with the following information:
-# Rows: One row per each distinct pair (partition, label) excluding "null" label
-# Col 1: Partition attribute value
-# Col 2: Label attribute value
-# Col 3: Number of data points with this (partition, label) pair
-# Col 4: Number of data points with the same partition, but a different label
-# Col 5: Number of data points with a different partition, but the same label
-# Col 6: Number of data points with a different partition and a different label
-# Col 7: The odds ratio
-# Col 8: Small side of 95%-confidence interval for the odds ratio
-# Col 9: Large side of 95%-confidence interval for the odds ratio
-# Col 10: How many sigmas away the log-odds ratio is from zero
-# Col 11: Chi-squared statistic
-# Col 12: Cramer's V * 100%
-# Col 13: Log-odds ratio P-value * 100%
-# Col 14: Chi-squared P-value * 100%
-# Col 15: Percentage (out of 100) of data points in this paritition to have this label
-# Col 16: Small side of 95%-confid. int-l of above percentage, Wilson Score
-# Col 17: Large side of 95%-confid. int-l of above percentage, Wilson Score
-# Col 18: Percentage (out of 100) of data points overall to have this label
-# Col 19: Small side of 95%-confid. int-l of above percentage, Wilson Score
-# Col 20: Large side of 95%-confid. int-l of above percentage, Wilson Score
-# Col 21: Percentage (out of 100) of data points overall to lie in this partition
-# Col 22: Small side of 95%-confid. int-l of above percentage, Wilson Score
-# Col 23: Large side of 95%-confid. int-l of above percentage, Wilson Score
-#
-# EXAMPLE:
-# hadoop jar SystemML.jar -f PATH/ctci_odds.dml -args PATH/ctci_test.mtx 5602 2 PATH/ctci_odds_test_output.mtx
-
-powerOfTen = 10000; # CONSTANT FOR ROUNDING THE RESULTS
-print ("BEGIN CTABLE ANALYSIS SCRIPT");
-print ("Reading the input matrix...");
-InData = read ($1, rows = $2, cols = 2, format = "text");
-numPoints = $2;
-print ("Computing the contingency table...");
-CT = table (InData [, 1], InData [, 2]);
-# DEBUG LINE ONLY: write (CT, "test/scripts/applications/ctableStats/ctci_test_CT.mtx", format="text");
-print ("Preparing for the output tables...");
-nullLabel = $3;
-numPartitions = nrow (CT);
-numLabels = ncol (CT);
-numOutRows = numPartitions * numLabels;
-if (nullLabel > 0 & nullLabel <= numLabels) {
- numOutRows = numOutRows - numPartitions;
-}
-cntPartitions = rowSums (CT);
-cntLabels = t(colSums (CT));
-OutMtx = Rand (rows = numOutRows, cols = 23, min = 0, max = 0);
-idx = 0;
-zero = Rand (rows = 1, cols = 1, min = 0, max = 0);
-for (iLabel in 1:numLabels)
-{
- if (iLabel != nullLabel)
- {
- if (1==1) {
- print ("Processing label " + iLabel + ":");
- }
- for (iPartition in 1:numPartitions)
- {
- idx = idx + 1;
- OutMtx [idx, 1] = iPartition + zero;
- OutMtx [idx, 2] = iLabel + zero;
-
- n11 = CT [iPartition, iLabel];
- n01 = cntPartitions [iPartition, 1] - CT [iPartition, iLabel];
- n10 = cntLabels [iLabel, 1] - CT [iPartition, iLabel];
- n00 = numPoints - cntPartitions [iPartition, 1] - cntLabels [iLabel, 1] + CT [iPartition, iLabel];
- odds_ratio = n11 * n00 / (n01 * n10);
- sigma_log_odds_ratio = sqrt (1.0 / n00 + 1.0 / n01 + 1.0 / n10 + 1.0 / n11);
- odds_ratio_interval_small = odds_ratio / exp (1.96 * sigma_log_odds_ratio);
- odds_ratio_interval_large = odds_ratio * exp (1.96 * sigma_log_odds_ratio);
- num_sigmas_away = abs (log (odds_ratio) / sigma_log_odds_ratio);
- chi_diff = n00 * n11 - n01 * n10;
- chi_denom = (n00 + n01) * (n10 + n11) * (n00 + n10) * (n01 + n11);
- chi_square = (n00 + n01 + n10 + n11) * chi_diff * chi_diff / chi_denom;
- cramers_V = sqrt (chi_square / (n00 + n01 + n10 + n11));
-
- OutMtx [idx, 3] = n11;
- OutMtx [idx, 4] = n01;
- OutMtx [idx, 5] = n10;
- OutMtx [idx, 6] = n00;
- OutMtx [idx, 7] = round (odds_ratio * powerOfTen) / powerOfTen;
- OutMtx [idx, 8] = round (odds_ratio_interval_small * powerOfTen) / powerOfTen;
- OutMtx [idx, 9] = round (odds_ratio_interval_large * powerOfTen) / powerOfTen;
- OutMtx [idx, 10] = round (num_sigmas_away * powerOfTen) / powerOfTen;
- OutMtx [idx, 11] = round (chi_square * powerOfTen) / powerOfTen;
- OutMtx [idx, 12] = round (100.0 * cramers_V * powerOfTen) / powerOfTen;
-
- gauss_pts = Rand (rows = 2, cols = 1, min = 0, max = 0);
- gauss_pts [1, 1] = - num_sigmas_away;
- gauss_pts [2, 1] = - sqrt (chi_square);
- gauss_probs = gaussian_probability (gauss_pts);
- pval_odds = gauss_probs [1, 1] * 2.0;
- pval_chi2 = gauss_probs [2, 1] * 2.0;
-
- OutMtx [idx, 13] = round (100.0 * pval_odds * powerOfTen) / powerOfTen;
- OutMtx [idx, 14] = round (100.0 * pval_chi2 * powerOfTen) / powerOfTen;
-
- m_cnt = Rand (rows = 3, cols = 1, min = 0, max = 0);
- n_cnt = Rand (rows = 3, cols = 1, min = 0, max = 0);
- m_cnt [1, 1] = CT [iPartition, iLabel];
- n_cnt [1, 1] = cntPartitions [iPartition, 1];
- m_cnt [2, 1] = cntLabels [iLabel, 1];
- n_cnt [2, 1] = numPoints + zero;
- m_cnt [3, 1] = cntPartitions [iPartition, 1];
- n_cnt [3, 1] = numPoints + zero;
- [ratios, conf_interval_small, conf_interval_large] = wilson_confidence (n_cnt, m_cnt);
- OutMtx [idx, 15] = round (100.0 * ratios [1, 1] * powerOfTen) / powerOfTen;
- OutMtx [idx, 16] = round (100.0 * conf_interval_small [1, 1] * powerOfTen) / powerOfTen;
- OutMtx [idx, 17] = round (100.0 * conf_interval_large [1, 1] * powerOfTen) / powerOfTen;
- OutMtx [idx, 18] = round (100.0 * ratios [2, 1] * powerOfTen) / powerOfTen;
- OutMtx [idx, 19] = round (100.0 * conf_interval_small [2, 1] * powerOfTen) / powerOfTen;
- OutMtx [idx, 20] = round (100.0 * conf_interval_large [2, 1] * powerOfTen) / powerOfTen;
- OutMtx [idx, 21] = round (100.0 * ratios [3, 1] * powerOfTen) / powerOfTen;
- OutMtx [idx, 22] = round (100.0 * conf_interval_small [3, 1] * powerOfTen) / powerOfTen;
- OutMtx [idx, 23] = round (100.0 * conf_interval_large [3, 1] * powerOfTen) / powerOfTen;
-} } }
-
-print ("Writing the output matrix...");
-write (OutMtx, $4, format="text");
-print ("END CTABLE ANALYSIS SCRIPT");
-
-wilson_confidence = function (Matrix[double] n, Matrix[double] m)
-return (Matrix[double] ratio, Matrix[double] conf_left, Matrix[double] conf_right)
-{
- z = 1.96; # 97.5% normal percentile, for 95% confidence interval
- z_sq_n = z * z * n;
- qroot = sqrt (z_sq_n * (m * (n - m) + z_sq_n / 4));
- midpt = n * m + z_sq_n / 2;
- denom = n * n + z_sq_n;
- ratio = m / n;
- conf_left = (midpt - qroot) / denom;
- conf_right = (midpt + qroot) / denom;
-}
-
-gaussian_probability = function (Matrix[double] vector_of_points)
- return (Matrix[double] vector_of_probabilities)
-{
- t_gp = 1.0 / (1.0 + abs (vector_of_points) * 0.231641888); # 0.231641888 = 0.3275911 / sqrt (2.0)
- erf_gp = 1.0 - t_gp * ( 0.254829592
- + t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. Stegun,
- + t_gp * ( 1.421413741 # U.S. Nat-l Bureau of Standards, 10th print (Dec 1972), Sec. 7.1.26, p. 299
- + t_gp * (-1.453152027
- + t_gp * 1.061405429)))) * exp (- vector_of_points * vector_of_points / 2.0);
- erf_gp = erf_gp * 2.0 * (ppred (vector_of_points, 0.0, ">") - 0.5);
- vector_of_probabilities = 0.5 + 0.5 * erf_gp;
-}
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# CTCI_ODDS.DML: TWO-ATTRIBUTE CONTINGENCY TABLE ODDS-RATIO CONFIDENCE INTERVAL ANALYSIS
+# Computes 95% confidence intervals for odds ratios using a Gaussian approximation for log-odds
+# INPUT 1: Dataset as an (N x 2) matrix, input file path/name
+# Rows: Individual data points
+# Col 1: Partition attribute (e.g. US State code), must be positive integer
+# Col 2: Label attribute (e.g. positive/negative/neutral), must be positive integer
+# INPUT 2: Number of data points N (i.e. input matrix size, rows)
+# INPUT 3: "Null" label code, 0 if there is no "null" label
+# INPUT 4: Output Matrix file path/name
+# OUTPUT 1: Output Matrix with the following information:
+# Rows: One row per each distinct pair (partition, label) excluding "null" label
+# Col 1: Partition attribute value
+# Col 2: Label attribute value
+# Col 3: Number of data points with this (partition, label) pair
+# Col 4: Number of data points with the same partition, but a different label
+# Col 5: Number of data points with a different partition, but the same label
+# Col 6: Number of data points with a different partition and a different label
+# Col 7: The odds ratio
+# Col 8: Small side of 95%-confidence interval for the odds ratio
+# Col 9: Large side of 95%-confidence interval for the odds ratio
+# Col 10: How many sigmas away the log-odds ratio is from zero
+# Col 11: Chi-squared statistic
+# Col 12: Cramer's V * 100%
+# Col 13: Log-odds ratio P-value * 100%
+# Col 14: Chi-squared P-value * 100%
+# Col 15: Percentage (out of 100) of data points in this paritition to have this label
+# Col 16: Small side of 95%-confid. int-l of above percentage, Wilson Score
+# Col 17: Large side of 95%-confid. int-l of above percentage, Wilson Score
+# Col 18: Percentage (out of 100) of data points overall to have this label
+# Col 19: Small side of 95%-confid. int-l of above percentage, Wilson Score
+# Col 20: Large side of 95%-confid. int-l of above percentage, Wilson Score
+# Col 21: Percentage (out of 100) of data points overall to lie in this partition
+# Col 22: Small side of 95%-confid. int-l of above percentage, Wilson Score
+# Col 23: Large side of 95%-confid. int-l of above percentage, Wilson Score
+#
+# EXAMPLE:
+# hadoop jar SystemML.jar -f PATH/ctci_odds.dml -args PATH/ctci_test.mtx 5602 2 PATH/ctci_odds_test_output.mtx
+
+powerOfTen = 10000; # CONSTANT FOR ROUNDING THE RESULTS
+print ("BEGIN CTABLE ANALYSIS SCRIPT");
+print ("Reading the input matrix...");
+InData = read ($1, rows = $2, cols = 2, format = "text");
+numPoints = $2;
+print ("Computing the contingency table...");
+CT = table (InData [, 1], InData [, 2]);
+# DEBUG LINE ONLY: write (CT, "test/scripts/applications/ctableStats/ctci_test_CT.mtx", format="text");
+print ("Preparing for the output tables...");
+nullLabel = $3;
+numPartitions = nrow (CT);
+numLabels = ncol (CT);
+numOutRows = numPartitions * numLabels;
+if (nullLabel > 0 & nullLabel <= numLabels) {
+ numOutRows = numOutRows - numPartitions;
+}
+cntPartitions = rowSums (CT);
+cntLabels = t(colSums (CT));
+OutMtx = Rand (rows = numOutRows, cols = 23, min = 0, max = 0);
+idx = 0;
+zero = Rand (rows = 1, cols = 1, min = 0, max = 0);
+for (iLabel in 1:numLabels)
+{
+ if (iLabel != nullLabel)
+ {
+ if (1==1) {
+ print ("Processing label " + iLabel + ":");
+ }
+ for (iPartition in 1:numPartitions)
+ {
+ idx = idx + 1;
+ OutMtx [idx, 1] = iPartition + zero;
+ OutMtx [idx, 2] = iLabel + zero;
+
+ n11 = CT [iPartition, iLabel];
+ n01 = cntPartitions [iPartition, 1] - CT [iPartition, iLabel];
+ n10 = cntLabels [iLabel, 1] - CT [iPartition, iLabel];
+ n00 = numPoints - cntPartitions [iPartition, 1] - cntLabels [iLabel, 1] + CT [iPartition, iLabel];
+ odds_ratio = n11 * n00 / (n01 * n10);
+ sigma_log_odds_ratio = sqrt (1.0 / n00 + 1.0 / n01 + 1.0 / n10 + 1.0 / n11);
+ odds_ratio_interval_small = odds_ratio / exp (1.96 * sigma_log_odds_ratio);
+ odds_ratio_interval_large = odds_ratio * exp (1.96 * sigma_log_odds_ratio);
+ num_sigmas_away = abs (log (odds_ratio) / sigma_log_odds_ratio);
+ chi_diff = n00 * n11 - n01 * n10;
+ chi_denom = (n00 + n01) * (n10 + n11) * (n00 + n10) * (n01 + n11);
+ chi_square = (n00 + n01 + n10 + n11) * chi_diff * chi_diff / chi_denom;
+ cramers_V = sqrt (chi_square / (n00 + n01 + n10 + n11));
+
+ OutMtx [idx, 3] = n11;
+ OutMtx [idx, 4] = n01;
+ OutMtx [idx, 5] = n10;
+ OutMtx [idx, 6] = n00;
+ OutMtx [idx, 7] = round (odds_ratio * powerOfTen) / powerOfTen;
+ OutMtx [idx, 8] = round (odds_ratio_interval_small * powerOfTen) / powerOfTen;
+ OutMtx [idx, 9] = round (odds_ratio_interval_large * powerOfTen) / powerOfTen;
+ OutMtx [idx, 10] = round (num_sigmas_away * powerOfTen) / powerOfTen;
+ OutMtx [idx, 11] = round (chi_square * powerOfTen) / powerOfTen;
+ OutMtx [idx, 12] = round (100.0 * cramers_V * powerOfTen) / powerOfTen;
+
+ gauss_pts = Rand (rows = 2, cols = 1, min = 0, max = 0);
+ gauss_pts [1, 1] = - num_sigmas_away;
+ gauss_pts [2, 1] = - sqrt (chi_square);
+ gauss_probs = gaussian_probability (gauss_pts);
+ pval_odds = gauss_probs [1, 1] * 2.0;
+ pval_chi2 = gauss_probs [2, 1] * 2.0;
+
+ OutMtx [idx, 13] = round (100.0 * pval_odds * powerOfTen) / powerOfTen;
+ OutMtx [idx, 14] = round (100.0 * pval_chi2 * powerOfTen) / powerOfTen;
+
+ m_cnt = Rand (rows = 3, cols = 1, min = 0, max = 0);
+ n_cnt = Rand (rows = 3, cols = 1, min = 0, max = 0);
+ m_cnt [1, 1] = CT [iPartition, iLabel];
+ n_cnt [1, 1] = cntPartitions [iPartition, 1];
+ m_cnt [2, 1] = cntLabels [iLabel, 1];
+ n_cnt [2, 1] = numPoints + zero;
+ m_cnt [3, 1] = cntPartitions [iPartition, 1];
+ n_cnt [3, 1] = numPoints + zero;
+ [ratios, conf_interval_small, conf_interval_large] = wilson_confidence (n_cnt, m_cnt);
+ OutMtx [idx, 15] = round (100.0 * ratios [1, 1] * powerOfTen) / powerOfTen;
+ OutMtx [idx, 16] = round (100.0 * conf_interval_small [1, 1] * powerOfTen) / powerOfTen;
+ OutMtx [idx, 17] = round (100.0 * conf_interval_large [1, 1] * powerOfTen) / powerOfTen;
+ OutMtx [idx, 18] = round (100.0 * ratios [2, 1] * powerOfTen) / powerOfTen;
+ OutMtx [idx, 19] = round (100.0 * conf_interval_small [2, 1] * powerOfTen) / powerOfTen;
+ OutMtx [idx, 20] = round (100.0 * conf_interval_large [2, 1] * powerOfTen) / powerOfTen;
+ OutMtx [idx, 21] = round (100.0 * ratios [3, 1] * powerOfTen) / powerOfTen;
+ OutMtx [idx, 22] = round (100.0 * conf_interval_small [3, 1] * powerOfTen) / powerOfTen;
+ OutMtx [idx, 23] = round (100.0 * conf_interval_large [3, 1] * powerOfTen) / powerOfTen;
+} } }
+
+print ("Writing the output matrix...");
+write (OutMtx, $4, format="text");
+print ("END CTABLE ANALYSIS SCRIPT");
+
+wilson_confidence = function (Matrix[double] n, Matrix[double] m)
+return (Matrix[double] ratio, Matrix[double] conf_left, Matrix[double] conf_right)
+{
+ z = 1.96; # 97.5% normal percentile, for 95% confidence interval
+ z_sq_n = z * z * n;
+ qroot = sqrt (z_sq_n * (m * (n - m) + z_sq_n / 4));
+ midpt = n * m + z_sq_n / 2;
+ denom = n * n + z_sq_n;
+ ratio = m / n;
+ conf_left = (midpt - qroot) / denom;
+ conf_right = (midpt + qroot) / denom;
+}
+
+gaussian_probability = function (Matrix[double] vector_of_points)
+ return (Matrix[double] vector_of_probabilities)
+{
+ t_gp = 1.0 / (1.0 + abs (vector_of_points) * 0.231641888); # 0.231641888 = 0.3275911 / sqrt (2.0)
+ erf_gp = 1.0 - t_gp * ( 0.254829592
+ + t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. Stegun,
+ + t_gp * ( 1.421413741 # U.S. Nat-l Bureau of Standards, 10th print (Dec 1972), Sec. 7.1.26, p. 299
+ + t_gp * (-1.453152027
+ + t_gp * 1.061405429)))) * exp (- vector_of_points * vector_of_points / 2.0);
+ erf_gp = erf_gp * 2.0 * (ppred (vector_of_points, 0.0, ">") - 0.5);
+ vector_of_probabilities = 0.5 + 0.5 * erf_gp;
+}
+