You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by du...@apache.org on 2016/01/22 17:34:02 UTC
[26/51] [partial] incubator-systemml git commit: [SYSTEMML-482]
[SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line
endings, and normalizing all of the line endings.
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/id3/id3.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/id3/id3.R b/src/test/scripts/applications/id3/id3.R
index 8528b52..b838607 100644
--- a/src/test/scripts/applications/id3/id3.R
+++ b/src/test/scripts/applications/id3/id3.R
@@ -1,242 +1,242 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library("Matrix")
-library("matrixStats")
-
-options(warn=-1)
-
-
-id3_learn = function(X, y, X_subset, attributes, minsplit)
-{
- #try the two base cases first
-
- #of the remaining samples, compute a histogram for labels
- hist_labels_helper = as.matrix(aggregate(as.vector(X_subset), by=list(as.vector(y)), FUN=sum))
- hist_labels = as.matrix(hist_labels_helper[,2])
-
- #go through the histogram to compute the number of labels
- #with non-zero samples
- #and to pull out the most popular label
-
- num_non_zero_labels = sum(hist_labels > 0);
- most_popular_label = which.max(t(hist_labels));
- num_remaining_attrs = sum(attributes)
-
- num_samples = sum(X_subset)
- mpl = as.numeric(most_popular_label)
-
- nodes = matrix(0, 1, 1)
- edges = matrix(0, 1, 1)
-
- #if all samples have the same label then return a leaf node
- #if no attributes remain then return a leaf node with the most popular label
- if(num_non_zero_labels == 1 | num_remaining_attrs == 0 | num_samples < minsplit){
- nodes = matrix(0, 1, 2)
- nodes[1,1] = -1
- nodes[1,2] = most_popular_label
- edges = matrix(-1, 1, 1)
- }else{
- #computing gains for all available attributes using parfor
- hist_labels2_helper = as.matrix(aggregate(as.vector(X_subset), by=list(as.vector(y)), FUN=sum))
- hist_labels2 = as.matrix(hist_labels2_helper[,2])
- num_samples2 = sum(X_subset)
- zero_entries_in_hist1 = (hist_labels2 == 0)
- pi1 = hist_labels2/num_samples2
- log_term1 = zero_entries_in_hist1*1 + (1-zero_entries_in_hist1)*pi1
- entropy_vector1 = -pi1*log(log_term1)
- ht = sum(entropy_vector1)
-
- sz = nrow(attributes)
- gains = matrix(0, sz, 1)
- for(i in 1:nrow(attributes)){
- if(as.numeric(attributes[i,1]) == 1){
- attr_vals = X[,i]
- attr_domain_helper = as.matrix(aggregate(as.vector(X_subset), by=list(as.vector(attr_vals)), FUN=sum))
- attr_domain = as.matrix(attr_domain_helper[,2])
-
- hxt_vector = matrix(0, nrow(attr_domain), 1)
-
- for(j in 1:nrow(attr_domain)){
- if(as.numeric(attr_domain[j,1]) != 0){
- val = j
- Tj = X_subset * (X[,i] == val)
-
- #entropy = compute_entropy(Tj, y)
- hist_labels1_helper = as.matrix(aggregate(as.vector(Tj), by=list(as.vector(y)), FUN=sum))
- hist_labels1 = as.matrix(hist_labels1_helper[,2])
- num_samples1 = sum(Tj)
- zero_entries_in_hist = (hist_labels1 == 0)
- pi = hist_labels1/num_samples1
- log_term = zero_entries_in_hist*1 + (1-zero_entries_in_hist)*pi
- entropy_vector = -pi*log(log_term)
- entropy = sum(entropy_vector)
-
- hxt_vector[j,1] = sum(Tj)/sum(X_subset)*entropy
- }
- }
- hxt = sum(hxt_vector)
- gains[i,1] = (ht - hxt)
- }
- }
-
- #pick out attr with highest gain
- best_attr = -1
- max_gain = 0
- for(i4 in 1:nrow(gains)){
- if(as.numeric(attributes[i4,1]) == 1){
- g = as.numeric(gains[i4,1])
- if(best_attr == -1 | max_gain <= g){
- max_gain = g
- best_attr = i4
- }
- }
- }
-
- attr_vals = X[,best_attr]
- attr_domain_helper = as.matrix(aggregate(as.vector(X_subset), by=list(as.vector(attr_vals)), FUN=sum))
- attr_domain = as.matrix(attr_domain_helper[,2])
-
- new_attributes = attributes
- new_attributes[best_attr, 1] = 0
-
- max_sz_subtree = 2*sum(X_subset)
- sz2 = nrow(attr_domain)
- sz1 = sz2*max_sz_subtree
-
- tempNodeStore = matrix(0, 2, sz1)
- tempEdgeStore = matrix(0, 3, sz1)
- numSubtreeNodes = matrix(0, sz2, 1)
- numSubtreeEdges = matrix(0, sz2, 1)
-
- for(i1 in 1:nrow(attr_domain)){
-
- Ti = X_subset * (X[,best_attr] == i1)
- num_nodes_Ti = sum(Ti)
-
- if(num_nodes_Ti > 0){
- tmpRet <- id3_learn(X, y, Ti, new_attributes, minsplit)
- nodesi = as.matrix(tmpRet$a);
- edgesi = as.matrix(tmpRet$b);
-
- start_pt = 1+(i1-1)*max_sz_subtree
- tempNodeStore[,start_pt:(start_pt+nrow(nodesi)-1)] = t(nodesi)
-
- numSubtreeNodes[i1,1] = nrow(nodesi)
- if(nrow(edgesi)!=1 | ncol(edgesi)!=1 | as.numeric(edgesi[1,1])!=-1){
- tempEdgeStore[,start_pt:(start_pt+nrow(edgesi)-1)] = t(edgesi)
- numSubtreeEdges[i1,1] = nrow(edgesi)
- }else{
- numSubtreeEdges[i1,1] = 0
- }
- }
- }
-
- num_nodes_in_subtrees = sum(numSubtreeNodes)
- num_edges_in_subtrees = sum(numSubtreeEdges)
-
- #creating root node
- sz = 1+num_nodes_in_subtrees
-
- nodes = matrix(0, sz, 2)
- nodes[1,1] = best_attr
- numNodes = 1
-
- #edges from root to children
- sz = sum(numSubtreeNodes > 0) + num_edges_in_subtrees
-
- edges = matrix(1, sz, 3)
- numEdges = 0
- for(i6 in 1:nrow(attr_domain)){
- num_nodesi = as.numeric(numSubtreeNodes[i6,1])
- if(num_nodesi > 0){
- edges[numEdges+1,2] = i6
- numEdges = numEdges + 1
- }
- }
-
- nonEmptyAttri = 0
- for(i7 in 1:nrow(attr_domain)){
- numNodesInSubtree = as.numeric(numSubtreeNodes[i7,1])
-
- if(numNodesInSubtree > 0){
- start_pt1 = 1 + (i7-1)*max_sz_subtree
- nodes[(numNodes+1):(numNodes+numNodesInSubtree),] = t(tempNodeStore[,start_pt1:(start_pt1+numNodesInSubtree-1)])
-
- numEdgesInSubtree = as.numeric(numSubtreeEdges[i7,1])
-
- if(numEdgesInSubtree!=0){
- edgesi1 = t(tempEdgeStore[,start_pt1:(start_pt1+numEdgesInSubtree-1)])
- edgesi1[,1] = edgesi1[,1] + numNodes
- edgesi1[,3] = edgesi1[,3] + numNodes
-
- edges[(numEdges+1):(numEdges+numEdgesInSubtree),] = edgesi1
- numEdges = numEdges + numEdgesInSubtree
- }
-
- edges[nonEmptyAttri+1,3] = numNodes + 1
- nonEmptyAttri = nonEmptyAttri + 1
-
- numNodes = numNodes + numNodesInSubtree
- }
- }
- }
-
- return ( list(a=nodes, b=edges) );
-}
-
-X = readMM(paste(args[1], "X.mtx", sep=""));
-y = readMM(paste(args[1], "y.mtx", sep=""));
-
-n = nrow(X)
-m = ncol(X)
-
-minsplit = 2
-
-
-X_subset = matrix(1, n, 1)
-attributes = matrix(1, m, 1)
-# recoding inputs
-featureCorrections = as.vector(1 - colMins(as.matrix(X)))
-onesMat = matrix(1, n, m)
-
-X = onesMat %*% diag(featureCorrections) + X
-labelCorrection = 1 - min(y)
-y = y + labelCorrection + 0
-
-tmpRet <- id3_learn(X, y, X_subset, attributes, minsplit)
-nodes = as.matrix(tmpRet$a)
-edges = as.matrix(tmpRet$b)
-
-# decoding outputs
-nodes[,2] = nodes[,2] - labelCorrection * (nodes[,1] == -1)
-for(i3 in 1:nrow(edges)){
- e_parent = as.numeric(edges[i3,1])
- parent_feature = as.numeric(nodes[e_parent,1])
- correction = as.numeric(featureCorrections[parent_feature])
- edges[i3,2] = edges[i3,2] - correction
-}
-
-writeMM(as(nodes,"CsparseMatrix"), paste(args[2],"nodes", sep=""), format = "text")
-writeMM(as(edges,"CsparseMatrix"), paste(args[2],"edges", sep=""), format = "text")
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library("Matrix")
+library("matrixStats")
+
+options(warn=-1)
+
+
+id3_learn = function(X, y, X_subset, attributes, minsplit)
+{
+ #try the two base cases first
+
+ #of the remaining samples, compute a histogram for labels
+ hist_labels_helper = as.matrix(aggregate(as.vector(X_subset), by=list(as.vector(y)), FUN=sum))
+ hist_labels = as.matrix(hist_labels_helper[,2])
+
+ #go through the histogram to compute the number of labels
+ #with non-zero samples
+ #and to pull out the most popular label
+
+ num_non_zero_labels = sum(hist_labels > 0);
+ most_popular_label = which.max(t(hist_labels));
+ num_remaining_attrs = sum(attributes)
+
+ num_samples = sum(X_subset)
+ mpl = as.numeric(most_popular_label)
+
+ nodes = matrix(0, 1, 1)
+ edges = matrix(0, 1, 1)
+
+ #if all samples have the same label then return a leaf node
+ #if no attributes remain then return a leaf node with the most popular label
+ if(num_non_zero_labels == 1 | num_remaining_attrs == 0 | num_samples < minsplit){
+ nodes = matrix(0, 1, 2)
+ nodes[1,1] = -1
+ nodes[1,2] = most_popular_label
+ edges = matrix(-1, 1, 1)
+ }else{
+ #computing gains for all available attributes using parfor
+ hist_labels2_helper = as.matrix(aggregate(as.vector(X_subset), by=list(as.vector(y)), FUN=sum))
+ hist_labels2 = as.matrix(hist_labels2_helper[,2])
+ num_samples2 = sum(X_subset)
+ zero_entries_in_hist1 = (hist_labels2 == 0)
+ pi1 = hist_labels2/num_samples2
+ log_term1 = zero_entries_in_hist1*1 + (1-zero_entries_in_hist1)*pi1
+ entropy_vector1 = -pi1*log(log_term1)
+ ht = sum(entropy_vector1)
+
+ sz = nrow(attributes)
+ gains = matrix(0, sz, 1)
+ for(i in 1:nrow(attributes)){
+ if(as.numeric(attributes[i,1]) == 1){
+ attr_vals = X[,i]
+ attr_domain_helper = as.matrix(aggregate(as.vector(X_subset), by=list(as.vector(attr_vals)), FUN=sum))
+ attr_domain = as.matrix(attr_domain_helper[,2])
+
+ hxt_vector = matrix(0, nrow(attr_domain), 1)
+
+ for(j in 1:nrow(attr_domain)){
+ if(as.numeric(attr_domain[j,1]) != 0){
+ val = j
+ Tj = X_subset * (X[,i] == val)
+
+ #entropy = compute_entropy(Tj, y)
+ hist_labels1_helper = as.matrix(aggregate(as.vector(Tj), by=list(as.vector(y)), FUN=sum))
+ hist_labels1 = as.matrix(hist_labels1_helper[,2])
+ num_samples1 = sum(Tj)
+ zero_entries_in_hist = (hist_labels1 == 0)
+ pi = hist_labels1/num_samples1
+ log_term = zero_entries_in_hist*1 + (1-zero_entries_in_hist)*pi
+ entropy_vector = -pi*log(log_term)
+ entropy = sum(entropy_vector)
+
+ hxt_vector[j,1] = sum(Tj)/sum(X_subset)*entropy
+ }
+ }
+ hxt = sum(hxt_vector)
+ gains[i,1] = (ht - hxt)
+ }
+ }
+
+ #pick out attr with highest gain
+ best_attr = -1
+ max_gain = 0
+ for(i4 in 1:nrow(gains)){
+ if(as.numeric(attributes[i4,1]) == 1){
+ g = as.numeric(gains[i4,1])
+ if(best_attr == -1 | max_gain <= g){
+ max_gain = g
+ best_attr = i4
+ }
+ }
+ }
+
+ attr_vals = X[,best_attr]
+ attr_domain_helper = as.matrix(aggregate(as.vector(X_subset), by=list(as.vector(attr_vals)), FUN=sum))
+ attr_domain = as.matrix(attr_domain_helper[,2])
+
+ new_attributes = attributes
+ new_attributes[best_attr, 1] = 0
+
+ max_sz_subtree = 2*sum(X_subset)
+ sz2 = nrow(attr_domain)
+ sz1 = sz2*max_sz_subtree
+
+ tempNodeStore = matrix(0, 2, sz1)
+ tempEdgeStore = matrix(0, 3, sz1)
+ numSubtreeNodes = matrix(0, sz2, 1)
+ numSubtreeEdges = matrix(0, sz2, 1)
+
+ for(i1 in 1:nrow(attr_domain)){
+
+ Ti = X_subset * (X[,best_attr] == i1)
+ num_nodes_Ti = sum(Ti)
+
+ if(num_nodes_Ti > 0){
+ tmpRet <- id3_learn(X, y, Ti, new_attributes, minsplit)
+ nodesi = as.matrix(tmpRet$a);
+ edgesi = as.matrix(tmpRet$b);
+
+ start_pt = 1+(i1-1)*max_sz_subtree
+ tempNodeStore[,start_pt:(start_pt+nrow(nodesi)-1)] = t(nodesi)
+
+ numSubtreeNodes[i1,1] = nrow(nodesi)
+ if(nrow(edgesi)!=1 | ncol(edgesi)!=1 | as.numeric(edgesi[1,1])!=-1){
+ tempEdgeStore[,start_pt:(start_pt+nrow(edgesi)-1)] = t(edgesi)
+ numSubtreeEdges[i1,1] = nrow(edgesi)
+ }else{
+ numSubtreeEdges[i1,1] = 0
+ }
+ }
+ }
+
+ num_nodes_in_subtrees = sum(numSubtreeNodes)
+ num_edges_in_subtrees = sum(numSubtreeEdges)
+
+ #creating root node
+ sz = 1+num_nodes_in_subtrees
+
+ nodes = matrix(0, sz, 2)
+ nodes[1,1] = best_attr
+ numNodes = 1
+
+ #edges from root to children
+ sz = sum(numSubtreeNodes > 0) + num_edges_in_subtrees
+
+ edges = matrix(1, sz, 3)
+ numEdges = 0
+ for(i6 in 1:nrow(attr_domain)){
+ num_nodesi = as.numeric(numSubtreeNodes[i6,1])
+ if(num_nodesi > 0){
+ edges[numEdges+1,2] = i6
+ numEdges = numEdges + 1
+ }
+ }
+
+ nonEmptyAttri = 0
+ for(i7 in 1:nrow(attr_domain)){
+ numNodesInSubtree = as.numeric(numSubtreeNodes[i7,1])
+
+ if(numNodesInSubtree > 0){
+ start_pt1 = 1 + (i7-1)*max_sz_subtree
+ nodes[(numNodes+1):(numNodes+numNodesInSubtree),] = t(tempNodeStore[,start_pt1:(start_pt1+numNodesInSubtree-1)])
+
+ numEdgesInSubtree = as.numeric(numSubtreeEdges[i7,1])
+
+ if(numEdgesInSubtree!=0){
+ edgesi1 = t(tempEdgeStore[,start_pt1:(start_pt1+numEdgesInSubtree-1)])
+ edgesi1[,1] = edgesi1[,1] + numNodes
+ edgesi1[,3] = edgesi1[,3] + numNodes
+
+ edges[(numEdges+1):(numEdges+numEdgesInSubtree),] = edgesi1
+ numEdges = numEdges + numEdgesInSubtree
+ }
+
+ edges[nonEmptyAttri+1,3] = numNodes + 1
+ nonEmptyAttri = nonEmptyAttri + 1
+
+ numNodes = numNodes + numNodesInSubtree
+ }
+ }
+ }
+
+ return ( list(a=nodes, b=edges) );
+}
+
+X = readMM(paste(args[1], "X.mtx", sep=""));
+y = readMM(paste(args[1], "y.mtx", sep=""));
+
+n = nrow(X)
+m = ncol(X)
+
+minsplit = 2
+
+
+X_subset = matrix(1, n, 1)
+attributes = matrix(1, m, 1)
+# recoding inputs
+featureCorrections = as.vector(1 - colMins(as.matrix(X)))
+onesMat = matrix(1, n, m)
+
+X = onesMat %*% diag(featureCorrections) + X
+labelCorrection = 1 - min(y)
+y = y + labelCorrection + 0
+
+tmpRet <- id3_learn(X, y, X_subset, attributes, minsplit)
+nodes = as.matrix(tmpRet$a)
+edges = as.matrix(tmpRet$b)
+
+# decoding outputs
+nodes[,2] = nodes[,2] - labelCorrection * (nodes[,1] == -1)
+for(i3 in 1:nrow(edges)){
+ e_parent = as.numeric(edges[i3,1])
+ parent_feature = as.numeric(nodes[e_parent,1])
+ correction = as.numeric(featureCorrections[parent_feature])
+ edges[i3,2] = edges[i3,2] - correction
+}
+
+writeMM(as(nodes,"CsparseMatrix"), paste(args[2],"nodes", sep=""), format = "text")
+writeMM(as(edges,"CsparseMatrix"), paste(args[2],"edges", sep=""), format = "text")
+
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/impute/imputeGaussMCMC.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/impute/imputeGaussMCMC.dml b/src/test/scripts/applications/impute/imputeGaussMCMC.dml
index 1ebe5a0..21ecaee 100644
--- a/src/test/scripts/applications/impute/imputeGaussMCMC.dml
+++ b/src/test/scripts/applications/impute/imputeGaussMCMC.dml
@@ -1,687 +1,687 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Implements the MCMC algorithm for imputation of missing data into a time-series of "reports".
-# Each report is a fixed-size vector of attribute values; reports come out each year/quarter/month ("term").
-# Hard linear equality constraints restrict values in/across the reports, e.g. total cost = sum of all costs.
-# Soft linear regression constraints define dependencies between values in/across the reports.
-# Linear regression parameters are unknown and sampled together with the missing values in the reports.
-#
-# INPUT 1: Initial reports matrix [1 : num_attrs, 1 : num_terms] with missing values usually set to zero,
-# but it MUST BE CONSISTENT with hard constraints! Set some missing values to nonzero if needed.
-# There are "num_terms" reports in the matrix, each having "num_attrs" attribute values.
-#
-# INPUT 2: Sparse matrix [1 : (num_terms * num_attrs), 1 : num_frees] that defines a linear map from
-# "free" variables to the reports matrix. A tensor of size (num_terms * num_attrs * num_frees)
-# where the reports matrix is stretched into a column-vector [1 : (num_terms * num_attrs)].
-# Term = t, attribute = i --> index = (t-1) * num_attrs + i
-#
-# INPUT 3: Sparse matrix [1 : (num_reg_eqs * num_factors), 1 : (num_terms * num_attrs)] that defines
-# a linear map from the stretched matrix of reports to the stretched matrix of regression factors.
-#
-# INPUT 4: Sparse vector [1 : (num_reg_eqs * num_factors), 1] that defines "default regression factors"
-# (if nonzero) to be added to the regression factors before they are multiplied by parameters.
-#
-# INPUT 5: Sparse matrix [1 : (num_reg_eqs * num_factors), 1 : num_params] that defines a linear map
-# from the vector of parameters to the stretched matrix of regression factors.
-#
-# INPUT 6: Sparse vector [1 : (num_reg_eqs * num_factors), 1] that defines default regression coefficients
-# (if nonzero) to be added to the parameters (if any) before being multiplied by regression factors.
-#
-# INPUT 7: A vector [1 : num_reg_eqs, 1] of scale multipliers, one per regression
-#
-# INPUT 8 : Number of factors in a regression equation, including the estimated value
-# INPUT 9 : Maximum number of burn-in full iterations (that sample each variable and each parameter once)
-# BUT the actual number of burn-in iterations may be smaller if "free fall" ends sooner
-# INPUT 10: Maximum number of observed full iterations (that sample each variable and each parameter once)
-#
-# INPUT 11: Output file name and path for the average MCMC reports table
-# INPUT 12: Output file for debugging (currently: the average parameters vector)
-#
-# Example:
-# hadoop jar SystemML.jar -f test/scripts/applications/impute/imputeGaussMCMC.dml -exec singlenode -args
-# test/scripts/applications/impute/initial_reports
-# test/scripts/applications/impute/CReps
-# test/scripts/applications/impute/RegresValueMap
-# test/scripts/applications/impute/RegresFactorDefault
-# test/scripts/applications/impute/RegresParamMap
-# test/scripts/applications/impute/RegresCoeffDefault
-# test/scripts/applications/impute/RegresScaleMult
-# 4 1000 100
-# test/scripts/applications/impute/output_reports
-# test/scripts/applications/impute/debug_info
-
-
-print ("START ImputeGaussMCMC");
-print ("Reading the input files...");
-
-initial_reports = read ($1);
-CReps = read ($2);
-
-num_terms = ncol (initial_reports); # Number of periods for which reports are produced
-num_attrs = nrow (initial_reports); # Number of attribute values per each term report
-num_frees = ncol (CReps); # Number of free variables used to describe all consistent reports
-
-dReps_size = num_terms * num_attrs;
-dReps = matrix (initial_reports, rows = dReps_size, cols = 1, byrow = FALSE);
-
-# We assume that all report-series consistent with hard constraints form an affine set:
-# reports = CReps %*% freeVars + dReps
-# Here "freeVars" is a vector of "free variables" (degrees of freedom), "CReps" is a linear mapping,
-# and "dReps" are the default values for the reports that correspond to the initial reports matrix.
-
-RegresValueMap = read ($3);
-RegresFactorDefault = read ($4);
-RegresParamMap = read ($5);
-RegresCoeffDefault = read ($6);
-RegresScaleMult = read ($7);
-
-num_factors = $8; # Number of factors in each regression equation, including the estimated value
-num_reg_eqs = nrow (RegresParamMap) / num_factors; # Number of regression equations
-num_params = ncol (RegresParamMap); # Number of parameters used in all regressions
-max_num_burnin_iterations = $9;
-max_num_observed_iterations = $10;
-
-ones_fv = matrix (1.0, rows = 1, cols = num_frees);
-ones_pm = matrix (1.0, rows = 1, cols = num_params);
-twos_re = matrix (2.0, rows = 1, cols = num_reg_eqs);
-
-# Create a summation operator (matrix) that adds the factors in each regression:
-
-ncols_SumFactor = num_reg_eqs * num_factors;
-SumFactor = matrix (0.0, rows = num_reg_eqs, cols = ncols_SumFactor);
-ones_f = matrix (1.0, rows = 1, cols = num_factors);
-SumFactor [1, 1:num_factors] = ones_f;
-nSumFactor = 1;
-while (nSumFactor < num_reg_eqs) {
- incSumFactor = nSumFactor;
- if (incSumFactor > num_reg_eqs - nSumFactor) {
- incSumFactor = num_reg_eqs - nSumFactor;
- }
- SumFactor [(nSumFactor + 1) : (nSumFactor + incSumFactor),
- (nSumFactor * num_factors + 1) : ((nSumFactor + incSumFactor) * num_factors)] =
- SumFactor [1 : incSumFactor, 1 : (incSumFactor * num_factors)];
- nSumFactor = nSumFactor + incSumFactor;
-}
-
-freeVars = matrix (0.0, rows = num_frees, cols = 1);
-params = matrix (1.0, rows = num_params, cols = 1);
-
-
-
-num_opt_iter = 20;
-print ("Performing initial optimization (" + num_opt_iter + " alternating CG steps)...");
-
-reports = CReps %*% freeVars + dReps;
-regresValues = RegresValueMap %*% reports + RegresFactorDefault;
-regresParams = RegresParamMap %*% params + RegresCoeffDefault;
-
-bilinear_sums = SumFactor %*% (regresValues * regresParams);
-w_bilinear_sums = RegresScaleMult * bilinear_sums;
-bilinear_form_value = sum (w_bilinear_sums * bilinear_sums);
-
-opt_iter = 1;
-is_step_params = 1;
-is_opt_converged = 0;
-
-print ("Before optimization: Initial bilinear form value = " + bilinear_form_value);
-
-while (is_opt_converged == 0)
-{
- deg = is_step_params * num_params + (1 - is_step_params) * num_frees;
- shift_vector = matrix (0.0, rows = deg, cols = 1);
-
- # Compute gradient
-
- if (is_step_params == 1) {
- gradient = twos_re %*% ((w_bilinear_sums %*% ones_pm) * (SumFactor %*% ((regresValues %*% ones_pm) * RegresParamMap)));
- } else {
- gradient = twos_re %*% ((w_bilinear_sums %*% ones_fv) * (SumFactor %*% ((regresParams %*% ones_fv) * (RegresValueMap %*% CReps))));
- }
-
- # Make a few conjugate gradient steps
-
- residual = t(gradient);
- p = - residual;
- norm_r2 = sum (residual * residual);
- cg_iter = 1;
- cg_terminate = 0;
-
- while (cg_terminate == 0)
- {
- # Want: q = A %*% p;
- # Compute gradient change from 0 to p
-
- if (is_step_params == 1) {
- w_bilinear_p = RegresScaleMult * (SumFactor %*% (regresValues * (RegresParamMap %*% p)));
- gradient_change_p = twos_re %*% ((w_bilinear_p %*% ones_pm) * (SumFactor %*% ((regresValues %*% ones_pm) * RegresParamMap)));
- } else {
- w_bilinear_p = RegresScaleMult * (SumFactor %*% ((RegresValueMap %*% CReps %*% p) * regresParams));
- gradient_change_p = twos_re %*% ((w_bilinear_p %*% ones_fv) * (SumFactor %*% ((regresParams %*% ones_fv) * (RegresValueMap %*% CReps))));
- }
- q = t(gradient_change_p);
-
- alpha = norm_r2 / castAsScalar (t(p) %*% q);
- shift_vector_change = alpha * p;
- shift_vector = shift_vector + shift_vector_change;
- old_norm_r2 = norm_r2;
- residual = residual + alpha * q;
- norm_r2 = sum (residual * residual);
- p = - residual + (norm_r2 / old_norm_r2) * p;
- cg_iter = cg_iter + 1;
- if (cg_iter > min (deg, 2 + opt_iter / 3)) {
- cg_terminate = 1;
- }
- }
-
- if (is_step_params == 1) {
- params = params + shift_vector;
- regresParams = RegresParamMap %*% params + RegresCoeffDefault;
- } else {
- freeVars = freeVars + shift_vector;
- reports = CReps %*% freeVars + dReps;
- regresValues = RegresValueMap %*% reports + RegresFactorDefault;
- }
-
- # Update the bilinear form and check convergence
-
- if (is_step_params == 1) {
- old_bilinear_form_value = bilinear_form_value;
- }
-
- bilinear_sums = SumFactor %*% (regresValues * regresParams);
- w_bilinear_sums = RegresScaleMult * bilinear_sums;
- bilinear_form_value = sum (w_bilinear_sums * bilinear_sums);
-
- if (is_step_params == 1) {
- print ("Optimization step " + opt_iter + " (params) : bilinear form value = " + bilinear_form_value);
- } else {
- print ("Optimization step " + opt_iter + " (reports): bilinear form value = " + bilinear_form_value);
- }
-
- is_step_params = 1 - is_step_params;
- opt_iter = opt_iter + 1;
-
- if (is_step_params == 1 & opt_iter > num_opt_iter) {
- is_opt_converged = 1;
- }
-}
-
-
-
-/* UNCOMMENT TO TRY CONJUGATE GRADIENT DESCENT
-
-
-
-print ("Starting Gradient Descent...");
-### GRADIENT DESCENT WITH MODIFICATIONS TO ENHANCE CONVERGENCE
-
-# num_past_dirVs = 3;
-# past_dirVFrees = matrix (0.0, rows = num_frees, cols = num_past_dirVs);
-# past_dirVParams = matrix (0.0, rows = num_params, cols = num_past_dirVs);
-
-shift_T = -1000.0;
-is_enough_gradient_descent = 0;
-gd_iter = 0;
-
-while (is_enough_gradient_descent == 0)
-{
-### GD-STEP 1: COMPUTE LOSS & GRADIENT AT CURRENT POINT
-
- reports = CReps %*% freeVars + dReps;
- regresValues = RegresValueMap %*% reports + RegresFactorDefault;
- regresParams = RegresParamMap %*% params + RegresCoeffDefault;
-
- bilinear_sums = SumFactor %*% (regresValues * regresParams);
- w_bilinear_sums = RegresScaleMult * bilinear_sums;
- gradientInFrees = twos_re %*% ((w_bilinear_sums %*% ones_fv) * (SumFactor %*% ((regresParams %*% ones_fv) * (RegresValueMap %*% CReps))));
- gradientInParams = twos_re %*% ((w_bilinear_sums %*% ones_pm) * (SumFactor %*% ((regresValues %*% ones_pm) * RegresParamMap)));
-
-### CG-STEP 2: MAKE A FEW APPROXIMATE CONJUGATE GRADIENT STEPS
-
- shift_frees = matrix (0.0, rows = num_frees, cols = 1);
- shift_params = matrix (0.0, rows = num_params, cols = 1);
-
- residual_frees = t(gradientInFrees);
- residual_params = t(gradientInParams);
-
- p_frees = - residual_frees;
- p_params = - residual_params;
-
- norm_r2 = sum (residual_frees * residual_frees) + sum (residual_params * residual_params);
-
- cg_iter = 1;
- cg_terminate = 0;
- cg_eps = 0.000001;
-
- while (cg_terminate == 0)
- {
- regresValues_eps_p = regresValues + cg_eps * (RegresValueMap %*% CReps %*% p_frees);
- regresParams_eps_p = regresParams + cg_eps * (RegresParamMap %*% p_params);
-
- bilinear_sums_eps_p = SumFactor %*% (regresValues_eps_p * regresParams_eps_p);
- w_bilinear_sums_eps_p = RegresScaleMult * bilinear_sums_eps_p;
-
- gradientInFrees_eps_p = twos_re %*% ((w_bilinear_sums_eps_p %*% ones_fv) * (SumFactor %*% ((regresParams_eps_p %*% ones_fv) * (RegresValueMap %*% CReps))));
- gradientInParams_eps_p = twos_re %*% ((w_bilinear_sums_eps_p %*% ones_pm) * (SumFactor %*% ((regresValues_eps_p %*% ones_pm) * RegresParamMap)));
-
- q_frees = t(gradientInFrees_eps_p - gradientInFrees) / cg_eps;
- q_params = t(gradientInParams_eps_p - gradientInParams) / cg_eps;
-
- alpha = norm_r2 / castAsScalar (t(p_frees) %*% q_frees + t(p_params) %*% q_params);
-
- shift_frees = shift_frees + alpha * p_frees;
- shift_params = shift_params + alpha * p_params;
-
- old_norm_r2 = norm_r2;
-
- residual_frees = residual_frees + alpha * q_frees;
- residual_params = residual_params + alpha * q_params;
-
- norm_r2 = sum (residual_frees * residual_frees) + sum (residual_params * residual_params);
-
- p_frees = - residual_frees + (norm_r2 / old_norm_r2) * p_frees;
- p_params = - residual_params + (norm_r2 / old_norm_r2) * p_params;
-
- cg_iter = cg_iter + 1;
- if (cg_iter > 4) {
- cg_terminate = 1;
- }
- }
-
-### GD-STEP 3: COMPUTE THE NEW DIRECTION VECTOR & "TEST" SHIFT
-
- dirVFrees_candidate = shift_frees;
- dirVParams_candidate = shift_params;
-
-# random_frees = Rand (rows = num_frees, cols = 1, min = 0.9, max = 1.1, sparsity = 1.0);
-# random_params = Rand (rows = num_params, cols = 1, min = 0.9, max = 1.1, sparsity = 1.0);
-# dirVFrees_candidate = dirVFrees_candidate * random_frees;
-# dirVParams_candidate = dirVParams_candidate * random_params;
-
- dirVFrees = dirVFrees_candidate;
- dirVParams = dirVParams_candidate;
-
-# dirV_proj_factors = t(past_dirVFrees) %*% dirVFrees_candidate + t(past_dirVParams) %*% dirVParams_candidate;
-# dirVFrees = dirVFrees_candidate - past_dirVFrees %*% dirV_proj_factors;
-# dirVParams = dirVParams_candidate - past_dirVParams %*% dirV_proj_factors;
-
- dirV_denom = sqrt (sum (dirVFrees * dirVFrees) + sum (dirVParams * dirVParams));
- dirVFrees = dirVFrees / dirV_denom;
- dirVParams = dirVParams / dirV_denom;
-
-# past_dirVFrees [, 2:num_past_dirVs] = past_dirVFrees [, 1:(num_past_dirVs-1)];
-# past_dirVParams [, 2:num_past_dirVs] = past_dirVParams [, 1:(num_past_dirVs-1)];
-# past_dirVFrees [, 1] = dirVFrees;
-# past_dirVParams [, 1] = dirVParams;
-
-
-### GD-STEP 4: COMPUTE THE POLYNOMIAL FOR d loss(t) / dt
-
- dirVRegresValues = RegresValueMap %*% CReps %*% dirVFrees;
- dirVRegresParams = RegresParamMap %*% dirVParams;
- dirVdirV_bilinear_sums = SumFactor %*% (dirVRegresValues * dirVRegresParams);
-
- dirV_bilinear_sums = SumFactor %*% (dirVRegresValues * regresParams + regresValues * dirVRegresParams);
- L_0 = sum (w_bilinear_sums * bilinear_sums);
- L_prime_0 = 2.0 * sum (w_bilinear_sums * dirV_bilinear_sums);
-
- freeVars_T = freeVars + shift_T * dirVFrees;
- params_T = params + shift_T * dirVParams;
-
- reports_T = CReps %*% freeVars_T + dReps;
- regresValues_T = RegresValueMap %*% reports_T + RegresFactorDefault;
- regresParams_T = RegresParamMap %*% params_T + RegresCoeffDefault;
-
- bilinear_sums_T = SumFactor %*% (regresValues_T * regresParams_T);
- w_bilinear_sums_T = RegresScaleMult * bilinear_sums_T;
- dirV_bilinear_sums_T = SumFactor %*% (dirVRegresValues * regresParams_T + regresValues_T * dirVRegresParams);
-
- L_T = sum (w_bilinear_sums_T * bilinear_sums_T);
- L_prime_T = 2.0 * sum (w_bilinear_sums_T * dirV_bilinear_sums_T);
-
- coeff_a = 4.0 * sum (RegresScaleMult * dirVdirV_bilinear_sums * dirVdirV_bilinear_sums);
- coeff_b = -1.5 * coeff_a * shift_T + 3.0 * (L_prime_0 + L_prime_T + 2.0 * (L_0 - L_T) / shift_T) / (shift_T * shift_T);
- coeff_c = 0.5 * coeff_a * shift_T * shift_T - 2.0 * (2.0 * L_prime_0 + L_prime_T + 3.0 * (L_0 - L_T) / shift_T) / shift_T;
- coeff_d = L_prime_0;
-
-### GD-STEP 5: SOLVE CUBIC EQUATION & PICK THE BEST SHIFT (ROOT)
-
- coeff_aa = coeff_b / coeff_a;
- coeff_bb = coeff_c / coeff_a;
- coeff_cc = coeff_d / coeff_a;
-
- coeff_Q = (coeff_aa * coeff_aa - 3.0 * coeff_bb) / 9.0;
- coeff_R = (2.0 * coeff_aa * coeff_aa * coeff_aa - 9.0 * coeff_aa * coeff_bb + 27.0 * coeff_cc) / 54.0;
-
- root_choice = 0.0;
- if (coeff_R * coeff_R < coeff_Q * coeff_Q * coeff_Q)
- {
- two_pi_third = 2.0943951023931954923084289221863;
- acos_argument = coeff_R / sqrt (coeff_Q * coeff_Q * coeff_Q);
-
- x = abs (acos_argument);
- acos_x = sqrt (1.0 - x) * (1.5707963050 + x * (-0.2145988016
- + x * ( 0.0889789874 + x * (-0.0501743046
- + x * ( 0.0308918810 + x * (-0.0170881256
- + x * ( 0.0066700901 + x * (-0.0012624911))))))));
- if (acos_argument >= 0.0) {
- coeff_theta = acos_x;
- } else {
- coeff_theta = 3.1415926535897932384626433832795 - acos_x;
- }
-
- root_1 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0);
- root_2 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0 + two_pi_third);
- root_3 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0 - two_pi_third);
- root_min = min (min (root_1, root_2), root_3);
- root_max = max (max (root_1, root_2), root_3);
- root_int_diff = (((root_max * coeff_a / 4.0 + coeff_b / 3.0) * root_max + coeff_c / 2.0) * root_max + coeff_d) * root_max
- - (((root_min * coeff_a / 4.0 + coeff_b / 3.0) * root_min + coeff_c / 2.0) * root_min + coeff_d) * root_min;
- if (root_int_diff >= 0.0) {
- root_choice = root_min;
- } else {
- root_choice = root_max;
- }
- } else {
- if (coeff_R >= 0.0) {
- sgn_coeff_R = 1.0;
- } else {
- sgn_coeff_R = -1.0;
- }
- coeff_bigA = - sgn_coeff_R * (abs (coeff_R) + sqrt (coeff_R * coeff_R - coeff_Q * coeff_Q * coeff_Q)) ^ (1.0 / 3.0);
- if (coeff_bigA != 0.0) {
- root_choice = coeff_bigA + coeff_Q / coeff_bigA - coeff_aa / 3.0;
- } else {
- root_choice = - coeff_aa / 3.0;
- }
- }
-
- root_choice = root_choice -
- (((coeff_a * root_choice + coeff_b) * root_choice + coeff_c) * root_choice + coeff_d)
- / ((3 * coeff_a * root_choice + 2 * coeff_b) * root_choice + coeff_c);
- root_choice = root_choice -
- (((coeff_a * root_choice + coeff_b) * root_choice + coeff_c) * root_choice + coeff_d)
- / ((3 * coeff_a * root_choice + 2 * coeff_b) * root_choice + coeff_c);
-
-
-### GD-STEP 6: FINISH UP THE ITERATION
-
- freeVars = freeVars + root_choice * dirVFrees;
- params = params + root_choice * dirVParams;
-
- root_int_diff = (((root_choice * coeff_a / 4.0 + coeff_b / 3.0) * root_choice + coeff_c / 2.0) * root_choice + coeff_d) * root_choice;
- if (- root_int_diff < 0.00000001 * L_0) {
- is_enough_gradient_descent = 1;
- }
- gd_iter = gd_iter + 1;
- print ("Grad Descent Iter " + gd_iter + ": L = " + (L_0 + root_int_diff) + "; shift = " + root_choice);
- shift_T = - 100.0 * sqrt (abs(root_choice) * abs(shift_T));
-}
-
-
-print ("Gradient Descent finished. Starting MCMC...");
-
-
-
-
-END UNCOMMENT TO TRY CONJUGATE GRADIENT DESCENT */
-
-
-
-
-
-
-
-
-print ("Performing MCMC initialization...");
-
-reports = CReps %*% freeVars + dReps;
-regresValues = RegresValueMap %*% reports + RegresFactorDefault;
-regresParams = RegresParamMap %*% params + RegresCoeffDefault;
-
-bilinear_vector = regresValues * regresParams;
-bilinear_form = matrix (bilinear_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE);
-bilinear_form_value = sum (RegresScaleMult * rowSums (bilinear_form) * rowSums (bilinear_form));
-
-max_num_iter = max_num_burnin_iterations + max_num_observed_iterations;
-dim_sample = num_frees + num_params;
-sample_ones = matrix (1.0, rows = dim_sample, cols = 1);
-
-# Generate a random permutation matrix for the sampling order of freeVars and params
-
-SampleOrder = diag (sample_ones);
-num_swaps = 10 * dim_sample;
-rnd = Rand (rows = num_swaps, cols = 1, min = 0.0, max = 1.0);
-left_swap = round (0.5 + dim_sample * rnd);
-rnd = Rand (rows = num_swaps, cols = 1, min = 0.0, max = 1.0);
-right_swap = round (0.5 + dim_sample * rnd);
-for (swap_i in 1:num_swaps) {
- l = castAsScalar (left_swap [swap_i, 1]);
- r = castAsScalar (right_swap [swap_i, 1]);
- if (l != r) {
- tmp_row = SampleOrder [l, ];
- SampleOrder [l, ] = SampleOrder [r, ];
- SampleOrder [r, ] = tmp_row;
- }
-}
-
-pi = 3.1415926535897932384626433832795;
-zero = matrix (0.0, rows = 1, cols = 1);
-
-isVar = colSums (SampleOrder [1 : num_frees, ]);
-sum_of_observed_reports = matrix (0.0, rows = num_attrs, cols = num_terms);
-sum_of_observed_params = matrix (0.0, rows = num_params, cols = 1);
-num_of_observed_reports = 0;
-sum_of_observed_losses = 0.0;
-is_observed = 0;
-
-is_calculating_loss_change = 0;
-is_monitoring_loss_change = 0;
-avg_prob_of_loss_increase = 0;
-update_factor_for_avg_loss_change = 0.02;
-avg_loss_change = -50.0 * update_factor_for_avg_loss_change;
-old_bilinear_form_value = bilinear_form_value;
-
-# Starting MCMC iterations
-
-iter = 0;
-
-while ((iter < max_num_iter) & (num_of_observed_reports < max_num_observed_iterations))
-{
- iter = iter + 1;
-
- # Initialize (bi-)linear forms
-
- regresValues = RegresValueMap %*% reports + RegresFactorDefault;
- regresParams = RegresParamMap %*% params + RegresCoeffDefault;
- bilinear_form_vector = regresValues * regresParams;
-
- bilinear_form = matrix (bilinear_form_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE);
- bilinear_form_value = sum (RegresScaleMult * rowSums (bilinear_form) * rowSums (bilinear_form));
-
- if (bilinear_form_value > old_bilinear_form_value) {
- avg_prob_of_loss_increase = avg_prob_of_loss_increase * (1 - update_factor_for_avg_loss_change) + 1 * update_factor_for_avg_loss_change;
- } else {
- avg_prob_of_loss_increase = avg_prob_of_loss_increase * (1 - update_factor_for_avg_loss_change);
- }
- if (is_calculating_loss_change == 0 & avg_prob_of_loss_increase > 0.4) {
- is_calculating_loss_change = 1;
- }
- if (is_monitoring_loss_change == 0 & avg_prob_of_loss_increase > 0.5) {
- is_calculating_loss_change = 1;
- is_monitoring_loss_change = 1;
- print ("Monitoring the average loss change is ON. ");
- }
- if (is_calculating_loss_change == 1) {
- avg_loss_change = avg_loss_change * (1 - update_factor_for_avg_loss_change)
- + (bilinear_form_value - old_bilinear_form_value) * update_factor_for_avg_loss_change;
- }
- if (is_observed == 0 & ((is_monitoring_loss_change == 1 & avg_loss_change > 0) | iter > max_num_burnin_iterations)) {
- print ("Burn-in ENDS, observation STARTS. ");
- is_observed = 1;
- }
-
- old_bilinear_form_value = bilinear_form_value;
-
- bilinear_form_value_to_print = bilinear_form_value;
- if (bilinear_form_value < 100000) {
- bilinear_form_value_to_print = round (10000 * bilinear_form_value) / 10000;
- } else {
- if (bilinear_form_value < 1000000000) {
- bilinear_form_value_to_print = round (bilinear_form_value);
- }}
-
- if (is_monitoring_loss_change == 0) {
- print ("MCMC iteration " + iter + ": Prob [loss_increase] = " + (round (10000 * avg_prob_of_loss_increase) / 10000)
- + ", bilinear form value = " + bilinear_form_value_to_print);
- } else {
- print ("MCMC iteration " + iter + ": Prob [loss_increase] = " + (round (10000 * avg_prob_of_loss_increase) / 10000)
- + ", bilinear form value = " + bilinear_form_value_to_print + ", avg_loss_change = " + (round (10000 * avg_loss_change) / 10000));
- }
-
- # Create a normally distributed random sample
-
- dim_half_sample = castAsScalar (round (dim_sample / 2 + 0.1 + zero));
- rnd1 = Rand (rows = dim_half_sample, cols = 1, min = 0.0, max = 1.0);
- rnd2 = Rand (rows = dim_half_sample, cols = 1, min = 0.0, max = 1.0);
- rnd_normal_1 = sqrt (- 2.0 * log (rnd1)) * sin (2 * pi * rnd2);
- rnd_normal_2 = sqrt (- 2.0 * log (rnd1)) * cos (2 * pi * rnd2);
- rnd_normal = matrix (0.0, rows = dim_sample, cols = 1);
- rnd_normal [1 : dim_half_sample, ] = rnd_normal_1;
- rnd_normal [(dim_sample - dim_half_sample + 1) : dim_sample, ] = rnd_normal_2;
-
- # Initialize updaters
-
- freeVars_updater = freeVars * 0.0;
- params_updater = params * 0.0;
- regresValues_updater = regresValues * 0.0;
- regresParams_updater = regresParams * 0.0;
- bilinear_updater_vector = bilinear_form_vector * 0.0;
-
- # Perform the sampling
-
- for (idx in 1:dim_sample)
- {
- # Generate the sample unit-vector and updaters
-
- if (castAsScalar (isVar [1, idx]) > 0.5) {
- freeVars_updater = SampleOrder [1 : num_frees, idx];
- regresValues_updater = RegresValueMap %*% CReps %*% freeVars_updater;
- bilinear_updater_vector = regresValues_updater * regresParams;
- } else {
- params_updater = SampleOrder [(num_frees + 1) : dim_sample, idx];
- regresParams_updater = RegresParamMap %*% params_updater;
- bilinear_updater_vector = regresValues * regresParams_updater;
- }
- bilinear_updater = matrix (bilinear_updater_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE);
-
- # Compute the quadratic by three shift-points: -1, 0, +1
-
- bilinear_form_value = sum (RegresScaleMult * rowSums (bilinear_form) * rowSums (bilinear_form));
- q_minus_1 = sum (RegresScaleMult * rowSums (bilinear_form - bilinear_updater) * rowSums (bilinear_form - bilinear_updater));
- q_plus_1 = sum (RegresScaleMult * rowSums (bilinear_form + bilinear_updater) * rowSums (bilinear_form + bilinear_updater));
- coeff_b = (q_plus_1 - q_minus_1) / 2.0;
- coeff_a = (q_plus_1 + q_minus_1) / 2.0 - bilinear_form_value;
-
- # Find the mean and the sigma for f(x) ~ exp (- (ax^2 + bx + c)),
- # then compute the shift to get the new sample
-
- mean_shift = - coeff_b / (2.0 * coeff_a);
- sigma_shift = 1.0 / sqrt (2.0 * coeff_a);
- shift = mean_shift + sigma_shift * castAsScalar (rnd_normal [idx, 1]);
-
-# BEGIN DEBUG INSERT
-# mmm = 1;
-# if (castAsScalar (isVar [1, idx]) > 0.5 & # IT IS A FREE VARIABLE, NOT A PARAMETER
-# castAsScalar (freeVars_updater [mmm, 1]) > 0) # IT IS mmm-TH FREE VARIABLE
-# {
-# # print ("freeVars[" + mmm + "]: q_minus_1 = " + q_minus_1 + ", q_plus_1 = " + q_plus_1 + ", coeff_a = " + coeff_a + ", coeff_b = " + coeff_b);
-# print ("freeVars[" + mmm + "]: q_minus_1 = " + q_minus_1 + ", q_plus_1 = " + q_plus_1 + ", mean_shift = " + mean_shift + ", sigma_shift = " + sigma_shift + ", shift = " + shift);
-# }
-# if (castAsScalar (isVar [1, idx]) <= 0.5 & # IT IS A PARAMETER, NOT A FREE VARIABLE
-# castAsScalar (params_updater [mmm, 1]) > 0) # IT IS mmm-TH PARAMETER
-# {
-# # print (" params[" + mmm + "]: q_minus_1 = " + q_minus_1 + ", q_plus_1 = " + q_plus_1 + ", coeff_a = " + coeff_a + ", coeff_b = " + coeff_b);
-# print (" params[" + mmm + "]: q_minus_1 = " + q_minus_1 + ", q_plus_1 = " + q_plus_1 + ", mean_shift = " + mean_shift + ", sigma_shift = " + sigma_shift + ", shift = " + shift);
-# }
-# END DEBUG INSERT
-
- # Perform the updates
-
- bilinear_form = bilinear_form + shift * bilinear_updater;
- if (castAsScalar (isVar [1, idx]) > 0.5) {
- freeVars = freeVars + shift * freeVars_updater;
- regresValues = regresValues + shift * regresValues_updater;
- } else {
- params = params + shift * params_updater;
- regresParams = regresParams + shift * regresParams_updater;
- }
- }
-
- # Update / adjust the reports and the parameters
-
- reports = CReps %*% freeVars + dReps;
- reports_matrix = matrix (reports, rows = num_attrs, cols = num_terms, byrow = FALSE);
-
- # Make an observation of the reports and/or the parameters
-
- if (is_observed > 0)
- {
- sum_of_observed_reports = sum_of_observed_reports + reports_matrix;
- num_of_observed_reports = num_of_observed_reports + 1;
-
- sum_of_observed_params = sum_of_observed_params + params;
- sum_of_observed_losses = sum_of_observed_losses + bilinear_form_value;
- }
-
-# v1 =castAsScalar(round(10000*reports[1 + (num_terms - 1) * num_attrs, 1])/10000);
-# v2 =castAsScalar(round(10000*reports[2 + (num_terms - 1) * num_attrs, 1])/10000);
-# v3 =castAsScalar(round(10000*reports[3 + (num_terms - 1) * num_attrs, 1])/10000);
-# v4 =castAsScalar(round(10000*reports[4 + (num_terms - 1) * num_attrs, 1])/10000);
-# w1 =castAsScalar(round(10000*reports_matrix[ 1,num_terms])/10000);
-# w2 =castAsScalar(round(10000*reports_matrix[ 2,num_terms])/10000);
-# w3 =castAsScalar(round(10000*reports_matrix[ 3,num_terms])/10000);
-# w4 =castAsScalar(round(10000*reports_matrix[ 4,num_terms])/10000);
-
-# v5 =castAsScalar(round(reports_matrix[ 5,num_terms]));
-# v8 =castAsScalar(round(reports_matrix[ 8,num_terms]));
-# v9 =castAsScalar(round(reports_matrix[ 9,num_terms]));
-# v10=castAsScalar(round(reports_matrix[10,num_terms]));
-# v16=castAsScalar(round(reports_matrix[16,num_terms]));
-# v19=castAsScalar(round(reports_matrix[19,num_terms]));
-
-#print (" Sample = 1:" + v1 + ", 2:" + v2 + ", 3:" + v3 + ", 4:" + v4);
-## + ", 5:" + v5 + ", 8:" + v8 + ", 9:" + v9 + ", 10:" + v10 + ", 16:" + v16 + ", 19:" + v19);
-#print (" Sample = 1:" + w1 + ", 2:" + w2 + ", 3:" + w3 + ", 4:" + w4);
-## + ", 5:" + w5 + ", 8:" + w8 + ", 9:" + w9 + ", 10:" + w10 + ", 16:" + w16 + ", 19:" + w19);
-
-}
-
-print ("Average observed loss = " + (sum_of_observed_losses / num_of_observed_reports));
-print ("Writing out the results...");
-
-avg_reports_matrix = sum_of_observed_reports / num_of_observed_reports;
-avg_params = sum_of_observed_params / num_of_observed_reports;
-write (avg_reports_matrix, $11, format="text");
-write (avg_params, $12, format="text");
-
-print ("END ImputeGaussMCMC");
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Implements the MCMC algorithm for imputation of missing data into a time-series of "reports".
+# Each report is a fixed-size vector of attribute values; reports come out each year/quarter/month ("term").
+# Hard linear equality constraints restrict values in/across the reports, e.g. total cost = sum of all costs.
+# Soft linear regression constraints define dependencies between values in/across the reports.
+# Linear regression parameters are unknown and sampled together with the missing values in the reports.
+#
+# INPUT 1: Initial reports matrix [1 : num_attrs, 1 : num_terms] with missing values usually set to zero,
+# but it MUST BE CONSISTENT with hard constraints! Set some missing values to nonzero if needed.
+# There are "num_terms" reports in the matrix, each having "num_attrs" attribute values.
+#
+# INPUT 2: Sparse matrix [1 : (num_terms * num_attrs), 1 : num_frees] that defines a linear map from
+# "free" variables to the reports matrix. A tensor of size (num_terms * num_attrs * num_frees)
+# where the reports matrix is stretched into a column-vector [1 : (num_terms * num_attrs)].
+# Term = t, attribute = i --> index = (t-1) * num_attrs + i
+#
+# INPUT 3: Sparse matrix [1 : (num_reg_eqs * num_factors), 1 : (num_terms * num_attrs)] that defines
+# a linear map from the stretched matrix of reports to the stretched matrix of regression factors.
+#
+# INPUT 4: Sparse vector [1 : (num_reg_eqs * num_factors), 1] that defines "default regression factors"
+# (if nonzero) to be added to the regression factors before they are multiplied by parameters.
+#
+# INPUT 5: Sparse matrix [1 : (num_reg_eqs * num_factors), 1 : num_params] that defines a linear map
+# from the vector of parameters to the stretched matrix of regression factors.
+#
+# INPUT 6: Sparse vector [1 : (num_reg_eqs * num_factors), 1] that defines default regression coefficients
+# (if nonzero) to be added to the parameters (if any) before being multiplied by regression factors.
+#
+# INPUT 7: A vector [1 : num_reg_eqs, 1] of scale multipliers, one per regression
+#
+# INPUT 8 : Number of factors in a regression equation, including the estimated value
+# INPUT 9 : Maximum number of burn-in full iterations (that sample each variable and each parameter once)
+# BUT the actual number of burn-in iterations may be smaller if "free fall" ends sooner
+# INPUT 10: Maximum number of observed full iterations (that sample each variable and each parameter once)
+#
+# INPUT 11: Output file name and path for the average MCMC reports table
+# INPUT 12: Output file for debugging (currently: the average parameters vector)
+#
+# Example:
+# hadoop jar SystemML.jar -f test/scripts/applications/impute/imputeGaussMCMC.dml -exec singlenode -args
+# test/scripts/applications/impute/initial_reports
+# test/scripts/applications/impute/CReps
+# test/scripts/applications/impute/RegresValueMap
+# test/scripts/applications/impute/RegresFactorDefault
+# test/scripts/applications/impute/RegresParamMap
+# test/scripts/applications/impute/RegresCoeffDefault
+# test/scripts/applications/impute/RegresScaleMult
+# 4 1000 100
+# test/scripts/applications/impute/output_reports
+# test/scripts/applications/impute/debug_info
+
+
+print ("START ImputeGaussMCMC");
+print ("Reading the input files...");
+
+initial_reports = read ($1);
+CReps = read ($2);
+
+num_terms = ncol (initial_reports); # Number of periods for which reports are produced
+num_attrs = nrow (initial_reports); # Number of attribute values per each term report
+num_frees = ncol (CReps); # Number of free variables used to describe all consistent reports
+
+dReps_size = num_terms * num_attrs;
+dReps = matrix (initial_reports, rows = dReps_size, cols = 1, byrow = FALSE);
+
+# We assume that all report-series consistent with hard constraints form an affine set:
+# reports = CReps %*% freeVars + dReps
+# Here "freeVars" is a vector of "free variables" (degrees of freedom), "CReps" is a linear mapping,
+# and "dReps" are the default values for the reports that correspond to the initial reports matrix.
+
+RegresValueMap = read ($3);
+RegresFactorDefault = read ($4);
+RegresParamMap = read ($5);
+RegresCoeffDefault = read ($6);
+RegresScaleMult = read ($7);
+
+num_factors = $8; # Number of factors in each regression equation, including the estimated value
+num_reg_eqs = nrow (RegresParamMap) / num_factors; # Number of regression equations
+num_params = ncol (RegresParamMap); # Number of parameters used in all regressions
+max_num_burnin_iterations = $9;
+max_num_observed_iterations = $10;
+
+ones_fv = matrix (1.0, rows = 1, cols = num_frees);
+ones_pm = matrix (1.0, rows = 1, cols = num_params);
+twos_re = matrix (2.0, rows = 1, cols = num_reg_eqs);
+
+# Create a summation operator (matrix) that adds the factors in each regression:
+
+ncols_SumFactor = num_reg_eqs * num_factors;
+SumFactor = matrix (0.0, rows = num_reg_eqs, cols = ncols_SumFactor);
+ones_f = matrix (1.0, rows = 1, cols = num_factors);
+SumFactor [1, 1:num_factors] = ones_f;
+nSumFactor = 1;
+while (nSumFactor < num_reg_eqs) {
+ incSumFactor = nSumFactor;
+ if (incSumFactor > num_reg_eqs - nSumFactor) {
+ incSumFactor = num_reg_eqs - nSumFactor;
+ }
+ SumFactor [(nSumFactor + 1) : (nSumFactor + incSumFactor),
+ (nSumFactor * num_factors + 1) : ((nSumFactor + incSumFactor) * num_factors)] =
+ SumFactor [1 : incSumFactor, 1 : (incSumFactor * num_factors)];
+ nSumFactor = nSumFactor + incSumFactor;
+}
+
+freeVars = matrix (0.0, rows = num_frees, cols = 1);
+params = matrix (1.0, rows = num_params, cols = 1);
+
+
+
+num_opt_iter = 20;
+print ("Performing initial optimization (" + num_opt_iter + " alternating CG steps)...");
+
+reports = CReps %*% freeVars + dReps;
+regresValues = RegresValueMap %*% reports + RegresFactorDefault;
+regresParams = RegresParamMap %*% params + RegresCoeffDefault;
+
+bilinear_sums = SumFactor %*% (regresValues * regresParams);
+w_bilinear_sums = RegresScaleMult * bilinear_sums;
+bilinear_form_value = sum (w_bilinear_sums * bilinear_sums);
+
+opt_iter = 1;
+is_step_params = 1;
+is_opt_converged = 0;
+
+print ("Before optimization: Initial bilinear form value = " + bilinear_form_value);
+
+while (is_opt_converged == 0)
+{
+ deg = is_step_params * num_params + (1 - is_step_params) * num_frees;
+ shift_vector = matrix (0.0, rows = deg, cols = 1);
+
+ # Compute gradient
+
+ if (is_step_params == 1) {
+ gradient = twos_re %*% ((w_bilinear_sums %*% ones_pm) * (SumFactor %*% ((regresValues %*% ones_pm) * RegresParamMap)));
+ } else {
+ gradient = twos_re %*% ((w_bilinear_sums %*% ones_fv) * (SumFactor %*% ((regresParams %*% ones_fv) * (RegresValueMap %*% CReps))));
+ }
+
+ # Make a few conjugate gradient steps
+
+ residual = t(gradient);
+ p = - residual;
+ norm_r2 = sum (residual * residual);
+ cg_iter = 1;
+ cg_terminate = 0;
+
+ while (cg_terminate == 0)
+ {
+ # Want: q = A %*% p;
+ # Compute gradient change from 0 to p
+
+ if (is_step_params == 1) {
+ w_bilinear_p = RegresScaleMult * (SumFactor %*% (regresValues * (RegresParamMap %*% p)));
+ gradient_change_p = twos_re %*% ((w_bilinear_p %*% ones_pm) * (SumFactor %*% ((regresValues %*% ones_pm) * RegresParamMap)));
+ } else {
+ w_bilinear_p = RegresScaleMult * (SumFactor %*% ((RegresValueMap %*% CReps %*% p) * regresParams));
+ gradient_change_p = twos_re %*% ((w_bilinear_p %*% ones_fv) * (SumFactor %*% ((regresParams %*% ones_fv) * (RegresValueMap %*% CReps))));
+ }
+ q = t(gradient_change_p);
+
+ alpha = norm_r2 / castAsScalar (t(p) %*% q);
+ shift_vector_change = alpha * p;
+ shift_vector = shift_vector + shift_vector_change;
+ old_norm_r2 = norm_r2;
+ residual = residual + alpha * q;
+ norm_r2 = sum (residual * residual);
+ p = - residual + (norm_r2 / old_norm_r2) * p;
+ cg_iter = cg_iter + 1;
+ if (cg_iter > min (deg, 2 + opt_iter / 3)) {
+ cg_terminate = 1;
+ }
+ }
+
+ if (is_step_params == 1) {
+ params = params + shift_vector;
+ regresParams = RegresParamMap %*% params + RegresCoeffDefault;
+ } else {
+ freeVars = freeVars + shift_vector;
+ reports = CReps %*% freeVars + dReps;
+ regresValues = RegresValueMap %*% reports + RegresFactorDefault;
+ }
+
+ # Update the bilinear form and check convergence
+
+ if (is_step_params == 1) {
+ old_bilinear_form_value = bilinear_form_value;
+ }
+
+ bilinear_sums = SumFactor %*% (regresValues * regresParams);
+ w_bilinear_sums = RegresScaleMult * bilinear_sums;
+ bilinear_form_value = sum (w_bilinear_sums * bilinear_sums);
+
+ if (is_step_params == 1) {
+ print ("Optimization step " + opt_iter + " (params) : bilinear form value = " + bilinear_form_value);
+ } else {
+ print ("Optimization step " + opt_iter + " (reports): bilinear form value = " + bilinear_form_value);
+ }
+
+ is_step_params = 1 - is_step_params;
+ opt_iter = opt_iter + 1;
+
+ if (is_step_params == 1 & opt_iter > num_opt_iter) {
+ is_opt_converged = 1;
+ }
+}
+
+
+
+/* UNCOMMENT TO TRY CONJUGATE GRADIENT DESCENT
+
+
+
+print ("Starting Gradient Descent...");
+### GRADIENT DESCENT WITH MODIFICATIONS TO ENHANCE CONVERGENCE
+
+# num_past_dirVs = 3;
+# past_dirVFrees = matrix (0.0, rows = num_frees, cols = num_past_dirVs);
+# past_dirVParams = matrix (0.0, rows = num_params, cols = num_past_dirVs);
+
+shift_T = -1000.0;
+is_enough_gradient_descent = 0;
+gd_iter = 0;
+
+while (is_enough_gradient_descent == 0)
+{
+### GD-STEP 1: COMPUTE LOSS & GRADIENT AT CURRENT POINT
+
+ reports = CReps %*% freeVars + dReps;
+ regresValues = RegresValueMap %*% reports + RegresFactorDefault;
+ regresParams = RegresParamMap %*% params + RegresCoeffDefault;
+
+ bilinear_sums = SumFactor %*% (regresValues * regresParams);
+ w_bilinear_sums = RegresScaleMult * bilinear_sums;
+ gradientInFrees = twos_re %*% ((w_bilinear_sums %*% ones_fv) * (SumFactor %*% ((regresParams %*% ones_fv) * (RegresValueMap %*% CReps))));
+ gradientInParams = twos_re %*% ((w_bilinear_sums %*% ones_pm) * (SumFactor %*% ((regresValues %*% ones_pm) * RegresParamMap)));
+
+### CG-STEP 2: MAKE A FEW APPROXIMATE CONJUGATE GRADIENT STEPS
+
+ shift_frees = matrix (0.0, rows = num_frees, cols = 1);
+ shift_params = matrix (0.0, rows = num_params, cols = 1);
+
+ residual_frees = t(gradientInFrees);
+ residual_params = t(gradientInParams);
+
+ p_frees = - residual_frees;
+ p_params = - residual_params;
+
+ norm_r2 = sum (residual_frees * residual_frees) + sum (residual_params * residual_params);
+
+ cg_iter = 1;
+ cg_terminate = 0;
+ cg_eps = 0.000001;
+
+ while (cg_terminate == 0)
+ {
+ regresValues_eps_p = regresValues + cg_eps * (RegresValueMap %*% CReps %*% p_frees);
+ regresParams_eps_p = regresParams + cg_eps * (RegresParamMap %*% p_params);
+
+ bilinear_sums_eps_p = SumFactor %*% (regresValues_eps_p * regresParams_eps_p);
+ w_bilinear_sums_eps_p = RegresScaleMult * bilinear_sums_eps_p;
+
+ gradientInFrees_eps_p = twos_re %*% ((w_bilinear_sums_eps_p %*% ones_fv) * (SumFactor %*% ((regresParams_eps_p %*% ones_fv) * (RegresValueMap %*% CReps))));
+ gradientInParams_eps_p = twos_re %*% ((w_bilinear_sums_eps_p %*% ones_pm) * (SumFactor %*% ((regresValues_eps_p %*% ones_pm) * RegresParamMap)));
+
+ q_frees = t(gradientInFrees_eps_p - gradientInFrees) / cg_eps;
+ q_params = t(gradientInParams_eps_p - gradientInParams) / cg_eps;
+
+ alpha = norm_r2 / castAsScalar (t(p_frees) %*% q_frees + t(p_params) %*% q_params);
+
+ shift_frees = shift_frees + alpha * p_frees;
+ shift_params = shift_params + alpha * p_params;
+
+ old_norm_r2 = norm_r2;
+
+ residual_frees = residual_frees + alpha * q_frees;
+ residual_params = residual_params + alpha * q_params;
+
+ norm_r2 = sum (residual_frees * residual_frees) + sum (residual_params * residual_params);
+
+ p_frees = - residual_frees + (norm_r2 / old_norm_r2) * p_frees;
+ p_params = - residual_params + (norm_r2 / old_norm_r2) * p_params;
+
+ cg_iter = cg_iter + 1;
+ if (cg_iter > 4) {
+ cg_terminate = 1;
+ }
+ }
+
+### GD-STEP 3: COMPUTE THE NEW DIRECTION VECTOR & "TEST" SHIFT
+
+ dirVFrees_candidate = shift_frees;
+ dirVParams_candidate = shift_params;
+
+# random_frees = Rand (rows = num_frees, cols = 1, min = 0.9, max = 1.1, sparsity = 1.0);
+# random_params = Rand (rows = num_params, cols = 1, min = 0.9, max = 1.1, sparsity = 1.0);
+# dirVFrees_candidate = dirVFrees_candidate * random_frees;
+# dirVParams_candidate = dirVParams_candidate * random_params;
+
+ dirVFrees = dirVFrees_candidate;
+ dirVParams = dirVParams_candidate;
+
+# dirV_proj_factors = t(past_dirVFrees) %*% dirVFrees_candidate + t(past_dirVParams) %*% dirVParams_candidate;
+# dirVFrees = dirVFrees_candidate - past_dirVFrees %*% dirV_proj_factors;
+# dirVParams = dirVParams_candidate - past_dirVParams %*% dirV_proj_factors;
+
+ dirV_denom = sqrt (sum (dirVFrees * dirVFrees) + sum (dirVParams * dirVParams));
+ dirVFrees = dirVFrees / dirV_denom;
+ dirVParams = dirVParams / dirV_denom;
+
+# past_dirVFrees [, 2:num_past_dirVs] = past_dirVFrees [, 1:(num_past_dirVs-1)];
+# past_dirVParams [, 2:num_past_dirVs] = past_dirVParams [, 1:(num_past_dirVs-1)];
+# past_dirVFrees [, 1] = dirVFrees;
+# past_dirVParams [, 1] = dirVParams;
+
+
+### GD-STEP 4: COMPUTE THE POLYNOMIAL FOR d loss(t) / dt
+
+ dirVRegresValues = RegresValueMap %*% CReps %*% dirVFrees;
+ dirVRegresParams = RegresParamMap %*% dirVParams;
+ dirVdirV_bilinear_sums = SumFactor %*% (dirVRegresValues * dirVRegresParams);
+
+ dirV_bilinear_sums = SumFactor %*% (dirVRegresValues * regresParams + regresValues * dirVRegresParams);
+ L_0 = sum (w_bilinear_sums * bilinear_sums);
+ L_prime_0 = 2.0 * sum (w_bilinear_sums * dirV_bilinear_sums);
+
+ freeVars_T = freeVars + shift_T * dirVFrees;
+ params_T = params + shift_T * dirVParams;
+
+ reports_T = CReps %*% freeVars_T + dReps;
+ regresValues_T = RegresValueMap %*% reports_T + RegresFactorDefault;
+ regresParams_T = RegresParamMap %*% params_T + RegresCoeffDefault;
+
+ bilinear_sums_T = SumFactor %*% (regresValues_T * regresParams_T);
+ w_bilinear_sums_T = RegresScaleMult * bilinear_sums_T;
+ dirV_bilinear_sums_T = SumFactor %*% (dirVRegresValues * regresParams_T + regresValues_T * dirVRegresParams);
+
+ L_T = sum (w_bilinear_sums_T * bilinear_sums_T);
+ L_prime_T = 2.0 * sum (w_bilinear_sums_T * dirV_bilinear_sums_T);
+
+ coeff_a = 4.0 * sum (RegresScaleMult * dirVdirV_bilinear_sums * dirVdirV_bilinear_sums);
+ coeff_b = -1.5 * coeff_a * shift_T + 3.0 * (L_prime_0 + L_prime_T + 2.0 * (L_0 - L_T) / shift_T) / (shift_T * shift_T);
+ coeff_c = 0.5 * coeff_a * shift_T * shift_T - 2.0 * (2.0 * L_prime_0 + L_prime_T + 3.0 * (L_0 - L_T) / shift_T) / shift_T;
+ coeff_d = L_prime_0;
+
+### GD-STEP 5: SOLVE CUBIC EQUATION & PICK THE BEST SHIFT (ROOT)
+
+ coeff_aa = coeff_b / coeff_a;
+ coeff_bb = coeff_c / coeff_a;
+ coeff_cc = coeff_d / coeff_a;
+
+ coeff_Q = (coeff_aa * coeff_aa - 3.0 * coeff_bb) / 9.0;
+ coeff_R = (2.0 * coeff_aa * coeff_aa * coeff_aa - 9.0 * coeff_aa * coeff_bb + 27.0 * coeff_cc) / 54.0;
+
+ root_choice = 0.0;
+ if (coeff_R * coeff_R < coeff_Q * coeff_Q * coeff_Q)
+ {
+ two_pi_third = 2.0943951023931954923084289221863;
+ acos_argument = coeff_R / sqrt (coeff_Q * coeff_Q * coeff_Q);
+
+ x = abs (acos_argument);
+ acos_x = sqrt (1.0 - x) * (1.5707963050 + x * (-0.2145988016
+ + x * ( 0.0889789874 + x * (-0.0501743046
+ + x * ( 0.0308918810 + x * (-0.0170881256
+ + x * ( 0.0066700901 + x * (-0.0012624911))))))));
+ if (acos_argument >= 0.0) {
+ coeff_theta = acos_x;
+ } else {
+ coeff_theta = 3.1415926535897932384626433832795 - acos_x;
+ }
+
+ root_1 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0);
+ root_2 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0 + two_pi_third);
+ root_3 = - coeff_aa / 3.0 - 2.0 * sqrt (coeff_Q) * cos (coeff_theta / 3.0 - two_pi_third);
+ root_min = min (min (root_1, root_2), root_3);
+ root_max = max (max (root_1, root_2), root_3);
+ root_int_diff = (((root_max * coeff_a / 4.0 + coeff_b / 3.0) * root_max + coeff_c / 2.0) * root_max + coeff_d) * root_max
+ - (((root_min * coeff_a / 4.0 + coeff_b / 3.0) * root_min + coeff_c / 2.0) * root_min + coeff_d) * root_min;
+ if (root_int_diff >= 0.0) {
+ root_choice = root_min;
+ } else {
+ root_choice = root_max;
+ }
+ } else {
+ if (coeff_R >= 0.0) {
+ sgn_coeff_R = 1.0;
+ } else {
+ sgn_coeff_R = -1.0;
+ }
+ coeff_bigA = - sgn_coeff_R * (abs (coeff_R) + sqrt (coeff_R * coeff_R - coeff_Q * coeff_Q * coeff_Q)) ^ (1.0 / 3.0);
+ if (coeff_bigA != 0.0) {
+ root_choice = coeff_bigA + coeff_Q / coeff_bigA - coeff_aa / 3.0;
+ } else {
+ root_choice = - coeff_aa / 3.0;
+ }
+ }
+
+ root_choice = root_choice -
+ (((coeff_a * root_choice + coeff_b) * root_choice + coeff_c) * root_choice + coeff_d)
+ / ((3 * coeff_a * root_choice + 2 * coeff_b) * root_choice + coeff_c);
+ root_choice = root_choice -
+ (((coeff_a * root_choice + coeff_b) * root_choice + coeff_c) * root_choice + coeff_d)
+ / ((3 * coeff_a * root_choice + 2 * coeff_b) * root_choice + coeff_c);
+
+
+### GD-STEP 6: FINISH UP THE ITERATION
+
+ freeVars = freeVars + root_choice * dirVFrees;
+ params = params + root_choice * dirVParams;
+
+ root_int_diff = (((root_choice * coeff_a / 4.0 + coeff_b / 3.0) * root_choice + coeff_c / 2.0) * root_choice + coeff_d) * root_choice;
+ if (- root_int_diff < 0.00000001 * L_0) {
+ is_enough_gradient_descent = 1;
+ }
+ gd_iter = gd_iter + 1;
+ print ("Grad Descent Iter " + gd_iter + ": L = " + (L_0 + root_int_diff) + "; shift = " + root_choice);
+ shift_T = - 100.0 * sqrt (abs(root_choice) * abs(shift_T));
+}
+
+
+print ("Gradient Descent finished. Starting MCMC...");
+
+
+
+
+END UNCOMMENT TO TRY CONJUGATE GRADIENT DESCENT */
+
+
+
+
+
+
+
+
+print ("Performing MCMC initialization...");
+
+reports = CReps %*% freeVars + dReps;
+regresValues = RegresValueMap %*% reports + RegresFactorDefault;
+regresParams = RegresParamMap %*% params + RegresCoeffDefault;
+
+bilinear_vector = regresValues * regresParams;
+bilinear_form = matrix (bilinear_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE);
+bilinear_form_value = sum (RegresScaleMult * rowSums (bilinear_form) * rowSums (bilinear_form));
+
+max_num_iter = max_num_burnin_iterations + max_num_observed_iterations;
+dim_sample = num_frees + num_params;
+sample_ones = matrix (1.0, rows = dim_sample, cols = 1);
+
+# Generate a random permutation matrix for the sampling order of freeVars and params
+
+SampleOrder = diag (sample_ones);
+num_swaps = 10 * dim_sample;
+rnd = Rand (rows = num_swaps, cols = 1, min = 0.0, max = 1.0);
+left_swap = round (0.5 + dim_sample * rnd);
+rnd = Rand (rows = num_swaps, cols = 1, min = 0.0, max = 1.0);
+right_swap = round (0.5 + dim_sample * rnd);
+for (swap_i in 1:num_swaps) {
+ l = castAsScalar (left_swap [swap_i, 1]);
+ r = castAsScalar (right_swap [swap_i, 1]);
+ if (l != r) {
+ tmp_row = SampleOrder [l, ];
+ SampleOrder [l, ] = SampleOrder [r, ];
+ SampleOrder [r, ] = tmp_row;
+ }
+}
+
+pi = 3.1415926535897932384626433832795;
+zero = matrix (0.0, rows = 1, cols = 1);
+
+isVar = colSums (SampleOrder [1 : num_frees, ]);
+sum_of_observed_reports = matrix (0.0, rows = num_attrs, cols = num_terms);
+sum_of_observed_params = matrix (0.0, rows = num_params, cols = 1);
+num_of_observed_reports = 0;
+sum_of_observed_losses = 0.0;
+is_observed = 0;
+
+is_calculating_loss_change = 0;
+is_monitoring_loss_change = 0;
+avg_prob_of_loss_increase = 0;
+update_factor_for_avg_loss_change = 0.02;
+avg_loss_change = -50.0 * update_factor_for_avg_loss_change;
+old_bilinear_form_value = bilinear_form_value;
+
+# Starting MCMC iterations
+
+iter = 0;
+
+while ((iter < max_num_iter) & (num_of_observed_reports < max_num_observed_iterations))
+{
+ iter = iter + 1;
+
+ # Initialize (bi-)linear forms
+
+ regresValues = RegresValueMap %*% reports + RegresFactorDefault;
+ regresParams = RegresParamMap %*% params + RegresCoeffDefault;
+ bilinear_form_vector = regresValues * regresParams;
+
+ bilinear_form = matrix (bilinear_form_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE);
+ bilinear_form_value = sum (RegresScaleMult * rowSums (bilinear_form) * rowSums (bilinear_form));
+
+ if (bilinear_form_value > old_bilinear_form_value) {
+ avg_prob_of_loss_increase = avg_prob_of_loss_increase * (1 - update_factor_for_avg_loss_change) + 1 * update_factor_for_avg_loss_change;
+ } else {
+ avg_prob_of_loss_increase = avg_prob_of_loss_increase * (1 - update_factor_for_avg_loss_change);
+ }
+ if (is_calculating_loss_change == 0 & avg_prob_of_loss_increase > 0.4) {
+ is_calculating_loss_change = 1;
+ }
+ if (is_monitoring_loss_change == 0 & avg_prob_of_loss_increase > 0.5) {
+ is_calculating_loss_change = 1;
+ is_monitoring_loss_change = 1;
+ print ("Monitoring the average loss change is ON. ");
+ }
+ if (is_calculating_loss_change == 1) {
+ avg_loss_change = avg_loss_change * (1 - update_factor_for_avg_loss_change)
+ + (bilinear_form_value - old_bilinear_form_value) * update_factor_for_avg_loss_change;
+ }
+ if (is_observed == 0 & ((is_monitoring_loss_change == 1 & avg_loss_change > 0) | iter > max_num_burnin_iterations)) {
+ print ("Burn-in ENDS, observation STARTS. ");
+ is_observed = 1;
+ }
+
+ old_bilinear_form_value = bilinear_form_value;
+
+ bilinear_form_value_to_print = bilinear_form_value;
+ if (bilinear_form_value < 100000) {
+ bilinear_form_value_to_print = round (10000 * bilinear_form_value) / 10000;
+ } else {
+ if (bilinear_form_value < 1000000000) {
+ bilinear_form_value_to_print = round (bilinear_form_value);
+ }}
+
+ if (is_monitoring_loss_change == 0) {
+ print ("MCMC iteration " + iter + ": Prob [loss_increase] = " + (round (10000 * avg_prob_of_loss_increase) / 10000)
+ + ", bilinear form value = " + bilinear_form_value_to_print);
+ } else {
+ print ("MCMC iteration " + iter + ": Prob [loss_increase] = " + (round (10000 * avg_prob_of_loss_increase) / 10000)
+ + ", bilinear form value = " + bilinear_form_value_to_print + ", avg_loss_change = " + (round (10000 * avg_loss_change) / 10000));
+ }
+
+ # Create a normally distributed random sample
+
+ dim_half_sample = castAsScalar (round (dim_sample / 2 + 0.1 + zero));
+ rnd1 = Rand (rows = dim_half_sample, cols = 1, min = 0.0, max = 1.0);
+ rnd2 = Rand (rows = dim_half_sample, cols = 1, min = 0.0, max = 1.0);
+ rnd_normal_1 = sqrt (- 2.0 * log (rnd1)) * sin (2 * pi * rnd2);
+ rnd_normal_2 = sqrt (- 2.0 * log (rnd1)) * cos (2 * pi * rnd2);
+ rnd_normal = matrix (0.0, rows = dim_sample, cols = 1);
+ rnd_normal [1 : dim_half_sample, ] = rnd_normal_1;
+ rnd_normal [(dim_sample - dim_half_sample + 1) : dim_sample, ] = rnd_normal_2;
+
+ # Initialize updaters
+
+ freeVars_updater = freeVars * 0.0;
+ params_updater = params * 0.0;
+ regresValues_updater = regresValues * 0.0;
+ regresParams_updater = regresParams * 0.0;
+ bilinear_updater_vector = bilinear_form_vector * 0.0;
+
+ # Perform the sampling
+
+ for (idx in 1:dim_sample)
+ {
+ # Generate the sample unit-vector and updaters
+
+ if (castAsScalar (isVar [1, idx]) > 0.5) {
+ freeVars_updater = SampleOrder [1 : num_frees, idx];
+ regresValues_updater = RegresValueMap %*% CReps %*% freeVars_updater;
+ bilinear_updater_vector = regresValues_updater * regresParams;
+ } else {
+ params_updater = SampleOrder [(num_frees + 1) : dim_sample, idx];
+ regresParams_updater = RegresParamMap %*% params_updater;
+ bilinear_updater_vector = regresValues * regresParams_updater;
+ }
+ bilinear_updater = matrix (bilinear_updater_vector, rows = num_reg_eqs, cols = num_factors, byrow = TRUE);
+
+ # Compute the quadratic by three shift-points: -1, 0, +1
+
+ bilinear_form_value = sum (RegresScaleMult * rowSums (bilinear_form) * rowSums (bilinear_form));
+ q_minus_1 = sum (RegresScaleMult * rowSums (bilinear_form - bilinear_updater) * rowSums (bilinear_form - bilinear_updater));
+ q_plus_1 = sum (RegresScaleMult * rowSums (bilinear_form + bilinear_updater) * rowSums (bilinear_form + bilinear_updater));
+ coeff_b = (q_plus_1 - q_minus_1) / 2.0;
+ coeff_a = (q_plus_1 + q_minus_1) / 2.0 - bilinear_form_value;
+
+ # Find the mean and the sigma for f(x) ~ exp (- (ax^2 + bx + c)),
+ # then compute the shift to get the new sample
+
+ mean_shift = - coeff_b / (2.0 * coeff_a);
+ sigma_shift = 1.0 / sqrt (2.0 * coeff_a);
+ shift = mean_shift + sigma_shift * castAsScalar (rnd_normal [idx, 1]);
+
+# BEGIN DEBUG INSERT
+# mmm = 1;
+# if (castAsScalar (isVar [1, idx]) > 0.5 & # IT IS A FREE VARIABLE, NOT A PARAMETER
+# castAsScalar (freeVars_updater [mmm, 1]) > 0) # IT IS mmm-TH FREE VARIABLE
+# {
+# # print ("freeVars[" + mmm + "]: q_minus_1 = " + q_minus_1 + ", q_plus_1 = " + q_plus_1 + ", coeff_a = " + coeff_a + ", coeff_b = " + coeff_b);
+# print ("freeVars[" + mmm + "]: q_minus_1 = " + q_minus_1 + ", q_plus_1 = " + q_plus_1 + ", mean_shift = " + mean_shift + ", sigma_shift = " + sigma_shift + ", shift = " + shift);
+# }
+# if (castAsScalar (isVar [1, idx]) <= 0.5 & # IT IS A PARAMETER, NOT A FREE VARIABLE
+# castAsScalar (params_updater [mmm, 1]) > 0) # IT IS mmm-TH PARAMETER
+# {
+# # print (" params[" + mmm + "]: q_minus_1 = " + q_minus_1 + ", q_plus_1 = " + q_plus_1 + ", coeff_a = " + coeff_a + ", coeff_b = " + coeff_b);
+# print (" params[" + mmm + "]: q_minus_1 = " + q_minus_1 + ", q_plus_1 = " + q_plus_1 + ", mean_shift = " + mean_shift + ", sigma_shift = " + sigma_shift + ", shift = " + shift);
+# }
+# END DEBUG INSERT
+
+ # Perform the updates
+
+ bilinear_form = bilinear_form + shift * bilinear_updater;
+ if (castAsScalar (isVar [1, idx]) > 0.5) {
+ freeVars = freeVars + shift * freeVars_updater;
+ regresValues = regresValues + shift * regresValues_updater;
+ } else {
+ params = params + shift * params_updater;
+ regresParams = regresParams + shift * regresParams_updater;
+ }
+ }
+
+ # Update / adjust the reports and the parameters
+
+ reports = CReps %*% freeVars + dReps;
+ reports_matrix = matrix (reports, rows = num_attrs, cols = num_terms, byrow = FALSE);
+
+ # Make an observation of the reports and/or the parameters
+
+ if (is_observed > 0)
+ {
+ sum_of_observed_reports = sum_of_observed_reports + reports_matrix;
+ num_of_observed_reports = num_of_observed_reports + 1;
+
+ sum_of_observed_params = sum_of_observed_params + params;
+ sum_of_observed_losses = sum_of_observed_losses + bilinear_form_value;
+ }
+
+# v1 =castAsScalar(round(10000*reports[1 + (num_terms - 1) * num_attrs, 1])/10000);
+# v2 =castAsScalar(round(10000*reports[2 + (num_terms - 1) * num_attrs, 1])/10000);
+# v3 =castAsScalar(round(10000*reports[3 + (num_terms - 1) * num_attrs, 1])/10000);
+# v4 =castAsScalar(round(10000*reports[4 + (num_terms - 1) * num_attrs, 1])/10000);
+# w1 =castAsScalar(round(10000*reports_matrix[ 1,num_terms])/10000);
+# w2 =castAsScalar(round(10000*reports_matrix[ 2,num_terms])/10000);
+# w3 =castAsScalar(round(10000*reports_matrix[ 3,num_terms])/10000);
+# w4 =castAsScalar(round(10000*reports_matrix[ 4,num_terms])/10000);
+
+# v5 =castAsScalar(round(reports_matrix[ 5,num_terms]));
+# v8 =castAsScalar(round(reports_matrix[ 8,num_terms]));
+# v9 =castAsScalar(round(reports_matrix[ 9,num_terms]));
+# v10=castAsScalar(round(reports_matrix[10,num_terms]));
+# v16=castAsScalar(round(reports_matrix[16,num_terms]));
+# v19=castAsScalar(round(reports_matrix[19,num_terms]));
+
+#print (" Sample = 1:" + v1 + ", 2:" + v2 + ", 3:" + v3 + ", 4:" + v4);
+## + ", 5:" + v5 + ", 8:" + v8 + ", 9:" + v9 + ", 10:" + v10 + ", 16:" + v16 + ", 19:" + v19);
+#print (" Sample = 1:" + w1 + ", 2:" + w2 + ", 3:" + w3 + ", 4:" + w4);
+## + ", 5:" + w5 + ", 8:" + w8 + ", 9:" + w9 + ", 10:" + w10 + ", 16:" + w16 + ", 19:" + w19);
+
+}
+
+print ("Average observed loss = " + (sum_of_observed_losses / num_of_observed_reports));
+print ("Writing out the results...");
+
+avg_reports_matrix = sum_of_observed_reports / num_of_observed_reports;
+avg_params = sum_of_observed_params / num_of_observed_reports;
+write (avg_reports_matrix, $11, format="text");
+write (avg_params, $12, format="text");
+
+print ("END ImputeGaussMCMC");