You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by mb...@apache.org on 2021/07/15 18:05:48 UTC

[systemds] branch master updated: [SYSTEMDS-3061] New garch builtin function (GARCH models)

This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/master by this push:
     new 99b5ab8  [SYSTEMDS-3061] New garch builtin function (GARCH models)
99b5ab8 is described below

commit 99b5ab8c9a4b2728195a6421bd9fdc51c842b93c
Author: JustYeti32 <ma...@gmx.net>
AuthorDate: Thu Jul 15 19:32:06 2021 +0200

    [SYSTEMDS-3061] New garch builtin function (GARCH models)
    
    Aim:
    - build Garch(1,1) model
    
    Features:
    - simulation of Garch(1,1) process
    - paramters search with gradient descent
    
    Updated fit-method:
    - gradient-descent with restart, similar to simulated annealing
    
    Incorporated test:
    - compare to R:
    if a similar residual-error can be achieved, test is passed
    
    AMLS project SS2021.
    Closes #1240.
---
 scripts/builtin/garch.dml                          | 287 +++++++++++++++++++++
 .../java/org/apache/sysds/common/Builtins.java     |   1 +
 .../test/functions/builtin/BuiltinGarchTest.java   | 114 ++++++++
 src/test/scripts/functions/builtin/garch.R         |  34 +++
 src/test/scripts/functions/builtin/garch.dml       |  27 ++
 5 files changed, 463 insertions(+)

diff --git a/scripts/builtin/garch.dml b/scripts/builtin/garch.dml
new file mode 100644
index 0000000..a37ca8b
--- /dev/null
+++ b/scripts/builtin/garch.dml
@@ -0,0 +1,287 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Builtin function that implements GARCH(1,1)
+#
+# INPUT PARAMETERS:
+# ----------------------------------------------------------------------------
+# NAME              TYPE      DEFAULT   MEANING
+# ----------------------------------------------------------------------------
+# X                 Double    ---       The input Matrix to apply Arima on.
+# kmax              Integer   ---       Number of iterations
+# momentum          Double    ---       Momentum for momentum-gradient descent (set to 0 to deactivate)
+# start_stepsize    Double    ---       Initial gradient-descent stepsize
+# end_stepsize      Double    ---       gradient-descent stepsize at end (linear descent)
+# start_vicinity    Double    ---       proportion of randomness of restart-location for gradient descent at beginning
+# end_vicinity      Double    ---       same at end (linear decay)
+# sim_seed          Integer   ---       seed for simulation of process on fitted coefficients
+# verbose           Boolean   ---       verbosity, comments during fitting
+#
+# RETURN VALUES
+# ----------------------------------------------------------------------------
+# NAME         TYPE   DEFAULT  MEANING
+# ----------------------------------------------------------------------------
+# fitted_X          Double  ---     simulated garch(1,1) process on fitted coefficients
+# fitted_var_hist   Double  ---     variances of simulated fitted process
+# best_a0           Double  ---     constant term of fitted process
+# best_arch_coef    Double  ---     1-st arch-coefficient of fitted process
+# best_var_coef     Double  ---     1-st garch-coefficient of fitted process
+# ----------------------------------------------------------------------------
+#
+# COMMENTS
+# This has some drawbacks: slow convergence of optimization (sort of simulated annealing/gradient descent)
+# TODO: use BFGS or BHHH if it is available (this are go to methods)
+# TODO: (only then) extend to garch(p,q); otherwise the search space is way too big for the current method
+
+
+# ------- MAIN FUNCTION
+
+m_garch = function(Matrix[Double] X, Integer kmax, Double momentum, Double start_stepsize, Double end_stepsize, Double start_vicinity, Double end_vicinity, Integer sim_seed, Boolean verbose)
+return (Matrix[Double] fitted_X, Matrix[Double] fitted_var_hist, Double best_a0, Double best_arch_coef, Double best_var_coef) {
+
+  [a0, arch_coef, var_coef] = sample_feasible_params() # initialize startpoint
+  curr_qmle = qmle(X, a0, arch_coef, var_coef) # initialize quasi-log-likelihood at start
+
+  # record the parameters of max-quasi-log-likelihood thus far
+  best_a0 = a0
+  best_arch_coef = arch_coef
+  best_var_coef = var_coef
+  best_qmle = curr_qmle
+
+  # record last change of gradient if update was applied
+  last_change_a0 = 0
+  last_change_arch_coef = 0
+  last_change_var_coef = 0
+
+  # initialize stepsize (linear decay)
+  stepsize = start_stepsize
+
+  # initialize vicinity (linear decay)
+  vicinity = start_vicinity
+
+  # all coeffs need be >0 to provide a feasible solution; clip at this constant
+  clip_at = 0.00001
+
+  # do gradient descent
+  for (k in 1:kmax-1) {
+    # update vicinity and stepsize
+    progress = k/kmax
+    stepsize = (1-progress) * start_stepsize + progress*end_stepsize
+    vicinity = (1-progress) * start_vicinity + progress*end_vicinity
+
+    # get gradient
+    [d_a0, d_arch_coef, d_var_coef] = gradient(X, a0, arch_coef, var_coef)
+
+    # newly proposed parameters
+    new_a0 = max(a0 + (stepsize * d_a0) + momentum * (last_change_a0), clip_at)
+    new_arch_coef = max(arch_coef + (stepsize * arch_coef) + (momentum * last_change_arch_coef), clip_at)
+    new_var_coef = max(var_coef + (stepsize * var_coef) + (momentum * last_change_var_coef), clip_at)
+
+    # ensure feasibility (this condition provides stationarity, see literature)
+    while (new_arch_coef + new_var_coef > 1) {
+      new_arch_coef = new_arch_coef / 2
+      new_var_coef = new_var_coef / 2
+    }
+
+    # objective function value of new feasible parameters
+    new_qmle = qmle(X, new_a0, new_arch_coef, new_var_coef)
+
+    # record the change of coefficients for momentum updates
+    change_a0 = new_a0 - a0
+    change_arch_coef = new_arch_coef - arch_coef
+    change_var_coef = new_var_coef - var_coef
+
+    # check if improvement
+    if (new_qmle > curr_qmle) {
+      # if so, update and use change for momentum
+      last_change_a0 = change_a0
+      last_change_arch_cof = change_arch_coef
+      last_change_var_coef = change_var_coef
+      update_reason = "gradient"
+    }
+    else {
+      # else: chance for restart at close point and reset momentum
+
+      update_reason = "no update" # unless the random restart applies
+      coin_flip = as.scalar(rand(rows=1, cols=1)) # random restart gets less likely with progressing search
+
+      if (coin_flip > progress) {
+        # sample random restart-point
+        # vicinity tells how far we move to the random point (0: dont move, 1: move fully to random point)
+        # similar to simulated annealing with adaptive neighborhood
+
+        [new_a0, new_arch_coef, new_var_coef] = sample_neighbor(a0, arch_coef, var_coef, vicinity)
+
+        # reset momentum
+        last_change_a0 = 0
+        last_change_arch_cof = 0
+        last_change_var_coef = 0
+
+        update_reason = "restart"
+      }
+    }
+
+    # update the point
+    a0 = new_a0
+    arch_coef = new_arch_coef
+    var_coef = new_var_coef
+
+    # update qmle at the moment
+    curr_qmle = qmle(X, a0, arch_coef, var_coef)
+
+    # check and record if it is the best point thus far
+    update_best = (curr_qmle > best_qmle);
+    if (update_best) {
+      best_qmle = curr_qmle
+      best_a0 = a0
+      best_arch_coef = arch_coef
+      best_var_coef = var_coef
+    }
+
+    # logging: report state of gradient descent
+    if (verbose) {
+      print("k                | " + toString(k))
+      print("a0               | " +  toString(a0))
+      print("arch coef        | " +  toString(arch_coef))
+      print("var coef         | " +  toString(var_coef))
+      print("qmle             | " +  toString(curr_qmle))
+      print("stepsize         | " + toString(stepsize))
+      print("update reason    | " + update_reason)
+      print("update best      | " + update_best)
+      print("____________________________________")
+    }
+  }
+
+  # simulate process from best solution
+  sim_steps = nrow(X)
+  [fitted_X, fitted_var_hist] = sim_garch(best_a0, best_arch_coef, best_var_coef, sim_steps, sim_seed)
+
+  # logging: report output
+  if (verbose) {
+    print("end iteration: return the following: ")
+    print("best qmle            | " + toString(best_qmle))
+    print("best a0              | " + toString(best_a0))
+    print("best arch_coef       |" + toString(best_arch_coef))
+    print("best var_coef        |" + toString(best_var_coef))
+    print("____________________________________")
+  }
+}
+
+
+# ------- UTILITY FUNCTIONS
+
+# ------- quasi-log-likelihood of garch-1-1 coefficients for given data
+# ------- https://math.berkeley.edu/~btw/thesis4.pdf
+qmle = function(Matrix[Double] X, Double a0, Double arch_coef, Double var_coef)
+  return (Double qmle)
+{
+  n = nrow(X)
+
+  # initialize variance
+  var_0 = a0 / (1-arch_coef - var_coef)
+  vars = matrix(var_0, rows=1, cols=1)
+
+  # init loop for var and qmle computation
+  var_lag = var_0
+  xq_lag = as.scalar(X[1,1])^2
+  qmle = 0
+
+  # compute vars and qmle recursively
+  # TODO vectorize via cummulative aggregates?
+  for (t in 2:n) {
+    xq_t = as.scalar(X[t,1])^2
+    var_t = a0 + arch_coef*xq_lag + var_coef*var_lag
+    qmle = qmle - (1/(2*n)) * (log(var_t) + (xq_t / var_t)) # up to constant
+
+    vars = rbind(vars, matrix(var_t, rows=1, cols=1))
+    var_lag = var_t
+    xq_lag = xq_t
+  }
+}
+
+
+# ------- returns coefs which yield a stationary garch process; sampled uniform at random in feasible region
+sample_feasible_params = function()
+return (Double a0, Double arch_coef, Double var_coef) {
+  # TODO vectorize (e.g., via 64 random numbers -> 1e-20 failure prob)
+  a0 = as.scalar(rand(rows=1, cols=1))
+  arch_coef = as.scalar(rand(rows=1, cols=1))
+  var_coef = as.scalar(rand(rows=1, cols=1))
+
+  while (arch_coef + var_coef >= 1) {
+    arch_coef = as.scalar(rand(rows=1, cols=1))
+    var_coef = as.scalar(rand(rows=1, cols=1))
+  }
+}
+
+# ------- sample random feasible point and blend with current point
+# ------- vicinity tells the proportion of the newly random point is blended in
+sample_neighbor = function(Double a0, Double arch_coef, Double var_coef, Double vicinity)
+return (Double nb_a0, Double nb_arch_coef, Double nb_var_coef) {
+  # nb is convex comb of current val and a distant target value
+  # the smaller the vicinity, the closer we are to the current point and the less randomness there is
+
+  [target_a0, target_arch_coef, target_var_coef] = sample_feasible_params()
+
+  nb_a0 = vicinity*target_a0 + (1-vicinity)*a0
+  nb_arch_coef = vicinity*target_arch_coef + (1-vicinity)*arch_coef
+  nb_var_coef = vicinity*target_var_coef + (1-vicinity)*var_coef
+}
+
+# ------- numerically approximated gradient of quasi-log-likelihood
+gradient = function(Matrix[Double] X, Double a0, Double arch_coef, Double var_coef)
+  return (Double d_a0, Double d_arch_coef, Double d_var_coef)
+{
+  eps = 0.00001
+  qmle_val = qmle(X, a0, arch_coef, var_coef)
+  d_a0 = (qmle(X, a0 + eps, arch_coef, var_coef) - qmle_val) / eps
+  d_arch_coef = (qmle(X, a0, arch_coef + eps, var_coef) - qmle_val) / eps
+  d_var_coef = (qmle(X, a0, arch_coef, var_coef + eps) - qmle_val) / eps
+}
+
+# ------- simulate a garch-process with given parameters
+sim_garch = function(Double a0, Double arch_coef, Double var_coef, Integer simsteps, Integer seed)
+  return (Matrix[Double] X, Matrix[Double] var_hist)
+{
+  # init var and std
+  var = a0 / (1 - arch_coef - var_coef)
+  std = sqrt(var)
+
+  # init outputs
+  var_hist = matrix(0, rows=0, cols=1)
+  X = matrix(0, rows=0, cols=1)
+
+  # recursively construct time series
+  # TODO vectorize via cumsumprod
+  for (t in seq(1,simsteps,1)) {
+    noise = as.scalar(rand(rows=1, cols=1, pdf="normal", seed=seed)) # N(0,1) noise
+    xt = noise * std
+
+    X = rbind(X, as.matrix(xt))
+    var_hist = rbind(var_hist,as.matrix(var))
+
+    # get var and std for next round
+    var = a0 + arch_coef * (xt^2) + var_coef * var
+    std = sqrt(var)
+
+    seed = seed + 1 # prevent same innovations
+  }
+}
diff --git a/src/main/java/org/apache/sysds/common/Builtins.java b/src/main/java/org/apache/sysds/common/Builtins.java
index c12fcd3..e8a858f 100644
--- a/src/main/java/org/apache/sysds/common/Builtins.java
+++ b/src/main/java/org/apache/sysds/common/Builtins.java
@@ -122,6 +122,7 @@ public enum Builtins {
 	EVAL("eval", false),
 	FLOOR("floor", false),
 	FRAME_SORT("frameSort", true),
+	GARCH("garch", true),
 	GAUSSIAN_CLASSIFIER("gaussianClassifier", true),
 	GET_ACCURACY("getAccuracy", true),
 	GLM("glm", true),
diff --git a/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinGarchTest.java b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinGarchTest.java
new file mode 100644
index 0000000..9bf2a50
--- /dev/null
+++ b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinGarchTest.java
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.functions.builtin;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+
+import org.junit.Test;
+import org.apache.sysds.common.Types.ExecMode;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+import org.apache.sysds.runtime.matrix.data.MatrixValue.CellIndex;
+import org.apache.sysds.runtime.meta.MatrixCharacteristics;
+import org.apache.sysds.test.AutomatedTestBase;
+import org.apache.sysds.test.TestConfiguration;
+import org.apache.sysds.test.TestUtils;
+
+@RunWith(value = Parameterized.class)
+@net.jcip.annotations.NotThreadSafe
+public class BuiltinGarchTest extends AutomatedTestBase {
+	private final static String TEST_NAME = "garch";
+	private final static String TEST_DIR = "functions/builtin/";
+	private static final String TEST_CLASS_DIR = TEST_DIR + BuiltinGarchTest.class.getSimpleName() + "/";
+
+	protected int kmax;
+	protected double momentum, start_stepsize, end_stepsize, start_vicinity, end_vicinity;
+
+	public BuiltinGarchTest(int kmax, double momentum, double start_stepsize, double end_stepsize, double start_vicinity, double end_vicinity){
+		this.kmax = kmax;
+		this.momentum = momentum;
+		this.start_stepsize = start_stepsize;
+		this.end_stepsize = end_stepsize;
+		this.start_vicinity = start_vicinity;
+		this.end_vicinity = end_vicinity;
+	}
+
+	@Parameters
+	public static Collection<Object[]> data() {
+		return Arrays.asList(new Object[][] {
+				{500, 0.0, 0.1, 0.001, 0.5, 0.0},
+				{500, 0.5, 0.25, 0.0001, 0.0, 0.0}
+		});
+	}
+
+	@Override
+	public void setUp() {
+		addTestConfiguration(TEST_NAME, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME, new String[]{"B"}));
+	}
+
+	@Test
+	public void testGarch(){
+		ExecMode platformOld = setExecMode(ExecMode.HYBRID);
+
+		try {
+			loadTestConfiguration(getTestConfiguration(TEST_NAME));
+			String HOME = SCRIPT_DIR + TEST_DIR;
+			fullDMLScriptName = HOME + TEST_NAME + ".dml";
+			fullRScriptName = HOME + TEST_NAME + ".R";
+
+			programArgs = new String[]{
+					"-nvargs",
+					"X=" + input("col.mtx"),
+					"kmax=" + kmax,
+					"momentum=" + momentum,
+					"start_stepsize=" + start_stepsize,
+					"end_stepsize=" + end_stepsize,
+					"start_vicinity=" + start_vicinity,
+					"end_vicinity=" + end_vicinity,
+					"model=" + output("learnt.model"),};
+
+			rCmd = getRCmd(input("col.mtx"),
+					expected("learnt.model")
+			);
+
+			int timeSeriesLength = 100;
+			double[][] timeSeries = getRandomMatrix(timeSeriesLength, 1, -1, 1, 1.0, 54321);
+
+			MatrixCharacteristics mc = new MatrixCharacteristics(timeSeriesLength,1,-1,-1);
+			writeInputMatrixWithMTD("col", timeSeries, true, mc);
+
+			runTest(true, false, null, -1);
+			runRScript(true);
+
+			// checks if R and systemds-implementation achieve a similar mean-error on dataset
+			double tol = 0.1;
+			HashMap<CellIndex, Double> garch_model_R = readRMatrixFromExpectedDir("learnt.model");
+			HashMap<CellIndex, Double> garch_model_SYSTEMDS= readDMLMatrixFromOutputDir("learnt.model");
+			TestUtils.compareMatrices(garch_model_R, garch_model_SYSTEMDS, tol, "garch_R", "garch_SYSTEMDS");
+		}
+		finally {
+			resetExecMode(platformOld);
+		}
+	}
+}
diff --git a/src/test/scripts/functions/builtin/garch.R b/src/test/scripts/functions/builtin/garch.R
new file mode 100644
index 0000000..a33da21
--- /dev/null
+++ b/src/test/scripts/functions/builtin/garch.R
@@ -0,0 +1,34 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args = commandArgs(TRUE)
+
+library("tseries")
+library(Matrix)
+
+X = readMM(args[1])
+
+n = nrow(X)
+model = garch(as.matrix(X))
+mean_residual_error = mean(residuals(model)[2:n]) # first is NA, drop this value
+
+writeMM(as(mean_residual_error, "CsparseMatrix"), args[2])
diff --git a/src/test/scripts/functions/builtin/garch.dml b/src/test/scripts/functions/builtin/garch.dml
new file mode 100644
index 0000000..b70db51
--- /dev/null
+++ b/src/test/scripts/functions/builtin/garch.dml
@@ -0,0 +1,27 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = read($X)
+
+[fitted_X, fitted_var_hist, a0, arch_coef, var_coef] = garch(X=X, kmax=$kmax, momentum=$momentum, start_stepsize=$start_stepsize, end_stepsize=$end_stepsize, start_vicinity=$start_vicinity, end_vicinity=$end_vicinity, sim_seed=54321, verbose=FALSE)
+mean_residual_error = matrix(sum(fitted_X - X) / nrow(fitted_X), rows=1, cols=1)
+
+write(mean_residual_error, $model)
\ No newline at end of file