You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemds.apache.org by mb...@apache.org on 2021/07/15 18:05:48 UTC
[systemds] branch master updated: [SYSTEMDS-3061] New garch builtin
function (GARCH models)
This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/master by this push:
new 99b5ab8 [SYSTEMDS-3061] New garch builtin function (GARCH models)
99b5ab8 is described below
commit 99b5ab8c9a4b2728195a6421bd9fdc51c842b93c
Author: JustYeti32 <ma...@gmx.net>
AuthorDate: Thu Jul 15 19:32:06 2021 +0200
[SYSTEMDS-3061] New garch builtin function (GARCH models)
Aim:
- build Garch(1,1) model
Features:
- simulation of Garch(1,1) process
- paramters search with gradient descent
Updated fit-method:
- gradient-descent with restart, similar to simulated annealing
Incorporated test:
- compare to R:
if a similar residual-error can be achieved, test is passed
AMLS project SS2021.
Closes #1240.
---
scripts/builtin/garch.dml | 287 +++++++++++++++++++++
.../java/org/apache/sysds/common/Builtins.java | 1 +
.../test/functions/builtin/BuiltinGarchTest.java | 114 ++++++++
src/test/scripts/functions/builtin/garch.R | 34 +++
src/test/scripts/functions/builtin/garch.dml | 27 ++
5 files changed, 463 insertions(+)
diff --git a/scripts/builtin/garch.dml b/scripts/builtin/garch.dml
new file mode 100644
index 0000000..a37ca8b
--- /dev/null
+++ b/scripts/builtin/garch.dml
@@ -0,0 +1,287 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Builtin function that implements GARCH(1,1)
+#
+# INPUT PARAMETERS:
+# ----------------------------------------------------------------------------
+# NAME TYPE DEFAULT MEANING
+# ----------------------------------------------------------------------------
+# X Double --- The input Matrix to apply Arima on.
+# kmax Integer --- Number of iterations
+# momentum Double --- Momentum for momentum-gradient descent (set to 0 to deactivate)
+# start_stepsize Double --- Initial gradient-descent stepsize
+# end_stepsize Double --- gradient-descent stepsize at end (linear descent)
+# start_vicinity Double --- proportion of randomness of restart-location for gradient descent at beginning
+# end_vicinity Double --- same at end (linear decay)
+# sim_seed Integer --- seed for simulation of process on fitted coefficients
+# verbose Boolean --- verbosity, comments during fitting
+#
+# RETURN VALUES
+# ----------------------------------------------------------------------------
+# NAME TYPE DEFAULT MEANING
+# ----------------------------------------------------------------------------
+# fitted_X Double --- simulated garch(1,1) process on fitted coefficients
+# fitted_var_hist Double --- variances of simulated fitted process
+# best_a0 Double --- constant term of fitted process
+# best_arch_coef Double --- 1-st arch-coefficient of fitted process
+# best_var_coef Double --- 1-st garch-coefficient of fitted process
+# ----------------------------------------------------------------------------
+#
+# COMMENTS
+# This has some drawbacks: slow convergence of optimization (sort of simulated annealing/gradient descent)
+# TODO: use BFGS or BHHH if it is available (this are go to methods)
+# TODO: (only then) extend to garch(p,q); otherwise the search space is way too big for the current method
+
+
+# ------- MAIN FUNCTION
+
+m_garch = function(Matrix[Double] X, Integer kmax, Double momentum, Double start_stepsize, Double end_stepsize, Double start_vicinity, Double end_vicinity, Integer sim_seed, Boolean verbose)
+return (Matrix[Double] fitted_X, Matrix[Double] fitted_var_hist, Double best_a0, Double best_arch_coef, Double best_var_coef) {
+
+ [a0, arch_coef, var_coef] = sample_feasible_params() # initialize startpoint
+ curr_qmle = qmle(X, a0, arch_coef, var_coef) # initialize quasi-log-likelihood at start
+
+ # record the parameters of max-quasi-log-likelihood thus far
+ best_a0 = a0
+ best_arch_coef = arch_coef
+ best_var_coef = var_coef
+ best_qmle = curr_qmle
+
+ # record last change of gradient if update was applied
+ last_change_a0 = 0
+ last_change_arch_coef = 0
+ last_change_var_coef = 0
+
+ # initialize stepsize (linear decay)
+ stepsize = start_stepsize
+
+ # initialize vicinity (linear decay)
+ vicinity = start_vicinity
+
+ # all coeffs need be >0 to provide a feasible solution; clip at this constant
+ clip_at = 0.00001
+
+ # do gradient descent
+ for (k in 1:kmax-1) {
+ # update vicinity and stepsize
+ progress = k/kmax
+ stepsize = (1-progress) * start_stepsize + progress*end_stepsize
+ vicinity = (1-progress) * start_vicinity + progress*end_vicinity
+
+ # get gradient
+ [d_a0, d_arch_coef, d_var_coef] = gradient(X, a0, arch_coef, var_coef)
+
+ # newly proposed parameters
+ new_a0 = max(a0 + (stepsize * d_a0) + momentum * (last_change_a0), clip_at)
+ new_arch_coef = max(arch_coef + (stepsize * arch_coef) + (momentum * last_change_arch_coef), clip_at)
+ new_var_coef = max(var_coef + (stepsize * var_coef) + (momentum * last_change_var_coef), clip_at)
+
+ # ensure feasibility (this condition provides stationarity, see literature)
+ while (new_arch_coef + new_var_coef > 1) {
+ new_arch_coef = new_arch_coef / 2
+ new_var_coef = new_var_coef / 2
+ }
+
+ # objective function value of new feasible parameters
+ new_qmle = qmle(X, new_a0, new_arch_coef, new_var_coef)
+
+ # record the change of coefficients for momentum updates
+ change_a0 = new_a0 - a0
+ change_arch_coef = new_arch_coef - arch_coef
+ change_var_coef = new_var_coef - var_coef
+
+ # check if improvement
+ if (new_qmle > curr_qmle) {
+ # if so, update and use change for momentum
+ last_change_a0 = change_a0
+ last_change_arch_cof = change_arch_coef
+ last_change_var_coef = change_var_coef
+ update_reason = "gradient"
+ }
+ else {
+ # else: chance for restart at close point and reset momentum
+
+ update_reason = "no update" # unless the random restart applies
+ coin_flip = as.scalar(rand(rows=1, cols=1)) # random restart gets less likely with progressing search
+
+ if (coin_flip > progress) {
+ # sample random restart-point
+ # vicinity tells how far we move to the random point (0: dont move, 1: move fully to random point)
+ # similar to simulated annealing with adaptive neighborhood
+
+ [new_a0, new_arch_coef, new_var_coef] = sample_neighbor(a0, arch_coef, var_coef, vicinity)
+
+ # reset momentum
+ last_change_a0 = 0
+ last_change_arch_cof = 0
+ last_change_var_coef = 0
+
+ update_reason = "restart"
+ }
+ }
+
+ # update the point
+ a0 = new_a0
+ arch_coef = new_arch_coef
+ var_coef = new_var_coef
+
+ # update qmle at the moment
+ curr_qmle = qmle(X, a0, arch_coef, var_coef)
+
+ # check and record if it is the best point thus far
+ update_best = (curr_qmle > best_qmle);
+ if (update_best) {
+ best_qmle = curr_qmle
+ best_a0 = a0
+ best_arch_coef = arch_coef
+ best_var_coef = var_coef
+ }
+
+ # logging: report state of gradient descent
+ if (verbose) {
+ print("k | " + toString(k))
+ print("a0 | " + toString(a0))
+ print("arch coef | " + toString(arch_coef))
+ print("var coef | " + toString(var_coef))
+ print("qmle | " + toString(curr_qmle))
+ print("stepsize | " + toString(stepsize))
+ print("update reason | " + update_reason)
+ print("update best | " + update_best)
+ print("____________________________________")
+ }
+ }
+
+ # simulate process from best solution
+ sim_steps = nrow(X)
+ [fitted_X, fitted_var_hist] = sim_garch(best_a0, best_arch_coef, best_var_coef, sim_steps, sim_seed)
+
+ # logging: report output
+ if (verbose) {
+ print("end iteration: return the following: ")
+ print("best qmle | " + toString(best_qmle))
+ print("best a0 | " + toString(best_a0))
+ print("best arch_coef |" + toString(best_arch_coef))
+ print("best var_coef |" + toString(best_var_coef))
+ print("____________________________________")
+ }
+}
+
+
+# ------- UTILITY FUNCTIONS
+
+# ------- quasi-log-likelihood of garch-1-1 coefficients for given data
+# ------- https://math.berkeley.edu/~btw/thesis4.pdf
+qmle = function(Matrix[Double] X, Double a0, Double arch_coef, Double var_coef)
+ return (Double qmle)
+{
+ n = nrow(X)
+
+ # initialize variance
+ var_0 = a0 / (1-arch_coef - var_coef)
+ vars = matrix(var_0, rows=1, cols=1)
+
+ # init loop for var and qmle computation
+ var_lag = var_0
+ xq_lag = as.scalar(X[1,1])^2
+ qmle = 0
+
+ # compute vars and qmle recursively
+ # TODO vectorize via cummulative aggregates?
+ for (t in 2:n) {
+ xq_t = as.scalar(X[t,1])^2
+ var_t = a0 + arch_coef*xq_lag + var_coef*var_lag
+ qmle = qmle - (1/(2*n)) * (log(var_t) + (xq_t / var_t)) # up to constant
+
+ vars = rbind(vars, matrix(var_t, rows=1, cols=1))
+ var_lag = var_t
+ xq_lag = xq_t
+ }
+}
+
+
+# ------- returns coefs which yield a stationary garch process; sampled uniform at random in feasible region
+sample_feasible_params = function()
+return (Double a0, Double arch_coef, Double var_coef) {
+ # TODO vectorize (e.g., via 64 random numbers -> 1e-20 failure prob)
+ a0 = as.scalar(rand(rows=1, cols=1))
+ arch_coef = as.scalar(rand(rows=1, cols=1))
+ var_coef = as.scalar(rand(rows=1, cols=1))
+
+ while (arch_coef + var_coef >= 1) {
+ arch_coef = as.scalar(rand(rows=1, cols=1))
+ var_coef = as.scalar(rand(rows=1, cols=1))
+ }
+}
+
+# ------- sample random feasible point and blend with current point
+# ------- vicinity tells the proportion of the newly random point is blended in
+sample_neighbor = function(Double a0, Double arch_coef, Double var_coef, Double vicinity)
+return (Double nb_a0, Double nb_arch_coef, Double nb_var_coef) {
+ # nb is convex comb of current val and a distant target value
+ # the smaller the vicinity, the closer we are to the current point and the less randomness there is
+
+ [target_a0, target_arch_coef, target_var_coef] = sample_feasible_params()
+
+ nb_a0 = vicinity*target_a0 + (1-vicinity)*a0
+ nb_arch_coef = vicinity*target_arch_coef + (1-vicinity)*arch_coef
+ nb_var_coef = vicinity*target_var_coef + (1-vicinity)*var_coef
+}
+
+# ------- numerically approximated gradient of quasi-log-likelihood
+gradient = function(Matrix[Double] X, Double a0, Double arch_coef, Double var_coef)
+ return (Double d_a0, Double d_arch_coef, Double d_var_coef)
+{
+ eps = 0.00001
+ qmle_val = qmle(X, a0, arch_coef, var_coef)
+ d_a0 = (qmle(X, a0 + eps, arch_coef, var_coef) - qmle_val) / eps
+ d_arch_coef = (qmle(X, a0, arch_coef + eps, var_coef) - qmle_val) / eps
+ d_var_coef = (qmle(X, a0, arch_coef, var_coef + eps) - qmle_val) / eps
+}
+
+# ------- simulate a garch-process with given parameters
+sim_garch = function(Double a0, Double arch_coef, Double var_coef, Integer simsteps, Integer seed)
+ return (Matrix[Double] X, Matrix[Double] var_hist)
+{
+ # init var and std
+ var = a0 / (1 - arch_coef - var_coef)
+ std = sqrt(var)
+
+ # init outputs
+ var_hist = matrix(0, rows=0, cols=1)
+ X = matrix(0, rows=0, cols=1)
+
+ # recursively construct time series
+ # TODO vectorize via cumsumprod
+ for (t in seq(1,simsteps,1)) {
+ noise = as.scalar(rand(rows=1, cols=1, pdf="normal", seed=seed)) # N(0,1) noise
+ xt = noise * std
+
+ X = rbind(X, as.matrix(xt))
+ var_hist = rbind(var_hist,as.matrix(var))
+
+ # get var and std for next round
+ var = a0 + arch_coef * (xt^2) + var_coef * var
+ std = sqrt(var)
+
+ seed = seed + 1 # prevent same innovations
+ }
+}
diff --git a/src/main/java/org/apache/sysds/common/Builtins.java b/src/main/java/org/apache/sysds/common/Builtins.java
index c12fcd3..e8a858f 100644
--- a/src/main/java/org/apache/sysds/common/Builtins.java
+++ b/src/main/java/org/apache/sysds/common/Builtins.java
@@ -122,6 +122,7 @@ public enum Builtins {
EVAL("eval", false),
FLOOR("floor", false),
FRAME_SORT("frameSort", true),
+ GARCH("garch", true),
GAUSSIAN_CLASSIFIER("gaussianClassifier", true),
GET_ACCURACY("getAccuracy", true),
GLM("glm", true),
diff --git a/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinGarchTest.java b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinGarchTest.java
new file mode 100644
index 0000000..9bf2a50
--- /dev/null
+++ b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinGarchTest.java
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.functions.builtin;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+
+import org.junit.Test;
+import org.apache.sysds.common.Types.ExecMode;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+import org.apache.sysds.runtime.matrix.data.MatrixValue.CellIndex;
+import org.apache.sysds.runtime.meta.MatrixCharacteristics;
+import org.apache.sysds.test.AutomatedTestBase;
+import org.apache.sysds.test.TestConfiguration;
+import org.apache.sysds.test.TestUtils;
+
+@RunWith(value = Parameterized.class)
+@net.jcip.annotations.NotThreadSafe
+public class BuiltinGarchTest extends AutomatedTestBase {
+ private final static String TEST_NAME = "garch";
+ private final static String TEST_DIR = "functions/builtin/";
+ private static final String TEST_CLASS_DIR = TEST_DIR + BuiltinGarchTest.class.getSimpleName() + "/";
+
+ protected int kmax;
+ protected double momentum, start_stepsize, end_stepsize, start_vicinity, end_vicinity;
+
+ public BuiltinGarchTest(int kmax, double momentum, double start_stepsize, double end_stepsize, double start_vicinity, double end_vicinity){
+ this.kmax = kmax;
+ this.momentum = momentum;
+ this.start_stepsize = start_stepsize;
+ this.end_stepsize = end_stepsize;
+ this.start_vicinity = start_vicinity;
+ this.end_vicinity = end_vicinity;
+ }
+
+ @Parameters
+ public static Collection<Object[]> data() {
+ return Arrays.asList(new Object[][] {
+ {500, 0.0, 0.1, 0.001, 0.5, 0.0},
+ {500, 0.5, 0.25, 0.0001, 0.0, 0.0}
+ });
+ }
+
+ @Override
+ public void setUp() {
+ addTestConfiguration(TEST_NAME, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME, new String[]{"B"}));
+ }
+
+ @Test
+ public void testGarch(){
+ ExecMode platformOld = setExecMode(ExecMode.HYBRID);
+
+ try {
+ loadTestConfiguration(getTestConfiguration(TEST_NAME));
+ String HOME = SCRIPT_DIR + TEST_DIR;
+ fullDMLScriptName = HOME + TEST_NAME + ".dml";
+ fullRScriptName = HOME + TEST_NAME + ".R";
+
+ programArgs = new String[]{
+ "-nvargs",
+ "X=" + input("col.mtx"),
+ "kmax=" + kmax,
+ "momentum=" + momentum,
+ "start_stepsize=" + start_stepsize,
+ "end_stepsize=" + end_stepsize,
+ "start_vicinity=" + start_vicinity,
+ "end_vicinity=" + end_vicinity,
+ "model=" + output("learnt.model"),};
+
+ rCmd = getRCmd(input("col.mtx"),
+ expected("learnt.model")
+ );
+
+ int timeSeriesLength = 100;
+ double[][] timeSeries = getRandomMatrix(timeSeriesLength, 1, -1, 1, 1.0, 54321);
+
+ MatrixCharacteristics mc = new MatrixCharacteristics(timeSeriesLength,1,-1,-1);
+ writeInputMatrixWithMTD("col", timeSeries, true, mc);
+
+ runTest(true, false, null, -1);
+ runRScript(true);
+
+ // checks if R and systemds-implementation achieve a similar mean-error on dataset
+ double tol = 0.1;
+ HashMap<CellIndex, Double> garch_model_R = readRMatrixFromExpectedDir("learnt.model");
+ HashMap<CellIndex, Double> garch_model_SYSTEMDS= readDMLMatrixFromOutputDir("learnt.model");
+ TestUtils.compareMatrices(garch_model_R, garch_model_SYSTEMDS, tol, "garch_R", "garch_SYSTEMDS");
+ }
+ finally {
+ resetExecMode(platformOld);
+ }
+ }
+}
diff --git a/src/test/scripts/functions/builtin/garch.R b/src/test/scripts/functions/builtin/garch.R
new file mode 100644
index 0000000..a33da21
--- /dev/null
+++ b/src/test/scripts/functions/builtin/garch.R
@@ -0,0 +1,34 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+args = commandArgs(TRUE)
+
+library("tseries")
+library(Matrix)
+
+X = readMM(args[1])
+
+n = nrow(X)
+model = garch(as.matrix(X))
+mean_residual_error = mean(residuals(model)[2:n]) # first is NA, drop this value
+
+writeMM(as(mean_residual_error, "CsparseMatrix"), args[2])
diff --git a/src/test/scripts/functions/builtin/garch.dml b/src/test/scripts/functions/builtin/garch.dml
new file mode 100644
index 0000000..b70db51
--- /dev/null
+++ b/src/test/scripts/functions/builtin/garch.dml
@@ -0,0 +1,27 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = read($X)
+
+[fitted_X, fitted_var_hist, a0, arch_coef, var_coef] = garch(X=X, kmax=$kmax, momentum=$momentum, start_stepsize=$start_stepsize, end_stepsize=$end_stepsize, start_vicinity=$start_vicinity, end_vicinity=$end_vicinity, sim_seed=54321, verbose=FALSE)
+mean_residual_error = matrix(sum(fitted_X - X) / nrow(fitted_X), rows=1, cols=1)
+
+write(mean_residual_error, $model)
\ No newline at end of file