You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficcontrol.apache.org by ne...@apache.org on 2022/02/25 21:36:59 UTC

[trafficcontrol] branch master updated: Add t3c final failure log msg (#6586)

This is an automated email from the ASF dual-hosted git repository.

neuman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficcontrol.git


The following commit(s) were added to refs/heads/master by this push:
     new 71a44ed  Add t3c final failure log msg (#6586)
71a44ed is described below

commit 71a44ed1aad904bc6ed3ed307186f3cb35ff1431
Author: Robert O Butts <ro...@users.noreply.github.com>
AuthorDate: Fri Feb 25 14:36:43 2022 -0700

    Add t3c final failure log msg (#6586)
    
    This adds a final obvious failure or success message to t3c. It isn't
    obvious whether error logs are fatal, or if t3c was able to keep
    going. This makes it obvious whether it succeeded or failed,
    and makes it easy to see, to search logs, and to create alarms
    when the run fails.
    
    Also adds panic catching and logging.
---
 cache-config/t3c-apply/t3c-apply.go            | 121 ++++++++++++++++---------
 cache-config/t3c-apply/util/util.go            |   5 +-
 cache-config/testing/ort-tests/t3c-fail-log.go |  51 +++++++++++
 3 files changed, 133 insertions(+), 44 deletions(-)

diff --git a/cache-config/t3c-apply/t3c-apply.go b/cache-config/t3c-apply/t3c-apply.go
index b4be92a..5a3edcb 100644
--- a/cache-config/t3c-apply/t3c-apply.go
+++ b/cache-config/t3c-apply/t3c-apply.go
@@ -29,6 +29,7 @@ import (
 	"github.com/apache/trafficcontrol/cache-config/t3c-apply/util"
 	"github.com/apache/trafficcontrol/cache-config/t3cutil"
 	"github.com/apache/trafficcontrol/lib/go-log"
+	tcutil "github.com/apache/trafficcontrol/lib/go-util"
 )
 
 // Version is the application version.
@@ -39,18 +40,17 @@ var Version = "0.4"
 // This is overwritten by the build with the current project version.
 var GitRevision = "nogit"
 
-// exit codes
 const (
-	Success           = 0
-	AlreadyRunning    = 132
-	ConfigFilesError  = 133
-	ConfigError       = 134
-	GeneralFailure    = 135
-	PackagingError    = 136
-	RevalidationError = 137
-	ServicesError     = 138
-	SyncDSError       = 139
-	UserCheckError    = 140
+	ExitCodeSuccess           = 0
+	ExitCodeAlreadyRunning    = 132
+	ExitCodeConfigFilesError  = 133
+	ExitCodeConfigError       = 134
+	ExitCodeGeneralFailure    = 135
+	ExitCodePackagingError    = 136
+	ExitCodeRevalidationError = 137
+	ExitCodeServicesError     = 138
+	ExitCodeSyncDSError       = 139
+	ExitCodeUserCheckError    = 140
 )
 
 func runSysctl(cfg config.Cfg) {
@@ -71,22 +71,36 @@ const LockFilePath = "/var/run/t3c.lock"
 const LockFileRetryInterval = time.Second
 const LockFileRetryTimeout = time.Minute
 
+const FailureExitMsg = `CRITICAL FAILURE, ABORTING`
+const PostConfigFailureExitMsg = `CRITICAL FAILURE AFTER SETTING CONFIG, ABORTING`
+const SuccessExitMsg = `SUCCESS`
+
 func main() {
+	os.Exit(LogPanic(Main))
+}
+
+// Main is the main function of t3c-apply.
+// This is a separate function so defer statements behave as-expected.
+// DO NOT call os.Exit within this function; return the code instead.
+// Returns the application exit code.
+func Main() int {
 	var syncdsUpdate torequest.UpdateStatus
 	var lock util.FileLock
 	cfg, err := config.GetCfg(Version, GitRevision)
 	if err != nil {
 		fmt.Println(err)
-		os.Exit(ConfigError)
+		fmt.Println(FailureExitMsg)
+		return ExitCodeConfigError
 	} else if cfg == (config.Cfg{}) { // user used the --help option
-		os.Exit(Success)
+		return ExitCodeSuccess
 	}
 
 	log.Infoln("Trying to acquire app lock")
 	for lockStart := time.Now(); !lock.GetLock(LockFilePath); {
 		if time.Since(lockStart) > LockFileRetryTimeout {
 			log.Errorf("Failed to get app lock after %v seconds, another instance is running, exiting without running\n", int(LockFileRetryTimeout/time.Second))
-			os.Exit(AlreadyRunning)
+			log.Infoln(FailureExitMsg)
+			return ExitCodeAlreadyRunning
 		}
 		time.Sleep(LockFileRetryInterval)
 	}
@@ -121,16 +135,20 @@ func main() {
 	// create and clean the config.TmpBase (/tmp/ort)
 	if !util.MkDir(config.TmpBase, cfg) {
 		log.Errorln("mkdir TmpBase '" + config.TmpBase + "' failed, cannot continue")
-		os.Exit(GeneralFailure)
+		log.Infoln(FailureExitMsg)
+		return ExitCodeGeneralFailure
 	} else if !util.CleanTmpDir(cfg) {
 		log.Errorln("CleanTmpDir failed, cannot continue")
-		os.Exit(GeneralFailure)
+		log.Infoln(FailureExitMsg)
+		return ExitCodeGeneralFailure
 	}
 
 	log.Infoln(time.Now().Format(time.RFC3339))
 
 	if !util.CheckUser(cfg) {
-		lock.UnlockAndExit(UserCheckError)
+
+		lock.Unlock()
+		return ExitCodeUserCheckError
 	}
 
 	toolName := trops.GetHeaderComment()
@@ -140,21 +158,26 @@ func main() {
 	// necessary to continue
 	if cfg.Files == t3cutil.ApplyFilesFlagReval {
 		syncdsUpdate, err = trops.CheckRevalidateState(false)
-		if err != nil || syncdsUpdate == torequest.UpdateTropsNotNeeded {
-			if err != nil {
-				log.Errorln("Checking revalidate state: " + err.Error())
-			} else {
-				log.Infoln("Checking revalidate state: returned UpdateTropsNotNeeded")
-			}
-			GitCommitAndExit(RevalidationError, cfg)
+
+		if err != nil {
+			log.Errorln("Checking revalidate state: " + err.Error())
+			return GitCommitAndExit(ExitCodeRevalidationError, FailureExitMsg, cfg)
+		}
+		if syncdsUpdate == torequest.UpdateTropsNotNeeded {
+			log.Infoln("Checking revalidate state: returned UpdateTropsNotNeeded")
+			return GitCommitAndExit(ExitCodeRevalidationError, SuccessExitMsg, cfg)
 		}
+
 	} else {
 		syncdsUpdate, err = trops.CheckSyncDSState()
 		if err != nil {
-			log.Errorln(err)
-			GitCommitAndExit(SyncDSError, cfg)
+			log.Errorln("Checking syncds state: " + err.Error())
+			return GitCommitAndExit(ExitCodeSyncDSError, FailureExitMsg, cfg)
 		}
 		if !cfg.IgnoreUpdateFlag && cfg.Files == t3cutil.ApplyFilesFlagAll && syncdsUpdate == torequest.UpdateTropsNotNeeded {
+			// If touching remap.config fails, we want to still try to restart services
+			// But log a critical-post-config-failure, which needs logged right before exit.
+			postConfigFail := false
 			// check for maxmind db updates even if we have no other updates
 			if CheckMaxmindUpdate(cfg) {
 				// We updated the db so we should touch and reload
@@ -163,15 +186,20 @@ func main() {
 				_, rc, err := util.ExecCommand("/usr/bin/touch", path)
 				if err != nil {
 					log.Errorf("failed to update the remap.config for reloading: %s\n", err.Error())
+					postConfigFail = true
 				} else if rc == 0 {
 					log.Infoln("updated the remap.config for reloading.")
 				}
 				if err := trops.StartServices(&syncdsUpdate); err != nil {
 					log.Errorln("failed to start services: " + err.Error())
-					GitCommitAndExit(ServicesError, cfg)
+					return GitCommitAndExit(ExitCodeServicesError, PostConfigFailureExitMsg, cfg)
 				}
 			}
-			GitCommitAndExit(Success, cfg)
+			finalMsg := SuccessExitMsg
+			if postConfigFail {
+				finalMsg = PostConfigFailureExitMsg
+			}
+			return GitCommitAndExit(ExitCodeSuccess, finalMsg, cfg)
 		}
 	}
 
@@ -183,22 +211,22 @@ func main() {
 		err = trops.ProcessPackages()
 		if err != nil {
 			log.Errorf("Error processing packages: %s\n", err)
-			GitCommitAndExit(PackagingError, cfg)
+			return GitCommitAndExit(ExitCodePackagingError, FailureExitMsg, cfg)
 		}
 
 		// check and make sure packages are enabled for startup
 		err = trops.CheckSystemServices()
 		if err != nil {
 			log.Errorf("Error verifying system services: %s\n", err.Error())
-			GitCommitAndExit(ServicesError, cfg)
+			return GitCommitAndExit(ExitCodeServicesError, FailureExitMsg, cfg)
 		}
 	}
 
 	log.Debugf("Preparing to fetch the config files for %s, files: %s, syncdsUpdate: %s\n", cfg.CacheHostName, cfg.Files, syncdsUpdate)
 	err = trops.GetConfigFileList()
 	if err != nil {
-		log.Errorf("Unable to continue: %s\n", err)
-		GitCommitAndExit(ConfigFilesError, cfg)
+		log.Errorf("Getting config file list: %s\n", err)
+		return GitCommitAndExit(ExitCodeConfigFilesError, FailureExitMsg, cfg)
 	}
 	syncdsUpdate, err = trops.ProcessConfigFiles()
 	if err != nil {
@@ -223,7 +251,7 @@ func main() {
 
 	if err := trops.StartServices(&syncdsUpdate); err != nil {
 		log.Errorln("failed to start services: " + err.Error())
-		GitCommitAndExit(ServicesError, cfg)
+		return GitCommitAndExit(ExitCodeServicesError, PostConfigFailureExitMsg, cfg)
 	}
 
 	// start 'teakd' if installed.
@@ -254,22 +282,33 @@ func main() {
 		log.Errorf("failed to update Traffic Ops: %s\n", err.Error())
 	}
 
-	GitCommitAndExit(Success, cfg)
+	return GitCommitAndExit(ExitCodeSuccess, SuccessExitMsg, cfg)
 }
 
-// TODO change code to always create git commits, if the dir is a repo
-// We only want --use-git to init the repo. If someone init'd the repo, t3c-apply should _always_ commit.
-// We don't want someone doing manual badass's and not having that log
+func LogPanic(f func() int) (exitCode int) {
+	defer func() {
+		if err := recover(); err != nil {
+			log.Errorf("panic: (err: %v) stacktrace:\n%s\n", err, tcutil.Stacktrace())
+			log.Infoln(FailureExitMsg)
+			exitCode = ExitCodeGeneralFailure
+			return
+		}
+	}()
+	return f()
+}
 
-// GitCommitAndExit attempts to git commit all changes, logs any error, and calls os.Exit with the given code.
-func GitCommitAndExit(exitCode int, cfg config.Cfg) {
-	success := exitCode == Success
+// GitCommitAndExit attempts to git commit all changes, and logs any error.
+// It then logs exitMsg at the Info level, and returns exitCode.
+// This is a helper function, to reduce the duplicated commit-log-return into a single line.
+func GitCommitAndExit(exitCode int, exitMsg string, cfg config.Cfg) int {
+	success := exitCode == ExitCodeSuccess
 	if cfg.UseGit == config.UseGitYes || cfg.UseGit == config.UseGitAuto {
 		if err := util.MakeGitCommitAll(cfg, util.GitChangeIsSelf, success); err != nil {
 			log.Errorln("git committing existing changes, dir '" + cfg.TsConfigDir + "': " + err.Error())
 		}
 	}
-	os.Exit(exitCode)
+	log.Infoln(exitMsg)
+	return exitCode
 }
 
 // CheckMaxmindUpdate will (if a url is set) check for a db on disk.
diff --git a/cache-config/t3c-apply/util/util.go b/cache-config/t3c-apply/util/util.go
index 4a10aee..6dc357b 100644
--- a/cache-config/t3c-apply/util/util.go
+++ b/cache-config/t3c-apply/util/util.go
@@ -87,12 +87,11 @@ func (f *FileLock) GetLock(lockFile string) bool {
 	return f.is_locked
 }
 
-// Releases a file lock and exits with the given status code.
-func (f *FileLock) UnlockAndExit(code int) {
+// Releases the file lock, if locked.
+func (f *FileLock) Unlock() {
 	if f.is_locked {
 		f.f_lock.Unlock()
 	}
-	os.Exit(code)
 }
 
 func DirectoryExists(dir string) (bool, os.FileInfo) {
diff --git a/cache-config/testing/ort-tests/t3c-fail-log.go b/cache-config/testing/ort-tests/t3c-fail-log.go
new file mode 100644
index 0000000..7d712fc
--- /dev/null
+++ b/cache-config/testing/ort-tests/t3c-fail-log.go
@@ -0,0 +1,51 @@
+package orttest
+
+/*
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+*/
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/apache/trafficcontrol/cache-config/testing/ort-tests/tcdata"
+)
+
+func TestT3cApplyFailMsg(t *testing.T) {
+	tcd.WithObjs(t, []tcdata.TCObj{
+		tcdata.CDNs, tcdata.Types, tcdata.Tenants, tcdata.Parameters,
+		tcdata.Profiles, tcdata.ProfileParameters, tcdata.Statuses,
+		tcdata.Divisions, tcdata.Regions, tcdata.PhysLocations,
+		tcdata.CacheGroups, tcdata.Servers, tcdata.Topologies,
+		tcdata.DeliveryServices}, func() {
+		t.Run("test t3c apply logging a run failure message", doTestT3cApplyFailMsg)
+	})
+}
+
+func doTestT3cApplyFailMsg(t *testing.T) {
+	// verifies that when no hostname is passed,
+	// t3c-apply will get the OS hostname,
+	// and use the short name (which is what will be in Traffic Ops),
+	// and not the full FQDN.
+
+	stdOut, exitCode := t3cUpdateUnsetFlag("nonexistent-host-to-cause-failure", "badass")
+	if exitCode == 0 {
+		t.Fatalf("t3c-apply with nonexistent host expected failure, actual code %v output: %s", exitCode, stdOut)
+	}
+
+	outStr := strings.TrimSpace(string(stdOut))
+
+	if !strings.HasSuffix(outStr, "CRITICAL FAILURE, ABORTING") {
+		t.Fatalf("t3c-apply failure expected to end with critical failure message, actual code %v output: %s", exitCode, stdOut)
+	}
+}