You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficcontrol.apache.org by ne...@apache.org on 2017/04/12 21:43:54 UTC

[04/13] incubator-trafficcontrol git commit: Add TM2 tmcheck validator for all monitors

Add TM2 tmcheck validator for all monitors

Adds tmcheck.ValidateAllMonitorsOfflineStates, which efficiently
checks all monitors in Traffic Ops (i.e. only getting the CRConfig
once).


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/commit/83b58d97
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/tree/83b58d97
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/diff/83b58d97

Branch: refs/heads/master
Commit: 83b58d97fb868309db0db48963a66f85a91caadb
Parents: 33fa72c
Author: Robert Butts <ro...@gmail.com>
Authored: Thu Mar 2 11:29:07 2017 -0700
Committer: Dave Neuman <ne...@apache.org>
Committed: Wed Apr 12 15:43:31 2017 -0600

----------------------------------------------------------------------
 .../traffic_monitor/enum/enum.go                |   3 +
 .../traffic_monitor/tmcheck/tmcheck.go          | 142 ++++++++++++++++++-
 .../traffic_monitor/tools/validate-offline.go   |   6 +-
 3 files changed, 143 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/83b58d97/traffic_monitor_golang/traffic_monitor/enum/enum.go
----------------------------------------------------------------------
diff --git a/traffic_monitor_golang/traffic_monitor/enum/enum.go b/traffic_monitor_golang/traffic_monitor/enum/enum.go
index 45546bd..1c06ac8 100644
--- a/traffic_monitor_golang/traffic_monitor/enum/enum.go
+++ b/traffic_monitor_golang/traffic_monitor/enum/enum.go
@@ -32,6 +32,9 @@ import (
 	"strings"
 )
 
+// CDNName is the name of a CDN in Traffic Control.
+type CDNName string
+
 // TrafficMonitorName is the hostname of a Traffic Monitor peer.
 type TrafficMonitorName string
 

http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/83b58d97/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go
----------------------------------------------------------------------
diff --git a/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go b/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go
index d6a964e..1178721 100644
--- a/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go
+++ b/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go
@@ -93,8 +93,12 @@ func ValidateOfflineStates(tmURI string, toClient *to.Session) error {
 	if err != nil {
 		return fmt.Errorf("getting CDN from Traffic Monitor: %v", err)
 	}
+	return ValidateOfflineStatesWithCDN(tmURI, cdn, toClient)
+}
 
-	crConfigBytes, err := toClient.CRConfigRaw(cdn)
+// ValidateOfflineStatesWithCDN validates per ValidateOfflineStates, but saves an additional query if the Traffic Monitor's CDN is known.
+func ValidateOfflineStatesWithCDN(tmURI string, tmCDN string, toClient *to.Session) error {
+	crConfigBytes, err := toClient.CRConfigRaw(tmCDN)
 	if err != nil {
 		return fmt.Errorf("getting CRConfig: %v", err)
 	}
@@ -104,19 +108,24 @@ func ValidateOfflineStates(tmURI string, toClient *to.Session) error {
 		return fmt.Errorf("unmarshalling CRConfig JSON: %v", err)
 	}
 
+	return ValidateOfflineStatesWithCRConfig(tmURI, &crConfig, toClient)
+}
+
+// ValidateOfflineStatesWithCRConfig validates per ValidateOfflineStates, but saves querying the CRconfig if it's already fetched.
+func ValidateOfflineStatesWithCRConfig(tmURI string, crConfig *crconfig.CRConfig, toClient *to.Session) error {
 	crStates, err := GetCRStates(tmURI + TrafficMonitorCRStatesPath)
 	if err != nil {
 		return fmt.Errorf("getting CRStates: %v", err)
 	}
 
-	return ValidateCRStates(crStates, &crConfig)
+	return ValidateCRStates(crStates, crConfig)
 }
 
 // ValidateCRStates validates that no OFFLINE or ADMIN_DOWN caches in the given CRConfig are marked Available in the given CRStates.
 func ValidateCRStates(crstates *peer.Crstates, crconfig *crconfig.CRConfig) error {
 	for cacheName, cacheInfo := range crconfig.ContentServers {
 		status := enum.CacheStatusFromString(string(*cacheInfo.Status))
-		if status != enum.CacheStatusOffline || status != enum.CacheStatusOffline {
+		if status != enum.CacheStatusAdminDown || status != enum.CacheStatusOffline {
 			continue
 		}
 
@@ -133,8 +142,8 @@ func ValidateCRStates(crstates *peer.Crstates, crconfig *crconfig.CRConfig) erro
 	return nil
 }
 
-// Validator is designed to be run as a goroutine, and does not return. It continously validates every `interval`, and calls `onErr` on failure, `onResumeSuccess` when a failure ceases, and `onCheck` on every poll.
-func Validator(
+// CRStatesOfflineValidator is designed to be run as a goroutine, and does not return. It continously validates every `interval`, and calls `onErr` on failure, `onResumeSuccess` when a failure ceases, and `onCheck` on every poll.
+func CRStatesOfflineValidator(
 	tmURI string,
 	toClient *to.Session,
 	interval time.Duration,
@@ -170,3 +179,126 @@ func Validator(
 		time.Sleep(interval)
 	}
 }
+
+// CRConfigOrError contains a CRConfig or an error. Union types? Monads? What are those?
+type CRConfigOrError struct {
+	CRConfig *crconfig.CRConfig
+	Err      error
+}
+
+// ValidateOfflineStates validates that no OFFLINE or ADMIN_DOWN caches in the given Traffic Ops' CRConfig are marked Available in the given Traffic Monitor's CRStates.
+func ValidateAllMonitorsOfflineStates(toClient *to.Session, includeOffline bool) (map[enum.TrafficMonitorName]error, error) {
+	trafficMonitorType := "RASCAL"
+	monitorTypeQuery := map[string][]string{"type": []string{trafficMonitorType}}
+	servers, err := toClient.ServersByType(monitorTypeQuery)
+	if err != nil {
+		return nil, fmt.Errorf("getting monitors from Traffic Ops: %v", err)
+	}
+
+	if !includeOffline {
+		servers = FilterOfflines(servers)
+	}
+
+	crConfigs := GetCRConfigs(GetCDNs(servers), toClient)
+
+	errs := map[enum.TrafficMonitorName]error{}
+	for _, server := range servers {
+		crConfig := crConfigs[enum.CDNName(server.CDNName)]
+		if err := crConfig.Err; err != nil {
+			errs[enum.TrafficMonitorName(server.HostName)] = fmt.Errorf("getting CRConfig: %v", err)
+			continue
+		}
+
+		fqdn := fmt.Sprintf("%s.%s", server.HostName, server.DomainName)
+		if err := ValidateOfflineStatesWithCRConfig(fqdn, crConfig.CRConfig, toClient); err != nil {
+			errs[enum.TrafficMonitorName(server.HostName)] = err
+		}
+	}
+	return errs, nil
+}
+
+// AllMonitorsCRStatesOfflineValidator is designed to be run as a goroutine, and does not return. It continously validates every `interval`, and calls `onErr` on failure, `onResumeSuccess` when a failure ceases, and `onCheck` on every poll. Note the error passed to `onErr` may be a general validation error not associated with any monitor, in which case the passed `enum.TrafficMonitorName` will be empty.
+func AllMonitorsCRStatesOfflineValidator(
+	toClient *to.Session,
+	interval time.Duration,
+	includeOffline bool,
+	grace time.Duration,
+	onErr func(enum.TrafficMonitorName, error),
+	onResumeSuccess func(enum.TrafficMonitorName),
+	onCheck func(enum.TrafficMonitorName, error),
+) {
+	invalid := map[enum.TrafficMonitorName]bool{}
+	invalidStart := map[enum.TrafficMonitorName]time.Time{}
+	for {
+		tmErrs, err := ValidateAllMonitorsOfflineStates(toClient, includeOffline) // []MonitorError {
+		if err != nil {
+			onErr("", fmt.Errorf("Error validating monitors: %v", err))
+			time.Sleep(interval)
+		}
+
+		for name, err := range tmErrs {
+			if err != nil && !invalid[name] {
+				invalid[name] = true
+				invalidStart[name] = time.Now()
+			}
+
+			if err != nil {
+				invalidSpan := time.Now().Sub(invalidStart[name])
+				if invalidSpan > grace {
+					onErr(name, fmt.Errorf("invalid state for %v: %v\n", invalidSpan, err))
+				}
+			}
+
+			onCheck(name, err)
+		}
+
+		for tm, tmInvalid := range invalid {
+			if _, ok := tmErrs[tm]; tmInvalid && !ok {
+				onResumeSuccess(tm)
+				invalid[tm] = false
+			}
+		}
+
+		time.Sleep(interval)
+	}
+}
+
+// FilterOfflines returns only servers which are REPORTED or ONLINE
+func FilterOfflines(servers []to.Server) []to.Server {
+	onlineServers := []to.Server{}
+	for _, server := range servers {
+		status := enum.CacheStatusFromString(server.Status)
+		if status != enum.CacheStatusOnline && status != enum.CacheStatusReported {
+			continue
+		}
+		onlineServers = append(onlineServers, server)
+	}
+	return onlineServers
+}
+
+func GetCDNs(servers []to.Server) map[enum.CDNName]struct{} {
+	cdns := map[enum.CDNName]struct{}{}
+	for _, server := range servers {
+		cdns[enum.CDNName(server.CDNName)] = struct{}{}
+	}
+	return cdns
+}
+
+func GetCRConfigs(cdns map[enum.CDNName]struct{}, toClient *to.Session) map[enum.CDNName]CRConfigOrError {
+	crConfigs := map[enum.CDNName]CRConfigOrError{}
+	for cdn, _ := range cdns {
+		crConfigBytes, err := toClient.CRConfigRaw(string(cdn))
+		if err != nil {
+			crConfigs[cdn] = CRConfigOrError{Err: fmt.Errorf("getting CRConfig: %v", err)}
+			continue
+		}
+
+		crConfig := crconfig.CRConfig{}
+		if err := json.Unmarshal(crConfigBytes, &crConfig); err != nil {
+			crConfigs[cdn] = CRConfigOrError{Err: fmt.Errorf("unmarshalling CRConfig JSON: %v", err)}
+		}
+
+		crConfigs[cdn] = CRConfigOrError{CRConfig: &crConfig}
+	}
+	return crConfigs
+}

http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/83b58d97/traffic_monitor_golang/traffic_monitor/tools/validate-offline.go
----------------------------------------------------------------------
diff --git a/traffic_monitor_golang/traffic_monitor/tools/validate-offline.go b/traffic_monitor_golang/traffic_monitor/tools/validate-offline.go
index 92b17c7..07eee78 100644
--- a/traffic_monitor_golang/traffic_monitor/tools/validate-offline.go
+++ b/traffic_monitor_golang/traffic_monitor/tools/validate-offline.go
@@ -117,7 +117,7 @@ func main() {
 		}
 	}
 
-	go tmcheck.Validator(*tmURI, toClient, *interval, *grace, onErr, onResumeSuccess, onCheck)
+	go tmcheck.CRStatesOfflineValidator(*tmURI, toClient, *interval, *grace, onErr, onResumeSuccess, onCheck)
 
 	if err := serve(log, *toURI, *tmURI); err != nil {
 		fmt.Printf("Serve error: %v\n", err)
@@ -145,8 +145,8 @@ func serve(log Log, toURI string, tmURI string) error {
 
 		fmt.Fprintf(w, `<pre>`)
 		logCopy := log.Get()
-		for i := len(logCopy) - 1; i >= 0; i-- {
-			fmt.Fprintf(w, "%s\n", logCopy[i])
+		for _, msg := range logCopy {
+			fmt.Fprintf(w, "%s\n", msg)
 		}
 		fmt.Fprintf(w, `</pre>`)