You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficcontrol.apache.org by ne...@apache.org on 2017/04/12 21:44:01 UTC
[11/13] incubator-trafficcontrol git commit: Add TM2 validator for
peer pollers
Add TM2 validator for peer pollers
Project: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/commit/c9196a12
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/tree/c9196a12
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/diff/c9196a12
Branch: refs/heads/master
Commit: c9196a12e57c381b8ba6de29e2aa4009a27a1f14
Parents: 4658c51
Author: Robert Butts <ro...@gmail.com>
Authored: Fri Mar 3 15:39:10 2017 -0700
Committer: Dave Neuman <ne...@apache.org>
Committed: Wed Apr 12 15:43:31 2017 -0600
----------------------------------------------------------------------
.../traffic_monitor/tmcheck/peerpoller.go | 112 ++++++++++
.../traffic_monitor/tmcheck/tmcheck.go | 69 +++++-
.../tools/nagios-validate-peerpoller.go | 36 +++
.../tools/service-validate-offline.go | 204 -----------------
.../traffic_monitor/tools/validator-service.go | 222 +++++++++++++++++++
5 files changed, 432 insertions(+), 211 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/c9196a12/traffic_monitor_golang/traffic_monitor/tmcheck/peerpoller.go
----------------------------------------------------------------------
diff --git a/traffic_monitor_golang/traffic_monitor/tmcheck/peerpoller.go b/traffic_monitor_golang/traffic_monitor/tmcheck/peerpoller.go
new file mode 100644
index 0000000..cdeaf36
--- /dev/null
+++ b/traffic_monitor_golang/traffic_monitor/tmcheck/peerpoller.go
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package tmcheck
+
+import (
+ "encoding/json"
+ "fmt"
+ "github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/enum"
+ to "github.com/apache/incubator-trafficcontrol/traffic_ops/client"
+ "io/ioutil"
+ "time"
+)
+
+const PeerPollMax = time.Duration(10) * time.Second
+
+const TrafficMonitorStatsPath = "/publish/Stats"
+
+// TrafficMonitorStatsJSON represents the JSON returned by Traffic Monitor's Stats endpoint. This currently only contains the Oldest Polled Peer Time member, as needed by this library.
+type TrafficMonitorStatsJSON struct {
+ Stats TrafficMonitorStats `json:"stats"`
+}
+
+// TrafficMonitorStats represents the internal JSON object returned by Traffic Monitor's Stats endpoint. This currently only contains the Oldest Polled Peer Time member, as needed by this library.
+type TrafficMonitorStats struct {
+ OldestPolledPeerTime int `json:"Oldest Polled Peer Time (ms)"`
+}
+
+func GetOldestPolledPeerTime(uri string) (time.Duration, error) {
+ resp, err := getClient().Get(uri + TrafficMonitorStatsPath)
+ if err != nil {
+ return time.Duration(0), fmt.Errorf("reading reply from %v: %v\n", uri, err)
+ }
+ respBytes, err := ioutil.ReadAll(resp.Body)
+ if err != nil {
+ return time.Duration(0), fmt.Errorf("reading reply from %v: %v\n", uri, err)
+ }
+
+ stats := TrafficMonitorStatsJSON{}
+ if err := json.Unmarshal(respBytes, &stats); err != nil {
+ return time.Duration(0), fmt.Errorf("unmarshalling: %v", err)
+ }
+
+ oldestPolledPeerTime := time.Duration(stats.Stats.OldestPolledPeerTime) * time.Millisecond
+
+ return oldestPolledPeerTime, nil
+}
+
+func ValidatePeerPoller(uri string) error {
+ lastPollTime, err := GetOldestPolledPeerTime(uri)
+ if err != nil {
+ return fmt.Errorf("failed to get oldest peer time: %v", err)
+ }
+ if lastPollTime > PeerPollMax {
+ return fmt.Errorf("Peer poller is dead, last poll was %v ago", lastPollTime)
+ }
+ return nil
+}
+
+func ValidateAllPeerPollers(toClient *to.Session, includeOffline bool) (map[enum.TrafficMonitorName]error, error) {
+ servers, err := GetMonitors(toClient, includeOffline)
+ if err != nil {
+ return nil, err
+ }
+ errs := map[enum.TrafficMonitorName]error{}
+ for _, server := range servers {
+ uri := fmt.Sprintf("http://%s.%s", server.HostName, server.DomainName)
+ errs[enum.TrafficMonitorName(server.HostName)] = ValidatePeerPoller(uri)
+ }
+ return errs, nil
+}
+
+func PeerPollersValidator(
+ tmURI string,
+ toClient *to.Session,
+ interval time.Duration,
+ grace time.Duration,
+ onErr func(error),
+ onResumeSuccess func(),
+ onCheck func(error),
+) {
+ wrapValidatePeerPoller := func(uri string, _ *to.Session) error { return ValidatePeerPoller(uri) }
+ Validator(tmURI, toClient, interval, grace, onErr, onResumeSuccess, onCheck, wrapValidatePeerPoller)
+}
+
+func PeerPollersAllValidator(
+ toClient *to.Session,
+ interval time.Duration,
+ includeOffline bool,
+ grace time.Duration,
+ onErr func(enum.TrafficMonitorName, error),
+ onResumeSuccess func(enum.TrafficMonitorName),
+ onCheck func(enum.TrafficMonitorName, error),
+) {
+ AllValidator(toClient, interval, includeOffline, grace, onErr, onResumeSuccess, onCheck, ValidateAllPeerPollers)
+}
http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/c9196a12/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go
----------------------------------------------------------------------
diff --git a/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go b/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go
index 819c1ab..a6e12ba 100644
--- a/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go
+++ b/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go
@@ -142,8 +142,28 @@ func ValidateCRStates(crstates *peer.Crstates, crconfig *crconfig.CRConfig) erro
return nil
}
+type ValidatorFunc func(
+ tmURI string,
+ toClient *to.Session,
+ interval time.Duration,
+ grace time.Duration,
+ onErr func(error),
+ onResumeSuccess func(),
+ onCheck func(error),
+)
+
+type AllValidatorFunc func(
+ toClient *to.Session,
+ interval time.Duration,
+ includeOffline bool,
+ grace time.Duration,
+ onErr func(enum.TrafficMonitorName, error),
+ onResumeSuccess func(enum.TrafficMonitorName),
+ onCheck func(enum.TrafficMonitorName, error),
+)
+
// CRStatesOfflineValidator is designed to be run as a goroutine, and does not return. It continously validates every `interval`, and calls `onErr` on failure, `onResumeSuccess` when a failure ceases, and `onCheck` on every poll.
-func CRStatesOfflineValidator(
+func Validator(
tmURI string,
toClient *to.Session,
interval time.Duration,
@@ -151,11 +171,12 @@ func CRStatesOfflineValidator(
onErr func(error),
onResumeSuccess func(),
onCheck func(error),
+ validator func(tmURI string, toClient *to.Session) error,
) {
invalid := false
invalidStart := time.Time{}
for {
- err := ValidateOfflineStates(tmURI, toClient)
+ err := validator(tmURI, toClient)
if err != nil && !invalid {
invalid = true
@@ -180,14 +201,26 @@ func CRStatesOfflineValidator(
}
}
+// CRStatesOfflineValidator is designed to be run as a goroutine, and does not return. It continously validates every `interval`, and calls `onErr` on failure, `onResumeSuccess` when a failure ceases, and `onCheck` on every poll.
+func CRStatesOfflineValidator(
+ tmURI string,
+ toClient *to.Session,
+ interval time.Duration,
+ grace time.Duration,
+ onErr func(error),
+ onResumeSuccess func(),
+ onCheck func(error),
+) {
+ Validator(tmURI, toClient, interval, grace, onErr, onResumeSuccess, onCheck, ValidateOfflineStates)
+}
+
// CRConfigOrError contains a CRConfig or an error. Union types? Monads? What are those?
type CRConfigOrError struct {
CRConfig *crconfig.CRConfig
Err error
}
-// ValidateOfflineStates validates that no OFFLINE or ADMIN_DOWN caches in the given Traffic Ops' CRConfig are marked Available in the given Traffic Monitor's CRStates.
-func ValidateAllMonitorsOfflineStates(toClient *to.Session, includeOffline bool) (map[enum.TrafficMonitorName]error, error) {
+func GetMonitors(toClient *to.Session, includeOffline bool) ([]to.Server, error) {
trafficMonitorType := "RASCAL"
monitorTypeQuery := map[string][]string{"type": []string{trafficMonitorType}}
servers, err := toClient.ServersByType(monitorTypeQuery)
@@ -198,6 +231,15 @@ func ValidateAllMonitorsOfflineStates(toClient *to.Session, includeOffline bool)
if !includeOffline {
servers = FilterOfflines(servers)
}
+ return servers, nil
+}
+
+// ValidateOfflineStates validates that no OFFLINE or ADMIN_DOWN caches in the given Traffic Ops' CRConfig are marked Available in the given Traffic Monitor's CRStates.
+func ValidateAllMonitorsOfflineStates(toClient *to.Session, includeOffline bool) (map[enum.TrafficMonitorName]error, error) {
+ servers, err := GetMonitors(toClient, includeOffline)
+ if err != nil {
+ return nil, err
+ }
crConfigs := GetCRConfigs(GetCDNs(servers), toClient)
@@ -215,8 +257,7 @@ func ValidateAllMonitorsOfflineStates(toClient *to.Session, includeOffline bool)
return errs, nil
}
-// AllMonitorsCRStatesOfflineValidator is designed to be run as a goroutine, and does not return. It continously validates every `interval`, and calls `onErr` on failure, `onResumeSuccess` when a failure ceases, and `onCheck` on every poll. Note the error passed to `onErr` may be a general validation error not associated with any monitor, in which case the passed `enum.TrafficMonitorName` will be empty.
-func AllMonitorsCRStatesOfflineValidator(
+func AllValidator(
toClient *to.Session,
interval time.Duration,
includeOffline bool,
@@ -224,12 +265,13 @@ func AllMonitorsCRStatesOfflineValidator(
onErr func(enum.TrafficMonitorName, error),
onResumeSuccess func(enum.TrafficMonitorName),
onCheck func(enum.TrafficMonitorName, error),
+ validator func(toClient *to.Session, includeOffline bool) (map[enum.TrafficMonitorName]error, error),
) {
invalid := map[enum.TrafficMonitorName]bool{}
invalidStart := map[enum.TrafficMonitorName]time.Time{}
metaFail := false
for {
- tmErrs, err := ValidateAllMonitorsOfflineStates(toClient, includeOffline)
+ tmErrs, err := validator(toClient, includeOffline)
if err != nil {
onErr("", fmt.Errorf("Error validating monitors: %v", err))
time.Sleep(interval)
@@ -266,6 +308,19 @@ func AllMonitorsCRStatesOfflineValidator(
}
}
+// AllMonitorsCRStatesOfflineValidator is designed to be run as a goroutine, and does not return. It continously validates every `interval`, and calls `onErr` on failure, `onResumeSuccess` when a failure ceases, and `onCheck` on every poll. Note the error passed to `onErr` may be a general validation error not associated with any monitor, in which case the passed `enum.TrafficMonitorName` will be empty.
+func AllMonitorsCRStatesOfflineValidator(
+ toClient *to.Session,
+ interval time.Duration,
+ includeOffline bool,
+ grace time.Duration,
+ onErr func(enum.TrafficMonitorName, error),
+ onResumeSuccess func(enum.TrafficMonitorName),
+ onCheck func(enum.TrafficMonitorName, error),
+) {
+ AllValidator(toClient, interval, includeOffline, grace, onErr, onResumeSuccess, onCheck, ValidateAllMonitorsOfflineStates)
+}
+
// FilterOfflines returns only servers which are REPORTED or ONLINE
func FilterOfflines(servers []to.Server) []to.Server {
onlineServers := []to.Server{}
http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/c9196a12/traffic_monitor_golang/traffic_monitor/tools/nagios-validate-peerpoller.go
----------------------------------------------------------------------
diff --git a/traffic_monitor_golang/traffic_monitor/tools/nagios-validate-peerpoller.go b/traffic_monitor_golang/traffic_monitor/tools/nagios-validate-peerpoller.go
new file mode 100644
index 0000000..5e9c9fd
--- /dev/null
+++ b/traffic_monitor_golang/traffic_monitor/tools/nagios-validate-peerpoller.go
@@ -0,0 +1,36 @@
+package main
+
+import (
+ "flag"
+ "fmt"
+ "github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/common/nagios"
+ "github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/tmcheck"
+)
+
+const UserAgent = "tm-peerpoller-validator/0.1"
+
+func main() {
+ tmURI := flag.String("tm", "", "The Traffic Monitor URI, whose Peer Poller to validate")
+ // toUser := flag.String("touser", "", "The Traffic Ops user")
+ // toPass := flag.String("topass", "", "The Traffic Ops password")
+ // includeOffline := flag.Bool("includeOffline", false, "Whether to include Offline Monitors")
+ help := flag.Bool("help", false, "Usage info")
+ helpBrief := flag.Bool("h", false, "Usage info")
+ flag.Parse()
+ if *help || *helpBrief {
+ fmt.Printf("Usage: ./nagios-validate-peerpoller -to https://traffic-ops.example.net -touser bill -topass thelizard -includeOffline true\n")
+ return
+ }
+
+ // toClient, err := to.LoginWithAgent(*toURI, *toUser, *toPass, true, UserAgent, false, tmcheck.RequestTimeout)
+ // if err != nil {
+ // fmt.Printf("Error logging in to Traffic Ops: %v\n", err)
+ // return
+ // }
+
+ err := tmcheck.ValidatePeerPoller(*tmURI)
+ if err != nil {
+ nagios.Exit(nagios.Critical, fmt.Sprintf("Error validating monitor peer poller: %v", err))
+ }
+ nagios.Exit(nagios.Ok, "")
+}
http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/c9196a12/traffic_monitor_golang/traffic_monitor/tools/service-validate-offline.go
----------------------------------------------------------------------
diff --git a/traffic_monitor_golang/traffic_monitor/tools/service-validate-offline.go b/traffic_monitor_golang/traffic_monitor/tools/service-validate-offline.go
deleted file mode 100644
index 41e0eb4..0000000
--- a/traffic_monitor_golang/traffic_monitor/tools/service-validate-offline.go
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-// validate-offline is a utility HTTP service which polls the given Traffic Monitor and validates that no OFFLINE or ADMIN_DOWN caches in the Traffic Ops CRConfig are marked Available in Traffic Monitor's CRstates endpoint.
-
-package main
-
-import (
- "flag"
- "fmt"
- "github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/enum"
- "github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/tmcheck"
- to "github.com/apache/incubator-trafficcontrol/traffic_ops/client"
- "net/http"
- "sort"
- "sync"
- "time"
-)
-
-const UserAgent = "tm-offline-validator/0.1"
-
-const LogLimit = 10
-
-type Log struct {
- log *[]string
- limit int
- errored *bool
- lastCheck *time.Time
- m *sync.RWMutex
-}
-
-func (l *Log) Add(msg string) {
- l.m.Lock()
- defer l.m.Unlock()
- *l.log = append([]string{msg}, *l.log...)
- if len(*l.log) > l.limit {
- *l.log = (*l.log)[:l.limit]
- }
-}
-
-func (l *Log) Get() []string {
- l.m.RLock()
- defer l.m.RUnlock()
- return *l.log
-}
-
-func (l *Log) GetErrored() (bool, time.Time) {
- l.m.RLock()
- defer l.m.RUnlock()
- return *l.errored, *l.lastCheck
-}
-
-func (l *Log) SetErrored(e bool) {
- l.m.Lock()
- defer l.m.Unlock()
- *l.errored = e
- *l.lastCheck = time.Now()
-}
-
-func NewLog() Log {
- log := make([]string, 0, LogLimit+1)
- errored := false
- limit := LogLimit
- lastCheck := time.Time{}
- return Log{log: &log, errored: &errored, m: &sync.RWMutex{}, limit: limit, lastCheck: &lastCheck}
-}
-
-type Logs struct {
- logs map[enum.TrafficMonitorName]Log
- m *sync.RWMutex
-}
-
-func NewLogs() Logs {
- return Logs{logs: map[enum.TrafficMonitorName]Log{}, m: &sync.RWMutex{}}
-}
-
-func (l Logs) Get(name enum.TrafficMonitorName) Log {
- l.m.Lock()
- defer l.m.Unlock()
- if _, ok := l.logs[name]; !ok {
- l.logs[name] = NewLog()
- }
- return l.logs[name]
-}
-
-func (l Logs) GetMonitors() []string {
- l.m.RLock()
- defer l.m.RUnlock()
- monitors := []string{}
- for name, _ := range l.logs {
- monitors = append(monitors, string(name))
- }
- return monitors
-}
-
-func main() {
- toURI := flag.String("to", "", "The Traffic Ops URI, whose CRConfig to validate")
- toUser := flag.String("touser", "", "The Traffic Ops user")
- toPass := flag.String("topass", "", "The Traffic Ops password")
- interval := flag.Duration("interval", time.Second*time.Duration(5), "The interval to validate")
- grace := flag.Duration("grace", time.Second*time.Duration(30), "The grace period before invalid states are reported")
- includeOffline := flag.Bool("includeOffline", false, "Whether to include Offline Monitors")
- help := flag.Bool("help", false, "Usage info")
- helpBrief := flag.Bool("h", false, "Usage info")
- flag.Parse()
- if *help || *helpBrief {
- fmt.Printf("Usage: go run validate-offline -to https://traffic-ops.example.net -touser bill -topass thelizard -tm http://traffic-monitor.example.net -interval 5s -grace 30s -includeOffline true\n")
- return
- }
-
- toClient, err := to.LoginWithAgent(*toURI, *toUser, *toPass, true, UserAgent, false, tmcheck.RequestTimeout)
- if err != nil {
- fmt.Printf("Error logging in to Traffic Ops: %v\n", err)
- return
- }
-
- logs := NewLogs()
-
- onErr := func(name enum.TrafficMonitorName, err error) {
- log := logs.Get(name)
- log.Add(fmt.Sprintf("%v ERROR %v\n", time.Now(), err))
- log.SetErrored(true)
- }
-
- onResumeSuccess := func(name enum.TrafficMonitorName) {
- log := logs.Get(name)
- log.Add(fmt.Sprintf("%v INFO State Valid\n", time.Now()))
- log.SetErrored(false)
- }
-
- onCheck := func(name enum.TrafficMonitorName, err error) {
- log := logs.Get(name)
- log.SetErrored(err != nil)
- }
-
- go tmcheck.AllMonitorsCRStatesOfflineValidator(toClient, *interval, *includeOffline, *grace, onErr, onResumeSuccess, onCheck)
-
- if err := serve(logs, *toURI); err != nil {
- fmt.Printf("Serve error: %v\n", err)
- }
-}
-
-func serve(logs Logs, toURI string) error {
- http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
- w.Header().Set("Access-Control-Allow-Origin", "*")
- w.Header().Set("Content-Type", "text/html")
- fmt.Fprintf(w, `<!DOCTYPE html>
-<meta http-equiv="refresh" content="5">
-<meta charset="utf-8">
-<meta name="viewport" content="width=device-width, initial-scale=1">
-<title>Traffic Monitor Offline Validator</title>
-<style type="text/css">body{margin:40px auto;line-height:1.6;font-size:18px;color:#444;padding:0 8px 0 8px}h1,h2,h3{line-height:1.2}span{padding:0px 4px 0px 4px;}</style>`)
-
- fmt.Fprintf(w, `<p>%s`, toURI)
-
- fmt.Fprintf(w, `<table style="width:100%%">`)
-
- monitors := logs.GetMonitors()
- sort.Strings(monitors) // sort, so they're always in the same order in the webpage
- for _, monitor := range monitors {
- fmt.Fprintf(w, `</tr>`)
-
- log := logs.Get(enum.TrafficMonitorName(monitor))
-
- fmt.Fprintf(w, `<td><span>%s</span></td>`, monitor)
- errored, lastCheck := log.GetErrored()
- if errored {
- fmt.Fprintf(w, `<td><span style="color:red">Invalid</span></td>`)
- } else {
- fmt.Fprintf(w, `<td><span style="color:limegreen">Valid</span></td>`)
- }
- fmt.Fprintf(w, `<td><span>as of %v</span></td>`, lastCheck)
-
- fmt.Fprintf(w, `<td><span style="font-family:monospace">`)
- logCopy := log.Get()
- firstMsg := ""
- if len(logCopy) > 0 {
- firstMsg = logCopy[0]
- }
- fmt.Fprintf(w, "%s\n", firstMsg)
- fmt.Fprintf(w, `</span></td>`)
-
- fmt.Fprintf(w, `</tr>`)
- }
- fmt.Fprintf(w, `</table>`)
- })
- return http.ListenAndServe(":80", nil)
-}
http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/c9196a12/traffic_monitor_golang/traffic_monitor/tools/validator-service.go
----------------------------------------------------------------------
diff --git a/traffic_monitor_golang/traffic_monitor/tools/validator-service.go b/traffic_monitor_golang/traffic_monitor/tools/validator-service.go
new file mode 100644
index 0000000..0b551c8
--- /dev/null
+++ b/traffic_monitor_golang/traffic_monitor/tools/validator-service.go
@@ -0,0 +1,222 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+// validate-offline is a utility HTTP service which polls the given Traffic Monitor and validates that no OFFLINE or ADMIN_DOWN caches in the Traffic Ops CRConfig are marked Available in Traffic Monitor's CRstates endpoint.
+
+package main
+
+import (
+ "flag"
+ "fmt"
+ "github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/enum"
+ "github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/tmcheck"
+ to "github.com/apache/incubator-trafficcontrol/traffic_ops/client"
+ "io"
+ "net/http"
+ "sort"
+ "sync"
+ "time"
+)
+
+const UserAgent = "tm-offline-validator/0.1"
+
+const LogLimit = 10
+
+type Log struct {
+ log *[]string
+ limit int
+ errored *bool
+ lastCheck *time.Time
+ m *sync.RWMutex
+}
+
+func (l *Log) Add(msg string) {
+ l.m.Lock()
+ defer l.m.Unlock()
+ *l.log = append([]string{msg}, *l.log...)
+ if len(*l.log) > l.limit {
+ *l.log = (*l.log)[:l.limit]
+ }
+}
+
+func (l *Log) Get() []string {
+ l.m.RLock()
+ defer l.m.RUnlock()
+ return *l.log
+}
+
+func (l *Log) GetErrored() (bool, time.Time) {
+ l.m.RLock()
+ defer l.m.RUnlock()
+ return *l.errored, *l.lastCheck
+}
+
+func (l *Log) SetErrored(e bool) {
+ l.m.Lock()
+ defer l.m.Unlock()
+ *l.errored = e
+ *l.lastCheck = time.Now()
+}
+
+func NewLog() Log {
+ log := make([]string, 0, LogLimit+1)
+ errored := false
+ limit := LogLimit
+ lastCheck := time.Time{}
+ return Log{log: &log, errored: &errored, m: &sync.RWMutex{}, limit: limit, lastCheck: &lastCheck}
+}
+
+type Logs struct {
+ logs map[enum.TrafficMonitorName]Log
+ m *sync.RWMutex
+}
+
+func NewLogs() Logs {
+ return Logs{logs: map[enum.TrafficMonitorName]Log{}, m: &sync.RWMutex{}}
+}
+
+func (l Logs) Get(name enum.TrafficMonitorName) Log {
+ l.m.Lock()
+ defer l.m.Unlock()
+ if _, ok := l.logs[name]; !ok {
+ l.logs[name] = NewLog()
+ }
+ return l.logs[name]
+}
+
+func (l Logs) GetMonitors() []string {
+ l.m.RLock()
+ defer l.m.RUnlock()
+ monitors := []string{}
+ for name, _ := range l.logs {
+ monitors = append(monitors, string(name))
+ }
+ return monitors
+}
+
+func startValidator(validator tmcheck.AllValidatorFunc, toClient *to.Session, interval time.Duration, includeOffline bool, grace time.Duration) Logs {
+ logs := NewLogs()
+
+ onErr := func(name enum.TrafficMonitorName, err error) {
+ log := logs.Get(name)
+ log.Add(fmt.Sprintf("%v ERROR %v\n", time.Now(), err))
+ log.SetErrored(true)
+ }
+
+ onResumeSuccess := func(name enum.TrafficMonitorName) {
+ log := logs.Get(name)
+ log.Add(fmt.Sprintf("%v INFO State Valid\n", time.Now()))
+ log.SetErrored(false)
+ }
+
+ onCheck := func(name enum.TrafficMonitorName, err error) {
+ log := logs.Get(name)
+ log.SetErrored(err != nil)
+ }
+
+ go validator(toClient, interval, includeOffline, grace, onErr, onResumeSuccess, onCheck)
+ return logs
+}
+
+func main() {
+ toURI := flag.String("to", "", "The Traffic Ops URI, whose CRConfig to validate")
+ toUser := flag.String("touser", "", "The Traffic Ops user")
+ toPass := flag.String("topass", "", "The Traffic Ops password")
+ interval := flag.Duration("interval", time.Second*time.Duration(5), "The interval to validate")
+ grace := flag.Duration("grace", time.Second*time.Duration(30), "The grace period before invalid states are reported")
+ includeOffline := flag.Bool("includeOffline", false, "Whether to include Offline Monitors")
+ help := flag.Bool("help", false, "Usage info")
+ helpBrief := flag.Bool("h", false, "Usage info")
+ flag.Parse()
+ if *help || *helpBrief {
+ fmt.Printf("Usage: go run validate-offline -to https://traffic-ops.example.net -touser bill -topass thelizard -tm http://traffic-monitor.example.net -interval 5s -grace 30s -includeOffline true\n")
+ return
+ }
+
+ toClient, err := to.LoginWithAgent(*toURI, *toUser, *toPass, true, UserAgent, false, tmcheck.RequestTimeout)
+ if err != nil {
+ fmt.Printf("Error logging in to Traffic Ops: %v\n", err)
+ return
+ }
+
+ crStatesOfflineLogs := startValidator(tmcheck.AllMonitorsCRStatesOfflineValidator, toClient, *interval, *includeOffline, *grace)
+ peerPollerLogs := startValidator(tmcheck.PeerPollersAllValidator, toClient, *interval, *includeOffline, *grace)
+
+ if err := serve(*toURI, crStatesOfflineLogs, peerPollerLogs); err != nil {
+ fmt.Printf("Serve error: %v\n", err)
+ }
+}
+
+func printLogs(logs Logs, w io.Writer) {
+ fmt.Fprintf(w, `<table style="width:100%%">`)
+
+ monitors := logs.GetMonitors()
+ sort.Strings(monitors) // sort, so they're always in the same order in the webpage
+ for _, monitor := range monitors {
+ fmt.Fprintf(w, `</tr>`)
+
+ log := logs.Get(enum.TrafficMonitorName(monitor))
+
+ fmt.Fprintf(w, `<td><span>%s</span></td>`, monitor)
+ errored, lastCheck := log.GetErrored()
+ if errored {
+ fmt.Fprintf(w, `<td><span style="color:red">Invalid</span></td>`)
+ } else {
+ fmt.Fprintf(w, `<td><span style="color:limegreen">Valid</span></td>`)
+ }
+ fmt.Fprintf(w, `<td><span>as of %v</span></td>`, lastCheck)
+
+ fmt.Fprintf(w, `<td><span style="font-family:monospace">`)
+ logCopy := log.Get()
+ firstMsg := ""
+ if len(logCopy) > 0 {
+ firstMsg = logCopy[0]
+ }
+ fmt.Fprintf(w, "%s\n", firstMsg)
+ fmt.Fprintf(w, `</span></td>`)
+
+ fmt.Fprintf(w, `</tr>`)
+ }
+ fmt.Fprintf(w, `</table>`)
+}
+
+func serve(toURI string, crStatesOfflineLogs Logs, peerPollerLogs Logs) error {
+ http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Access-Control-Allow-Origin", "*")
+ w.Header().Set("Content-Type", "text/html")
+ fmt.Fprintf(w, `<!DOCTYPE html>
+<meta http-equiv="refresh" content="5">
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>Traffic Monitor Offline Validator</title>
+<style type="text/css">body{margin:40px auto;line-height:1.6;font-size:18px;color:#444;padding:0 8px 0 8px}h1,h2,h3{line-height:1.2}span{padding:0px 4px 0px 4px;}</style>`)
+
+ fmt.Fprintf(w, `<h1>Traffic Monitor Validator</h1>`)
+
+ fmt.Fprintf(w, `<p>%s`, toURI)
+
+ fmt.Fprintf(w, `<h2>CRStates Offline</h2>`)
+ printLogs(crStatesOfflineLogs, w)
+
+ fmt.Fprintf(w, `<h2>Peer Poller</h2>`)
+ printLogs(peerPollerLogs, w)
+
+ })
+ return http.ListenAndServe(":80", nil)
+}