You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficcontrol.apache.org by ne...@apache.org on 2017/04/12 21:44:01 UTC

[11/13] incubator-trafficcontrol git commit: Add TM2 validator for peer pollers

Add TM2 validator for peer pollers


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/commit/c9196a12
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/tree/c9196a12
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/diff/c9196a12

Branch: refs/heads/master
Commit: c9196a12e57c381b8ba6de29e2aa4009a27a1f14
Parents: 4658c51
Author: Robert Butts <ro...@gmail.com>
Authored: Fri Mar 3 15:39:10 2017 -0700
Committer: Dave Neuman <ne...@apache.org>
Committed: Wed Apr 12 15:43:31 2017 -0600

----------------------------------------------------------------------
 .../traffic_monitor/tmcheck/peerpoller.go       | 112 ++++++++++
 .../traffic_monitor/tmcheck/tmcheck.go          |  69 +++++-
 .../tools/nagios-validate-peerpoller.go         |  36 +++
 .../tools/service-validate-offline.go           | 204 -----------------
 .../traffic_monitor/tools/validator-service.go  | 222 +++++++++++++++++++
 5 files changed, 432 insertions(+), 211 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/c9196a12/traffic_monitor_golang/traffic_monitor/tmcheck/peerpoller.go
----------------------------------------------------------------------
diff --git a/traffic_monitor_golang/traffic_monitor/tmcheck/peerpoller.go b/traffic_monitor_golang/traffic_monitor/tmcheck/peerpoller.go
new file mode 100644
index 0000000..cdeaf36
--- /dev/null
+++ b/traffic_monitor_golang/traffic_monitor/tmcheck/peerpoller.go
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package tmcheck
+
+import (
+	"encoding/json"
+	"fmt"
+	"github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/enum"
+	to "github.com/apache/incubator-trafficcontrol/traffic_ops/client"
+	"io/ioutil"
+	"time"
+)
+
+const PeerPollMax = time.Duration(10) * time.Second
+
+const TrafficMonitorStatsPath = "/publish/Stats"
+
+// TrafficMonitorStatsJSON represents the JSON returned by Traffic Monitor's Stats endpoint. This currently only contains the Oldest Polled Peer Time member, as needed by this library.
+type TrafficMonitorStatsJSON struct {
+	Stats TrafficMonitorStats `json:"stats"`
+}
+
+// TrafficMonitorStats represents the internal JSON object returned by Traffic Monitor's Stats endpoint. This currently only contains the Oldest Polled Peer Time member, as needed by this library.
+type TrafficMonitorStats struct {
+	OldestPolledPeerTime int `json:"Oldest Polled Peer Time (ms)"`
+}
+
+func GetOldestPolledPeerTime(uri string) (time.Duration, error) {
+	resp, err := getClient().Get(uri + TrafficMonitorStatsPath)
+	if err != nil {
+		return time.Duration(0), fmt.Errorf("reading reply from %v: %v\n", uri, err)
+	}
+	respBytes, err := ioutil.ReadAll(resp.Body)
+	if err != nil {
+		return time.Duration(0), fmt.Errorf("reading reply from %v: %v\n", uri, err)
+	}
+
+	stats := TrafficMonitorStatsJSON{}
+	if err := json.Unmarshal(respBytes, &stats); err != nil {
+		return time.Duration(0), fmt.Errorf("unmarshalling: %v", err)
+	}
+
+	oldestPolledPeerTime := time.Duration(stats.Stats.OldestPolledPeerTime) * time.Millisecond
+
+	return oldestPolledPeerTime, nil
+}
+
+func ValidatePeerPoller(uri string) error {
+	lastPollTime, err := GetOldestPolledPeerTime(uri)
+	if err != nil {
+		return fmt.Errorf("failed to get oldest peer time: %v", err)
+	}
+	if lastPollTime > PeerPollMax {
+		return fmt.Errorf("Peer poller is dead, last poll was %v ago", lastPollTime)
+	}
+	return nil
+}
+
+func ValidateAllPeerPollers(toClient *to.Session, includeOffline bool) (map[enum.TrafficMonitorName]error, error) {
+	servers, err := GetMonitors(toClient, includeOffline)
+	if err != nil {
+		return nil, err
+	}
+	errs := map[enum.TrafficMonitorName]error{}
+	for _, server := range servers {
+		uri := fmt.Sprintf("http://%s.%s", server.HostName, server.DomainName)
+		errs[enum.TrafficMonitorName(server.HostName)] = ValidatePeerPoller(uri)
+	}
+	return errs, nil
+}
+
+func PeerPollersValidator(
+	tmURI string,
+	toClient *to.Session,
+	interval time.Duration,
+	grace time.Duration,
+	onErr func(error),
+	onResumeSuccess func(),
+	onCheck func(error),
+) {
+	wrapValidatePeerPoller := func(uri string, _ *to.Session) error { return ValidatePeerPoller(uri) }
+	Validator(tmURI, toClient, interval, grace, onErr, onResumeSuccess, onCheck, wrapValidatePeerPoller)
+}
+
+func PeerPollersAllValidator(
+	toClient *to.Session,
+	interval time.Duration,
+	includeOffline bool,
+	grace time.Duration,
+	onErr func(enum.TrafficMonitorName, error),
+	onResumeSuccess func(enum.TrafficMonitorName),
+	onCheck func(enum.TrafficMonitorName, error),
+) {
+	AllValidator(toClient, interval, includeOffline, grace, onErr, onResumeSuccess, onCheck, ValidateAllPeerPollers)
+}

http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/c9196a12/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go
----------------------------------------------------------------------
diff --git a/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go b/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go
index 819c1ab..a6e12ba 100644
--- a/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go
+++ b/traffic_monitor_golang/traffic_monitor/tmcheck/tmcheck.go
@@ -142,8 +142,28 @@ func ValidateCRStates(crstates *peer.Crstates, crconfig *crconfig.CRConfig) erro
 	return nil
 }
 
+type ValidatorFunc func(
+	tmURI string,
+	toClient *to.Session,
+	interval time.Duration,
+	grace time.Duration,
+	onErr func(error),
+	onResumeSuccess func(),
+	onCheck func(error),
+)
+
+type AllValidatorFunc func(
+	toClient *to.Session,
+	interval time.Duration,
+	includeOffline bool,
+	grace time.Duration,
+	onErr func(enum.TrafficMonitorName, error),
+	onResumeSuccess func(enum.TrafficMonitorName),
+	onCheck func(enum.TrafficMonitorName, error),
+)
+
 // CRStatesOfflineValidator is designed to be run as a goroutine, and does not return. It continously validates every `interval`, and calls `onErr` on failure, `onResumeSuccess` when a failure ceases, and `onCheck` on every poll.
-func CRStatesOfflineValidator(
+func Validator(
 	tmURI string,
 	toClient *to.Session,
 	interval time.Duration,
@@ -151,11 +171,12 @@ func CRStatesOfflineValidator(
 	onErr func(error),
 	onResumeSuccess func(),
 	onCheck func(error),
+	validator func(tmURI string, toClient *to.Session) error,
 ) {
 	invalid := false
 	invalidStart := time.Time{}
 	for {
-		err := ValidateOfflineStates(tmURI, toClient)
+		err := validator(tmURI, toClient)
 
 		if err != nil && !invalid {
 			invalid = true
@@ -180,14 +201,26 @@ func CRStatesOfflineValidator(
 	}
 }
 
+// CRStatesOfflineValidator is designed to be run as a goroutine, and does not return. It continously validates every `interval`, and calls `onErr` on failure, `onResumeSuccess` when a failure ceases, and `onCheck` on every poll.
+func CRStatesOfflineValidator(
+	tmURI string,
+	toClient *to.Session,
+	interval time.Duration,
+	grace time.Duration,
+	onErr func(error),
+	onResumeSuccess func(),
+	onCheck func(error),
+) {
+	Validator(tmURI, toClient, interval, grace, onErr, onResumeSuccess, onCheck, ValidateOfflineStates)
+}
+
 // CRConfigOrError contains a CRConfig or an error. Union types? Monads? What are those?
 type CRConfigOrError struct {
 	CRConfig *crconfig.CRConfig
 	Err      error
 }
 
-// ValidateOfflineStates validates that no OFFLINE or ADMIN_DOWN caches in the given Traffic Ops' CRConfig are marked Available in the given Traffic Monitor's CRStates.
-func ValidateAllMonitorsOfflineStates(toClient *to.Session, includeOffline bool) (map[enum.TrafficMonitorName]error, error) {
+func GetMonitors(toClient *to.Session, includeOffline bool) ([]to.Server, error) {
 	trafficMonitorType := "RASCAL"
 	monitorTypeQuery := map[string][]string{"type": []string{trafficMonitorType}}
 	servers, err := toClient.ServersByType(monitorTypeQuery)
@@ -198,6 +231,15 @@ func ValidateAllMonitorsOfflineStates(toClient *to.Session, includeOffline bool)
 	if !includeOffline {
 		servers = FilterOfflines(servers)
 	}
+	return servers, nil
+}
+
+// ValidateOfflineStates validates that no OFFLINE or ADMIN_DOWN caches in the given Traffic Ops' CRConfig are marked Available in the given Traffic Monitor's CRStates.
+func ValidateAllMonitorsOfflineStates(toClient *to.Session, includeOffline bool) (map[enum.TrafficMonitorName]error, error) {
+	servers, err := GetMonitors(toClient, includeOffline)
+	if err != nil {
+		return nil, err
+	}
 
 	crConfigs := GetCRConfigs(GetCDNs(servers), toClient)
 
@@ -215,8 +257,7 @@ func ValidateAllMonitorsOfflineStates(toClient *to.Session, includeOffline bool)
 	return errs, nil
 }
 
-// AllMonitorsCRStatesOfflineValidator is designed to be run as a goroutine, and does not return. It continously validates every `interval`, and calls `onErr` on failure, `onResumeSuccess` when a failure ceases, and `onCheck` on every poll. Note the error passed to `onErr` may be a general validation error not associated with any monitor, in which case the passed `enum.TrafficMonitorName` will be empty.
-func AllMonitorsCRStatesOfflineValidator(
+func AllValidator(
 	toClient *to.Session,
 	interval time.Duration,
 	includeOffline bool,
@@ -224,12 +265,13 @@ func AllMonitorsCRStatesOfflineValidator(
 	onErr func(enum.TrafficMonitorName, error),
 	onResumeSuccess func(enum.TrafficMonitorName),
 	onCheck func(enum.TrafficMonitorName, error),
+	validator func(toClient *to.Session, includeOffline bool) (map[enum.TrafficMonitorName]error, error),
 ) {
 	invalid := map[enum.TrafficMonitorName]bool{}
 	invalidStart := map[enum.TrafficMonitorName]time.Time{}
 	metaFail := false
 	for {
-		tmErrs, err := ValidateAllMonitorsOfflineStates(toClient, includeOffline)
+		tmErrs, err := validator(toClient, includeOffline)
 		if err != nil {
 			onErr("", fmt.Errorf("Error validating monitors: %v", err))
 			time.Sleep(interval)
@@ -266,6 +308,19 @@ func AllMonitorsCRStatesOfflineValidator(
 	}
 }
 
+// AllMonitorsCRStatesOfflineValidator is designed to be run as a goroutine, and does not return. It continously validates every `interval`, and calls `onErr` on failure, `onResumeSuccess` when a failure ceases, and `onCheck` on every poll. Note the error passed to `onErr` may be a general validation error not associated with any monitor, in which case the passed `enum.TrafficMonitorName` will be empty.
+func AllMonitorsCRStatesOfflineValidator(
+	toClient *to.Session,
+	interval time.Duration,
+	includeOffline bool,
+	grace time.Duration,
+	onErr func(enum.TrafficMonitorName, error),
+	onResumeSuccess func(enum.TrafficMonitorName),
+	onCheck func(enum.TrafficMonitorName, error),
+) {
+	AllValidator(toClient, interval, includeOffline, grace, onErr, onResumeSuccess, onCheck, ValidateAllMonitorsOfflineStates)
+}
+
 // FilterOfflines returns only servers which are REPORTED or ONLINE
 func FilterOfflines(servers []to.Server) []to.Server {
 	onlineServers := []to.Server{}

http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/c9196a12/traffic_monitor_golang/traffic_monitor/tools/nagios-validate-peerpoller.go
----------------------------------------------------------------------
diff --git a/traffic_monitor_golang/traffic_monitor/tools/nagios-validate-peerpoller.go b/traffic_monitor_golang/traffic_monitor/tools/nagios-validate-peerpoller.go
new file mode 100644
index 0000000..5e9c9fd
--- /dev/null
+++ b/traffic_monitor_golang/traffic_monitor/tools/nagios-validate-peerpoller.go
@@ -0,0 +1,36 @@
+package main
+
+import (
+	"flag"
+	"fmt"
+	"github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/common/nagios"
+	"github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/tmcheck"
+)
+
+const UserAgent = "tm-peerpoller-validator/0.1"
+
+func main() {
+	tmURI := flag.String("tm", "", "The Traffic Monitor URI, whose Peer Poller to validate")
+	// toUser := flag.String("touser", "", "The Traffic Ops user")
+	// toPass := flag.String("topass", "", "The Traffic Ops password")
+	// includeOffline := flag.Bool("includeOffline", false, "Whether to include Offline Monitors")
+	help := flag.Bool("help", false, "Usage info")
+	helpBrief := flag.Bool("h", false, "Usage info")
+	flag.Parse()
+	if *help || *helpBrief {
+		fmt.Printf("Usage: ./nagios-validate-peerpoller -to https://traffic-ops.example.net -touser bill -topass thelizard -includeOffline true\n")
+		return
+	}
+
+	// toClient, err := to.LoginWithAgent(*toURI, *toUser, *toPass, true, UserAgent, false, tmcheck.RequestTimeout)
+	// if err != nil {
+	// 	fmt.Printf("Error logging in to Traffic Ops: %v\n", err)
+	// 	return
+	// }
+
+	err := tmcheck.ValidatePeerPoller(*tmURI)
+	if err != nil {
+		nagios.Exit(nagios.Critical, fmt.Sprintf("Error validating monitor peer poller: %v", err))
+	}
+	nagios.Exit(nagios.Ok, "")
+}

http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/c9196a12/traffic_monitor_golang/traffic_monitor/tools/service-validate-offline.go
----------------------------------------------------------------------
diff --git a/traffic_monitor_golang/traffic_monitor/tools/service-validate-offline.go b/traffic_monitor_golang/traffic_monitor/tools/service-validate-offline.go
deleted file mode 100644
index 41e0eb4..0000000
--- a/traffic_monitor_golang/traffic_monitor/tools/service-validate-offline.go
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-// validate-offline is a utility HTTP service which polls the given Traffic Monitor and validates that no OFFLINE or ADMIN_DOWN caches in the Traffic Ops CRConfig are marked Available in Traffic Monitor's CRstates endpoint.
-
-package main
-
-import (
-	"flag"
-	"fmt"
-	"github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/enum"
-	"github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/tmcheck"
-	to "github.com/apache/incubator-trafficcontrol/traffic_ops/client"
-	"net/http"
-	"sort"
-	"sync"
-	"time"
-)
-
-const UserAgent = "tm-offline-validator/0.1"
-
-const LogLimit = 10
-
-type Log struct {
-	log       *[]string
-	limit     int
-	errored   *bool
-	lastCheck *time.Time
-	m         *sync.RWMutex
-}
-
-func (l *Log) Add(msg string) {
-	l.m.Lock()
-	defer l.m.Unlock()
-	*l.log = append([]string{msg}, *l.log...)
-	if len(*l.log) > l.limit {
-		*l.log = (*l.log)[:l.limit]
-	}
-}
-
-func (l *Log) Get() []string {
-	l.m.RLock()
-	defer l.m.RUnlock()
-	return *l.log
-}
-
-func (l *Log) GetErrored() (bool, time.Time) {
-	l.m.RLock()
-	defer l.m.RUnlock()
-	return *l.errored, *l.lastCheck
-}
-
-func (l *Log) SetErrored(e bool) {
-	l.m.Lock()
-	defer l.m.Unlock()
-	*l.errored = e
-	*l.lastCheck = time.Now()
-}
-
-func NewLog() Log {
-	log := make([]string, 0, LogLimit+1)
-	errored := false
-	limit := LogLimit
-	lastCheck := time.Time{}
-	return Log{log: &log, errored: &errored, m: &sync.RWMutex{}, limit: limit, lastCheck: &lastCheck}
-}
-
-type Logs struct {
-	logs map[enum.TrafficMonitorName]Log
-	m    *sync.RWMutex
-}
-
-func NewLogs() Logs {
-	return Logs{logs: map[enum.TrafficMonitorName]Log{}, m: &sync.RWMutex{}}
-}
-
-func (l Logs) Get(name enum.TrafficMonitorName) Log {
-	l.m.Lock()
-	defer l.m.Unlock()
-	if _, ok := l.logs[name]; !ok {
-		l.logs[name] = NewLog()
-	}
-	return l.logs[name]
-}
-
-func (l Logs) GetMonitors() []string {
-	l.m.RLock()
-	defer l.m.RUnlock()
-	monitors := []string{}
-	for name, _ := range l.logs {
-		monitors = append(monitors, string(name))
-	}
-	return monitors
-}
-
-func main() {
-	toURI := flag.String("to", "", "The Traffic Ops URI, whose CRConfig to validate")
-	toUser := flag.String("touser", "", "The Traffic Ops user")
-	toPass := flag.String("topass", "", "The Traffic Ops password")
-	interval := flag.Duration("interval", time.Second*time.Duration(5), "The interval to validate")
-	grace := flag.Duration("grace", time.Second*time.Duration(30), "The grace period before invalid states are reported")
-	includeOffline := flag.Bool("includeOffline", false, "Whether to include Offline Monitors")
-	help := flag.Bool("help", false, "Usage info")
-	helpBrief := flag.Bool("h", false, "Usage info")
-	flag.Parse()
-	if *help || *helpBrief {
-		fmt.Printf("Usage: go run validate-offline -to https://traffic-ops.example.net -touser bill -topass thelizard -tm http://traffic-monitor.example.net -interval 5s -grace 30s -includeOffline true\n")
-		return
-	}
-
-	toClient, err := to.LoginWithAgent(*toURI, *toUser, *toPass, true, UserAgent, false, tmcheck.RequestTimeout)
-	if err != nil {
-		fmt.Printf("Error logging in to Traffic Ops: %v\n", err)
-		return
-	}
-
-	logs := NewLogs()
-
-	onErr := func(name enum.TrafficMonitorName, err error) {
-		log := logs.Get(name)
-		log.Add(fmt.Sprintf("%v ERROR %v\n", time.Now(), err))
-		log.SetErrored(true)
-	}
-
-	onResumeSuccess := func(name enum.TrafficMonitorName) {
-		log := logs.Get(name)
-		log.Add(fmt.Sprintf("%v INFO State Valid\n", time.Now()))
-		log.SetErrored(false)
-	}
-
-	onCheck := func(name enum.TrafficMonitorName, err error) {
-		log := logs.Get(name)
-		log.SetErrored(err != nil)
-	}
-
-	go tmcheck.AllMonitorsCRStatesOfflineValidator(toClient, *interval, *includeOffline, *grace, onErr, onResumeSuccess, onCheck)
-
-	if err := serve(logs, *toURI); err != nil {
-		fmt.Printf("Serve error: %v\n", err)
-	}
-}
-
-func serve(logs Logs, toURI string) error {
-	http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
-		w.Header().Set("Access-Control-Allow-Origin", "*")
-		w.Header().Set("Content-Type", "text/html")
-		fmt.Fprintf(w, `<!DOCTYPE html>
-<meta http-equiv="refresh" content="5">
-<meta charset="utf-8">
-<meta name="viewport" content="width=device-width, initial-scale=1">
-<title>Traffic Monitor Offline Validator</title>
-<style type="text/css">body{margin:40px auto;line-height:1.6;font-size:18px;color:#444;padding:0 8px 0 8px}h1,h2,h3{line-height:1.2}span{padding:0px 4px 0px 4px;}</style>`)
-
-		fmt.Fprintf(w, `<p>%s`, toURI)
-
-		fmt.Fprintf(w, `<table style="width:100%%">`)
-
-		monitors := logs.GetMonitors()
-		sort.Strings(monitors) // sort, so they're always in the same order in the webpage
-		for _, monitor := range monitors {
-			fmt.Fprintf(w, `</tr>`)
-
-			log := logs.Get(enum.TrafficMonitorName(monitor))
-
-			fmt.Fprintf(w, `<td><span>%s</span></td>`, monitor)
-			errored, lastCheck := log.GetErrored()
-			if errored {
-				fmt.Fprintf(w, `<td><span style="color:red">Invalid</span></td>`)
-			} else {
-				fmt.Fprintf(w, `<td><span style="color:limegreen">Valid</span></td>`)
-			}
-			fmt.Fprintf(w, `<td><span>as of %v</span></td>`, lastCheck)
-
-			fmt.Fprintf(w, `<td><span style="font-family:monospace">`)
-			logCopy := log.Get()
-			firstMsg := ""
-			if len(logCopy) > 0 {
-				firstMsg = logCopy[0]
-			}
-			fmt.Fprintf(w, "%s\n", firstMsg)
-			fmt.Fprintf(w, `</span></td>`)
-
-			fmt.Fprintf(w, `</tr>`)
-		}
-		fmt.Fprintf(w, `</table>`)
-	})
-	return http.ListenAndServe(":80", nil)
-}

http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/c9196a12/traffic_monitor_golang/traffic_monitor/tools/validator-service.go
----------------------------------------------------------------------
diff --git a/traffic_monitor_golang/traffic_monitor/tools/validator-service.go b/traffic_monitor_golang/traffic_monitor/tools/validator-service.go
new file mode 100644
index 0000000..0b551c8
--- /dev/null
+++ b/traffic_monitor_golang/traffic_monitor/tools/validator-service.go
@@ -0,0 +1,222 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+// validate-offline is a utility HTTP service which polls the given Traffic Monitor and validates that no OFFLINE or ADMIN_DOWN caches in the Traffic Ops CRConfig are marked Available in Traffic Monitor's CRstates endpoint.
+
+package main
+
+import (
+	"flag"
+	"fmt"
+	"github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/enum"
+	"github.com/apache/incubator-trafficcontrol/traffic_monitor_golang/traffic_monitor/tmcheck"
+	to "github.com/apache/incubator-trafficcontrol/traffic_ops/client"
+	"io"
+	"net/http"
+	"sort"
+	"sync"
+	"time"
+)
+
+const UserAgent = "tm-offline-validator/0.1"
+
+const LogLimit = 10
+
+type Log struct {
+	log       *[]string
+	limit     int
+	errored   *bool
+	lastCheck *time.Time
+	m         *sync.RWMutex
+}
+
+func (l *Log) Add(msg string) {
+	l.m.Lock()
+	defer l.m.Unlock()
+	*l.log = append([]string{msg}, *l.log...)
+	if len(*l.log) > l.limit {
+		*l.log = (*l.log)[:l.limit]
+	}
+}
+
+func (l *Log) Get() []string {
+	l.m.RLock()
+	defer l.m.RUnlock()
+	return *l.log
+}
+
+func (l *Log) GetErrored() (bool, time.Time) {
+	l.m.RLock()
+	defer l.m.RUnlock()
+	return *l.errored, *l.lastCheck
+}
+
+func (l *Log) SetErrored(e bool) {
+	l.m.Lock()
+	defer l.m.Unlock()
+	*l.errored = e
+	*l.lastCheck = time.Now()
+}
+
+func NewLog() Log {
+	log := make([]string, 0, LogLimit+1)
+	errored := false
+	limit := LogLimit
+	lastCheck := time.Time{}
+	return Log{log: &log, errored: &errored, m: &sync.RWMutex{}, limit: limit, lastCheck: &lastCheck}
+}
+
+type Logs struct {
+	logs map[enum.TrafficMonitorName]Log
+	m    *sync.RWMutex
+}
+
+func NewLogs() Logs {
+	return Logs{logs: map[enum.TrafficMonitorName]Log{}, m: &sync.RWMutex{}}
+}
+
+func (l Logs) Get(name enum.TrafficMonitorName) Log {
+	l.m.Lock()
+	defer l.m.Unlock()
+	if _, ok := l.logs[name]; !ok {
+		l.logs[name] = NewLog()
+	}
+	return l.logs[name]
+}
+
+func (l Logs) GetMonitors() []string {
+	l.m.RLock()
+	defer l.m.RUnlock()
+	monitors := []string{}
+	for name, _ := range l.logs {
+		monitors = append(monitors, string(name))
+	}
+	return monitors
+}
+
+func startValidator(validator tmcheck.AllValidatorFunc, toClient *to.Session, interval time.Duration, includeOffline bool, grace time.Duration) Logs {
+	logs := NewLogs()
+
+	onErr := func(name enum.TrafficMonitorName, err error) {
+		log := logs.Get(name)
+		log.Add(fmt.Sprintf("%v ERROR %v\n", time.Now(), err))
+		log.SetErrored(true)
+	}
+
+	onResumeSuccess := func(name enum.TrafficMonitorName) {
+		log := logs.Get(name)
+		log.Add(fmt.Sprintf("%v INFO State Valid\n", time.Now()))
+		log.SetErrored(false)
+	}
+
+	onCheck := func(name enum.TrafficMonitorName, err error) {
+		log := logs.Get(name)
+		log.SetErrored(err != nil)
+	}
+
+	go validator(toClient, interval, includeOffline, grace, onErr, onResumeSuccess, onCheck)
+	return logs
+}
+
+func main() {
+	toURI := flag.String("to", "", "The Traffic Ops URI, whose CRConfig to validate")
+	toUser := flag.String("touser", "", "The Traffic Ops user")
+	toPass := flag.String("topass", "", "The Traffic Ops password")
+	interval := flag.Duration("interval", time.Second*time.Duration(5), "The interval to validate")
+	grace := flag.Duration("grace", time.Second*time.Duration(30), "The grace period before invalid states are reported")
+	includeOffline := flag.Bool("includeOffline", false, "Whether to include Offline Monitors")
+	help := flag.Bool("help", false, "Usage info")
+	helpBrief := flag.Bool("h", false, "Usage info")
+	flag.Parse()
+	if *help || *helpBrief {
+		fmt.Printf("Usage: go run validate-offline -to https://traffic-ops.example.net -touser bill -topass thelizard -tm http://traffic-monitor.example.net -interval 5s -grace 30s -includeOffline true\n")
+		return
+	}
+
+	toClient, err := to.LoginWithAgent(*toURI, *toUser, *toPass, true, UserAgent, false, tmcheck.RequestTimeout)
+	if err != nil {
+		fmt.Printf("Error logging in to Traffic Ops: %v\n", err)
+		return
+	}
+
+	crStatesOfflineLogs := startValidator(tmcheck.AllMonitorsCRStatesOfflineValidator, toClient, *interval, *includeOffline, *grace)
+	peerPollerLogs := startValidator(tmcheck.PeerPollersAllValidator, toClient, *interval, *includeOffline, *grace)
+
+	if err := serve(*toURI, crStatesOfflineLogs, peerPollerLogs); err != nil {
+		fmt.Printf("Serve error: %v\n", err)
+	}
+}
+
+func printLogs(logs Logs, w io.Writer) {
+	fmt.Fprintf(w, `<table style="width:100%%">`)
+
+	monitors := logs.GetMonitors()
+	sort.Strings(monitors) // sort, so they're always in the same order in the webpage
+	for _, monitor := range monitors {
+		fmt.Fprintf(w, `</tr>`)
+
+		log := logs.Get(enum.TrafficMonitorName(monitor))
+
+		fmt.Fprintf(w, `<td><span>%s</span></td>`, monitor)
+		errored, lastCheck := log.GetErrored()
+		if errored {
+			fmt.Fprintf(w, `<td><span style="color:red">Invalid</span></td>`)
+		} else {
+			fmt.Fprintf(w, `<td><span style="color:limegreen">Valid</span></td>`)
+		}
+		fmt.Fprintf(w, `<td><span>as of %v</span></td>`, lastCheck)
+
+		fmt.Fprintf(w, `<td><span style="font-family:monospace">`)
+		logCopy := log.Get()
+		firstMsg := ""
+		if len(logCopy) > 0 {
+			firstMsg = logCopy[0]
+		}
+		fmt.Fprintf(w, "%s\n", firstMsg)
+		fmt.Fprintf(w, `</span></td>`)
+
+		fmt.Fprintf(w, `</tr>`)
+	}
+	fmt.Fprintf(w, `</table>`)
+}
+
+func serve(toURI string, crStatesOfflineLogs Logs, peerPollerLogs Logs) error {
+	http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Access-Control-Allow-Origin", "*")
+		w.Header().Set("Content-Type", "text/html")
+		fmt.Fprintf(w, `<!DOCTYPE html>
+<meta http-equiv="refresh" content="5">
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>Traffic Monitor Offline Validator</title>
+<style type="text/css">body{margin:40px auto;line-height:1.6;font-size:18px;color:#444;padding:0 8px 0 8px}h1,h2,h3{line-height:1.2}span{padding:0px 4px 0px 4px;}</style>`)
+
+		fmt.Fprintf(w, `<h1>Traffic Monitor Validator</h1>`)
+
+		fmt.Fprintf(w, `<p>%s`, toURI)
+
+		fmt.Fprintf(w, `<h2>CRStates Offline</h2>`)
+		printLogs(crStatesOfflineLogs, w)
+
+		fmt.Fprintf(w, `<h2>Peer Poller</h2>`)
+		printLogs(peerPollerLogs, w)
+
+	})
+	return http.ListenAndServe(":80", nil)
+}