You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficcontrol.apache.org by ne...@apache.org on 2017/01/25 17:30:18 UTC
[07/20] incubator-trafficcontrol git commit: Fix TM2 duplicate
availability logic
Fix TM2 duplicate availability logic
Project: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/commit/40d5bcba
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/tree/40d5bcba
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/diff/40d5bcba
Branch: refs/heads/master
Commit: 40d5bcbaa78a1459c57d8a05fb945817d58323ec
Parents: 8a7bed4
Author: Robert Butts <ro...@gmail.com>
Authored: Fri Jan 20 15:30:35 2017 -0700
Committer: Dave Neuman <ne...@apache.org>
Committed: Wed Jan 25 10:29:46 2017 -0700
----------------------------------------------------------------------
.../traffic_monitor/manager/healthresult.go | 30 ++--------
.../traffic_monitor/manager/stathistory.go | 59 +++++++++++---------
2 files changed, 38 insertions(+), 51 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/40d5bcba/traffic_monitor/experimental/traffic_monitor/manager/healthresult.go
----------------------------------------------------------------------
diff --git a/traffic_monitor/experimental/traffic_monitor/manager/healthresult.go b/traffic_monitor/experimental/traffic_monitor/manager/healthresult.go
index f99155d..110f083 100644
--- a/traffic_monitor/experimental/traffic_monitor/manager/healthresult.go
+++ b/traffic_monitor/experimental/traffic_monitor/manager/healthresult.go
@@ -204,10 +204,9 @@ func processHealthResult(
}()
toDataCopy := toData.Get() // create a copy, so the same data used for all processing of this cache health result
- localCacheStatus := localCacheStatusThreadsafe.Get().Copy()
monitorConfigCopy := monitorConfig.Get()
healthHistoryCopy := healthHistory.Get().Copy()
- for _, healthResult := range results {
+ for i, healthResult := range results {
log.Debugf("poll %v %v healthresultman start\n", healthResult.PollID, time.Now())
fetchCount.Inc()
var prevResult cache.Result
@@ -218,6 +217,7 @@ func processHealthResult(
if healthResult.Error == nil {
health.GetVitals(&healthResult, &prevResult, &monitorConfigCopy)
+ results[i] = healthResult
}
maxHistory := uint64(monitorConfigCopy.Profile[monitorConfigCopy.TrafficServer[string(healthResult.ID)].Profile].Parameters.HistoryCount)
@@ -227,33 +227,11 @@ func processHealthResult(
}
healthHistoryCopy[healthResult.ID] = pruneHistory(append([]cache.Result{healthResult}, healthHistoryCopy[healthResult.ID]...), maxHistory)
+ }
- isAvailable, whyAvailable, unavailableStat := health.EvalCache(cache.ToInfo(healthResult), nil, &monitorConfigCopy)
- whyAvailable += "(healthpoll)" // debug
- if available, ok := localStates.GetCache(healthResult.ID); !ok || available.IsAvailable != isAvailable {
- log.Infof("Changing state for %s was: %t now: %t because %s error: %v", healthResult.ID, available.IsAvailable, isAvailable, whyAvailable, healthResult.Error)
- events.Add(health.Event{Time: time.Now(), Description: whyAvailable, Name: string(healthResult.ID), Hostname: string(healthResult.ID), Type: toDataCopy.ServerTypes[healthResult.ID].String(), Available: isAvailable})
- }
+ calcAvailability(results, "health", nil, monitorConfigCopy, toDataCopy, localCacheStatusThreadsafe, localStates, events)
- // if the cache is now Available, and was previously unavailable due to a threshold, make sure this poller contains the stat which exceeded the threshold.
- if previousStatus, hasPreviousStatus := localCacheStatus[healthResult.ID]; isAvailable && hasPreviousStatus && !previousStatus.Available && previousStatus.UnavailableStat != "" {
- if !resultHasStat(previousStatus.UnavailableStat, healthResult) {
- // TODO determine if it's ok to add the result data (but not availability). Or will making them not align cause issues?
- continue
- }
- }
-
- localCacheStatus[healthResult.ID] = cache.AvailableStatus{
- Available: isAvailable,
- Status: monitorConfigCopy.TrafficServer[string(healthResult.ID)].Status,
- Why: whyAvailable,
- UnavailableStat: unavailableStat,
- } // TODO move within localStates?
- localStates.SetCache(healthResult.ID, peer.IsAvailable{IsAvailable: isAvailable})
- }
- CalculateDeliveryServiceState(toDataCopy.DeliveryServiceServers, localStates)
healthHistory.Set(healthHistoryCopy)
- localCacheStatusThreadsafe.Set(localCacheStatus)
// TODO determine if we should combineCrStates() here
lastHealthDurations := lastHealthDurationsThreadsafe.Get().Copy()
http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/40d5bcba/traffic_monitor/experimental/traffic_monitor/manager/stathistory.go
----------------------------------------------------------------------
diff --git a/traffic_monitor/experimental/traffic_monitor/manager/stathistory.go b/traffic_monitor/experimental/traffic_monitor/manager/stathistory.go
index bf2d1dc..b44f357 100644
--- a/traffic_monitor/experimental/traffic_monitor/manager/stathistory.go
+++ b/traffic_monitor/experimental/traffic_monitor/manager/stathistory.go
@@ -158,7 +158,6 @@ func processStatResults(
statInfoHistory := statInfoHistoryThreadsafe.Get().Copy()
statResultHistory := statResultHistoryThreadsafe.Get().Copy()
statMaxKbpses := statMaxKbpsesThreadsafe.Get().Copy()
- localCacheStatus := localCacheStatusThreadsafe.Get().Copy()
for i, result := range results {
maxStats := uint64(mc.Profile[mc.TrafficServer[string(result.ID)].Profile].Parameters.HistoryCount)
@@ -213,44 +212,54 @@ func processStatResults(
lastStats.Set(newLastStats)
}
- // TODO test
- // TODO abstract setting availability logic (duplicated in healthresult.go)
+ calcAvailability(results, "stat", statResultHistory, mc, toData, localCacheStatusThreadsafe, localStates, events)
+
+ endTime := time.Now()
+ lastStatDurations := lastStatDurationsThreadsafe.Get().Copy()
for _, result := range results {
- isAvailable, whyAvailable, unavailableStat := health.EvalCache(cache.ToInfo(result), statResultHistory[result.ID], &mc)
- whyAvailable += "(statpoll)" // debug
+ if lastStatStart, ok := lastStatEndTimes[result.ID]; ok {
+ d := time.Since(lastStatStart)
+ lastStatDurations[result.ID] = d
+ }
+ lastStatEndTimes[result.ID] = endTime
+ }
+ lastStatDurationsThreadsafe.Set(lastStatDurations)
+ unpolledCaches.SetPolled(results, lastStats.Get())
+}
- if available, ok := localStates.GetCache(result.ID); !ok || available.IsAvailable != isAvailable {
- log.Infof("Changing state for %s was: %t now: %t because %s error: %v", result.ID, available.IsAvailable, isAvailable, whyAvailable, result.Error)
- events.Add(health.Event{Time: time.Now(), Description: whyAvailable, Name: string(result.ID), Hostname: string(result.ID), Type: toData.ServerTypes[result.ID].String(), Available: isAvailable})
+// calcAvailability calculates the availability of the cache, from the given result. Availability is stored in `localCacheStatus` and `localStates`, and if the status changed an event is added to `events`. statResultHistory may be nil, for pollers which don't poll stats.
+// TODO add enum for poller names?
+func calcAvailability(results []cache.Result, pollerName string, statResultHistory cache.ResultStatHistory, mc to.TrafficMonitorConfigMap, toData todata.TOData, localCacheStatusThreadsafe threadsafe.CacheAvailableStatus, localStates peer.CRStatesThreadsafe, events threadsafe.Events) {
+ localCacheStatuses := localCacheStatusThreadsafe.Get().Copy()
+ for _, result := range results {
+ statResults := cache.ResultStatValHistory(nil)
+ if statResultHistory != nil {
+ statResults = statResultHistory[result.ID]
}
+ isAvailable, whyAvailable, unavailableStat := health.EvalCache(cache.ToInfo(result), statResults, &mc)
+ whyAvailable += " (" + pollerName + ")" // TODO move to field in AvailableStatus
+
// if the cache is now Available, and was previously unavailable due to a threshold, make sure this poller contains the stat which exceeded the threshold.
- if previousStatus, hasPreviousStatus := localCacheStatus[result.ID]; isAvailable && hasPreviousStatus && !previousStatus.Available && previousStatus.UnavailableStat != "" {
+ if previousStatus, hasPreviousStatus := localCacheStatuses[result.ID]; isAvailable && hasPreviousStatus && !previousStatus.Available && previousStatus.UnavailableStat != "" {
if !resultHasStat(previousStatus.UnavailableStat, result) {
- // TODO determine if it's ok to add the result data (but not availability). Or will making them not align cause issues?
- continue
+ return
}
}
- localCacheStatus[result.ID] = cache.AvailableStatus{
+ localCacheStatuses[result.ID] = cache.AvailableStatus{
Available: isAvailable,
Status: mc.TrafficServer[string(result.ID)].Status,
Why: whyAvailable,
UnavailableStat: unavailableStat,
} // TODO move within localStates?
- localStates.SetCache(result.ID, peer.IsAvailable{IsAvailable: isAvailable})
- CalculateDeliveryServiceState(toData.DeliveryServiceServers, localStates)
- localCacheStatusThreadsafe.Set(localCacheStatus)
- }
- endTime := time.Now()
- lastStatDurations := lastStatDurationsThreadsafe.Get().Copy()
- for _, result := range results {
- if lastStatStart, ok := lastStatEndTimes[result.ID]; ok {
- d := time.Since(lastStatStart)
- lastStatDurations[result.ID] = d
+ if available, ok := localStates.GetCache(result.ID); !ok || available.IsAvailable != isAvailable {
+ log.Infof("Changing state for %s was: %t now: %t because %s error: %v", result.ID, available.IsAvailable, isAvailable, whyAvailable, result.Error)
+ events.Add(cache.Event{Time: time.Now().Unix(), Description: whyAvailable, Name: result.ID, Hostname: result.ID, Type: toData.ServerTypes[result.ID].String(), Available: isAvailable})
}
- lastStatEndTimes[result.ID] = endTime
+
+ localStates.SetCache(result.ID, peer.IsAvailable{IsAvailable: isAvailable})
}
- lastStatDurationsThreadsafe.Set(lastStatDurations)
- unpolledCaches.SetPolled(results, lastStats.Get())
+ CalculateDeliveryServiceState(toData.DeliveryServiceServers, localStates)
+ localCacheStatusThreadsafe.Set(localCacheStatuses)
}