You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficcontrol.apache.org by ne...@apache.org on 2017/01/25 17:30:18 UTC

[07/20] incubator-trafficcontrol git commit: Fix TM2 duplicate availability logic

Fix TM2 duplicate availability logic


Project: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/commit/40d5bcba
Tree: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/tree/40d5bcba
Diff: http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/diff/40d5bcba

Branch: refs/heads/master
Commit: 40d5bcbaa78a1459c57d8a05fb945817d58323ec
Parents: 8a7bed4
Author: Robert Butts <ro...@gmail.com>
Authored: Fri Jan 20 15:30:35 2017 -0700
Committer: Dave Neuman <ne...@apache.org>
Committed: Wed Jan 25 10:29:46 2017 -0700

----------------------------------------------------------------------
 .../traffic_monitor/manager/healthresult.go     | 30 ++--------
 .../traffic_monitor/manager/stathistory.go      | 59 +++++++++++---------
 2 files changed, 38 insertions(+), 51 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/40d5bcba/traffic_monitor/experimental/traffic_monitor/manager/healthresult.go
----------------------------------------------------------------------
diff --git a/traffic_monitor/experimental/traffic_monitor/manager/healthresult.go b/traffic_monitor/experimental/traffic_monitor/manager/healthresult.go
index f99155d..110f083 100644
--- a/traffic_monitor/experimental/traffic_monitor/manager/healthresult.go
+++ b/traffic_monitor/experimental/traffic_monitor/manager/healthresult.go
@@ -204,10 +204,9 @@ func processHealthResult(
 	}()
 
 	toDataCopy := toData.Get() // create a copy, so the same data used for all processing of this cache health result
-	localCacheStatus := localCacheStatusThreadsafe.Get().Copy()
 	monitorConfigCopy := monitorConfig.Get()
 	healthHistoryCopy := healthHistory.Get().Copy()
-	for _, healthResult := range results {
+	for i, healthResult := range results {
 		log.Debugf("poll %v %v healthresultman start\n", healthResult.PollID, time.Now())
 		fetchCount.Inc()
 		var prevResult cache.Result
@@ -218,6 +217,7 @@ func processHealthResult(
 
 		if healthResult.Error == nil {
 			health.GetVitals(&healthResult, &prevResult, &monitorConfigCopy)
+			results[i] = healthResult
 		}
 
 		maxHistory := uint64(monitorConfigCopy.Profile[monitorConfigCopy.TrafficServer[string(healthResult.ID)].Profile].Parameters.HistoryCount)
@@ -227,33 +227,11 @@ func processHealthResult(
 		}
 
 		healthHistoryCopy[healthResult.ID] = pruneHistory(append([]cache.Result{healthResult}, healthHistoryCopy[healthResult.ID]...), maxHistory)
+	}
 
-		isAvailable, whyAvailable, unavailableStat := health.EvalCache(cache.ToInfo(healthResult), nil, &monitorConfigCopy)
-		whyAvailable += "(healthpoll)" // debug
-		if available, ok := localStates.GetCache(healthResult.ID); !ok || available.IsAvailable != isAvailable {
-			log.Infof("Changing state for %s was: %t now: %t because %s error: %v", healthResult.ID, available.IsAvailable, isAvailable, whyAvailable, healthResult.Error)
-			events.Add(health.Event{Time: time.Now(), Description: whyAvailable, Name: string(healthResult.ID), Hostname: string(healthResult.ID), Type: toDataCopy.ServerTypes[healthResult.ID].String(), Available: isAvailable})
-		}
+	calcAvailability(results, "health", nil, monitorConfigCopy, toDataCopy, localCacheStatusThreadsafe, localStates, events)
 
-		// if the cache is now Available, and was previously unavailable due to a threshold, make sure this poller contains the stat which exceeded the threshold.
-		if previousStatus, hasPreviousStatus := localCacheStatus[healthResult.ID]; isAvailable && hasPreviousStatus && !previousStatus.Available && previousStatus.UnavailableStat != "" {
-			if !resultHasStat(previousStatus.UnavailableStat, healthResult) {
-				// TODO determine if it's ok to add the result data (but not availability). Or will making them not align cause issues?
-				continue
-			}
-		}
-
-		localCacheStatus[healthResult.ID] = cache.AvailableStatus{
-			Available:       isAvailable,
-			Status:          monitorConfigCopy.TrafficServer[string(healthResult.ID)].Status,
-			Why:             whyAvailable,
-			UnavailableStat: unavailableStat,
-		} // TODO move within localStates?
-		localStates.SetCache(healthResult.ID, peer.IsAvailable{IsAvailable: isAvailable})
-	}
-	CalculateDeliveryServiceState(toDataCopy.DeliveryServiceServers, localStates)
 	healthHistory.Set(healthHistoryCopy)
-	localCacheStatusThreadsafe.Set(localCacheStatus)
 	// TODO determine if we should combineCrStates() here
 
 	lastHealthDurations := lastHealthDurationsThreadsafe.Get().Copy()

http://git-wip-us.apache.org/repos/asf/incubator-trafficcontrol/blob/40d5bcba/traffic_monitor/experimental/traffic_monitor/manager/stathistory.go
----------------------------------------------------------------------
diff --git a/traffic_monitor/experimental/traffic_monitor/manager/stathistory.go b/traffic_monitor/experimental/traffic_monitor/manager/stathistory.go
index bf2d1dc..b44f357 100644
--- a/traffic_monitor/experimental/traffic_monitor/manager/stathistory.go
+++ b/traffic_monitor/experimental/traffic_monitor/manager/stathistory.go
@@ -158,7 +158,6 @@ func processStatResults(
 	statInfoHistory := statInfoHistoryThreadsafe.Get().Copy()
 	statResultHistory := statResultHistoryThreadsafe.Get().Copy()
 	statMaxKbpses := statMaxKbpsesThreadsafe.Get().Copy()
-	localCacheStatus := localCacheStatusThreadsafe.Get().Copy()
 
 	for i, result := range results {
 		maxStats := uint64(mc.Profile[mc.TrafficServer[string(result.ID)].Profile].Parameters.HistoryCount)
@@ -213,44 +212,54 @@ func processStatResults(
 		lastStats.Set(newLastStats)
 	}
 
-	// TODO test
-	// TODO abstract setting availability logic (duplicated in healthresult.go)
+	calcAvailability(results, "stat", statResultHistory, mc, toData, localCacheStatusThreadsafe, localStates, events)
+
+	endTime := time.Now()
+	lastStatDurations := lastStatDurationsThreadsafe.Get().Copy()
 	for _, result := range results {
-		isAvailable, whyAvailable, unavailableStat := health.EvalCache(cache.ToInfo(result), statResultHistory[result.ID], &mc)
-		whyAvailable += "(statpoll)" // debug
+		if lastStatStart, ok := lastStatEndTimes[result.ID]; ok {
+			d := time.Since(lastStatStart)
+			lastStatDurations[result.ID] = d
+		}
+		lastStatEndTimes[result.ID] = endTime
+	}
+	lastStatDurationsThreadsafe.Set(lastStatDurations)
+	unpolledCaches.SetPolled(results, lastStats.Get())
+}
 
-		if available, ok := localStates.GetCache(result.ID); !ok || available.IsAvailable != isAvailable {
-			log.Infof("Changing state for %s was: %t now: %t because %s error: %v", result.ID, available.IsAvailable, isAvailable, whyAvailable, result.Error)
-			events.Add(health.Event{Time: time.Now(), Description: whyAvailable, Name: string(result.ID), Hostname: string(result.ID), Type: toData.ServerTypes[result.ID].String(), Available: isAvailable})
+// calcAvailability calculates the availability of the cache, from the given result. Availability is stored in `localCacheStatus` and `localStates`, and if the status changed an event is added to `events`. statResultHistory may be nil, for pollers which don't poll stats.
+// TODO add enum for poller names?
+func calcAvailability(results []cache.Result, pollerName string, statResultHistory cache.ResultStatHistory, mc to.TrafficMonitorConfigMap, toData todata.TOData, localCacheStatusThreadsafe threadsafe.CacheAvailableStatus, localStates peer.CRStatesThreadsafe, events threadsafe.Events) {
+	localCacheStatuses := localCacheStatusThreadsafe.Get().Copy()
+	for _, result := range results {
+		statResults := cache.ResultStatValHistory(nil)
+		if statResultHistory != nil {
+			statResults = statResultHistory[result.ID]
 		}
 
+		isAvailable, whyAvailable, unavailableStat := health.EvalCache(cache.ToInfo(result), statResults, &mc)
+		whyAvailable += " (" + pollerName + ")" // TODO move to field in AvailableStatus
+
 		// if the cache is now Available, and was previously unavailable due to a threshold, make sure this poller contains the stat which exceeded the threshold.
-		if previousStatus, hasPreviousStatus := localCacheStatus[result.ID]; isAvailable && hasPreviousStatus && !previousStatus.Available && previousStatus.UnavailableStat != "" {
+		if previousStatus, hasPreviousStatus := localCacheStatuses[result.ID]; isAvailable && hasPreviousStatus && !previousStatus.Available && previousStatus.UnavailableStat != "" {
 			if !resultHasStat(previousStatus.UnavailableStat, result) {
-				// TODO determine if it's ok to add the result data (but not availability). Or will making them not align cause issues?
-				continue
+				return
 			}
 		}
-		localCacheStatus[result.ID] = cache.AvailableStatus{
+		localCacheStatuses[result.ID] = cache.AvailableStatus{
 			Available:       isAvailable,
 			Status:          mc.TrafficServer[string(result.ID)].Status,
 			Why:             whyAvailable,
 			UnavailableStat: unavailableStat,
 		} // TODO move within localStates?
-		localStates.SetCache(result.ID, peer.IsAvailable{IsAvailable: isAvailable})
-		CalculateDeliveryServiceState(toData.DeliveryServiceServers, localStates)
-		localCacheStatusThreadsafe.Set(localCacheStatus)
-	}
 
-	endTime := time.Now()
-	lastStatDurations := lastStatDurationsThreadsafe.Get().Copy()
-	for _, result := range results {
-		if lastStatStart, ok := lastStatEndTimes[result.ID]; ok {
-			d := time.Since(lastStatStart)
-			lastStatDurations[result.ID] = d
+		if available, ok := localStates.GetCache(result.ID); !ok || available.IsAvailable != isAvailable {
+			log.Infof("Changing state for %s was: %t now: %t because %s error: %v", result.ID, available.IsAvailable, isAvailable, whyAvailable, result.Error)
+			events.Add(cache.Event{Time: time.Now().Unix(), Description: whyAvailable, Name: result.ID, Hostname: result.ID, Type: toData.ServerTypes[result.ID].String(), Available: isAvailable})
 		}
-		lastStatEndTimes[result.ID] = endTime
+
+		localStates.SetCache(result.ID, peer.IsAvailable{IsAvailable: isAvailable})
 	}
-	lastStatDurationsThreadsafe.Set(lastStatDurations)
-	unpolledCaches.SetPolled(results, lastStats.Get())
+	CalculateDeliveryServiceState(toData.DeliveryServiceServers, localStates)
+	localCacheStatusThreadsafe.Set(localCacheStatuses)
 }