You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@trafficcontrol.apache.org by ra...@apache.org on 2018/11/06 20:51:45 UTC
[trafficcontrol] 01/04: Add an exponential time backoff object for
use in traffic_monitor.
This is an automated email from the ASF dual-hosted git repository.
rawlin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficcontrol.git
commit bc41a6e6e5ce07c89ffdfd8b6078ae029a669d45
Author: John Rushford <jr...@apache.org>
AuthorDate: Mon Oct 29 21:46:31 2018 +0000
Add an exponential time backoff object for use in traffic_monitor.
---
lib/go-util/backoff.go | 105 +++++++++++++++++++++++++++++++++++
lib/go-util/backoff_test.go | 105 +++++++++++++++++++++++++++++++++++
lib/go-util/num_test.go | 1 -
traffic_monitor/config/config.go | 23 ++++----
traffic_monitor/manager/opsconfig.go | 27 ++++++++-
5 files changed, 247 insertions(+), 14 deletions(-)
diff --git a/lib/go-util/backoff.go b/lib/go-util/backoff.go
new file mode 100644
index 0000000..94576c7
--- /dev/null
+++ b/lib/go-util/backoff.go
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+// Used to return an exponentially increasing time.Duration value that may be used
+// to sleep between retries.
+
+package util
+
+import (
+ "fmt"
+ "math"
+ "math/rand"
+ "time"
+)
+
+// DefaultFactor may be used by applications for the factor argument.
+const DefaultFactor = 2.0
+
+type backoff struct {
+ attempt float64
+ Factor float64
+ Min time.Duration
+ Max time.Duration
+ rgen *rand.Rand
+}
+
+type Backoff interface {
+ BackoffDuration() time.Duration
+ Reset()
+}
+
+func NewBackoff(min time.Duration, max time.Duration, factor float64) (Backoff, error) {
+
+ // verify arguments and set defaults if necessary.
+ if min < 1 {
+ return nil, fmt.Errorf("'min: %v, is invalid. min must be greater than '1'", min)
+ }
+ if max <= min {
+ return nil, fmt.Errorf("'max: %v, is invalid. max must be greater than 'min'", max)
+ }
+ if factor <= 1.0 {
+ return nil, fmt.Errorf("'factor: %v, is invalid. factor must be greater than '1'", factor)
+ }
+
+ src := rand.NewSource(time.Now().UTC().UnixNano())
+
+ return &backoff{
+ attempt: 0,
+ Factor: factor,
+ Min: min,
+ Max: max,
+ rgen: rand.New(src),
+ }, nil
+}
+
+// generate random jitter
+func (b *backoff) jitter(durFloat float64, minFloat float64) float64 {
+ return b.rgen.Float64()*(durFloat-minFloat) + minFloat
+}
+
+func (b *backoff) Reset() {
+ b.attempt = 0
+}
+
+// Calculate and return backoff time duration
+func (b *backoff) BackoffDuration() time.Duration {
+
+ minFloat := float64(b.Min)
+ durFloat := minFloat * math.Pow(b.Factor, b.attempt)
+ b.attempt++
+
+ // add jitter
+ durFloat += b.jitter(durFloat, minFloat)
+
+ // reached the max duration return max
+ if durFloat >= float64(b.Max) {
+ return b.Max
+ }
+
+ dur := time.Duration(durFloat)
+
+ if dur < b.Min {
+ return b.Min
+ }
+ if dur > b.Max {
+ return b.Max
+ }
+ return dur
+}
diff --git a/lib/go-util/backoff_test.go b/lib/go-util/backoff_test.go
new file mode 100644
index 0000000..e300d68
--- /dev/null
+++ b/lib/go-util/backoff_test.go
@@ -0,0 +1,105 @@
+package util
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+import (
+ "testing"
+ "time"
+)
+
+func TestNewBackoff(t *testing.T) {
+ min := 500 * time.Millisecond
+ max := 600 * time.Millisecond
+ fac := DefaultFactor
+
+ bo, err := NewBackoff(min, max, fac)
+
+ if err != nil {
+ t.Errorf("un-expected error. min: %v, max: %v, fac: %v - %v", min, max, fac, err)
+ }
+
+ if bo == nil {
+ t.Errorf("un-expected error bo is nil, min: %v, max: %v, fac: %v - %v", min, max, fac, err)
+ }
+
+ bo, err = NewBackoff(0, 0, 0)
+
+ if err == nil {
+ t.Errorf("Expected errors passing invalid arguments to NewBackOff()")
+ }
+
+ if bo != nil {
+ t.Errorf("un-expected error passing invalid arguments to NewBackOff(), 'bo' should be nil")
+ }
+}
+
+func TestReset(t *testing.T) {
+ bo, err := NewBackoff(1, 2000000000, DefaultFactor)
+
+ if err != nil {
+ t.Errorf("un-expected error: %v", err)
+ }
+
+ val1 := bo.BackoffDuration()
+ val2 := bo.BackoffDuration()
+
+ if val2 < val1 {
+ t.Errorf("expected val1: %v is less than val2: %v", val1, val2)
+ }
+
+ bo.Reset()
+
+ val3 := bo.BackoffDuration()
+
+ if val3 > val2 {
+ t.Errorf("expected val3: %v is less than val2: %v", val3, val2)
+ }
+}
+
+func TestBackoffDuration(t *testing.T) {
+ min := 100 * time.Nanosecond
+ max := 20000 * time.Nanosecond
+ fac := DefaultFactor
+ bo, err := NewBackoff(min, max, fac)
+
+ if err != nil {
+ t.Errorf("un-expected error: %v", err)
+ }
+ dur := min
+
+ val := bo.BackoffDuration()
+ if val <= min {
+ t.Errorf("unexpected duration calculation, val: %v is less than min: %v", val, min)
+ }
+
+ // 8 iterations with the default settings.
+ for i := 0; i < 8; i++ {
+ val = bo.BackoffDuration()
+ if val < dur {
+ t.Errorf("unexpected duration calculation, iteration: %v, val: %v <= dur: %v", i, val, dur)
+ }
+ dur = val
+ }
+
+ // after 8 calls with the default settings, val should be '1m0s', DefaultMaxMS
+ if val != max {
+ t.Errorf("unexpected duration calculation, val: %v != : max: %v", val, max)
+ }
+}
diff --git a/lib/go-util/num_test.go b/lib/go-util/num_test.go
index dd37986..06e8aa2 100644
--- a/lib/go-util/num_test.go
+++ b/lib/go-util/num_test.go
@@ -17,7 +17,6 @@ package util
// When adding symbols, document the RFC and section they correspond to.
import (
- "encoding/json"
"reflect"
"testing"
)
diff --git a/traffic_monitor/config/config.go b/traffic_monitor/config/config.go
index 8bb2daa..43097df 100644
--- a/traffic_monitor/config/config.go
+++ b/traffic_monitor/config/config.go
@@ -39,6 +39,8 @@ const (
LogLocationNull = "null"
//StaticFileDir is the directory that contains static html and js files.
StaticFileDir = "/opt/traffic_monitor/static/"
+ // FixedToRetryInterval is a fallback retry interval to use if the Backoff intervals are misconfigured.
+ FixedRetryInterval = 10000 * time.Millisecond
)
// Config is the configuration for the application. It includes myriad data, such as polling intervals and log locations.
@@ -64,7 +66,8 @@ type Config struct {
HealthToStatRatio uint64 `json:"health_to_stat_ratio"`
StaticFileDir string `json:"static_file_dir"`
CRConfigHistoryCount uint64 `json:"crconfig_history_count"`
- TrafficOpsRetryInterval time.Duration `json:"-"`
+ TrafficOpsMinRetryInterval time.Duration `json:"-"`
+ TrafficOpsMaxRetryInterval time.Duration `json:"-"`
}
func (c Config) ErrorLog() log.LogLocation { return log.LogLocation(c.LogLocationError) }
@@ -96,7 +99,8 @@ var DefaultConfig = Config{
HealthToStatRatio: 4,
StaticFileDir: StaticFileDir,
CRConfigHistoryCount: 20000,
- TrafficOpsRetryInterval: 3 * time.Second,
+ TrafficOpsMinRetryInterval: 100 * time.Millisecond,
+ TrafficOpsMaxRetryInterval: 60000 * time.Millisecond,
}
// MarshalJSON marshals custom millisecond durations. Aliasing inspired by http://choly.ca/post/go-json-marshalling/
@@ -141,7 +145,8 @@ func (c *Config) UnmarshalJSON(data []byte) error {
StatFlushIntervalMs *uint64 `json:"stat_flush_interval_ms"`
ServeReadTimeoutMs *uint64 `json:"serve_read_timeout_ms"`
ServeWriteTimeoutMs *uint64 `json:"serve_write_timeout_ms"`
- TrafficOpsRetryIntervalSec *uint64 `json:"traffic_ops_retry_interval_sec"`
+ TrafficOpsMinRetryIntervalMs *uint64 `json:"traffic_ops_min_retry_interval_ms"`
+ TrafficOpsMaxRetryIntervalMs *uint64 `json:"traffic_ops_max_retry_interval_ms"`
*Alias
}{
Alias: (*Alias)(c),
@@ -180,13 +185,11 @@ func (c *Config) UnmarshalJSON(data []byte) error {
if aux.PeerOptimistic != nil {
c.PeerOptimistic = *aux.PeerOptimistic
}
- if aux.TrafficOpsRetryIntervalSec != nil {
- if *aux.TrafficOpsRetryIntervalSec <= 0 {
- log.Errorf("The 'traffic_ops_retry_interval_sec: %v' setting is incorrect, needs to be a positive number of seconds, using default of 3 seconds", *aux.TrafficOpsRetryIntervalSec)
- c.TrafficOpsRetryInterval = 3 * time.Second
- } else {
- c.TrafficOpsRetryInterval = time.Duration(*aux.TrafficOpsRetryIntervalSec) * time.Second
- }
+ if aux.TrafficOpsMinRetryIntervalMs != nil {
+ c.TrafficOpsMinRetryInterval = time.Duration(*aux.TrafficOpsMinRetryIntervalMs) * time.Millisecond
+ }
+ if aux.TrafficOpsMaxRetryIntervalMs != nil {
+ c.TrafficOpsMaxRetryInterval = time.Duration(*aux.TrafficOpsMaxRetryIntervalMs) * time.Millisecond
}
return nil
}
diff --git a/traffic_monitor/manager/opsconfig.go b/traffic_monitor/manager/opsconfig.go
index 5beed93..d0c44b6 100644
--- a/traffic_monitor/manager/opsconfig.go
+++ b/traffic_monitor/manager/opsconfig.go
@@ -29,6 +29,7 @@ import (
"golang.org/x/sys/unix"
"github.com/apache/trafficcontrol/lib/go-log"
+ "github.com/apache/trafficcontrol/lib/go-util"
"github.com/apache/trafficcontrol/traffic_monitor/config"
"github.com/apache/trafficcontrol/traffic_monitor/datareq"
"github.com/apache/trafficcontrol/traffic_monitor/handler"
@@ -138,12 +139,22 @@ func StartOpsConfigManager(
// fixed an issue here where traffic_monitor loops forever, doing nothing useful if traffic_ops is down,
// and would never logging in again. since traffic_monitor is just starting up here, keep retrying until traffic_ops is reachable and a session can be established.
+ backoff, err := util.NewBackoff(cfg.TrafficOpsMinRetryInterval, cfg.TrafficOpsMaxRetryInterval, util.DefaultFactor)
+ if err != nil {
+ log.Errorf("possible invalid backoff arguments, will use a fixed sleep interval: %v", err)
+ }
for {
realToSession, toAddr, err = to.LoginWithAgent(newOpsConfig.Url, newOpsConfig.Username, newOpsConfig.Password, newOpsConfig.Insecure, staticAppData.UserAgent, useCache, trafficOpsRequestTimeout)
if err != nil {
handleErr(fmt.Errorf("MonitorConfigPoller: error instantiating Session with traffic_ops (%v): %s\n", toAddr, err))
- log.Errorf("cfg.TrafficOpsRetryInterval: %v", cfg.TrafficOpsRetryInterval)
- time.Sleep(cfg.TrafficOpsRetryInterval)
+ if backoff != nil {
+ duration := backoff.BackoffDuration()
+ log.Errorf("retrying in %v\n", duration)
+ time.Sleep(duration)
+ } else {
+ log.Errorf("retrying in %v\n", config.FixedRetryInterval)
+ time.Sleep(config.FixedRetryInterval)
+ }
continue
} else {
toSession.Set(realToSession)
@@ -162,10 +173,20 @@ func StartOpsConfigManager(
// fixed an issue when traffic_monitor receives corrupt data, CRConfig, from traffic_ops.
// Will loop and retry until a good CRConfig is received from traffic_ops
+ if backoff != nil {
+ backoff.Reset()
+ }
for {
if err := toData.Fetch(toSession, newOpsConfig.CdnName); err != nil {
handleErr(fmt.Errorf("Error getting Traffic Ops data: %v\n", err))
- time.Sleep(cfg.TrafficOpsRetryInterval)
+ if backoff != nil {
+ duration := backoff.BackoffDuration()
+ log.Errorf("retrying in %v\n", duration)
+ time.Sleep(duration)
+ } else {
+ log.Errorf("retrying in %v\n", config.FixedRetryInterval)
+ time.Sleep(config.FixedRetryInterval)
+ }
continue
}
break