You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@servicecomb.apache.org by li...@apache.org on 2019/01/12 17:51:10 UTC
[servicecomb-service-center] 02/02: SCB-1059 Declare ALARM event
queue
This is an automated email from the ASF dual-hosted git repository.
littlecui pushed a commit to branch metrics
in repository https://gitbox.apache.org/repos/asf/servicecomb-service-center.git
commit d4d9b46a57d476cf9b188189e3107a7b80c92cf1
Author: little-cui <su...@qq.com>
AuthorDate: Sat Jan 12 01:43:03 2019 +0800
SCB-1059 Declare ALARM event queue
---
integration/health-metrics-grafana.json | 745 ++++++++++++++++----------------
pkg/notify/notification_service.go | 64 +--
server/alarm/common.go | 3 +
server/alarm/service.go | 6 +-
server/health/metrics.go | 64 +++
5 files changed, 480 insertions(+), 402 deletions(-)
diff --git a/integration/health-metrics-grafana.json b/integration/health-metrics-grafana.json
index 0cf10b1..005c2bb 100644
--- a/integration/health-metrics-grafana.json
+++ b/integration/health-metrics-grafana.json
@@ -977,96 +977,6 @@
"type": "text"
},
{
- "aliasColors": {},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "${DS_LOCAL}",
- "fill": 1,
- "gridPos": {
- "h": 6,
- "w": 8,
- "x": 0,
- "y": 13
- },
- "height": "",
- "id": 1,
- "legend": {
- "alignAsTable": true,
- "avg": true,
- "current": true,
- "max": true,
- "min": true,
- "show": true,
- "sort": null,
- "sortDesc": null,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 1,
- "links": [],
- "minSpan": 4,
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(irate(service_center_http_request_total{job=\"service-center\"}[1m]))",
- "format": "time_series",
- "instant": false,
- "intervalFactor": 2,
- "legendFormat": "tps",
- "refId": "A"
- }
- ],
- "thresholds": [],
- "timeFrom": null,
- "timeShift": null,
- "title": "TPS",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": []
- },
- "yaxes": [
- {
- "format": "ops",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
- {
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
@@ -1087,7 +997,7 @@
"gridPos": {
"h": 6,
"w": 4,
- "x": 8,
+ "x": 0,
"y": 13
},
"id": 13,
@@ -1149,250 +1059,94 @@
"valueName": "current"
},
{
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": true,
- "colors": [
- "#d44a3a",
- "rgba(237, 129, 40, 0.89)",
- "#299c46"
- ],
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
"datasource": "${DS_LOCAL}",
- "format": "percentunit",
- "gauge": {
- "maxValue": 1,
- "minValue": 0,
- "show": true,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
+ "fill": 1,
"gridPos": {
"h": 6,
- "w": 4,
- "x": 12,
+ "w": 8,
+ "x": 4,
"y": 13
},
- "id": 12,
- "interval": null,
- "links": [],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "minSpan": 4,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": true,
- "lineColor": "rgb(31, 120, 193)",
- "show": true
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "sum(service_center_http_success_total{job=\"service-center\"})/sum(service_center_http_request_total{job=\"service-center\"})",
- "format": "time_series",
- "intervalFactor": 2,
- "refId": "A"
- }
- ],
- "thresholds": ".5,.8",
- "title": "Global Success Rate",
- "transparent": false,
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "current"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "${DS_LOCAL}",
- "format": "ops",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
- "h": 6,
- "w": 4,
- "x": 16,
- "y": 13
+ "height": "",
+ "id": 1,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "sort": null,
+ "sortDesc": null,
+ "total": false,
+ "values": true
},
- "id": 10,
- "interval": null,
+ "lines": true,
+ "linewidth": 1,
"links": [],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
"minSpan": 4,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": true,
- "lineColor": "rgb(31, 120, 193)",
- "show": true
- },
- "tableColumn": "",
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
"targets": [
{
- "expr": "sum(irate(service_center_http_request_total{job=\"service-center\",code=~\"4.+\"}[1m]))",
+ "expr": "sum(irate(service_center_http_request_total{job=\"service-center\"}[1m]))",
"format": "time_series",
"instant": false,
- "intervalFactor": 2,
- "legendFormat": "",
+ "intervalFactor": 1,
+ "legendFormat": "tps",
"refId": "A"
}
],
- "thresholds": "",
- "title": "4XX Errors",
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "avg"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "${DS_LOCAL}",
- "format": "ops",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "TPS",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
},
- "gridPos": {
- "h": 6,
- "w": 4,
- "x": 20,
- "y": 13
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
},
- "id": 11,
- "interval": null,
- "links": [],
- "mappingType": 1,
- "mappingTypes": [
+ "yaxes": [
{
- "name": "value to text",
- "value": 1
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
},
{
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "minSpan": 4,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": true,
- "lineColor": "rgb(31, 120, 193)",
- "show": true
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "sum(irate(service_center_http_request_total{job=\"service-center\",code=~\"5.+\"}[1m]))",
- "format": "time_series",
- "intervalFactor": 2,
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "5XX Errors",
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
}
],
- "valueName": "avg"
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
},
{
"aliasColors": {},
@@ -1404,11 +1158,11 @@
"gridPos": {
"h": 6,
"w": 8,
- "x": 0,
- "y": 19
+ "x": 12,
+ "y": 13
},
"height": "",
- "id": 28,
+ "id": 34,
"legend": {
"alignAsTable": true,
"avg": true,
@@ -1436,18 +1190,18 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(service_center_http_success_total{job=\"service-center\"})/sum(service_center_http_request_total{job=\"service-center\"})",
+ "expr": "max(avg_over_time(service_center_db_heartbeat_durations_microseconds{job=\"service-center\"}[1m])) by (status)",
"format": "time_series",
"instant": false,
- "intervalFactor": 2,
- "legendFormat": "rate",
+ "intervalFactor": 1,
+ "legendFormat": "{{status}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
- "title": "Success Rate",
+ "title": "Heartbeat Latency",
"tooltip": {
"shared": true,
"sort": 0,
@@ -1463,7 +1217,7 @@
},
"yaxes": [
{
- "format": "percentunit",
+ "format": "µs",
"label": null,
"logBase": 1,
"max": null,
@@ -1485,6 +1239,88 @@
}
},
{
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "#d44a3a",
+ "rgba(237, 129, 40, 0.89)",
+ "#299c46"
+ ],
+ "datasource": "${DS_LOCAL}",
+ "format": "percentunit",
+ "gauge": {
+ "maxValue": 1,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 0,
+ "y": 19
+ },
+ "id": 12,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "minSpan": 4,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": true,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(service_center_http_success_total{job=\"service-center\"})/sum(service_center_http_request_total{job=\"service-center\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "thresholds": ".5,.8",
+ "title": "Global Success Rate",
+ "transparent": false,
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
"aliasColors": {},
"bars": false,
"dashLength": 10,
@@ -1494,11 +1330,11 @@
"gridPos": {
"h": 6,
"w": 8,
- "x": 8,
+ "x": 4,
"y": 19
},
"height": "",
- "id": 34,
+ "id": 28,
"legend": {
"alignAsTable": true,
"avg": true,
@@ -1526,18 +1362,18 @@
"steppedLine": false,
"targets": [
{
- "expr": "max(avg_over_time(service_center_db_heartbeat_durations_microseconds{job=\"service-center\"}[1m])) by (status)",
+ "expr": "sum(service_center_http_success_total{job=\"service-center\"})/sum(service_center_http_request_total{job=\"service-center\"})",
"format": "time_series",
"instant": false,
- "intervalFactor": 2,
- "legendFormat": "{{status}}",
+ "intervalFactor": 1,
+ "legendFormat": "rate",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
- "title": "Heartbeat Latency",
+ "title": "Success Rate",
"tooltip": {
"shared": true,
"sort": 0,
@@ -1553,7 +1389,7 @@
},
"yaxes": [
{
- "format": "µs",
+ "format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
@@ -1575,6 +1411,170 @@
}
},
{
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "${DS_LOCAL}",
+ "format": "ops",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 12,
+ "y": 19
+ },
+ "id": 10,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "minSpan": 4,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": true,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(irate(service_center_http_request_total{job=\"service-center\",code=~\"4.+\"}[1m]))",
+ "format": "time_series",
+ "instant": false,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "4XX Errors",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "avg"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "${DS_LOCAL}",
+ "format": "ops",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 16,
+ "y": 19
+ },
+ "id": 11,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "minSpan": 4,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": true,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(irate(service_center_http_request_total{job=\"service-center\",code=~\"5.+\"}[1m]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "5XX Errors",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "avg"
+ },
+ {
"content": "<div class=\"text-center dashboard-header\">\n <span>INSTANCE METRICS</span>\n</div>\n",
"gridPos": {
"h": 3,
@@ -1738,7 +1738,7 @@
"expr": "sum(irate(service_center_http_request_total{job=\"service-center\"}[1m])) by (method,api,instance)",
"format": "time_series",
"instant": false,
- "intervalFactor": 2,
+ "intervalFactor": 1,
"legendFormat": "{{instance}}> {{method}} {{api}}",
"refId": "A"
},
@@ -1838,7 +1838,7 @@
"expr": "max(avg_over_time(service_center_http_request_durations_microseconds{job=\"service-center\",method=\"GET\"}[1m])) by (method,api,instance)",
"format": "time_series",
"instant": false,
- "intervalFactor": 2,
+ "intervalFactor": 1,
"legendFormat": "{{instance}}> {{method}} {{api}}",
"refId": "A"
}
@@ -1932,7 +1932,7 @@
"expr": "max(avg_over_time(service_center_http_request_durations_microseconds{job=\"service-center\",method!=\"GET\"}[1m])) by (method,api,instance)",
"format": "time_series",
"instant": false,
- "intervalFactor": 2,
+ "intervalFactor": 1,
"legendFormat": "{{instance}}> {{method}} {{api}}",
"refId": "A"
},
@@ -2033,7 +2033,7 @@
"expr": "sum(service_center_notify_publish_total{job=\"service-center\"}) by (instance,source)",
"format": "time_series",
"instant": false,
- "intervalFactor": 2,
+ "intervalFactor": 1,
"legendFormat": "{{instance}}> {{source}}",
"refId": "A"
},
@@ -2232,10 +2232,10 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(service_center_db_backend_operation_total{job=\"service-center\"}) by (instance,operation)",
+ "expr": "sum(irate(service_center_db_backend_operation_total{job=\"service-center\"}[1m])) by (instance,operation)",
"format": "time_series",
"instant": false,
- "intervalFactor": 2,
+ "intervalFactor": 1,
"legendFormat": "{{instance}}> {{operation}}",
"refId": "A"
}
@@ -2243,7 +2243,7 @@
"thresholds": [],
"timeFrom": null,
"timeShift": null,
- "title": "Backend Operation Total",
+ "title": "Backend Operations",
"tooltip": {
"shared": true,
"sort": 0,
@@ -2261,7 +2261,7 @@
"yaxes": [
{
"decimals": 0,
- "format": "none",
+ "format": "ops",
"label": null,
"logBase": 1,
"max": null,
@@ -2337,7 +2337,7 @@
"thresholds": [],
"timeFrom": null,
"timeShift": null,
- "title": "Backend Operation Latency",
+ "title": "Backend Operations Latency",
"tooltip": {
"shared": true,
"sort": 0,
@@ -2405,7 +2405,7 @@
"y": 43
},
"height": "",
- "id": 2,
+ "id": 9,
"legend": {
"alignAsTable": true,
"avg": true,
@@ -2413,8 +2413,6 @@
"max": true,
"min": true,
"show": true,
- "sort": "current",
- "sortDesc": true,
"total": false,
"values": true
},
@@ -2432,10 +2430,10 @@
"steppedLine": false,
"targets": [
{
- "expr": "go_goroutines{job=\"service-center\"}",
+ "expr": "service_center_process_cpu_usage{job=\"service-center\"}",
"format": "time_series",
"instant": false,
- "intervalFactor": 2,
+ "intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
}
@@ -2443,7 +2441,7 @@
"thresholds": [],
"timeFrom": null,
"timeShift": null,
- "title": "Goroutines",
+ "title": "CPU",
"tooltip": {
"shared": true,
"sort": 0,
@@ -2459,7 +2457,8 @@
},
"yaxes": [
{
- "format": "none",
+ "decimals": null,
+ "format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
@@ -2493,8 +2492,7 @@
"x": 8,
"y": 43
},
- "height": "",
- "id": 9,
+ "id": 14,
"legend": {
"alignAsTable": true,
"avg": true,
@@ -2519,10 +2517,9 @@
"steppedLine": false,
"targets": [
{
- "expr": "irate(process_cpu_seconds_total{job=\"service-center\"}[1m])",
+ "expr": "process_resident_memory_bytes{job=\"service-center\"}",
"format": "time_series",
- "instant": false,
- "intervalFactor": 2,
+ "intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
}
@@ -2530,7 +2527,7 @@
"thresholds": [],
"timeFrom": null,
"timeShift": null,
- "title": "CPU",
+ "title": "Memory",
"tooltip": {
"shared": true,
"sort": 0,
@@ -2546,7 +2543,7 @@
},
"yaxes": [
{
- "format": "s",
+ "format": "decbytes",
"label": null,
"logBase": 1,
"max": null,
@@ -2580,8 +2577,7 @@
"x": 16,
"y": 43
},
- "height": "",
- "id": 8,
+ "id": 5,
"legend": {
"alignAsTable": true,
"avg": true,
@@ -2606,9 +2602,8 @@
"steppedLine": false,
"targets": [
{
- "expr": "max(avg_over_time(go_gc_duration_seconds{job=\"service-center\"}[1m])) by (instance)",
+ "expr": "go_threads{job=\"service-center\"}",
"format": "time_series",
- "instant": false,
"intervalFactor": 2,
"legendFormat": "{{instance}}",
"refId": "A"
@@ -2617,7 +2612,7 @@
"thresholds": [],
"timeFrom": null,
"timeShift": null,
- "title": "GC Latency",
+ "title": "Threads",
"tooltip": {
"shared": true,
"sort": 0,
@@ -2633,7 +2628,7 @@
},
"yaxes": [
{
- "format": "s",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
@@ -2667,7 +2662,7 @@
"x": 0,
"y": 50
},
- "id": 5,
+ "id": 6,
"legend": {
"alignAsTable": true,
"avg": true,
@@ -2692,7 +2687,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "go_threads{job=\"service-center\"}",
+ "expr": "process_open_fds{job=\"service-center\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@@ -2702,7 +2697,7 @@
"thresholds": [],
"timeFrom": null,
"timeShift": null,
- "title": "Threads",
+ "title": "File Descriptions",
"tooltip": {
"shared": true,
"sort": 0,
@@ -2752,7 +2747,8 @@
"x": 8,
"y": 50
},
- "id": 6,
+ "height": "",
+ "id": 2,
"legend": {
"alignAsTable": true,
"avg": true,
@@ -2760,6 +2756,8 @@
"max": true,
"min": true,
"show": true,
+ "sort": "current",
+ "sortDesc": true,
"total": false,
"values": true
},
@@ -2777,9 +2775,10 @@
"steppedLine": false,
"targets": [
{
- "expr": "process_open_fds{job=\"service-center\"}",
+ "expr": "go_goroutines{job=\"service-center\"}",
"format": "time_series",
- "intervalFactor": 2,
+ "instant": false,
+ "intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
}
@@ -2787,7 +2786,7 @@
"thresholds": [],
"timeFrom": null,
"timeShift": null,
- "title": "File Descriptions",
+ "title": "Goroutines",
"tooltip": {
"shared": true,
"sort": 0,
@@ -2803,7 +2802,7 @@
},
"yaxes": [
{
- "format": "short",
+ "format": "none",
"label": null,
"logBase": 1,
"max": null,
@@ -2837,7 +2836,8 @@
"x": 16,
"y": 50
},
- "id": 14,
+ "height": "",
+ "id": 8,
"legend": {
"alignAsTable": true,
"avg": true,
@@ -2862,8 +2862,9 @@
"steppedLine": false,
"targets": [
{
- "expr": "process_resident_memory_bytes{job=\"service-center\"}",
+ "expr": "max(avg_over_time(go_gc_duration_seconds{job=\"service-center\"}[1m])) by (instance)",
"format": "time_series",
+ "instant": false,
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
@@ -2872,7 +2873,7 @@
"thresholds": [],
"timeFrom": null,
"timeShift": null,
- "title": "Memory",
+ "title": "GC Latency",
"tooltip": {
"shared": true,
"sort": 0,
@@ -2888,7 +2889,7 @@
},
"yaxes": [
{
- "format": "decbytes",
+ "format": "s",
"label": null,
"logBase": 1,
"max": null,
@@ -2952,7 +2953,7 @@
{
"expr": "sum(service_center_local_cache_size_bytes{job=\"service-center\"}) by (resource,instance)",
"format": "time_series",
- "intervalFactor": 2,
+ "intervalFactor": 1,
"legendFormat": "{{instance}}> {{resource}}",
"refId": "A"
},
@@ -3013,7 +3014,7 @@
"list": []
},
"time": {
- "from": "now-15m",
+ "from": "now-1h",
"to": "now"
},
"timepicker": {
@@ -3044,5 +3045,5 @@
"timezone": "",
"title": "ServiceCenter",
"uid": "Zg6NoHGiz",
- "version": 15
+ "version": 7
}
\ No newline at end of file
diff --git a/pkg/notify/notification_service.go b/pkg/notify/notification_service.go
index c2edd24..0aa7fcb 100644
--- a/pkg/notify/notification_service.go
+++ b/pkg/notify/notification_service.go
@@ -24,14 +24,31 @@ import (
type NotifyService struct {
processors map[Type]*Processor
- closeMux sync.RWMutex
+ mux sync.RWMutex
isClose bool
}
-func (s *NotifyService) init() {
- for _, t := range Types() {
- s.processors[t] = NewProcessor(t.String(), t.QueueSize())
+func (s *NotifyService) newProcessor(t Type) *Processor {
+ s.mux.RLock()
+ p, ok := s.processors[t]
+ if ok {
+ s.mux.RUnlock()
+ return p
}
+ s.mux.RUnlock()
+
+ s.mux.Lock()
+ p, ok = s.processors[t]
+ if ok {
+ s.mux.Unlock()
+ return p
+ }
+ p = NewProcessor(t.String(), t.QueueSize())
+ s.processors[t] = p
+ s.mux.Unlock()
+
+ p.Run()
+ return p
}
func (s *NotifyService) Start() {
@@ -39,15 +56,13 @@ func (s *NotifyService) Start() {
log.Warnf("notify service is already running")
return
}
- s.closeMux.Lock()
+ s.mux.Lock()
s.isClose = false
- s.closeMux.Unlock()
+ s.mux.Unlock()
// 错误subscriber清理
s.AddSubscriber(NewNotifyServiceHealthChecker())
- s.startProcessors()
-
log.Debugf("notify service is started")
}
@@ -58,12 +73,7 @@ func (s *NotifyService) AddSubscriber(n Subscriber) error {
return err
}
- p, ok := s.processors[n.Type()]
- if !ok {
- err := errors.New("unknown subscribe type")
- log.Errorf(err, "add %s subscriber[%s/%s] failed", n.Type(), n.Subject(), n.Group())
- return err
- }
+ p := s.newProcessor(n.Type())
n.SetService(s)
n.OnAccept()
@@ -72,26 +82,25 @@ func (s *NotifyService) AddSubscriber(n Subscriber) error {
}
func (s *NotifyService) RemoveSubscriber(n Subscriber) {
+ s.mux.RLock()
p, ok := s.processors[n.Type()]
if !ok {
+ s.mux.RUnlock()
return
}
+ s.mux.RUnlock()
p.Remove(n)
n.Close()
}
-func (s *NotifyService) startProcessors() {
- for _, p := range s.processors {
- p.Run()
- }
-}
-
func (s *NotifyService) stopProcessors() {
+ s.mux.RLock()
for _, p := range s.processors {
p.Clear()
p.Stop()
}
+ s.mux.RUnlock()
}
//通知内容塞到队列里
@@ -100,18 +109,21 @@ func (s *NotifyService) Publish(job Event) error {
return errors.New("add notify job failed for server shutdown")
}
+ s.mux.RLock()
p, ok := s.processors[job.Type()]
if !ok {
+ s.mux.RUnlock()
return errors.New("Unknown job type")
}
+ s.mux.RUnlock()
p.Accept(job)
return nil
}
func (s *NotifyService) Closed() (b bool) {
- s.closeMux.RLock()
+ s.mux.RLock()
b = s.isClose
- s.closeMux.RUnlock()
+ s.mux.RUnlock()
return
}
@@ -119,9 +131,9 @@ func (s *NotifyService) Stop() {
if s.Closed() {
return
}
- s.closeMux.Lock()
+ s.mux.Lock()
s.isClose = true
- s.closeMux.Unlock()
+ s.mux.Unlock()
s.stopProcessors()
@@ -129,10 +141,8 @@ func (s *NotifyService) Stop() {
}
func NewNotifyService() *NotifyService {
- ns := &NotifyService{
+ return &NotifyService{
processors: make(map[Type]*Processor),
isClose: true,
}
- ns.init()
- return ns
}
diff --git a/server/alarm/common.go b/server/alarm/common.go
index a5a87b2..601b2d1 100644
--- a/server/alarm/common.go
+++ b/server/alarm/common.go
@@ -17,6 +17,7 @@ package alarm
import (
"fmt"
+ "github.com/apache/servicecomb-service-center/pkg/notify"
"github.com/apache/servicecomb-service-center/server/alarm/model"
)
@@ -39,6 +40,8 @@ const (
Group = "__ALARM_GROUP__"
)
+var ALARM = notify.RegisterType("ALARM", 0)
+
func FieldBool(key string, v bool) model.Field {
return model.Field{Key: key, Value: v}
}
diff --git a/server/alarm/service.go b/server/alarm/service.go
index d1714d9..ee00713 100644
--- a/server/alarm/service.go
+++ b/server/alarm/service.go
@@ -36,7 +36,7 @@ type AlarmService struct {
func (ac *AlarmService) Raise(id model.ID, fields ...model.Field) error {
ae := &model.AlarmEvent{
- Event: nf.NewEvent(nf.NOTIFTY, Subject, ""),
+ Event: nf.NewEvent(ALARM, Subject, ""),
Status: Activated,
Id: id,
Fields: util.NewJSONObject(),
@@ -49,7 +49,7 @@ func (ac *AlarmService) Raise(id model.ID, fields ...model.Field) error {
func (ac *AlarmService) Clear(id model.ID) error {
ae := &model.AlarmEvent{
- Event: nf.NewEvent(nf.NOTIFTY, Subject, ""),
+ Event: nf.NewEvent(ALARM, Subject, ""),
Status: Cleared,
Id: id,
}
@@ -87,7 +87,7 @@ func (ac *AlarmService) OnMessage(evt nf.Event) {
func NewAlarmService() *AlarmService {
c := &AlarmService{
- Subscriber: nf.NewSubscriber(nf.NOTIFTY, Subject, Group),
+ Subscriber: nf.NewSubscriber(ALARM, Subject, Group),
}
notify.NotifyCenter().AddSubscriber(c)
return c
diff --git a/server/health/metrics.go b/server/health/metrics.go
new file mode 100644
index 0000000..0650daa
--- /dev/null
+++ b/server/health/metrics.go
@@ -0,0 +1,64 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package health
+
+import (
+ "github.com/apache/servicecomb-service-center/pkg/gopool"
+ "github.com/apache/servicecomb-service-center/server/metric"
+ "github.com/prometheus/client_golang/prometheus"
+ "github.com/prometheus/procfs"
+ "golang.org/x/net/context"
+ "os"
+ "time"
+)
+
+var (
+ cpu = prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Namespace: metric.FamilyName,
+ Subsystem: "process",
+ Name: "cpu_usage",
+ Help: "Process cpu usage",
+ }, []string{"instance"})
+)
+
+func init() {
+ prometheus.MustRegister(cpu)
+ gopool.Go(func(ctx context.Context) {
+ var (
+ cpuTotal float64
+ cpuProc float64
+ )
+ for {
+ select {
+ case <-ctx.Done():
+ return
+ case <-time.After(3 * time.Second):
+ p, _ := procfs.NewProc(os.Getpid())
+ stat, _ := procfs.NewStat()
+ pstat, _ := p.NewStat()
+ ct := stat.CPUTotal.User + stat.CPUTotal.Nice + stat.CPUTotal.System +
+ stat.CPUTotal.Idle + stat.CPUTotal.Iowait + stat.CPUTotal.IRQ +
+ stat.CPUTotal.SoftIRQ + stat.CPUTotal.Steal + stat.CPUTotal.Guest
+ pt := float64(pstat.UTime+pstat.STime+pstat.CUTime+pstat.CSTime) / 100
+ cpu.WithLabelValues(metric.InstanceName()).Set(
+ (pt - cpuProc) * float64(len(stat.CPU)) / (ct - cpuTotal))
+ cpuTotal, cpuProc = ct, pt
+ }
+
+ }
+ })
+}