You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@servicecomb.apache.org by GitBox <gi...@apache.org> on 2019/01/14 03:37:47 UTC
[servicecomb-service-center] Diff for: [GitHub] little-cui closed pull
request #525: SCB-1059 Bug fixes
diff --git a/integration/health-metrics-grafana.json b/integration/health-metrics-grafana.json
index 0cf10b1d..005c2bb3 100644
--- a/integration/health-metrics-grafana.json
+++ b/integration/health-metrics-grafana.json
@@ -976,96 +976,6 @@
"transparent": true,
"type": "text"
},
- {
- "aliasColors": {},
- "bars": false,
- "dashLength": 10,
- "dashes": false,
- "datasource": "${DS_LOCAL}",
- "fill": 1,
- "gridPos": {
- "h": 6,
- "w": 8,
- "x": 0,
- "y": 13
- },
- "height": "",
- "id": 1,
- "legend": {
- "alignAsTable": true,
- "avg": true,
- "current": true,
- "max": true,
- "min": true,
- "show": true,
- "sort": null,
- "sortDesc": null,
- "total": false,
- "values": true
- },
- "lines": true,
- "linewidth": 1,
- "links": [],
- "minSpan": 4,
- "nullPointMode": "null",
- "percentage": false,
- "pointradius": 5,
- "points": false,
- "renderer": "flot",
- "seriesOverrides": [],
- "spaceLength": 10,
- "stack": false,
- "steppedLine": false,
- "targets": [
- {
- "expr": "sum(irate(service_center_http_request_total{job=\"service-center\"}[1m]))",
- "format": "time_series",
- "instant": false,
- "intervalFactor": 2,
- "legendFormat": "tps",
- "refId": "A"
- }
- ],
- "thresholds": [],
- "timeFrom": null,
- "timeShift": null,
- "title": "TPS",
- "tooltip": {
- "shared": true,
- "sort": 0,
- "value_type": "individual"
- },
- "type": "graph",
- "xaxis": {
- "buckets": null,
- "mode": "time",
- "name": null,
- "show": true,
- "values": []
- },
- "yaxes": [
- {
- "format": "ops",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- },
- {
- "format": "short",
- "label": null,
- "logBase": 1,
- "max": null,
- "min": null,
- "show": true
- }
- ],
- "yaxis": {
- "align": false,
- "alignLevel": null
- }
- },
{
"cacheTimeout": null,
"colorBackground": false,
@@ -1087,7 +997,7 @@
"gridPos": {
"h": 6,
"w": 4,
- "x": 8,
+ "x": 0,
"y": 13
},
"id": 13,
@@ -1149,250 +1059,94 @@
"valueName": "current"
},
{
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": true,
- "colors": [
- "#d44a3a",
- "rgba(237, 129, 40, 0.89)",
- "#299c46"
- ],
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
"datasource": "${DS_LOCAL}",
- "format": "percentunit",
- "gauge": {
- "maxValue": 1,
- "minValue": 0,
- "show": true,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
+ "fill": 1,
"gridPos": {
"h": 6,
- "w": 4,
- "x": 12,
+ "w": 8,
+ "x": 4,
"y": 13
},
- "id": 12,
- "interval": null,
- "links": [],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "minSpan": 4,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": true,
- "lineColor": "rgb(31, 120, 193)",
- "show": true
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "sum(service_center_http_success_total{job=\"service-center\"})/sum(service_center_http_request_total{job=\"service-center\"})",
- "format": "time_series",
- "intervalFactor": 2,
- "refId": "A"
- }
- ],
- "thresholds": ".5,.8",
- "title": "Global Success Rate",
- "transparent": false,
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "current"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "${DS_LOCAL}",
- "format": "ops",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
- },
- "gridPos": {
- "h": 6,
- "w": 4,
- "x": 16,
- "y": 13
+ "height": "",
+ "id": 1,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": true,
+ "min": true,
+ "show": true,
+ "sort": null,
+ "sortDesc": null,
+ "total": false,
+ "values": true
},
- "id": 10,
- "interval": null,
+ "lines": true,
+ "linewidth": 1,
"links": [],
- "mappingType": 1,
- "mappingTypes": [
- {
- "name": "value to text",
- "value": 1
- },
- {
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
"minSpan": 4,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": true,
- "lineColor": "rgb(31, 120, 193)",
- "show": true
- },
- "tableColumn": "",
+ "nullPointMode": "null",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
"targets": [
{
- "expr": "sum(irate(service_center_http_request_total{job=\"service-center\",code=~\"4.+\"}[1m]))",
+ "expr": "sum(irate(service_center_http_request_total{job=\"service-center\"}[1m]))",
"format": "time_series",
"instant": false,
- "intervalFactor": 2,
- "legendFormat": "",
+ "intervalFactor": 1,
+ "legendFormat": "tps",
"refId": "A"
}
],
- "thresholds": "",
- "title": "4XX Errors",
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
- }
- ],
- "valueName": "avg"
- },
- {
- "cacheTimeout": null,
- "colorBackground": false,
- "colorValue": false,
- "colors": [
- "#299c46",
- "rgba(237, 129, 40, 0.89)",
- "#d44a3a"
- ],
- "datasource": "${DS_LOCAL}",
- "format": "ops",
- "gauge": {
- "maxValue": 100,
- "minValue": 0,
- "show": false,
- "thresholdLabels": false,
- "thresholdMarkers": true
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "TPS",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
},
- "gridPos": {
- "h": 6,
- "w": 4,
- "x": 20,
- "y": 13
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
},
- "id": 11,
- "interval": null,
- "links": [],
- "mappingType": 1,
- "mappingTypes": [
+ "yaxes": [
{
- "name": "value to text",
- "value": 1
+ "format": "ops",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
},
{
- "name": "range to text",
- "value": 2
- }
- ],
- "maxDataPoints": 100,
- "minSpan": 4,
- "nullPointMode": "connected",
- "nullText": null,
- "postfix": "",
- "postfixFontSize": "50%",
- "prefix": "",
- "prefixFontSize": "50%",
- "rangeMaps": [
- {
- "from": "null",
- "text": "N/A",
- "to": "null"
- }
- ],
- "sparkline": {
- "fillColor": "rgba(31, 118, 189, 0.18)",
- "full": true,
- "lineColor": "rgb(31, 120, 193)",
- "show": true
- },
- "tableColumn": "",
- "targets": [
- {
- "expr": "sum(irate(service_center_http_request_total{job=\"service-center\",code=~\"5.+\"}[1m]))",
- "format": "time_series",
- "intervalFactor": 2,
- "refId": "A"
- }
- ],
- "thresholds": "",
- "title": "5XX Errors",
- "type": "singlestat",
- "valueFontSize": "80%",
- "valueMaps": [
- {
- "op": "=",
- "text": "N/A",
- "value": "null"
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
}
],
- "valueName": "avg"
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
},
{
"aliasColors": {},
@@ -1404,11 +1158,11 @@
"gridPos": {
"h": 6,
"w": 8,
- "x": 0,
- "y": 19
+ "x": 12,
+ "y": 13
},
"height": "",
- "id": 28,
+ "id": 34,
"legend": {
"alignAsTable": true,
"avg": true,
@@ -1436,18 +1190,18 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(service_center_http_success_total{job=\"service-center\"})/sum(service_center_http_request_total{job=\"service-center\"})",
+ "expr": "max(avg_over_time(service_center_db_heartbeat_durations_microseconds{job=\"service-center\"}[1m])) by (status)",
"format": "time_series",
"instant": false,
- "intervalFactor": 2,
- "legendFormat": "rate",
+ "intervalFactor": 1,
+ "legendFormat": "{{status}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
- "title": "Success Rate",
+ "title": "Heartbeat Latency",
"tooltip": {
"shared": true,
"sort": 0,
@@ -1463,7 +1217,7 @@
},
"yaxes": [
{
- "format": "percentunit",
+ "format": "µs",
"label": null,
"logBase": 1,
"max": null,
@@ -1484,6 +1238,88 @@
"alignLevel": null
}
},
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": true,
+ "colors": [
+ "#d44a3a",
+ "rgba(237, 129, 40, 0.89)",
+ "#299c46"
+ ],
+ "datasource": "${DS_LOCAL}",
+ "format": "percentunit",
+ "gauge": {
+ "maxValue": 1,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 0,
+ "y": 19
+ },
+ "id": 12,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "minSpan": 4,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": true,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(service_center_http_success_total{job=\"service-center\"})/sum(service_center_http_request_total{job=\"service-center\"})",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "thresholds": ".5,.8",
+ "title": "Global Success Rate",
+ "transparent": false,
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
{
"aliasColors": {},
"bars": false,
@@ -1494,11 +1330,11 @@
"gridPos": {
"h": 6,
"w": 8,
- "x": 8,
+ "x": 4,
"y": 19
},
"height": "",
- "id": 34,
+ "id": 28,
"legend": {
"alignAsTable": true,
"avg": true,
@@ -1526,18 +1362,18 @@
"steppedLine": false,
"targets": [
{
- "expr": "max(avg_over_time(service_center_db_heartbeat_durations_microseconds{job=\"service-center\"}[1m])) by (status)",
+ "expr": "sum(service_center_http_success_total{job=\"service-center\"})/sum(service_center_http_request_total{job=\"service-center\"})",
"format": "time_series",
"instant": false,
- "intervalFactor": 2,
- "legendFormat": "{{status}}",
+ "intervalFactor": 1,
+ "legendFormat": "rate",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeShift": null,
- "title": "Heartbeat Latency",
+ "title": "Success Rate",
"tooltip": {
"shared": true,
"sort": 0,
@@ -1553,7 +1389,7 @@
},
"yaxes": [
{
- "format": "µs",
+ "format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
@@ -1574,6 +1410,170 @@
"alignLevel": null
}
},
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "${DS_LOCAL}",
+ "format": "ops",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 12,
+ "y": 19
+ },
+ "id": 10,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "minSpan": 4,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": true,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(irate(service_center_http_request_total{job=\"service-center\",code=~\"4.+\"}[1m]))",
+ "format": "time_series",
+ "instant": false,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "4XX Errors",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "avg"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "${DS_LOCAL}",
+ "format": "ops",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 6,
+ "w": 4,
+ "x": 16,
+ "y": 19
+ },
+ "id": 11,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "minSpan": 4,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": true,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(irate(service_center_http_request_total{job=\"service-center\",code=~\"5.+\"}[1m]))",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "5XX Errors",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "avg"
+ },
{
"content": "<div class=\"text-center dashboard-header\">\n <span>INSTANCE METRICS</span>\n</div>\n",
"gridPos": {
@@ -1738,7 +1738,7 @@
"expr": "sum(irate(service_center_http_request_total{job=\"service-center\"}[1m])) by (method,api,instance)",
"format": "time_series",
"instant": false,
- "intervalFactor": 2,
+ "intervalFactor": 1,
"legendFormat": "{{instance}}> {{method}} {{api}}",
"refId": "A"
},
@@ -1838,7 +1838,7 @@
"expr": "max(avg_over_time(service_center_http_request_durations_microseconds{job=\"service-center\",method=\"GET\"}[1m])) by (method,api,instance)",
"format": "time_series",
"instant": false,
- "intervalFactor": 2,
+ "intervalFactor": 1,
"legendFormat": "{{instance}}> {{method}} {{api}}",
"refId": "A"
}
@@ -1932,7 +1932,7 @@
"expr": "max(avg_over_time(service_center_http_request_durations_microseconds{job=\"service-center\",method!=\"GET\"}[1m])) by (method,api,instance)",
"format": "time_series",
"instant": false,
- "intervalFactor": 2,
+ "intervalFactor": 1,
"legendFormat": "{{instance}}> {{method}} {{api}}",
"refId": "A"
},
@@ -2033,7 +2033,7 @@
"expr": "sum(service_center_notify_publish_total{job=\"service-center\"}) by (instance,source)",
"format": "time_series",
"instant": false,
- "intervalFactor": 2,
+ "intervalFactor": 1,
"legendFormat": "{{instance}}> {{source}}",
"refId": "A"
},
@@ -2232,10 +2232,10 @@
"steppedLine": false,
"targets": [
{
- "expr": "sum(service_center_db_backend_operation_total{job=\"service-center\"}) by (instance,operation)",
+ "expr": "sum(irate(service_center_db_backend_operation_total{job=\"service-center\"}[1m])) by (instance,operation)",
"format": "time_series",
"instant": false,
- "intervalFactor": 2,
+ "intervalFactor": 1,
"legendFormat": "{{instance}}> {{operation}}",
"refId": "A"
}
@@ -2243,7 +2243,7 @@
"thresholds": [],
"timeFrom": null,
"timeShift": null,
- "title": "Backend Operation Total",
+ "title": "Backend Operations",
"tooltip": {
"shared": true,
"sort": 0,
@@ -2261,7 +2261,7 @@
"yaxes": [
{
"decimals": 0,
- "format": "none",
+ "format": "ops",
"label": null,
"logBase": 1,
"max": null,
@@ -2337,7 +2337,7 @@
"thresholds": [],
"timeFrom": null,
"timeShift": null,
- "title": "Backend Operation Latency",
+ "title": "Backend Operations Latency",
"tooltip": {
"shared": true,
"sort": 0,
@@ -2405,7 +2405,7 @@
"y": 43
},
"height": "",
- "id": 2,
+ "id": 9,
"legend": {
"alignAsTable": true,
"avg": true,
@@ -2413,8 +2413,6 @@
"max": true,
"min": true,
"show": true,
- "sort": "current",
- "sortDesc": true,
"total": false,
"values": true
},
@@ -2432,10 +2430,10 @@
"steppedLine": false,
"targets": [
{
- "expr": "go_goroutines{job=\"service-center\"}",
+ "expr": "service_center_process_cpu_usage{job=\"service-center\"}",
"format": "time_series",
"instant": false,
- "intervalFactor": 2,
+ "intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
}
@@ -2443,7 +2441,7 @@
"thresholds": [],
"timeFrom": null,
"timeShift": null,
- "title": "Goroutines",
+ "title": "CPU",
"tooltip": {
"shared": true,
"sort": 0,
@@ -2459,7 +2457,8 @@
},
"yaxes": [
{
- "format": "none",
+ "decimals": null,
+ "format": "percentunit",
"label": null,
"logBase": 1,
"max": null,
@@ -2493,8 +2492,7 @@
"x": 8,
"y": 43
},
- "height": "",
- "id": 9,
+ "id": 14,
"legend": {
"alignAsTable": true,
"avg": true,
@@ -2519,10 +2517,9 @@
"steppedLine": false,
"targets": [
{
- "expr": "irate(process_cpu_seconds_total{job=\"service-center\"}[1m])",
+ "expr": "process_resident_memory_bytes{job=\"service-center\"}",
"format": "time_series",
- "instant": false,
- "intervalFactor": 2,
+ "intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
}
@@ -2530,7 +2527,7 @@
"thresholds": [],
"timeFrom": null,
"timeShift": null,
- "title": "CPU",
+ "title": "Memory",
"tooltip": {
"shared": true,
"sort": 0,
@@ -2546,7 +2543,7 @@
},
"yaxes": [
{
- "format": "s",
+ "format": "decbytes",
"label": null,
"logBase": 1,
"max": null,
@@ -2580,8 +2577,7 @@
"x": 16,
"y": 43
},
- "height": "",
- "id": 8,
+ "id": 5,
"legend": {
"alignAsTable": true,
"avg": true,
@@ -2606,9 +2602,8 @@
"steppedLine": false,
"targets": [
{
- "expr": "max(avg_over_time(go_gc_duration_seconds{job=\"service-center\"}[1m])) by (instance)",
+ "expr": "go_threads{job=\"service-center\"}",
"format": "time_series",
- "instant": false,
"intervalFactor": 2,
"legendFormat": "{{instance}}",
"refId": "A"
@@ -2617,7 +2612,7 @@
"thresholds": [],
"timeFrom": null,
"timeShift": null,
- "title": "GC Latency",
+ "title": "Threads",
"tooltip": {
"shared": true,
"sort": 0,
@@ -2633,7 +2628,7 @@
},
"yaxes": [
{
- "format": "s",
+ "format": "short",
"label": null,
"logBase": 1,
"max": null,
@@ -2667,7 +2662,7 @@
"x": 0,
"y": 50
},
- "id": 5,
+ "id": 6,
"legend": {
"alignAsTable": true,
"avg": true,
@@ -2692,7 +2687,7 @@
"steppedLine": false,
"targets": [
{
- "expr": "go_threads{job=\"service-center\"}",
+ "expr": "process_open_fds{job=\"service-center\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}}",
@@ -2702,7 +2697,7 @@
"thresholds": [],
"timeFrom": null,
"timeShift": null,
- "title": "Threads",
+ "title": "File Descriptions",
"tooltip": {
"shared": true,
"sort": 0,
@@ -2752,7 +2747,8 @@
"x": 8,
"y": 50
},
- "id": 6,
+ "height": "",
+ "id": 2,
"legend": {
"alignAsTable": true,
"avg": true,
@@ -2760,6 +2756,8 @@
"max": true,
"min": true,
"show": true,
+ "sort": "current",
+ "sortDesc": true,
"total": false,
"values": true
},
@@ -2777,9 +2775,10 @@
"steppedLine": false,
"targets": [
{
- "expr": "process_open_fds{job=\"service-center\"}",
+ "expr": "go_goroutines{job=\"service-center\"}",
"format": "time_series",
- "intervalFactor": 2,
+ "instant": false,
+ "intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
}
@@ -2787,7 +2786,7 @@
"thresholds": [],
"timeFrom": null,
"timeShift": null,
- "title": "File Descriptions",
+ "title": "Goroutines",
"tooltip": {
"shared": true,
"sort": 0,
@@ -2803,7 +2802,7 @@
},
"yaxes": [
{
- "format": "short",
+ "format": "none",
"label": null,
"logBase": 1,
"max": null,
@@ -2837,7 +2836,8 @@
"x": 16,
"y": 50
},
- "id": 14,
+ "height": "",
+ "id": 8,
"legend": {
"alignAsTable": true,
"avg": true,
@@ -2862,8 +2862,9 @@
"steppedLine": false,
"targets": [
{
- "expr": "process_resident_memory_bytes{job=\"service-center\"}",
+ "expr": "max(avg_over_time(go_gc_duration_seconds{job=\"service-center\"}[1m])) by (instance)",
"format": "time_series",
+ "instant": false,
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"refId": "A"
@@ -2872,7 +2873,7 @@
"thresholds": [],
"timeFrom": null,
"timeShift": null,
- "title": "Memory",
+ "title": "GC Latency",
"tooltip": {
"shared": true,
"sort": 0,
@@ -2888,7 +2889,7 @@
},
"yaxes": [
{
- "format": "decbytes",
+ "format": "s",
"label": null,
"logBase": 1,
"max": null,
@@ -2952,7 +2953,7 @@
{
"expr": "sum(service_center_local_cache_size_bytes{job=\"service-center\"}) by (resource,instance)",
"format": "time_series",
- "intervalFactor": 2,
+ "intervalFactor": 1,
"legendFormat": "{{instance}}> {{resource}}",
"refId": "A"
},
@@ -3013,7 +3014,7 @@
"list": []
},
"time": {
- "from": "now-15m",
+ "from": "now-1h",
"to": "now"
},
"timepicker": {
@@ -3044,5 +3045,5 @@
"timezone": "",
"title": "ServiceCenter",
"uid": "Zg6NoHGiz",
- "version": 15
+ "version": 7
}
\ No newline at end of file
diff --git a/pkg/notify/notification_service.go b/pkg/notify/notification_service.go
index c2edd249..7eebc6ab 100644
--- a/pkg/notify/notification_service.go
+++ b/pkg/notify/notification_service.go
@@ -24,14 +24,31 @@ import (
type NotifyService struct {
processors map[Type]*Processor
- closeMux sync.RWMutex
+ mux sync.RWMutex
isClose bool
}
-func (s *NotifyService) init() {
- for _, t := range Types() {
- s.processors[t] = NewProcessor(t.String(), t.QueueSize())
+func (s *NotifyService) newProcessor(t Type) *Processor {
+ s.mux.RLock()
+ p, ok := s.processors[t]
+ if ok {
+ s.mux.RUnlock()
+ return p
}
+ s.mux.RUnlock()
+
+ s.mux.Lock()
+ p, ok = s.processors[t]
+ if ok {
+ s.mux.Unlock()
+ return p
+ }
+ p = NewProcessor(t.String(), t.QueueSize())
+ s.processors[t] = p
+ s.mux.Unlock()
+
+ p.Run()
+ return p
}
func (s *NotifyService) Start() {
@@ -39,15 +56,13 @@ func (s *NotifyService) Start() {
log.Warnf("notify service is already running")
return
}
- s.closeMux.Lock()
+ s.mux.Lock()
s.isClose = false
- s.closeMux.Unlock()
+ s.mux.Unlock()
// 错误subscriber清理
s.AddSubscriber(NewNotifyServiceHealthChecker())
- s.startProcessors()
-
log.Debugf("notify service is started")
}
@@ -58,12 +73,13 @@ func (s *NotifyService) AddSubscriber(n Subscriber) error {
return err
}
- p, ok := s.processors[n.Type()]
- if !ok {
+ if !n.Type().IsValid() {
err := errors.New("unknown subscribe type")
log.Errorf(err, "add %s subscriber[%s/%s] failed", n.Type(), n.Subject(), n.Group())
return err
}
+
+ p := s.newProcessor(n.Type())
n.SetService(s)
n.OnAccept()
@@ -72,26 +88,25 @@ func (s *NotifyService) AddSubscriber(n Subscriber) error {
}
func (s *NotifyService) RemoveSubscriber(n Subscriber) {
+ s.mux.RLock()
p, ok := s.processors[n.Type()]
if !ok {
+ s.mux.RUnlock()
return
}
+ s.mux.RUnlock()
p.Remove(n)
n.Close()
}
-func (s *NotifyService) startProcessors() {
- for _, p := range s.processors {
- p.Run()
- }
-}
-
func (s *NotifyService) stopProcessors() {
+ s.mux.RLock()
for _, p := range s.processors {
p.Clear()
p.Stop()
}
+ s.mux.RUnlock()
}
//通知内容塞到队列里
@@ -100,18 +115,21 @@ func (s *NotifyService) Publish(job Event) error {
return errors.New("add notify job failed for server shutdown")
}
+ s.mux.RLock()
p, ok := s.processors[job.Type()]
if !ok {
+ s.mux.RUnlock()
return errors.New("Unknown job type")
}
+ s.mux.RUnlock()
p.Accept(job)
return nil
}
func (s *NotifyService) Closed() (b bool) {
- s.closeMux.RLock()
+ s.mux.RLock()
b = s.isClose
- s.closeMux.RUnlock()
+ s.mux.RUnlock()
return
}
@@ -119,9 +137,9 @@ func (s *NotifyService) Stop() {
if s.Closed() {
return
}
- s.closeMux.Lock()
+ s.mux.Lock()
s.isClose = true
- s.closeMux.Unlock()
+ s.mux.Unlock()
s.stopProcessors()
@@ -129,10 +147,8 @@ func (s *NotifyService) Stop() {
}
func NewNotifyService() *NotifyService {
- ns := &NotifyService{
+ return &NotifyService{
processors: make(map[Type]*Processor),
isClose: true,
}
- ns.init()
- return ns
}
diff --git a/pkg/notify/types.go b/pkg/notify/types.go
index 829eea5e..d6cba5d7 100644
--- a/pkg/notify/types.go
+++ b/pkg/notify/types.go
@@ -20,14 +20,14 @@ import "strconv"
type Type int
func (nt Type) String() string {
- if int(nt) < len(typeNames) {
+ if nt.IsValid() {
return typeNames[nt]
}
return "Type" + strconv.Itoa(int(nt))
}
func (nt Type) QueueSize() (s int) {
- if int(nt) < len(typeQueues) {
+ if nt.IsValid() {
s = typeQueues[nt]
}
if s <= 0 {
@@ -36,6 +36,10 @@ func (nt Type) QueueSize() (s int) {
return
}
+func (nt Type) IsValid() bool {
+ return nt >= 0 && int(nt) < len(typeQueues)
+}
+
var typeNames = []string{
NOTIFTY: "NOTIFTY",
}
diff --git a/pkg/util/sys.go b/pkg/util/sys.go
index 2d4ffe60..8faf8ae8 100644
--- a/pkg/util/sys.go
+++ b/pkg/util/sys.go
@@ -17,6 +17,7 @@
package util
import (
+ "github.com/prometheus/procfs"
"os"
"strconv"
"unsafe"
@@ -72,3 +73,14 @@ func GetEnvString(name string, def string) string {
}
return def
}
+
+func GetProcCPUUsage() (pt float64, ct float64) {
+ p, _ := procfs.NewProc(os.Getpid())
+ stat, _ := procfs.NewStat()
+ pstat, _ := p.NewStat()
+ ct = stat.CPUTotal.User + stat.CPUTotal.Nice + stat.CPUTotal.System +
+ stat.CPUTotal.Idle + stat.CPUTotal.Iowait + stat.CPUTotal.IRQ +
+ stat.CPUTotal.SoftIRQ + stat.CPUTotal.Steal + stat.CPUTotal.Guest
+ pt = float64(pstat.UTime+pstat.STime+pstat.CUTime+pstat.CSTime) / 100
+ return
+}
diff --git a/pkg/util/sys_test.go b/pkg/util/sys_test.go
new file mode 100644
index 00000000..d6fadf78
--- /dev/null
+++ b/pkg/util/sys_test.go
@@ -0,0 +1,25 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package util
+
+import "testing"
+
+func TestGetProcCPUUsage(t *testing.T) {
+ p, c := GetProcCPUUsage()
+ if p < 0 || c <= 0 || p >= c {
+ t.Fatal("TestGetProcCPUUsage", p, c)
+ }
+}
diff --git a/server/alarm/common.go b/server/alarm/common.go
index a5a87b2d..601b2d12 100644
--- a/server/alarm/common.go
+++ b/server/alarm/common.go
@@ -17,6 +17,7 @@ package alarm
import (
"fmt"
+ "github.com/apache/servicecomb-service-center/pkg/notify"
"github.com/apache/servicecomb-service-center/server/alarm/model"
)
@@ -39,6 +40,8 @@ const (
Group = "__ALARM_GROUP__"
)
+var ALARM = notify.RegisterType("ALARM", 0)
+
func FieldBool(key string, v bool) model.Field {
return model.Field{Key: key, Value: v}
}
diff --git a/server/alarm/service.go b/server/alarm/service.go
index d1714d99..ee00713f 100644
--- a/server/alarm/service.go
+++ b/server/alarm/service.go
@@ -36,7 +36,7 @@ type AlarmService struct {
func (ac *AlarmService) Raise(id model.ID, fields ...model.Field) error {
ae := &model.AlarmEvent{
- Event: nf.NewEvent(nf.NOTIFTY, Subject, ""),
+ Event: nf.NewEvent(ALARM, Subject, ""),
Status: Activated,
Id: id,
Fields: util.NewJSONObject(),
@@ -49,7 +49,7 @@ func (ac *AlarmService) Raise(id model.ID, fields ...model.Field) error {
func (ac *AlarmService) Clear(id model.ID) error {
ae := &model.AlarmEvent{
- Event: nf.NewEvent(nf.NOTIFTY, Subject, ""),
+ Event: nf.NewEvent(ALARM, Subject, ""),
Status: Cleared,
Id: id,
}
@@ -87,7 +87,7 @@ func (ac *AlarmService) OnMessage(evt nf.Event) {
func NewAlarmService() *AlarmService {
c := &AlarmService{
- Subscriber: nf.NewSubscriber(nf.NOTIFTY, Subject, Group),
+ Subscriber: nf.NewSubscriber(ALARM, Subject, Group),
}
notify.NotifyCenter().AddSubscriber(c)
return c
diff --git a/server/core/microservice.go b/server/core/microservice.go
index 9331e2c1..1a45a0c3 100644
--- a/server/core/microservice.go
+++ b/server/core/microservice.go
@@ -39,8 +39,9 @@ const (
REGISTRY_PROJECT = "default"
REGISTRY_DOMAIN_PROJECT = "default/default"
- REGISTRY_APP_ID = "default"
- REGISTRY_SERVICE_NAME = "SERVICECENTER"
+ REGISTRY_APP_ID = "default"
+ REGISTRY_SERVICE_NAME = "SERVICECENTER"
+ REGISTRY_SERVICE_ALIAS = "SERVICECENTER"
REGISTRY_DEFAULT_LEASE_RENEWALINTERVAL int32 = 30
REGISTRY_DEFAULT_LEASE_RETRYTIMES int32 = 3
@@ -60,6 +61,7 @@ func prepareSelfRegistration() {
Environment: pb.ENV_PROD,
AppId: REGISTRY_APP_ID,
ServiceName: REGISTRY_SERVICE_NAME,
+ Alias: REGISTRY_SERVICE_ALIAS,
Version: version.Ver().Version,
Status: pb.MS_UP,
Level: "BACK",
@@ -114,6 +116,9 @@ func IsShared(key *pb.MicroServiceKey) bool {
return false
}
_, ok := sharedServiceNames[key.ServiceName]
+ if !ok {
+ _, ok = sharedServiceNames[key.Alias]
+ }
return ok
}
diff --git a/server/core/microservice_test.go b/server/core/microservice_test.go
index edbac4c4..c5133ddd 100644
--- a/server/core/microservice_test.go
+++ b/server/core/microservice_test.go
@@ -75,4 +75,7 @@ func TestSetSharedMode(t *testing.T) {
if !IsShared(&proto.MicroServiceKey{Tenant: "default/default", AppId: "default", ServiceName: "shared"}) {
t.Fatalf("TestSetSharedMode failed")
}
+ if !IsShared(&proto.MicroServiceKey{Tenant: "default/default", AppId: "default", Alias: "shared"}) {
+ t.Fatalf("TestSetSharedMode failed")
+ }
}
diff --git a/server/health/metrics.go b/server/health/metrics.go
new file mode 100644
index 00000000..4c49fe7f
--- /dev/null
+++ b/server/health/metrics.go
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package health
+
+import (
+ "github.com/apache/servicecomb-service-center/pkg/gopool"
+ "github.com/apache/servicecomb-service-center/pkg/util"
+ "github.com/apache/servicecomb-service-center/server/metric"
+ "github.com/prometheus/client_golang/prometheus"
+ "golang.org/x/net/context"
+ "runtime"
+ "time"
+)
+
+const durationReportCPUUsage = 3 * time.Second
+
+var (
+ cpuGauge = prometheus.NewGaugeVec(
+ prometheus.GaugeOpts{
+ Namespace: metric.FamilyName,
+ Subsystem: "process",
+ Name: "cpu_usage",
+ Help: "Process cpu usage",
+ }, []string{"instance"})
+)
+
+func init() {
+ prometheus.MustRegister(cpuGauge)
+ gopool.Go(AutoReportCPUUsage)
+}
+
+func AutoReportCPUUsage(ctx context.Context) {
+ var (
+ cpuTotal float64
+ cpuProc float64
+ cpus = runtime.NumCPU()
+ )
+ for {
+ select {
+ case <-ctx.Done():
+ return
+ case <-time.After(durationReportCPUUsage):
+ pt, ct := util.GetProcCPUUsage()
+ cpuGauge.WithLabelValues(metric.InstanceName()).Set(
+ (pt - cpuProc) * float64(cpus) / (ct - cpuTotal))
+ cpuTotal, cpuProc = ct, pt
+ }
+ }
+}
diff --git a/server/notify/common.go b/server/notify/common.go
index e3c9a674..a2343983 100644
--- a/server/notify/common.go
+++ b/server/notify/common.go
@@ -19,7 +19,9 @@ import "time"
const (
AddJobTimeout = 1 * time.Second
+ HeartbeatInterval = 30 * time.Second
+ ReadTimeout = HeartbeatInterval * 4
SendTimeout = 5 * time.Second
- HeartbeatTimeout = 30 * time.Second
InstanceEventQueueSize = 5000
+ ReadMaxBody = 64
)
diff --git a/server/notify/stream.go b/server/notify/stream.go
index 0e4e2d71..0ac6403e 100644
--- a/server/notify/stream.go
+++ b/server/notify/stream.go
@@ -27,14 +27,14 @@ import (
)
func HandleWatchJob(watcher *InstanceEventListWatcher, stream pb.ServiceInstanceCtrl_WatchServer) (err error) {
- timer := time.NewTimer(HeartbeatTimeout)
+ timer := time.NewTimer(HeartbeatInterval)
defer timer.Stop()
for {
select {
case <-stream.Context().Done():
return
case <-timer.C:
- timer.Reset(HeartbeatTimeout)
+ timer.Reset(HeartbeatInterval)
// TODO grpc 长连接心跳?
case job := <-watcher.Job:
@@ -57,7 +57,7 @@ func HandleWatchJob(watcher *InstanceEventListWatcher, stream pb.ServiceInstance
return
}
- util.ResetTimer(timer, HeartbeatTimeout)
+ util.ResetTimer(timer, HeartbeatInterval)
}
}
}
diff --git a/server/notify/websocket.go b/server/notify/websocket.go
index 2f0a6d8e..cc789e8f 100644
--- a/server/notify/websocket.go
+++ b/server/notify/websocket.go
@@ -41,7 +41,7 @@ type WebSocket struct {
}
func (wh *WebSocket) Init() error {
- wh.ticker = time.NewTicker(HeartbeatTimeout)
+ wh.ticker = time.NewTicker(HeartbeatInterval)
wh.needPingWatcher = true
wh.free = make(chan struct{}, 1)
wh.closed = make(chan struct{})
@@ -71,12 +71,16 @@ func (wh *WebSocket) Init() error {
return nil
}
-func (wh *WebSocket) Timeout() time.Duration {
+func (wh *WebSocket) ReadTimeout() time.Duration {
+ return ReadTimeout
+}
+
+func (wh *WebSocket) SendTimeout() time.Duration {
return SendTimeout
}
func (wh *WebSocket) heartbeat(messageType int) error {
- err := wh.conn.WriteControl(messageType, []byte{}, time.Now().Add(wh.Timeout()))
+ err := wh.conn.WriteControl(messageType, []byte{}, time.Now().Add(wh.SendTimeout()))
if err != nil {
messageTypeName := "Ping"
if messageType == websocket.PongMessage {
@@ -94,6 +98,7 @@ func (wh *WebSocket) HandleWatchWebSocketControlMessage() {
remoteAddr := wh.conn.RemoteAddr().String()
// PING
wh.conn.SetPingHandler(func(message string) error {
+ defer wh.conn.SetReadDeadline(time.Now().Add(wh.ReadTimeout()))
if wh.needPingWatcher {
log.Infof("received 'Ping' message '%s' from watcher[%s], no longer send 'Ping' to it, subject: %s, group: %s",
message, remoteAddr, wh.watcher.Subject(), wh.watcher.Group())
@@ -103,6 +108,7 @@ func (wh *WebSocket) HandleWatchWebSocketControlMessage() {
})
// PONG
wh.conn.SetPongHandler(func(message string) error {
+ defer wh.conn.SetReadDeadline(time.Now().Add(wh.ReadTimeout()))
log.Debugf("received 'Pong' message '%s' from watcher[%s], subject: %s, group: %s",
message, remoteAddr, wh.watcher.Subject(), wh.watcher.Group())
return nil
@@ -114,6 +120,8 @@ func (wh *WebSocket) HandleWatchWebSocketControlMessage() {
return wh.sendClose(code, text)
})
+ wh.conn.SetReadLimit(ReadMaxBody)
+ wh.conn.SetReadDeadline(time.Now().Add(wh.ReadTimeout()))
for {
_, _, err := wh.conn.ReadMessage()
if err != nil {
@@ -130,7 +138,7 @@ func (wh *WebSocket) sendClose(code int, text string) error {
if code != websocket.CloseNoStatusReceived {
message = websocket.FormatCloseMessage(code, text)
}
- err := wh.conn.WriteControl(websocket.CloseMessage, message, time.Now().Add(wh.Timeout()))
+ err := wh.conn.WriteControl(websocket.CloseMessage, message, time.Now().Add(wh.SendTimeout()))
if err != nil {
log.Errorf(err, "watcher[%s] catch an err, subject: %s, group: %s",
remoteAddr, wh.watcher.Subject(), wh.watcher.Group())
@@ -229,7 +237,7 @@ func (wh *WebSocket) HandleWatchWebSocketJob(o interface{}) {
default:
}
- err := wh.conn.WriteMessage(websocket.TextMessage, message)
+ err := wh.WriteMessage(message)
if job != nil {
ReportPublishCompleted(INSTANCE.String(), err, job.CreateAt())
}
@@ -239,6 +247,11 @@ func (wh *WebSocket) HandleWatchWebSocketJob(o interface{}) {
}
}
+func (wh *WebSocket) WriteMessage(message []byte) error {
+ wh.conn.SetWriteDeadline(time.Now().Add(wh.SendTimeout()))
+ return wh.conn.WriteMessage(websocket.TextMessage, message)
+}
+
func (wh *WebSocket) Ready() <-chan struct{} {
return wh.free
}
With regards,
Apache Git Services