feat(monitoring): add mpabi node-exporter scrape and agave dashboard
This commit is contained in:
@@ -26,6 +26,12 @@ spec:
|
|||||||
static_configs:
|
static_configs:
|
||||||
- targets:
|
- targets:
|
||||||
- 10.66.66.1:8999
|
- 10.66.66.1:8999
|
||||||
|
- job_name: mpabi-node-exporter
|
||||||
|
metrics_path: /metrics
|
||||||
|
scrape_interval: 15s
|
||||||
|
static_configs:
|
||||||
|
- targets:
|
||||||
|
- 10.66.66.1:9100
|
||||||
prometheusOperator:
|
prometheusOperator:
|
||||||
admissionWebhooks:
|
admissionWebhooks:
|
||||||
enabled: false
|
enabled: false
|
||||||
|
|||||||
122
kustomize/infra/monitoring-extras/dashboard-agave-status.yaml
Normal file
122
kustomize/infra/monitoring-extras/dashboard-agave-status.yaml
Normal file
@@ -0,0 +1,122 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: grafana-dashboard-agave-status
|
||||||
|
namespace: monitoring
|
||||||
|
labels:
|
||||||
|
grafana_dashboard: "1"
|
||||||
|
data:
|
||||||
|
agave-status.json: |-
|
||||||
|
{
|
||||||
|
"uid": "agave-status-mpabi",
|
||||||
|
"title": "Agave @ mpabi",
|
||||||
|
"timezone": "browser",
|
||||||
|
"schemaVersion": 39,
|
||||||
|
"version": 1,
|
||||||
|
"refresh": "10s",
|
||||||
|
"tags": ["agave", "solana", "mpabi"],
|
||||||
|
"templating": {
|
||||||
|
"list": [
|
||||||
|
{
|
||||||
|
"name": "instance",
|
||||||
|
"type": "query",
|
||||||
|
"label": "Node Exporter Instance",
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"query": {
|
||||||
|
"query": "label_values(up{job=\"mpabi-node-exporter\"}, instance)",
|
||||||
|
"refId": "PromVarInstance"
|
||||||
|
},
|
||||||
|
"current": { "selected": false, "text": "10.66.66.1:9100", "value": "10.66.66.1:9100" }
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"type": "stat",
|
||||||
|
"title": "Geyser Metrics Target (Prometheus up)",
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"refId": "A",
|
||||||
|
"expr": "up{job=\"mpabi-yellowstone-geyser\"}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"options": {
|
||||||
|
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
||||||
|
"orientation": "horizontal",
|
||||||
|
"textMode": "value"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{ "color": "red", "value": null },
|
||||||
|
{ "color": "green", "value": 1 }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 6, "w": 12, "x": 0, "y": 0 }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"type": "stat",
|
||||||
|
"title": "agave-validator.service (systemd active)",
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"refId": "B",
|
||||||
|
"expr": "node_systemd_unit_state{job=\"mpabi-node-exporter\",instance=\"$instance\",name=\"agave-validator.service\",state=\"active\"}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"options": {
|
||||||
|
"reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false },
|
||||||
|
"orientation": "horizontal",
|
||||||
|
"textMode": "value"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{ "color": "red", "value": null },
|
||||||
|
{ "color": "green", "value": 1 }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"overrides": []
|
||||||
|
},
|
||||||
|
"gridPos": { "h": 6, "w": 12, "x": 12, "y": 0 }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"type": "timeseries",
|
||||||
|
"title": "Load (1m)",
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"refId": "C",
|
||||||
|
"expr": "node_load1{job=\"mpabi-node-exporter\",instance=\"$instance\"}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 6 }
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"type": "timeseries",
|
||||||
|
"title": "Memory Used (%)",
|
||||||
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"refId": "D",
|
||||||
|
"expr": "100 - (node_memory_MemAvailable_bytes{job=\"mpabi-node-exporter\",instance=\"$instance\"} / node_memory_MemTotal_bytes{job=\"mpabi-node-exporter\",instance=\"$instance\"} * 100)"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 6 }
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
@@ -7,3 +7,4 @@ resources:
|
|||||||
- ingressroute-grafana-http.yaml
|
- ingressroute-grafana-http.yaml
|
||||||
- ingressroute-prometheus.yaml
|
- ingressroute-prometheus.yaml
|
||||||
- ingressroute-prometheus-http.yaml
|
- ingressroute-prometheus-http.yaml
|
||||||
|
- dashboard-agave-status.yaml
|
||||||
|
|||||||
Reference in New Issue
Block a user