178 lines
9.1 KiB
YAML
178 lines
9.1 KiB
YAML
apiVersion: v1
|
|
kind: ConfigMap
|
|
metadata:
|
|
name: grafana-dashboard-agave-status
|
|
namespace: monitoring
|
|
labels:
|
|
grafana_dashboard: "1"
|
|
data:
|
|
agave-status.json: |-
|
|
{
|
|
"uid": "agave-status-mpabi",
|
|
"title": "Agave @ mpabi",
|
|
"timezone": "browser",
|
|
"schemaVersion": 39,
|
|
"version": 2,
|
|
"refresh": "10s",
|
|
"tags": ["agave", "solana", "mpabi"],
|
|
"templating": {
|
|
"list": [
|
|
{
|
|
"name": "instance",
|
|
"type": "query",
|
|
"label": "Node Exporter Instance",
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"query": { "query": "label_values(up{job=\"mpabi-node-exporter\"}, instance)", "refId": "PromVarInstance" },
|
|
"current": { "selected": false, "text": "10.66.66.1:9100", "value": "10.66.66.1:9100" }
|
|
}
|
|
]
|
|
},
|
|
"panels": [
|
|
{
|
|
"id": 1,
|
|
"type": "stat",
|
|
"title": "Geyser Metrics Target (Prometheus up)",
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"targets": [{ "refId": "A", "expr": "up{job=\"mpabi-yellowstone-geyser\"}" }],
|
|
"options": { "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "orientation": "horizontal", "textMode": "value" },
|
|
"fieldConfig": {
|
|
"defaults": { "thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] } },
|
|
"overrides": []
|
|
},
|
|
"gridPos": { "h": 6, "w": 12, "x": 0, "y": 0 }
|
|
},
|
|
{
|
|
"id": 2,
|
|
"type": "stat",
|
|
"title": "agave-validator.service (systemd active)",
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"targets": [{ "refId": "B", "expr": "node_systemd_unit_state{job=\"mpabi-node-exporter\",instance=\"$instance\",name=\"agave-validator.service\",state=\"active\"}" }],
|
|
"options": { "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, "orientation": "horizontal", "textMode": "value" },
|
|
"fieldConfig": {
|
|
"defaults": { "thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] } },
|
|
"overrides": []
|
|
},
|
|
"gridPos": { "h": 6, "w": 12, "x": 12, "y": 0 }
|
|
},
|
|
{
|
|
"id": 3,
|
|
"type": "timeseries",
|
|
"title": "CPU Used (%)",
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"targets": [{ "refId": "C", "expr": "100 - (avg by (instance) (rate(node_cpu_seconds_total{job=\"mpabi-node-exporter\",instance=\"$instance\",mode=\"idle\"}[5m])) * 100)" }],
|
|
"fieldConfig": { "defaults": { "unit": "percent", "min": 0, "max": 100 }, "overrides": [] },
|
|
"gridPos": { "h": 8, "w": 8, "x": 0, "y": 6 }
|
|
},
|
|
{
|
|
"id": 4,
|
|
"type": "timeseries",
|
|
"title": "Load (1m)",
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"targets": [{ "refId": "D", "expr": "node_load1{job=\"mpabi-node-exporter\",instance=\"$instance\"}" }],
|
|
"gridPos": { "h": 8, "w": 8, "x": 8, "y": 6 }
|
|
},
|
|
{
|
|
"id": 5,
|
|
"type": "timeseries",
|
|
"title": "Memory Used (%)",
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"targets": [{ "refId": "E", "expr": "100 - (node_memory_MemAvailable_bytes{job=\"mpabi-node-exporter\",instance=\"$instance\"} / node_memory_MemTotal_bytes{job=\"mpabi-node-exporter\",instance=\"$instance\"} * 100)" }],
|
|
"fieldConfig": { "defaults": { "unit": "percent", "min": 0, "max": 100 }, "overrides": [] },
|
|
"gridPos": { "h": 8, "w": 8, "x": 16, "y": 6 }
|
|
},
|
|
{
|
|
"id": 6,
|
|
"type": "timeseries",
|
|
"title": "Swap Used (GiB)",
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"targets": [{ "refId": "F", "expr": "(node_memory_SwapTotal_bytes{job=\"mpabi-node-exporter\",instance=\"$instance\"} - node_memory_SwapFree_bytes{job=\"mpabi-node-exporter\",instance=\"$instance\"}) / 1024 / 1024 / 1024" }],
|
|
"fieldConfig": { "defaults": { "unit": "gbytes" }, "overrides": [] },
|
|
"gridPos": { "h": 8, "w": 8, "x": 0, "y": 14 }
|
|
},
|
|
{
|
|
"id": 7,
|
|
"type": "timeseries",
|
|
"title": "Disk Free Accounts (%)",
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"targets": [{ "refId": "G", "expr": "100 * node_filesystem_avail_bytes{job=\"mpabi-node-exporter\",instance=\"$instance\",mountpoint=\"/var/lib/solana/accounts\"} / node_filesystem_size_bytes{job=\"mpabi-node-exporter\",instance=\"$instance\",mountpoint=\"/var/lib/solana/accounts\"}" }],
|
|
"fieldConfig": { "defaults": { "unit": "percent", "min": 0, "max": 100 }, "overrides": [] },
|
|
"gridPos": { "h": 8, "w": 8, "x": 8, "y": 14 }
|
|
},
|
|
{
|
|
"id": 8,
|
|
"type": "timeseries",
|
|
"title": "Disk Free Ledger (%)",
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"targets": [{ "refId": "H", "expr": "100 * node_filesystem_avail_bytes{job=\"mpabi-node-exporter\",instance=\"$instance\",mountpoint=\"/var/lib/solana/ledger\"} / node_filesystem_size_bytes{job=\"mpabi-node-exporter\",instance=\"$instance\",mountpoint=\"/var/lib/solana/ledger\"}" }],
|
|
"fieldConfig": { "defaults": { "unit": "percent", "min": 0, "max": 100 }, "overrides": [] },
|
|
"gridPos": { "h": 8, "w": 8, "x": 16, "y": 14 }
|
|
},
|
|
{
|
|
"id": 9,
|
|
"type": "timeseries",
|
|
"title": "Disk IO (NVMe) Read/Write (MiB/s)",
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"targets": [
|
|
{ "refId": "I", "expr": "sum by (device) (rate(node_disk_read_bytes_total{job=\"mpabi-node-exporter\",instance=\"$instance\",device=~\"nvme.*\"}[5m])) / 1024 / 1024", "legendFormat": "read {{device}}" },
|
|
{ "refId": "J", "expr": "sum by (device) (rate(node_disk_written_bytes_total{job=\"mpabi-node-exporter\",instance=\"$instance\",device=~\"nvme.*\"}[5m])) / 1024 / 1024", "legendFormat": "write {{device}}" }
|
|
],
|
|
"fieldConfig": { "defaults": { "unit": "mbytes" }, "overrides": [] },
|
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 22 }
|
|
},
|
|
{
|
|
"id": 10,
|
|
"type": "timeseries",
|
|
"title": "Network wg0 RX/TX (MiB/s)",
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"targets": [
|
|
{ "refId": "K", "expr": "rate(node_network_receive_bytes_total{job=\"mpabi-node-exporter\",instance=\"$instance\",device=\"wg0\"}[5m]) / 1024 / 1024", "legendFormat": "rx" },
|
|
{ "refId": "L", "expr": "rate(node_network_transmit_bytes_total{job=\"mpabi-node-exporter\",instance=\"$instance\",device=\"wg0\"}[5m]) / 1024 / 1024", "legendFormat": "tx" }
|
|
],
|
|
"fieldConfig": { "defaults": { "unit": "mbytes" }, "overrides": [] },
|
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 22 }
|
|
},
|
|
{
|
|
"id": 11,
|
|
"type": "timeseries",
|
|
"title": "Geyser: Subscriber Queue Size",
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"targets": [{ "refId": "M", "expr": "grpc_subscriber_queue_size{job=\"mpabi-yellowstone-geyser\"}", "legendFormat": "{{subscriber_id}}" }],
|
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 30 }
|
|
},
|
|
{
|
|
"id": 12,
|
|
"type": "timeseries",
|
|
"title": "Geyser: Connections Total",
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"targets": [{ "refId": "N", "expr": "connections_total{job=\"mpabi-yellowstone-geyser\"}" }],
|
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 30 }
|
|
},
|
|
{
|
|
"id": 13,
|
|
"type": "timeseries",
|
|
"title": "Geyser: Bytes Sent (MiB/s)",
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"targets": [{ "refId": "O", "expr": "rate(grpc_bytes_sent{job=\"mpabi-yellowstone-geyser\"}[5m]) / 1024 / 1024", "legendFormat": "{{subscriber_id}}" }],
|
|
"fieldConfig": { "defaults": { "unit": "mbytes" }, "overrides": [] },
|
|
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 38 }
|
|
},
|
|
{
|
|
"id": 14,
|
|
"type": "timeseries",
|
|
"title": "Geyser: Messages Sent (/s)",
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"targets": [{ "refId": "P", "expr": "rate(grpc_message_sent_count{job=\"mpabi-yellowstone-geyser\"}[5m])", "legendFormat": "{{subscriber_id}}" }],
|
|
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 38 }
|
|
},
|
|
{
|
|
"id": 15,
|
|
"type": "timeseries",
|
|
"title": "Geyser: Disconnects (increase 15m)",
|
|
"datasource": { "type": "prometheus", "uid": "prometheus" },
|
|
"targets": [{ "refId": "Q", "expr": "sum by (reason) (increase(grpc_client_disconnects_total{job=\"mpabi-yellowstone-geyser\"}[15m]))", "legendFormat": "{{reason}}" }],
|
|
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 46 }
|
|
}
|
|
]
|
|
}
|
|
|