diff --git a/README.md b/README.md index a4d2053b9..33f5d3899 100644 --- a/README.md +++ b/README.md @@ -85,6 +85,7 @@ Collection available here: **[https://samber.github.io/awesome-prometheus-alerts - [Promtail](https://samber.github.io/awesome-prometheus-alerts/rules#promtail) - [Cortex](https://samber.github.io/awesome-prometheus-alerts/rules#cortex) - [Jenkins](https://samber.github.io/awesome-prometheus-alerts/rules#jenkins) +- [Graph Node](https://samber.github.io/awesome-prometheus-alerts/rules#graph-node) ## 🤝 Contributing diff --git a/_data/rules.yml b/_data/rules.yml index 2403b71cc..8d72345fb 100644 --- a/_data/rules.yml +++ b/_data/rules.yml @@ -2819,3 +2819,33 @@ groups: description: UPS load is > 80% query: 'apcupsd_ups_load_percent > 80' severity: warning + + - name: Graph Node + exporters: + - name: Embedded exporter + slug: embedded-exporter + rules: + - name: Provider failed because net_version failed + description: "Failed net_version for Provider `{{$labels.provider}}` in Graph node `{{$labels.instance}}`" + query: 'eth_rpc_status == 1' + severity: critical + - name: Provider failed because get genesis failed + description: "Failed to get genesis for Provider `{{$labels.provider}}` in Graph node `{{$labels.instance}}`" + query: 'eth_rpc_status == 2' + severity: critical + - name: Provider failed because net_version timeout + description: "net_version timeout for Provider `{{$labels.provider}}` in Graph node `{{$labels.instance}}`" + query: 'eth_rpc_status == 3' + severity: critical + - name: Provider failed because get genesis timeout + description: "Timeout to get genesis for Provider `{{$labels.provider}}` in Graph node `{{$labels.instance}}`" + query: 'eth_rpc_status == 4' + severity: critical + - name: Store connection is too slow + description: "Store connection is too slow to `{{$labels.pool}}` pool, `{{$labels.shard}}` shard in Graph node `{{$labels.instance}}`" + query: 'store_connection_wait_time_ms > 10' + severity: warning + - name: Store connection is too slow + description: "Store connection is too slow to `{{$labels.pool}}` pool, `{{$labels.shard}}` shard in Graph node `{{$labels.instance}}`" + query: 'store_connection_wait_time_ms > 20' + severity: critical diff --git a/dist/rules/graph-node/embedded-exporter.yml b/dist/rules/graph-node/embedded-exporter.yml new file mode 100644 index 000000000..ec555b688 --- /dev/null +++ b/dist/rules/graph-node/embedded-exporter.yml @@ -0,0 +1,59 @@ +groups: + +- name: EmbeddedExporter + + rules: + + - alert: ProviderNetVersionFailed + expr: 'eth_rpc_status == 1' + for: 0m + labels: + severity: critical + annotations: + summary: Provider net_version failed (provider {{$labels.provider}}, node {{$labels.instance}}) + description: "Failed net_version for Provider {{$labels.provider}} in Graph node {{$labels.instance}}" + + - alert: ProviderGetGenesisFailed + expr: 'eth_rpc_status == 2' + for: 0m + labels: + severity: critical + annotations: + summary: Provider get genesis failed (provider {{$labels.provider}}, node {{$labels.instance}}) + description: "Failed to get genesis for Provider {{$labels.provider}} in Graph node {{$labels.instance}}" + + - alert: ProviderNetVersionTimeout + expr: 'eth_rpc_status == 3' + for: 0m + labels: + severity: critical + annotations: + summary: Provider net_version timeout (provider {{$labels.provider}}, node {{$labels.instance}}) + description: "net_version timeout for Provider {{$labels.provider}} in Graph node {{$labels.instance}}" + + - alert: ProviderGetGenesisTimeout + expr: 'eth_rpc_status == 4' + for: 0m + labels: + severity: critical + annotations: + summary: Provider get genesis timeout (provider {{$labels.provider}}, node {{$labels.instance}}) + description: "Timeout to get genesis for Provider {{$labels.provider}} in Graph node {{$labels.instance}}" + + - alert: StoreConnectionSlow + expr: 'store_connection_wait_time_ms > 10' + for: 1m + labels: + severity: warning + annotations: + summary: Store connection is slow (pool {{$labels.pool}}, shard {{$labels.shard}}, node {{$labels.instance}}) + description: "Store connection is slow to {{$labels.pool}} pool, {{$labels.shard}} shard in Graph node {{$labels.instance}}" + + - alert: StoreConnectionTooSlow + expr: 'store_connection_wait_time_ms > 20' + for: 1m + labels: + severity: critical + annotations: + summary: Store connection is too slow (pool {{$labels.pool}}, shard {{$labels.shard}}, node {{$labels.instance}}) + description: "Store connection is too slow to {{$labels.pool}} pool, {{$labels.shard}} shard in Graph node {{$labels.instance}}"