Skip to content

Commit

Permalink
Merge pull request #889 from chess-knight/fix/apiserver_cluster_label
Browse files Browse the repository at this point in the history
Add missing cluster labels and aggregations for apiserver alerts
  • Loading branch information
povilasv authored Sep 23, 2024
2 parents e2239f0 + e80710c commit 3cb7958
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 10 deletions.
17 changes: 9 additions & 8 deletions alerts/kube_apiserver.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@ local utils = import '../lib/utils.libsonnet';
_config+:: {
kubeApiserverSelector: error 'must provide selector for kube-apiserver',

kubeAPILatencyWarningSeconds: 1,

certExpirationWarningSeconds: 7 * 24 * 3600,
certExpirationCriticalSeconds: 1 * 24 * 3600,
},
Expand All @@ -18,13 +16,16 @@ local utils = import '../lib/utils.libsonnet';
{
alert: 'KubeAPIErrorBudgetBurn',
expr: |||
sum(apiserver_request:burnrate%s) > (%.2f * %.5f)
and
sum(apiserver_request:burnrate%s) > (%.2f * %.5f)
sum by(%s) (apiserver_request:burnrate%s) > (%.2f * %.5f)
and on(%s)
sum by(%s) (apiserver_request:burnrate%s) > (%.2f * %.5f)
||| % [
$._config.clusterLabel,
w.long,
w.factor,
(1 - $._config.SLOs.apiserver.target),
$._config.clusterLabel,
$._config.clusterLabel,
w.short,
w.factor,
(1 - $._config.SLOs.apiserver.target),
Expand All @@ -49,7 +50,7 @@ local utils = import '../lib/utils.libsonnet';
{
alert: 'KubeClientCertificateExpiration',
expr: |||
apiserver_client_certificate_expiration_seconds_count{%(kubeApiserverSelector)s} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{%(kubeApiserverSelector)s}[5m]))) < %(certExpirationWarningSeconds)s
apiserver_client_certificate_expiration_seconds_count{%(kubeApiserverSelector)s} > 0 and on(%(clusterLabel)s, job) histogram_quantile(0.01, sum by (%(clusterLabel)s, job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{%(kubeApiserverSelector)s}[5m]))) < %(certExpirationWarningSeconds)s
||| % $._config,
'for': '5m',
labels: {
Expand All @@ -63,7 +64,7 @@ local utils = import '../lib/utils.libsonnet';
{
alert: 'KubeClientCertificateExpiration',
expr: |||
apiserver_client_certificate_expiration_seconds_count{%(kubeApiserverSelector)s} > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{%(kubeApiserverSelector)s}[5m]))) < %(certExpirationCriticalSeconds)s
apiserver_client_certificate_expiration_seconds_count{%(kubeApiserverSelector)s} > 0 and on(%(clusterLabel)s, job) histogram_quantile(0.01, sum by (%(clusterLabel)s, job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{%(kubeApiserverSelector)s}[5m]))) < %(certExpirationCriticalSeconds)s
||| % $._config,
'for': '5m',
labels: {
Expand Down Expand Up @@ -108,7 +109,7 @@ local utils = import '../lib/utils.libsonnet';
{
alert: 'KubeAPITerminatedRequests',
expr: |||
sum(rate(apiserver_request_terminations_total{%(kubeApiserverSelector)s}[10m])) / ( sum(rate(apiserver_request_total{%(kubeApiserverSelector)s}[10m])) + sum(rate(apiserver_request_terminations_total{%(kubeApiserverSelector)s}[10m])) ) > 0.20
sum by(%(clusterLabel)s) (rate(apiserver_request_terminations_total{%(kubeApiserverSelector)s}[10m])) / ( sum by(%(clusterLabel)s) (rate(apiserver_request_total{%(kubeApiserverSelector)s}[10m])) + sum by(%(clusterLabel)s) (rate(apiserver_request_terminations_total{%(kubeApiserverSelector)s}[10m])) ) > 0.20
||| % $._config,
labels: {
severity: 'warning',
Expand Down
5 changes: 3 additions & 2 deletions tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1125,9 +1125,9 @@ tests:

- interval: 1m
input_series:
- series: 'apiserver_request_terminations_total{job="kube-apiserver",apiserver="kube-apiserver"}'
- series: 'apiserver_request_terminations_total{cluster="kubernetes",job="kube-apiserver",apiserver="kube-apiserver"}'
values: '1+1x10'
- series: 'apiserver_request_total{job="kube-apiserver",apiserver="kube-apiserver"}'
- series: 'apiserver_request_total{cluster="kubernetes",job="kube-apiserver",apiserver="kube-apiserver"}'
values: '1+2x10'
alert_rule_test:
- eval_time: 5m # alert hasn't fired
Expand All @@ -1137,6 +1137,7 @@ tests:
exp_alerts:
- exp_labels:
severity: warning
cluster: "kubernetes"
exp_annotations:
summary: "The kubernetes apiserver has terminated 33.33% of its incoming requests."
description: "The kubernetes apiserver has terminated 33.33% of its incoming requests."
Expand Down

0 comments on commit 3cb7958

Please sign in to comment.