diff --git a/victoria-metrics/alertmanager.config.yml b/victoria-metrics/alertmanager.config.yml index d31b9d0..ab391f3 100644 --- a/victoria-metrics/alertmanager.config.yml +++ b/victoria-metrics/alertmanager.config.yml @@ -31,3 +31,12 @@ route: - alertgroup=Frigate group_by: - alertname + +inhibit_rules: +- source_matchers: + - alertname=Free disk space is very low + target_matchers: + - alertname=Free disk space is low + equal: + - instance + - df diff --git a/victoria-metrics/alerts.yml b/victoria-metrics/alerts.yml index d29d33a..0eb721a 100644 --- a/victoria-metrics/alerts.yml +++ b/victoria-metrics/alerts.yml @@ -1,12 +1,35 @@ groups: - name: default alert rules: - - alert: DiskUsage + - alert: Free disk space is low expr: >- - sum(collectd_df_df_complex{type!="free"}) by (instance, df) / sum(collectd_df_df_complex{df!="var-log", df!="var-lib-frigate"}) by (instance, df) > .75 - or sum(collectd_df_df_complex{type!="free"}) by (instance, df) / sum(collectd_df_df_complex{df="var-log"}) by (instance, df) > .95 - or sum(collectd_df_df_complex{type!="free"}) by (instance, df) / sum(collectd_df_df_complex{df="var-lib-frigate"}) by (instance, df) > .95 + ( + filesystem:usage:percent{ + kubernetes_io_arch!="arm64", + df!="mmcblk0p3", + df!="var-lib-frigate", + df!="var-log", + } + or + filesystem:usage:percent{ + kubernetes_io_arch="arm64", + df!="boot", + } + or + filesystem:usage:percent{ + df="mmcblk0p3", + instance!="nut0.pyrocufflink.blue", + } + ) > .75 for: 2h + annotations: + severity: minor + - alert: Free disk space is very low + expr: >- + filesystem:usage:percent > 0.9 + for: 2h + annotations: + severity: minor - alert: TheWebsiteIsDown expr: >- probe_success{job="websites"} == 0 diff --git a/victoria-metrics/kustomization.yaml b/victoria-metrics/kustomization.yaml index 9f5c132..6d52acc 100644 --- a/victoria-metrics/kustomization.yaml +++ b/victoria-metrics/kustomization.yaml @@ -38,6 +38,7 @@ configMapGenerator: - name: vmalert-rules files: - alerts.yml + - recording.yml options: disableNameSuffixHash: true labels: diff --git a/victoria-metrics/recording.yml b/victoria-metrics/recording.yml new file mode 100644 index 0000000..d2dbebc --- /dev/null +++ b/victoria-metrics/recording.yml @@ -0,0 +1,8 @@ +groups: +- name: collectd + rules: + - record: filesystem:usage:percent + expr: >- + sum without (type) (collectd_df_df_complex{type!="free"}) + / sum without (type) (collectd_df_df_complex) +