From 8cb292a4b2bc7668229493c066965d73e2be89b6 Mon Sep 17 00:00:00 2001 From: "Dustin C. Hatch" Date: Thu, 11 Jul 2024 22:07:27 -0500 Subject: [PATCH] v-m: alerts: Add alert for temperatures After the incident this week with the CPU overheating on _vmhost1_, I want to make sure I know as soon as possible when anything is starting to get too hot. --- victoria-metrics/alerts.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/victoria-metrics/alerts.yml b/victoria-metrics/alerts.yml index c51fe40..ce716a1 100644 --- a/victoria-metrics/alerts.yml +++ b/victoria-metrics/alerts.yml @@ -141,3 +141,10 @@ groups: - ignoring (instance) group_right (scope) (patroni_xlog_replayed_location != 0) > 10240 for: 10m + +- name: Temperature + rules: + - alert: High Temperature + expr: >- + {__name__=~"collectd_.*_temperature", sensors!~"i350bb.*"} > 80 + for: 10m