hosts: Add mtrcs0.p.r
*mtrcs0.pyrocufflink.red* is a Raspberry Pi CM4 on a Waveshare CM4-IO-BASE-B carrier board with a NVMe SSD. It runs a custom OS built using Buildroot, and is not a member of the *pyrocufflink.blue* AD domain. *mtrcs0.p.r* hosts Victoria Metrics/`vmagent`, `vmalert`, AlertManager, and Grafana. I've created a unique group and playbook for it, *metricspi*, to manage all these applications together.
This commit is contained in:
48
group_vars/metricspi/alerts.yml
Normal file
48
group_vars/metricspi/alerts.yml
Normal file
@@ -0,0 +1,48 @@
|
||||
vmalert_rules:
|
||||
groups:
|
||||
- name: default alert
|
||||
rules:
|
||||
- alert: DiskUsage
|
||||
expr: >-
|
||||
sum(collectd_df_df_complex{type!="free"}) by (instance, df) / sum(collectd_df_df_complex{df!="var-log"}) by (instance, df) > .75
|
||||
or sum(collectd_df_df_complex{type!="free"}) by (instance, df) / sum(collectd_df_df_complex{df="var-log"}) by (instance, df) > .95
|
||||
for: 2h
|
||||
- alert: TheWebsiteIsDown
|
||||
expr: >-
|
||||
probe_success{job="websites"} == 0
|
||||
for: 10m
|
||||
- alert: Missing Metrics
|
||||
expr: >-
|
||||
up{instance!~"vmhost.*"} == 0
|
||||
for: 10m
|
||||
- alert: NUT is offline
|
||||
expr: >-
|
||||
absent(collectd_nut_percent)
|
||||
|
||||
- name: Bitwarden
|
||||
rules:
|
||||
- alert: vaultwarden is not running
|
||||
expr: >-
|
||||
collectd_processes_ps_count_processes{processes="vaultwarden"} < 1
|
||||
for: 5m
|
||||
|
||||
- name: Active Directory
|
||||
rules:
|
||||
- alert: samba is not running
|
||||
expr: >-
|
||||
collectd_processes_ps_count_processes{processes=~"samba|smbd|winbindd|krb5kdc"} < 1
|
||||
for: 5m
|
||||
|
||||
- name: Graylog
|
||||
rules:
|
||||
- alert: unprocessed messages
|
||||
expr: >-
|
||||
org_graylog2_journal_entries_uncommitted > 100
|
||||
for: 1h
|
||||
|
||||
- name: mdraid
|
||||
rules:
|
||||
- alert: mdraid missing disk
|
||||
expr: collectd_md_md_disks{type="missing"} != 0
|
||||
- alert: mdraid failed disk
|
||||
expr: collectd_md_md_disks{type="failed"} != 0
|
||||
Reference in New Issue
Block a user