From 0fe296f7f38dbb43bf9b732287642c2d25982781 Mon Sep 17 00:00:00 2001 From: "Dustin C. Hatch" Date: Tue, 5 Aug 2025 06:51:10 -0500 Subject: [PATCH 1/3] fluent-bit: Deploy log collector for Victoria Logs [fluent-bit][0] is a generic, highly-configurable log collector. It was apparently initially developed for fluentd, but is has so many output capabilities that it works wil many different log aggregation systems, including Victoria Logs. Although Victoria Logs supports the Loki input format, and therefore _Promtail_ would work, I want to try to avoid depending on third-party repositories. _fluent-bit_ is packaged by Fedora, so there shouldn't be any dependency issues, etc. [0]: https://fluentbit.io --- fluent-bit.yml | 4 ++ group_vars/all.yml | 20 +++++++++ host-setup.yml | 3 +- roles/fluent-bit/defaults/main.yml | 34 +++++++++++++++ roles/fluent-bit/files/fluent-bit.service | 36 +++++++++++++++ roles/fluent-bit/handlers/main.yml | 9 ++++ roles/fluent-bit/meta/main.yml | 2 + roles/fluent-bit/tasks/main.yml | 53 +++++++++++++++++++++++ 8 files changed, 160 insertions(+), 1 deletion(-) create mode 100644 fluent-bit.yml create mode 100644 roles/fluent-bit/defaults/main.yml create mode 100644 roles/fluent-bit/files/fluent-bit.service create mode 100644 roles/fluent-bit/handlers/main.yml create mode 100644 roles/fluent-bit/meta/main.yml create mode 100644 roles/fluent-bit/tasks/main.yml diff --git a/fluent-bit.yml b/fluent-bit.yml new file mode 100644 index 0000000..ea08e78 --- /dev/null +++ b/fluent-bit.yml @@ -0,0 +1,4 @@ +- hosts: all + roles: + - role: fluent-bit + tags: fluent-bit diff --git a/group_vars/all.yml b/group_vars/all.yml index e82be4a..ec31b0f 100644 --- a/group_vars/all.yml +++ b/group_vars/all.yml @@ -141,3 +141,23 @@ dnf_automatic_schedule: >- | random(seed=inventory_hostname) | string }} *-*-* 04:00:00 America/Chicago + +fluent_bit_filters: +# Avoid log amplification from logging the result of sending logs! +- name: grep + match: host.fluent-bit.service + exclude: message \[output:http:victorialogs\] .+, HTTP status=200$ +fluent_bit_outputs: +- name: http + alias: victorialogs + match: host.* + host: logs.pyrocufflink.blue + port: 443 + tls: true + tls.verify: true + tls.verify_hostname: true + tls.ca_file: /etc/pki/ca-trust/source/anchors/dch-root-ca-r2.crt + uri: /insert/jsonline?_stream_fields=hostname,systemd_unit&_msg_field=message&_time_field=date + format: json_lines + json_date_format: iso8601 + log_response_payload: false diff --git a/host-setup.yml b/host-setup.yml index 8983913..7aa1744 100644 --- a/host-setup.yml +++ b/host-setup.yml @@ -2,6 +2,7 @@ - import_playbook: users.yml - import_playbook: collectd.yml -- import_playbook: promtail.yml +- import_playbook: dch-root-ca.yml +- import_playbook: fluent-bit.yml - import_playbook: auto-updates.yml - import_playbook: datavol.yml diff --git a/roles/fluent-bit/defaults/main.yml b/roles/fluent-bit/defaults/main.yml new file mode 100644 index 0000000..2093a8e --- /dev/null +++ b/roles/fluent-bit/defaults/main.yml @@ -0,0 +1,34 @@ +fluent_bit_config: + service: '{{ fluent_bit_config_service }}' + pipeline: '{{ fluent_bit_pipeline }}' + +fluent_bit_config_service: + log_level: '{{ fluent_bit_log_level }}' + +fluent_bit_log_level: info + +fluent_bit_pipeline: + inputs: '{{ fluent_bit_inputs }}' + filters: '{{ fluent_bit_filters }}' + outputs: '{{ fluent_bit_outputs }}' + +fluent_bit_inputs: '{{ fluent_bit_default_inputs }}' + +fluent_bit_default_inputs: +- '{{ fluent_bit_input_systemd }}' + +fluent_bit_input_systemd: + name: systemd + tag: host.* + db: /var/lib/fluent-bit/journal + lowercase: true + strip_underscores: true + +fluent_bit_filters: [] + +fluent_bit_outputs: +- '{{ fluent_bit_null_output }}' + +fluent_bit_null_output: + name: null + match: '*' diff --git a/roles/fluent-bit/files/fluent-bit.service b/roles/fluent-bit/files/fluent-bit.service new file mode 100644 index 0000000..c2b1b6f --- /dev/null +++ b/roles/fluent-bit/files/fluent-bit.service @@ -0,0 +1,36 @@ +[Unit] +Description=Fluent Bit +Documentation=https://docs.fluentbit.io/manual/ +Requires=network.target +After=network.target +StartLimitIntervalSec=5 +StartLimitBurst=5 + +[Service] +Type=exec +ExecStart=/usr/bin/fluent-bit -c /etc/fluent-bit/fluent-bit.yml -Y +ExecReload=/bin/kill -HUP $MAINPID +StateDirectory=fluent-bit +Restart=always +RestartSec=1 +BindPaths=%S/fluent-bit +CapabilityBoundingSet=CAP_DAC_READ_SEARCH +LockPersonality=yes +MemoryDenyWriteExecute=yes +PrivateDevices=yes +PrivateTmp=yes +ProtectControlGroups=yes +ProtectHome=yes +ProtectHostname=yes +ProtectKernelModules=yes +ProtectKernelTunables=yes +ProtectSystem=strict +ReadOnlyPaths=/var/log +ReadWritePaths=%S/fluent-bit +RestrictNamespaces=yes +RestrictRealtime=yes +SystemCallArchitectures=native +TemporaryFileSystem=%S:ro + +[Install] +WantedBy=multi-user.target diff --git a/roles/fluent-bit/handlers/main.yml b/roles/fluent-bit/handlers/main.yml new file mode 100644 index 0000000..37aee73 --- /dev/null +++ b/roles/fluent-bit/handlers/main.yml @@ -0,0 +1,9 @@ +- name: restart fluent-bit + service: + name: fluent-bit + state: restarted + +- name: reload fluent-bit + service: + name: fluent-bit + state: reloaded diff --git a/roles/fluent-bit/meta/main.yml b/roles/fluent-bit/meta/main.yml new file mode 100644 index 0000000..bbb2a56 --- /dev/null +++ b/roles/fluent-bit/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: +- role: systemd-base diff --git a/roles/fluent-bit/tasks/main.yml b/roles/fluent-bit/tasks/main.yml new file mode 100644 index 0000000..55c25ee --- /dev/null +++ b/roles/fluent-bit/tasks/main.yml @@ -0,0 +1,53 @@ +- name: ensure fluent-bit is installed + package: + name: fluent-bit + state: present + tags: + - install + +- name: ensure fluent-bit is configured + copy: + dest: /etc/fluent-bit/fluent-bit.yml + content: '{{ fluent_bit_config | to_nice_yaml(indent=2) }}' + owner: root + group: root + mode: u=rw,go= + notify: + - restart fluent-bit + tags: + - config + +# The default unit configuration for fluent-bit.service sucks. It runs +# as root without any kind of restrictions or sandboxing, forces the +# "classic" configuration format (which is deprecated in favor of +# YAML), and does not support hot reload. It's very simple, so we can +# replace it completely without too much worry about upstream changes. +- name: ensure custom fluent-bit systemd service unit file is installed + copy: + src: fluent-bit.service + dest: /etc/systemd/system/fluent-bit.service + owner: root + group: root + mode: u=rw,go=r + notify: + - reload systemd + - restart fluent-bit + tags: + - systemd + +- name: ensure fluent-bit starts at boot + service: + name: fluent-bit + enabled: true + tags: + - service + +- name: flush handlers + meta: flush_handlers + +- name: ensure fluent-bit is running + service: + name: fluent-bit + state: started + tags: + - service From dcef00935330965b35707be0d9d22630a549aa15 Mon Sep 17 00:00:00 2001 From: "Dustin C. Hatch" Date: Tue, 5 Aug 2025 07:25:59 -0500 Subject: [PATCH 2/3] fluent-bit: send md alerts to ntfy For machines that have Linux MD RAID arrays, I want to receive notifications about the status of the arrays immediately via _ntfy_. I had this before with `journal2ntfy`, but I never got around to setting it up for the current generation of machines (_nvr2_, _chromie_). Now that we have `fluent-bit` deployed, we can use its pipeline capabilities to select the subset of messages for which we want immediate alerts and send them directly to _ntfy_. We use a Lua function to transform the log record into a body compatible with _ntfy_'s JSON publish request; `fluent-bit` doesn't have any other way to set array values, as needed for the `tags` member. --- group_vars/all.yml | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/group_vars/all.yml b/group_vars/all.yml index ec31b0f..e34d219 100644 --- a/group_vars/all.yml +++ b/group_vars/all.yml @@ -147,6 +147,34 @@ fluent_bit_filters: - name: grep match: host.fluent-bit.service exclude: message \[output:http:victorialogs\] .+, HTTP status=200$ +- name: rewrite_tag + alias: ntfy + match: host.* + rule: transport kernel ntfy true +- name: grep + match: ntfy + alias: ntfy.filter + regex: message ^md +- name: lua + alias: ntfy.populate + match: ntfy + code: | + function ntfy_transform(tag, timestamp, record) + record["topic"] = "alerts" + record["tags"] = { + record["hostname"], + record["syslog_identifier"], + } + return 1, timestamp, record + end + call: ntfy_transform +- name: record_modifier + alias: ntfy.clean + match: ntfy + allowlist_key: + - message + - tags + - topic fluent_bit_outputs: - name: http alias: victorialogs @@ -161,3 +189,16 @@ fluent_bit_outputs: format: json_lines json_date_format: iso8601 log_response_payload: false +- name: http + alias: ntfy + workers: 1 + match: ntfy + host: ntfy.pyrocufflink.blue + port: 443 + tls: true + tls.verify: true + tls.verify_hostname: true + uri: / + format: json_lines + json_date_key: false + log_response_payload: false From 6bc0475e8916b360e1f39f28f1eedcd0148298e9 Mon Sep 17 00:00:00 2001 From: "Dustin C. Hatch" Date: Tue, 5 Aug 2025 10:31:33 -0500 Subject: [PATCH 3/3] raid-array: Fix md re-add automation Recent versions of `mdadm` stopped accepting `/dev/disk/by-id` symlinks as the MD device: > mdadm: Value "/dev/disk/by-id/md-name-backup5" cannot be set as devname. Reason: Cannot be started from '/' or '<'. To work around this, we need a script to resolve the symlink and pass the real block device name. --- raid-array.yml | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/raid-array.yml b/raid-array.yml index 993510e..07feb34 100644 --- a/raid-array.yml +++ b/raid-array.yml @@ -29,14 +29,35 @@ tags: - mdadm - mdadm-create + - name: ensure md-auto-readd script is installed + copy: + content: |+ + #!/bin/sh + + main() { + md_dev=$(readlink -e /dev/disk/by-id/md-name-"$2") + if [ -z "${md_dev}" ]; then + printf 'Could not find block device for MD raid %s\n' "$2" >&2 + exit 1 + fi + exec mdadm --re-add "${md_dev}" "$1" + } + + main "$@" 2>&1 | logger + dest: /usr/local/libexec/md-auto-readd + owner: root + group: root + mode: u=rwx,go=rx + tags: + - script - name: ensure raid auto re-add udev rule exists copy: content: >+ - ENV{ID_FS_LABEL}=="{{ md_name }}", + ACTION=="add", ENV{ID_FS_TYPE}=="linux_raid_member", ENV{ID_FS_USAGE}=="raid", - RUN+="/usr/sbin/mdadm --re-add /dev/disk/by-id/md-name-{{ md_name }} $devnode" - dest: /etc/udev/rules.d/80-{{ md_name }}.rules + RUN+="/usr/local/libexec/md-auto-readd $devnode $env{ID_FS_LABEL}" + dest: /etc/udev/rules.d/80-md-auto-readd.rules mode: u=rw,go=r owner: root group: root