From 164f3b5e0f1023d11df4bc7809d0021816d91039 Mon Sep 17 00:00:00 2001 From: "Dustin C. Hatch" Date: Sun, 17 Nov 2024 09:41:30 -0600 Subject: [PATCH] r/wal-g-pg: Handle versioned storage locations The target location for WAL archives and backups saved by WAL-G should be separated based on the major version of PostgreSQL with which they are compatible. This will make it easier to restore those backups, since they can only be restored into a cluster of the same version. Unfortunately, WAL-G does not natively handle this. In fact, it doesn't really have any way of knowing the version of the PostgreSQL server it is backing up, at least when it is uploading WAL archives. Thus, we have to include the version number in the target path (S3 prefix) manually. We can't rely on Ansible to do this, because there is no way to ensure Ansible runs at the appropriate point during the upgrade process. As such, we need to be able to modify the target location as part of the upgrade, without causing a conflict with Ansible the next time it runs. To that end, I've changed how the _wal-g-pg_ role creates the configuration file for WAL-G. Instead of rendering directly to `wal-g.yml`, the role renders a template, `wal-g.yml.in`. This template can include a `@PGVERSION@` specifier. The `wal-g-config` script will then use `sed` to replace that specifier with the version of PostgreSQL installed on the server, rendering the final `wal-g.yml`. This script is called both by Ansible in a handler after generating the template configuration, and also as a post-upgrade action by the `postgresql-upgrade` script. I originally wanted the `wal-g-config` script to use the version of PostgreSQL specified in the `PG_VERSION` file within the data directory. This would ensure that WAL-G always uploads/downloads files for the matching version. Unfortunately, this introduced a dependency conflict: the WAL-G configuration needs to be present before a backup can be restored, but the data directory is empty until after the backup has been restored. Thus, we have to use the installed server version, rather than the data directory version. This leaves a small window where WAL-G may be configured to point to the wrong target if the `postgresql-upgrade` script fails and thus does not trigger regenerating the configuration file. This could result in new WAL archives/backups being uploaded to the old target location. These files would be incompatible with the other files in that location, and could potentially overwrite existing files. This is rather unlikely, since the PostgreSQL server will not start if the _postgresql-upgrade.service_ failed. The only time it should be possible is if the upgrade fails in such a way that it leaves an empty but valid data directory, and then the machine is rebooted. --- group_vars/postgresql.yml | 2 +- roles/wal-g-pg/files/post-upgrade.sh | 3 ++ roles/wal-g-pg/files/wal-g-config.sh | 46 ++++++++++++++++++++++++++++ roles/wal-g-pg/handlers/main.yml | 4 +++ roles/wal-g-pg/meta/main.yml | 1 + roles/wal-g-pg/tasks/main.yml | 28 +++++++++++++++-- 6 files changed, 81 insertions(+), 3 deletions(-) create mode 100644 roles/wal-g-pg/files/post-upgrade.sh create mode 100644 roles/wal-g-pg/files/wal-g-config.sh diff --git a/group_vars/postgresql.yml b/group_vars/postgresql.yml index 8ceab3a..de325e3 100644 --- a/group_vars/postgresql.yml +++ b/group_vars/postgresql.yml @@ -56,7 +56,7 @@ wal_g_aws_secret_access_key: !vault | wal_g_pg_config: AWS_ACCESS_KEY_ID: '{{ wal_g_aws_access_key_id }}' AWS_SECRET_ACCESS_KEY: '{{ wal_g_aws_secret_access_key }}' - WALG_S3_PREFIX: s3://pgbackup/pyrocufflink/main/15 + WALG_S3_PREFIX: s3://pgbackup/pyrocufflink/main/@PGVERSION@ AWS_ENDPOINT: https://s3.backups.pyrocufflink.blue PGHOST: /run/postgresql WALG_STATSD_ADDRESS: localhost:9125 diff --git a/roles/wal-g-pg/files/post-upgrade.sh b/roles/wal-g-pg/files/post-upgrade.sh new file mode 100644 index 0000000..1640e32 --- /dev/null +++ b/roles/wal-g-pg/files/post-upgrade.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +wal-g-config /etc/postgresql/wal-g.yml.in /etc/postgresql/wal-g.yml diff --git a/roles/wal-g-pg/files/wal-g-config.sh b/roles/wal-g-pg/files/wal-g-config.sh new file mode 100644 index 0000000..7b8e2f6 --- /dev/null +++ b/roles/wal-g-pg/files/wal-g-config.sh @@ -0,0 +1,46 @@ +#!/bin/sh +# vim: set sw=4 ts=4 sts=4 et : + +usage() { + printf 'usage: %s SRC DEST\n' "${0##*/}" +} + +while [ $# -gt 0 ]; do + case "$1" in + -*) + usage >&2 + exit 2 + ;; + *) + if [ -z "${src-}" ]; then + src=$1 + elif [ -z "${dest-}" ]; then + dest=$1 + else + usage >&2 + exit 2 + fi + ;; + esac + shift +done + +if [ -z "${src-}" ] || [ -z "${dest-}" ]; then + usage >&2 + exit 2 +fi + +set -- + +if pgversion=$(rpm -q --qf '%{V}' postgresql-server | cut -d. -f1); then + set -- "$@" -e 's/@PGVERSION@/'"${pgversion}"/ +fi + +if [ $# -eq 0 ]; then + echo 'Nothing to do' >&2 + exit 1 +fi + +set -x + +sed -r "$@" "${src}" > "${dest}" diff --git a/roles/wal-g-pg/handlers/main.yml b/roles/wal-g-pg/handlers/main.yml index 7e48233..200597f 100644 --- a/roles/wal-g-pg/handlers/main.yml +++ b/roles/wal-g-pg/handlers/main.yml @@ -2,6 +2,10 @@ command: semodule -i /usr/local/share/selinux/wal-g-postgresql.cil +- name: regenerate wal-g config + command: + /etc/postgresql/post-upgrade.d/wal-g-config.sh + - name: restart wal-g backup timer systemd: name: wal-g-backup.timer diff --git a/roles/wal-g-pg/meta/main.yml b/roles/wal-g-pg/meta/main.yml index 948dd1c..e930148 100644 --- a/roles/wal-g-pg/meta/main.yml +++ b/roles/wal-g-pg/meta/main.yml @@ -1,3 +1,4 @@ dependencies: - dch-yum - systemd-base +- postgresql-server-base diff --git a/roles/wal-g-pg/tasks/main.yml b/roles/wal-g-pg/tasks/main.yml index 91381c7..1e1a829 100644 --- a/roles/wal-g-pg/tasks/main.yml +++ b/roles/wal-g-pg/tasks/main.yml @@ -5,15 +5,39 @@ tags: - install -- name: ensure wal-g is configured +- name: ensure wal-g config generator is installed + copy: + src: wal-g-config.sh + dest: /usr/local/bin/wal-g-config + owner: root + group: root + mode: u=rwx,go=rx + tags: + - wal-g-config + +- name: ensure wal-g-config post-upgrade script is installed + copy: + src: post-upgrade.sh + dest: /etc/postgresql/post-upgrade.d/wal-g-config.sh + owner: root + group: root + mode: u=rwx,go=rx + tags: + - wal-g-config + - post-upgrade + +- name: ensure wal-g configuration template is set copy: content: |+ {{ wal_g_pg_config | to_nice_yaml(indent=2) }} - dest: /etc/postgresql/wal-g.yml + dest: /etc/postgresql/wal-g.yml.in owner: root group: postgres mode: u=rw,g=r,o= + notify: + - regenerate wal-g config tags: + - wal-g-config - config - name: ensure local selinux share directory exists