From 0af625cea14a8a45aa3cc1fb455be05afe86b170 Mon Sep 17 00:00:00 2001 From: "Dustin C. Hatch" Date: Mon, 1 Dec 2025 14:24:04 -0600 Subject: [PATCH] crio-clean: Add script to clean container storage I've noticed that from time to time, the container storage volume seems to accumulate "dangling" containers. These are paths under `/var/lib/containers/storage/overlay` that have a bunch of content in their `diff` sub-directory, but nothing else, and do not seem to be mounted into any running containers. I have not identified what causes this, nor a simple and reliable way to clean them up. Fortunately, wiping the entire container storage graph with `crio wipe` seems to work well enough. The `crio-clean.sh` script takes care of safely wiping the container storage graph on a given node. It first drains the node and then stops any running containers that were left. Then, it uses `crio wipe` to clean the entire storage graph. Finally, it restarts the node, allowing Kubernetes to reschedule the pods that were stopped. --- crio-clean.sh | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 crio-clean.sh diff --git a/crio-clean.sh b/crio-clean.sh new file mode 100644 index 0000000..a66d22a --- /dev/null +++ b/crio-clean.sh @@ -0,0 +1,55 @@ +#!/bin/sh +# vim: set sw=4 ts=4 sts=4 et : + +usage() { + printf 'usage: %s node\n' "${0##*/}" +} + +drain_node() { + kubectl drain \ + --ignore-daemonsets \ + --delete-emptydir-data \ + "$1" +} + +stop_node() { + ssh "$1" doas sh <&2 + systemctl stop kubelet + echo 'Stopping all containers' >&2 + crictl ps -aq | xargs crictl stop + echo 'Stopping CRI-O' >&2 + systemctl stop crio +EOF +} + +wipe_crio() { + echo 'Wiping container storage' + ssh "$1" doas crio wipe -f +} + +start_node() { + echo 'Starting Kubelet/CRI-O' + ssh "$1" doas systemctl start crio kubelet +} + +uncordon_node() { + kubectl uncordon "$1" +} + +main() { + local node=$1 + + if [ -z "${node}" ]; then + usage >&2 + exit 2 + fi + + drain_node "${node}" || exit + stop_node "${node}" || exit + wipe_crio "${node}" || exit + start_node "${node}" || exit + uncordon_node "${node}" || exit +} + +main "$@"