I've noticed that from time to time, the container storage volume seems to accumulate "dangling" containers. These are paths under `/var/lib/containers/storage/overlay` that have a bunch of content in their `diff` sub-directory, but nothing else, and do not seem to be mounted into any running containers. I have not identified what causes this, nor a simple and reliable way to clean them up. Fortunately, wiping the entire container storage graph with `crio wipe` seems to work well enough. The `crio-clean.sh` script takes care of safely wiping the container storage graph on a given node. It first drains the node and then stops any running containers that were left. Then, it uses `crio wipe` to clean the entire storage graph. Finally, it restarts the node, allowing Kubernetes to reschedule the pods that were stopped.
56 lines
994 B
Bash
56 lines
994 B
Bash
#!/bin/sh
|
|
# vim: set sw=4 ts=4 sts=4 et :
|
|
|
|
usage() {
|
|
printf 'usage: %s node\n' "${0##*/}"
|
|
}
|
|
|
|
drain_node() {
|
|
kubectl drain \
|
|
--ignore-daemonsets \
|
|
--delete-emptydir-data \
|
|
"$1"
|
|
}
|
|
|
|
stop_node() {
|
|
ssh "$1" doas sh <<EOF # lang: bash
|
|
echo 'Stopping kubelet' >&2
|
|
systemctl stop kubelet
|
|
echo 'Stopping all containers' >&2
|
|
crictl ps -aq | xargs crictl stop
|
|
echo 'Stopping CRI-O' >&2
|
|
systemctl stop crio
|
|
EOF
|
|
}
|
|
|
|
wipe_crio() {
|
|
echo 'Wiping container storage'
|
|
ssh "$1" doas crio wipe -f
|
|
}
|
|
|
|
start_node() {
|
|
echo 'Starting Kubelet/CRI-O'
|
|
ssh "$1" doas systemctl start crio kubelet
|
|
}
|
|
|
|
uncordon_node() {
|
|
kubectl uncordon "$1"
|
|
}
|
|
|
|
main() {
|
|
local node=$1
|
|
|
|
if [ -z "${node}" ]; then
|
|
usage >&2
|
|
exit 2
|
|
fi
|
|
|
|
drain_node "${node}" || exit
|
|
stop_node "${node}" || exit
|
|
wipe_crio "${node}" || exit
|
|
start_node "${node}" || exit
|
|
uncordon_node "${node}" || exit
|
|
}
|
|
|
|
main "$@"
|