From 6c7dcce90bd358cea901fceda5d636cd33bd6b30 Mon Sep 17 00:00:00 2001 From: "Dustin C. Hatch" Date: Sun, 31 Jul 2022 17:09:03 -0500 Subject: [PATCH] setup: switch back to ext4 on lvm Originally, I decided to use *btrfs* subvolumes to create writable directories inside otherwise immutable locations, such as for `/etc/cni/net.d`, etc. I figured this would be cleaner than bind-mounting directories from `/var`, and would avoid the trouble of determining an appropriate volume sizes necessary to make them each their own filesystem. Unfortunately, it turns out that *cri-o* may still have some issues with its *btrfs* storage driver. One [blog post][0] hints at performance issues in *containerd*, and it seems they may apply to *cri-o* as well. I certainly encountered performance issues when attempting to run `npm` in a Jenkins job running in a Kubernetes pod. There is definitely a [performance issue with `npm`][1] when running in a container, which may or may not have been exacerbated by the *btrfs* storage driver. In any case, upstream [does not reecommend][2] using the *btrfs* driver, performance notwithstanding. The *overlay* driver is much more widely used and tested. Plus, it's easier to filter out container layers from filesystem usage statistics simply by ignoring *overlay* filesystems. [0]: https://blog.cubieserver.de/2022/dont-use-containerd-with-the-btrfs-snapshotter/ [1]: https://github.com/npm/cli/issues/3208#issuecomment-1002990902 [2]: https://github.com/containers/storage/issues/929 --- setup/fedora-k8s-ctrl.ks | 31 ++++++++++++++++++------------- setup/fedora-k8s-node.ks | 31 ++++++++++++++++++------------- 2 files changed, 36 insertions(+), 26 deletions(-) diff --git a/setup/fedora-k8s-ctrl.ks b/setup/fedora-k8s-ctrl.ks index 14e969f..94e14ec 100644 --- a/setup/fedora-k8s-ctrl.ks +++ b/setup/fedora-k8s-ctrl.ks @@ -13,17 +13,14 @@ reboot bootloader --location mbr clearpart --all --initlabel reqpart -part /boot --fstype ext4 --size=1024 -part btrfs.0 --fstype btrfs --size 4096 -part btrfs.1 --fstype btrfs --grow -btrfs none --label fedora btrfs.0 -btrfs none --label data btrfs.1 -btrfs / --subvol --name root LABEL=fedora -btrfs /home --subvol --name home LABEL=data -btrfs /var --subvol --name var LABEL=data -btrfs /etc/cni/net.d --subvol --name cni-net LABEL=data -btrfs /usr/libexec/kubernetes/kubelet-plugins --subvol --name kubelet-plugins LABEL=data -btrfs /opt --subvol --name opt LABEL=data +part /boot --fstype ext4 --size=512 +part pv.01 --size=1 --grow +volgroup fedora pv.01 +logvol / --fstype ext4 --name=root --vgname=fedora --size=4096 +logvol /home --fstype ext4 --name=home --vgname=fedora --size=100 +logvol /var --fstype ext4 --name=var --vgname=fedora --size=1024 --grow +logvol /var/log --fstype ext4 --name=var_log --vgname=fedora --size=1024 +logvol /var/lib/.k8s --fstype ext4 --name=k8s --vgname=fedora --size=512 %pre echo '%packages' > /tmp/packages.ks @@ -136,9 +133,17 @@ net.bridge.bridge-nf-call-ip6tables = 1 net.ipv4.ip_forward = 1 EOF -sed -i 's/^driver = .*/driver = "btrfs"/' /etc/containers/storage.conf +# Anaconda does not provide any way to express bind mounts +mkdir -p /etc/cni/net.d +mkdir -p /opt/cni +mkdir -p /usr/libexec/kubernetes/kubelet-plugins +cat >> /etc/fstab <<'EOF' +/var/lib/.k8s/cni-net.d /etc/cni/net.d none bind 0 0 +/var/lib/.k8s/cni-bin /opt/cni none bind 0 0 +/var/lib/.k8s/kubelet-plugins /usr/libexec/kubernetes/kubelet-plugins none bind 0 0 +EOF # Enable read-only rootfs. This cannot be done with part/logvol, as that would # make Anaconda mount it read-only befor the installation starts. -sed -i -r '/\S+\s+\/\s+/s/subvol=root/ro,&/' /etc/fstab +sed -i -r '/\S+\s+\/\s+/s/defaults/ro/' /etc/fstab %end diff --git a/setup/fedora-k8s-node.ks b/setup/fedora-k8s-node.ks index c917eb2..c087bc7 100644 --- a/setup/fedora-k8s-node.ks +++ b/setup/fedora-k8s-node.ks @@ -14,17 +14,14 @@ ignoredisk --only-use vda bootloader --location mbr clearpart --all --initlabel reqpart -part /boot --fstype ext4 --size=1024 -part btrfs.0 --fstype btrfs --size 4096 -part btrfs.1 --fstype btrfs --grow -btrfs none --label fedora btrfs.0 -btrfs none --label data btrfs.1 -btrfs / --subvol --name root LABEL=fedora -btrfs /home --subvol --name home LABEL=data -btrfs /var --subvol --name var LABEL=data -btrfs /etc/cni/net.d --subvol --name cni-net LABEL=data -btrfs /usr/libexec/kubernetes/kubelet-plugins --subvol --name kubelet-plugins LABEL=data -btrfs /opt --subvol --name opt LABEL=data +part /boot --fstype ext4 --size=512 +part pv.01 --size=1 --grow +volgroup fedora pv.01 +logvol / --fstype ext4 --name=root --vgname=fedora --size=4096 +logvol /home --fstype ext4 --name=home --vgname=fedora --size=100 +logvol /var --fstype ext4 --name=var --vgname=fedora --size=1024 --grow +logvol /var/log --fstype ext4 --name=var_log --vgname=fedora --size=1024 +logvol /var/lib/.k8s --fstype ext4 --name=k8s --vgname=fedora --size=512 %pre echo '%packages' > /tmp/packages.ks @@ -139,7 +136,15 @@ net.bridge.bridge-nf-call-ip6tables = 1 net.ipv4.ip_forward = 1 EOF -sed -i 's/^driver = .*/driver = "btrfs"/' /etc/containers/storage.conf +# Anaconda does not provide any way to express bind mounts +mkdir -p /etc/cni/net.d +mkdir -p /opt/cni +mkdir -p /usr/libexec/kubernetes/kubelet-plugins +cat >> /etc/fstab <<'EOF' +/var/lib/.k8s/cni-net.d /etc/cni/net.d none bind 0 0 +/var/lib/.k8s/cni-bin /opt/cni none bind 0 0 +/var/lib/.k8s/kubelet-plugins /usr/libexec/kubernetes/kubelet-plugins none bind 0 0 +EOF # Anaconda always creates a partition on the disk and formats that, instead of # just formatting the whole disk. This makes it difficult to extend the disk @@ -152,5 +157,5 @@ echo 'LABEL=longhorn /var/lib/longhorn ext4 defaults 0 0' >> /etc/fstab # Enable read-only rootfs. This cannot be done with part/logvol, as that would # make Anaconda mount it read-only befor the installation starts. -sed -i -r '/\S+\s+\/\s+/s/subvol=root/ro,&/' /etc/fstab +sed -i -r '/\S+\s+\/\s+/s/defaults/ro/' /etc/fstab %end