From f6910f04df37451bb681722affa5f882d2e410c7 Mon Sep 17 00:00:00 2001 From: "Dustin C. Hatch" Date: Sun, 14 Jan 2024 11:42:46 -0600 Subject: [PATCH] tf/asg: Add CA resource tag for FUSE device plugin Jenkins jobs that build container images in user namespaces need access to `/dev/fuse`, which is provided by the [fuse-device-plugin][0]. This plugin runs as a DaemonSet, which updates the status of the node it's running on when it starts to indicate that the FUSE device is available. When scaling up from zero nodes, Cluster Autoscaler has no way to know that this will occur, and therefore cannot determine that scaling up the ASG will create a node with the required resources. Thus, the ASG needs a tag to inform CA that the nodes it creates will indeed have the resources and scaling it up will allow the pod to be scheduled. Although this feature of CA was added in 1.14, it apparently got broken at some point and no longer works in 1.22. It works again in 1.26, though. [0]: https://github.com/kuberenetes-learning-group/fuse-device-plugin/tree/master --- terraform/asg.tf | 5 +++++ terraform/terraform.tfstate | 13 +++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/terraform/asg.tf b/terraform/asg.tf index 908f272..a0758c8 100644 --- a/terraform/asg.tf +++ b/terraform/asg.tf @@ -89,4 +89,9 @@ resource "aws_autoscaling_group" "k8s-aarch64" { value = "owned" propagate_at_launch = true } + tag { + key = "k8s.io/cluster-autoscaler/node-template/resources/github.com/fuse" + value = "1" + propagate_at_launch = false + } } diff --git a/terraform/terraform.tfstate b/terraform/terraform.tfstate index 80a0fba..1d57462 100644 --- a/terraform/terraform.tfstate +++ b/terraform/terraform.tfstate @@ -1,7 +1,7 @@ { "version": 4, "terraform_version": "1.6.2", - "serial": 96, + "serial": 98, "lineage": "a100be74-c98e-0769-2d6a-bf6a2c5f3ebf", "outputs": {}, "resources": [ @@ -107,9 +107,9 @@ "schema_version": 0, "attributes": { "account_id": "566967686773", - "arn": "arn:aws:sts::566967686773:assumed-role/dynk8s-terraform/aws-go-sdk-1705162223321505341", + "arn": "arn:aws:sts::566967686773:assumed-role/dynk8s-terraform/aws-go-sdk-1705246977054689837", "id": "566967686773", - "user_id": "AROAYIAPIKZ25DFDOYZHT:aws-go-sdk-1705162223321505341" + "user_id": "AROAYIAPIKZ25DFDOYZHT:aws-go-sdk-1705246977054689837" }, "sensitive_attributes": [] } @@ -218,7 +218,7 @@ "context": "", "default_cooldown": 300, "default_instance_warmup": 0, - "desired_capacity": 1, + "desired_capacity": 0, "enabled_metrics": [], "force_delete": false, "force_delete_warm_pool": false, @@ -258,6 +258,11 @@ "key": "k8s.io/cluster-autoscaler/kubernetes", "propagate_at_launch": true, "value": "owned" + }, + { + "key": "k8s.io/cluster-autoscaler/node-template/resources/github.com/fuse", + "propagate_at_launch": false, + "value": "1" } ], "tags": null,