events: Delete Node on instance termination
dustin/dynk8s-provisioner/pipeline/head This commit looks good
Details
dustin/dynk8s-provisioner/pipeline/head This commit looks good
Details
The Cluster Autoscaler does not delete the Node resource in Kubernetes after it terminates an instance: > It does not delete the Node object from Kubernetes. Cleaning up Node > objects corresponding to terminated instances is the responsibility of > the cloud node controller, which can run as part of > kube-controller-manager or cloud-controller-manager. On-premises clusters are probably not running the Cloud Controller Manager, so Node resources are liable to be left behind after a scale-down event. To keep unused Node resources from accumulating, the *dynk8s-provisioner* will now delete the Node resource associated with an EC2 instance when it receives a state-change event indicating the instance has been terminated. To identify the correct Node, it compares the value of the `providerID` field of each existing node with the instance ID mentioned in the event. An exact match is not possible, since the provider ID includes the availability zone of the instance, which is not included in the event, however, instances IDs are unique enough that this "should" never be an issue.master
parent
d85f314a8b
commit
cd920418aa
|
@ -7,7 +7,7 @@ use log::{debug, error};
|
|||
|
||||
use crate::k8s::{
|
||||
assign_wireguard_config, create_bootstrap_token, delete_bootstrap_tokens,
|
||||
unassign_wireguard_config,
|
||||
delete_node, unassign_wireguard_config,
|
||||
};
|
||||
use crate::model::events::*;
|
||||
|
||||
|
@ -24,6 +24,7 @@ use crate::model::events::*;
|
|||
/// When an instance is terminated:
|
||||
/// 1. Any WireGuard configs assigned to the instance are unassigned
|
||||
/// 2. All bootstrap tokens for the instance are deleted
|
||||
/// 3. The Kubernetes Node resource for the instance is deleted
|
||||
pub async fn on_ec2_instance_state_change(evt: Ec2InstanceStateChange) {
|
||||
debug!("EC2 instance {} is now {}", &evt.instance_id, &evt.state);
|
||||
if evt.state == "running" {
|
||||
|
@ -53,5 +54,11 @@ pub async fn on_ec2_instance_state_change(evt: Ec2InstanceStateChange) {
|
|||
&evt.instance_id, e
|
||||
);
|
||||
}
|
||||
if let Err(e) = delete_node(&evt.instance_id).await {
|
||||
error!(
|
||||
"Failed to delete node for instance {}: {}",
|
||||
&evt.instance_id, e
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
27
src/k8s.rs
27
src/k8s.rs
|
@ -3,7 +3,7 @@ use std::collections::btree_map::BTreeMap;
|
|||
|
||||
use chrono::offset::Utc;
|
||||
use chrono::{DateTime, Duration};
|
||||
use k8s_openapi::api::core::v1::{ConfigMap, Secret};
|
||||
use k8s_openapi::api::core::v1::{ConfigMap, Node, Secret};
|
||||
use k8s_openapi::apimachinery::pkg::apis::meta::v1::ObjectMeta;
|
||||
use kube::core::params::{ListParams, Patch, PatchParams, PostParams};
|
||||
use kube::{Api, Client};
|
||||
|
@ -385,6 +385,31 @@ pub async fn get_kubeconfig<I: AsRef<str>>(
|
|||
}
|
||||
}
|
||||
|
||||
/// Delete the node representing an EC2 instance
|
||||
///
|
||||
/// When an EC2 node is terminated, it is permanently offline. If the instance
|
||||
/// was a member of the cluster, it may have a Node resource still present in
|
||||
/// Kubernetes. This object needs to be deleted; neither the Cluster
|
||||
/// Autoscaler nor Kubernetes itself will do this.
|
||||
pub async fn delete_node<I: AsRef<str>>(
|
||||
instance_id: I,
|
||||
) -> Result<(), kube::Error> {
|
||||
let instance_id = instance_id.as_ref();
|
||||
let client = Client::try_default().await?;
|
||||
let nodes: Api<Node> = Api::all(client);
|
||||
for node in nodes.list(&Default::default()).await? {
|
||||
if let (Some(name), Some(spec)) = (node.metadata.name, node.spec) {
|
||||
if let Some(pid) = spec.provider_id {
|
||||
if pid.starts_with("aws:///") && pid.ends_with(instance_id) {
|
||||
info!("Deleting node {}", &name);
|
||||
nodes.delete(&name, &Default::default()).await?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Retrieve the bootstrap token assigned to an EC2 instance
|
||||
async fn get_bootstrap_token<I: AsRef<str>>(
|
||||
instance_id: I,
|
||||
|
|
Loading…
Reference in New Issue