If evicting a pod fails with an HTTP 239 Too Many Requests error, it means there is a PodDisruptionBudget that prevents the pod from being deleted. This can happen, for example, when draining a node that has Longhorn volumes attached, as Longhorn creates a PDB for its instance manager pods on such nodes. Longhorn will automatically remove the PDB once there are no workloads on that node that use its Volumes, so we must continue to evict other pods and try evicting the failed pods again later. This behavior mostly mimics what `kubectl drain` does to handle this same condition.
462 lines
14 KiB
Rust
462 lines
14 KiB
Rust
use std::sync::LazyLock;
|
|
|
|
use k8s_openapi::api::coordination::v1::Lease;
|
|
use k8s_openapi::api::core::v1::{Node, Pod};
|
|
use kube::Client;
|
|
use kube::api::{Api, ListParams};
|
|
use rocket::async_test;
|
|
use rocket::futures::FutureExt;
|
|
use rocket::http::{ContentType, Header, Status};
|
|
use rocket::tokio;
|
|
use rocket::tokio::sync::Mutex;
|
|
|
|
static LOCK: LazyLock<Mutex<()>> = LazyLock::new(|| Mutex::new(()));
|
|
|
|
async fn delete_lease(name: &str) {
|
|
let client = Client::try_default().await.unwrap();
|
|
let leases: Api<Lease> = Api::default_namespaced(client);
|
|
let _ = kube::runtime::wait::delete::delete_and_finalize(
|
|
leases,
|
|
name,
|
|
&Default::default(),
|
|
)
|
|
.await;
|
|
}
|
|
|
|
async fn get_lease(name: &str) -> Result<Lease, kube::Error> {
|
|
let client = Client::try_default().await.unwrap();
|
|
let leases: Api<Lease> = Api::default_namespaced(client);
|
|
leases.get(name).await
|
|
}
|
|
|
|
async fn get_a_node() -> Result<Node, kube::Error> {
|
|
let client = Client::try_default().await?;
|
|
let nodes: Api<Node> = Api::all(client);
|
|
Ok(nodes.list(&Default::default()).await?.items.pop().unwrap())
|
|
}
|
|
|
|
async fn get_node_by_name(name: &str) -> Result<Node, kube::Error> {
|
|
let client = Client::try_default().await?;
|
|
let nodes: Api<Node> = Api::all(client);
|
|
nodes.get(name).await
|
|
}
|
|
|
|
async fn get_pods_on_node(name: &str) -> Result<Vec<Pod>, kube::Error> {
|
|
let client = Client::try_default().await?;
|
|
let pods: Api<Pod> = Api::all(client);
|
|
Ok(pods
|
|
.list(&ListParams::default().fields(&format!("spec.nodeName=={name}")))
|
|
.await?
|
|
.items)
|
|
}
|
|
|
|
#[async_test]
|
|
async fn test_lock_v1_success() {
|
|
super::setup();
|
|
let _lock = &*LOCK.lock().await;
|
|
|
|
delete_lease("reboot-lock-default").await;
|
|
let client = super::async_client().await;
|
|
let response = client
|
|
.post("/api/v1/lock")
|
|
.header(Header::new("K8s-Reboot-Lock", "lock"))
|
|
.header(ContentType::Form)
|
|
.body("hostname=test1.example.org")
|
|
.dispatch()
|
|
.await;
|
|
assert_eq!(response.status(), Status::Ok);
|
|
assert_eq!(
|
|
response.into_string().await.as_deref(),
|
|
Some(
|
|
"Acquired reboot lock for group default, host test1.example.org\n"
|
|
)
|
|
);
|
|
let lease = get_lease("reboot-lock-default").await.unwrap();
|
|
assert_eq!(
|
|
lease.spec.unwrap().holder_identity.as_deref(),
|
|
Some("test1.example.org")
|
|
);
|
|
}
|
|
|
|
#[async_test]
|
|
async fn test_lock_v1_custom_group() {
|
|
super::setup();
|
|
|
|
delete_lease("reboot-lock-testgroup").await;
|
|
let client = super::async_client().await;
|
|
let response = client
|
|
.post("/api/v1/lock")
|
|
.header(Header::new("K8s-Reboot-Lock", "lock"))
|
|
.header(ContentType::Form)
|
|
.body("hostname=test1.example.org&group=testgroup")
|
|
.dispatch()
|
|
.await;
|
|
assert_eq!(response.status(), Status::Ok);
|
|
assert_eq!(
|
|
response.into_string().await.as_deref(),
|
|
Some(
|
|
"Acquired reboot lock for group testgroup, host test1.example.org\n"
|
|
)
|
|
);
|
|
let lease = get_lease("reboot-lock-testgroup").await.unwrap();
|
|
assert_eq!(
|
|
lease.spec.unwrap().holder_identity.as_deref(),
|
|
Some("test1.example.org")
|
|
);
|
|
}
|
|
|
|
#[async_test]
|
|
async fn test_lock_v1_conflict() {
|
|
super::setup();
|
|
let _lock = &*LOCK.lock().await;
|
|
|
|
delete_lease("reboot-lock-default").await;
|
|
let client = super::async_client().await;
|
|
let response = client
|
|
.post("/api/v1/lock")
|
|
.header(Header::new("K8s-Reboot-Lock", "lock"))
|
|
.header(ContentType::Form)
|
|
.body("hostname=test1.example.org")
|
|
.dispatch()
|
|
.await;
|
|
assert_eq!(response.status(), Status::Ok);
|
|
assert_eq!(
|
|
response.into_string().await.as_deref(),
|
|
Some(
|
|
"Acquired reboot lock for group default, host test1.example.org\n"
|
|
)
|
|
);
|
|
let response = client
|
|
.post("/api/v1/lock")
|
|
.header(Header::new("K8s-Reboot-Lock", "lock"))
|
|
.header(ContentType::Form)
|
|
.body("hostname=test2.example.org&wait=false")
|
|
.dispatch()
|
|
.await;
|
|
assert_eq!(response.status(), Status::Conflict);
|
|
let want_msg = concat!(
|
|
"Another system is already rebooting:",
|
|
" Apply failed with 1 conflict:",
|
|
" conflict with \"test1.example.org\":",
|
|
" .spec.holderIdentity",
|
|
"\n",
|
|
);
|
|
assert_eq!(response.into_string().await.as_deref(), Some(want_msg));
|
|
let lease = get_lease("reboot-lock-default").await.unwrap();
|
|
assert_eq!(
|
|
lease.spec.unwrap().holder_identity.as_deref(),
|
|
Some("test1.example.org")
|
|
);
|
|
}
|
|
|
|
#[async_test]
|
|
async fn test_lock_v1_conflict_wait() {
|
|
super::setup();
|
|
let _lock = &*LOCK.lock().await;
|
|
|
|
tracing::info!("Deleting existing lease");
|
|
delete_lease("reboot-lock-default").await;
|
|
tracing::info!("Creating first lease");
|
|
let client = super::async_client().await;
|
|
let response = client
|
|
.post("/api/v1/lock")
|
|
.header(Header::new("K8s-Reboot-Lock", "lock"))
|
|
.header(ContentType::Form)
|
|
.body("hostname=test1.example.org")
|
|
.dispatch()
|
|
.await;
|
|
assert_eq!(response.status(), Status::Ok);
|
|
assert_eq!(
|
|
response.into_string().await.as_deref(),
|
|
Some(
|
|
"Acquired reboot lock for group default, host test1.example.org\n"
|
|
)
|
|
);
|
|
let lease = get_lease("reboot-lock-default").await.unwrap();
|
|
assert_eq!(
|
|
lease.spec.unwrap().holder_identity.as_deref(),
|
|
Some("test1.example.org")
|
|
);
|
|
let timer = std::time::Instant::now();
|
|
let _task = tokio::spawn(async {
|
|
tokio::time::sleep(std::time::Duration::from_secs(1))
|
|
.then(|_| async {
|
|
tracing::info!("Deleting first lease");
|
|
delete_lease("reboot-lock-default").await
|
|
})
|
|
.await
|
|
});
|
|
tracing::info!("Creating second lease");
|
|
let response = client
|
|
.post("/api/v1/lock")
|
|
.header(Header::new("K8s-Reboot-Lock", "lock"))
|
|
.header(ContentType::Form)
|
|
.body("hostname=test2.example.org")
|
|
.dispatch()
|
|
.await;
|
|
assert_eq!(response.status(), Status::Ok);
|
|
assert_eq!(
|
|
response.into_string().await.as_deref(),
|
|
Some(
|
|
"Acquired reboot lock for group default, host test2.example.org\n"
|
|
)
|
|
);
|
|
let duration = timer.elapsed().as_millis();
|
|
assert!(duration > 1000 && duration < 2000);
|
|
let lease = get_lease("reboot-lock-default").await.unwrap();
|
|
assert_eq!(
|
|
lease.spec.unwrap().holder_identity.as_deref(),
|
|
Some("test2.example.org")
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_lock_v1_no_header() {
|
|
super::setup();
|
|
|
|
let client = super::client();
|
|
let response = client
|
|
.post("/api/v1/lock")
|
|
.header(ContentType::Form)
|
|
.body("hostname=test1.example.org")
|
|
.dispatch();
|
|
assert_eq!(response.status(), Status::BadRequest);
|
|
assert_eq!(
|
|
response.into_string().as_deref(),
|
|
Some("Invalid lock header\n")
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_lock_v1_no_data() {
|
|
super::setup();
|
|
|
|
let client = super::client();
|
|
let response = client
|
|
.post("/api/v1/lock")
|
|
.header(Header::new("K8s-Reboot-Lock", "lock"))
|
|
.header(ContentType::Form)
|
|
.body("")
|
|
.dispatch();
|
|
assert_eq!(response.status(), Status::UnprocessableEntity);
|
|
assert_eq!(
|
|
response.into_string().as_deref(),
|
|
Some("Error processing request:\nhostname: missing\n")
|
|
);
|
|
}
|
|
|
|
#[async_test]
|
|
async fn test_unlock_v1_success() {
|
|
super::setup();
|
|
let _lock = &*LOCK.lock().await;
|
|
|
|
delete_lease("reboot-lock-default").await;
|
|
let client = super::async_client().await;
|
|
let response = client
|
|
.post("/api/v1/lock")
|
|
.header(Header::new("K8s-Reboot-Lock", "lock"))
|
|
.header(ContentType::Form)
|
|
.body("hostname=test1.example.org")
|
|
.dispatch()
|
|
.await;
|
|
assert_eq!(response.status(), Status::Ok);
|
|
let lease = get_lease("reboot-lock-default").await.unwrap();
|
|
assert_eq!(
|
|
lease.spec.unwrap().holder_identity.as_deref(),
|
|
Some("test1.example.org")
|
|
);
|
|
let response = client
|
|
.post("/api/v1/unlock")
|
|
.header(Header::new("K8s-Reboot-Lock", "lock"))
|
|
.header(ContentType::Form)
|
|
.body("hostname=test1.example.org")
|
|
.dispatch()
|
|
.await;
|
|
let status = response.status();
|
|
assert_eq!(response.into_string().await, None);
|
|
assert_eq!(status, Status::Ok);
|
|
let lease = get_lease("reboot-lock-default").await.unwrap();
|
|
assert_eq!(lease.spec.unwrap().holder_identity, None);
|
|
}
|
|
|
|
#[async_test]
|
|
async fn test_unlock_v1_not_locked() {
|
|
super::setup();
|
|
let _lock = &*LOCK.lock().await;
|
|
|
|
delete_lease("reboot-lock-default").await;
|
|
let client = super::async_client().await;
|
|
let response = client
|
|
.post("/api/v1/unlock")
|
|
.header(Header::new("K8s-Reboot-Lock", "lock"))
|
|
.header(ContentType::Form)
|
|
.body("hostname=test1.example.org")
|
|
.dispatch()
|
|
.await;
|
|
let status = response.status();
|
|
assert_eq!(response.into_string().await, None);
|
|
assert_eq!(status, Status::Ok);
|
|
let lease = get_lease("reboot-lock-default").await.unwrap();
|
|
assert_eq!(lease.spec.unwrap().holder_identity.as_deref(), None);
|
|
}
|
|
|
|
#[async_test]
|
|
async fn test_unlock_v1_not_mine() {
|
|
super::setup();
|
|
let _lock = &*LOCK.lock().await;
|
|
|
|
delete_lease("reboot-lock-default").await;
|
|
let client = super::async_client().await;
|
|
let response = client
|
|
.post("/api/v1/lock")
|
|
.header(Header::new("K8s-Reboot-Lock", "lock"))
|
|
.header(ContentType::Form)
|
|
.body("hostname=test1.example.org")
|
|
.dispatch()
|
|
.await;
|
|
assert_eq!(response.status(), Status::Ok);
|
|
let lease = get_lease("reboot-lock-default").await.unwrap();
|
|
assert_eq!(
|
|
lease.spec.unwrap().holder_identity.as_deref(),
|
|
Some("test1.example.org")
|
|
);
|
|
let response = client
|
|
.post("/api/v1/unlock")
|
|
.header(Header::new("K8s-Reboot-Lock", "lock"))
|
|
.header(ContentType::Form)
|
|
.body("hostname=test2.example.org")
|
|
.dispatch()
|
|
.await;
|
|
let status = response.status();
|
|
assert_eq!(response.into_string().await, None);
|
|
assert_eq!(status, Status::Ok);
|
|
let lease = get_lease("reboot-lock-default").await.unwrap();
|
|
assert_eq!(
|
|
lease.spec.unwrap().holder_identity.as_deref(),
|
|
Some("test1.example.org")
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_unlock_v1_no_header() {
|
|
super::setup();
|
|
|
|
let client = super::client();
|
|
let response = client
|
|
.post("/api/v1/unlock")
|
|
.header(ContentType::Form)
|
|
.body("hostname=test1.example.org")
|
|
.dispatch();
|
|
assert_eq!(response.status(), Status::BadRequest);
|
|
assert_eq!(
|
|
response.into_string().as_deref(),
|
|
Some("Invalid lock header\n")
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_unlock_v1_no_data() {
|
|
super::setup();
|
|
|
|
let client = super::client();
|
|
let response = client
|
|
.post("/api/v1/unlock")
|
|
.header(Header::new("K8s-Reboot-Lock", "lock"))
|
|
.header(ContentType::Form)
|
|
.body("")
|
|
.dispatch();
|
|
assert_eq!(response.status(), Status::UnprocessableEntity);
|
|
assert_eq!(
|
|
response.into_string().as_deref(),
|
|
Some("Error processing request:\nhostname: missing\n")
|
|
);
|
|
}
|
|
|
|
#[async_test]
|
|
async fn test_lock_v1_drain() {
|
|
super::setup();
|
|
let _lock = &*LOCK.lock().await;
|
|
|
|
delete_lease("reboot-lock-default").await;
|
|
let node = get_a_node().await.unwrap();
|
|
let hostname = node.metadata.name.clone().unwrap();
|
|
let client = super::async_client().await;
|
|
let response = client
|
|
.post("/api/v1/lock")
|
|
.header(Header::new("K8s-Reboot-Lock", "lock"))
|
|
.header(ContentType::Form)
|
|
.body(format!("hostname={hostname}"))
|
|
.dispatch()
|
|
.await;
|
|
let status = response.status();
|
|
assert_eq!(
|
|
response.into_string().await,
|
|
Some(format!(
|
|
"Acquired reboot lock for group default, host {hostname}\n"
|
|
))
|
|
);
|
|
assert_eq!(status, Status::Ok);
|
|
let lease = get_lease("reboot-lock-default").await.unwrap();
|
|
assert_eq!(
|
|
lease.spec.unwrap().holder_identity.as_ref(),
|
|
Some(&hostname)
|
|
);
|
|
let node = get_node_by_name(&hostname).await.unwrap();
|
|
assert!(
|
|
node.spec
|
|
.unwrap()
|
|
.taints
|
|
.unwrap()
|
|
.iter()
|
|
.any(|t| t.key == "node.kubernetes.io/unschedulable"
|
|
&& t.effect == "NoSchedule")
|
|
);
|
|
let pods = get_pods_on_node(&hostname).await.unwrap();
|
|
assert_eq!(
|
|
pods.iter()
|
|
.filter(|p| {
|
|
!p.metadata
|
|
.owner_references
|
|
.clone()
|
|
.unwrap_or_default()
|
|
.iter()
|
|
.any(|o| o.kind == "DaemonSet")
|
|
})
|
|
.count(),
|
|
0
|
|
);
|
|
}
|
|
|
|
#[async_test]
|
|
async fn test_unlock_v1_uncordon() {
|
|
super::setup();
|
|
let _lock = &*LOCK.lock().await;
|
|
|
|
let node = get_a_node().await.unwrap();
|
|
let hostname = node.metadata.name.clone().unwrap();
|
|
let client = super::async_client().await;
|
|
let response = client
|
|
.post("/api/v1/unlock")
|
|
.header(Header::new("K8s-Reboot-Lock", "lock"))
|
|
.header(ContentType::Form)
|
|
.body(format!("hostname={hostname}"))
|
|
.dispatch()
|
|
.await;
|
|
let status = response.status();
|
|
assert_eq!(response.into_string().await, None,);
|
|
assert_eq!(status, Status::Ok);
|
|
let lease = get_lease("reboot-lock-default").await.unwrap();
|
|
assert_ne!(lease.spec.unwrap().holder_identity.as_ref(), Some(&hostname));
|
|
let node = get_node_by_name(&hostname).await.unwrap();
|
|
assert!(
|
|
!node
|
|
.spec
|
|
.unwrap()
|
|
.taints
|
|
.unwrap_or_default()
|
|
.iter()
|
|
.any(|t| t.key == "node.kubernetes.io/unschedulable"
|
|
&& t.effect == "NoSchedule")
|
|
);
|
|
}
|