drain: Retry failed evictions
If evicting a pod fails with an HTTP 239 Too Many Requests error, it means there is a PodDisruptionBudget that prevents the pod from being deleted. This can happen, for example, when draining a node that has Longhorn volumes attached, as Longhorn creates a PDB for its instance manager pods on such nodes. Longhorn will automatically remove the PDB once there are no workloads on that node that use its Volumes, so we must continue to evict other pods and try evicting the failed pods again later. This behavior mostly mimics what `kubectl drain` does to handle this same condition.master
parent
7d8ee51016
commit
d937bd6fb2
105
src/drain.rs
105
src/drain.rs
|
@ -3,17 +3,58 @@ use std::collections::{HashMap, HashSet};
|
||||||
use k8s_openapi::api::core::v1::{Node, Pod};
|
use k8s_openapi::api::core::v1::{Node, Pod};
|
||||||
use kube::Client;
|
use kube::Client;
|
||||||
use kube::api::{Api, ListParams, WatchEvent, WatchParams};
|
use kube::api::{Api, ListParams, WatchEvent, WatchParams};
|
||||||
use rocket::futures::{StreamExt, TryStreamExt};
|
use rocket::futures::stream::{BoxStream, StreamExt};
|
||||||
use tracing::{debug, info, trace};
|
use rocket::tokio;
|
||||||
|
use rocket::tokio::sync::mpsc::{self, Receiver};
|
||||||
|
use tracing::{debug, error, info, trace, warn};
|
||||||
|
|
||||||
|
async fn wait_drained(
|
||||||
|
mut stream: BoxStream<'_, Result<WatchEvent<Pod>, kube::Error>>,
|
||||||
|
mut channel: Receiver<(String, String)>,
|
||||||
|
) -> Result<(), kube::Error> {
|
||||||
|
let mut waitlist = HashSet::new();
|
||||||
|
loop {
|
||||||
|
tokio::select! {
|
||||||
|
Some((namespace, name)) = channel.recv() => {
|
||||||
|
debug!("Waiting for pod {namespace}/{name}");
|
||||||
|
waitlist.insert((namespace, name));
|
||||||
|
}
|
||||||
|
event = stream.next() => {
|
||||||
|
if let Some(event) = event {
|
||||||
|
trace!("Watch pod event: {event:?}");
|
||||||
|
if let WatchEvent::Deleted(pod) = event? {
|
||||||
|
if let (Some(namespace), Some(name)) =
|
||||||
|
(pod.metadata.namespace, pod.metadata.name)
|
||||||
|
{
|
||||||
|
info!("Pod {namespace}/{name} evicted");
|
||||||
|
waitlist.remove(&(namespace, name));
|
||||||
|
}
|
||||||
|
let n = waitlist.len();
|
||||||
|
if n == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
debug!(
|
||||||
|
"Waiting for {n} more {}",
|
||||||
|
if n == 1 { "pod" } else { "pods" }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) async fn drain_node(
|
pub(crate) async fn drain_node(
|
||||||
client: Client,
|
client: Client,
|
||||||
name: &str,
|
name: &str,
|
||||||
) -> Result<(), kube::Error> {
|
) -> Result<(), kube::Error> {
|
||||||
let all_pods: Api<Pod> = Api::all(client.clone());
|
let all_pods: Api<Pod> = Api::all(client.clone());
|
||||||
let filter = &format!("spec.nodeName={name}");
|
let filter = format!("spec.nodeName={name}");
|
||||||
let mut node_pods: HashSet<_> = all_pods
|
let mut node_pods: HashSet<_> = all_pods
|
||||||
.list(&ListParams::default().fields(filter))
|
.list(&ListParams::default().fields(&filter))
|
||||||
.await?
|
.await?
|
||||||
.items
|
.items
|
||||||
.into_iter()
|
.into_iter()
|
||||||
|
@ -34,38 +75,52 @@ pub(crate) async fn drain_node(
|
||||||
debug!("No pods to evict from node {name}");
|
debug!("No pods to evict from node {name}");
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
let (tx, rx) = mpsc::channel(node_pods.len());
|
||||||
let mut pods = HashMap::new();
|
let mut pods = HashMap::new();
|
||||||
|
let wait_task = tokio::spawn(async move {
|
||||||
|
let params = WatchParams::default().fields(&filter);
|
||||||
|
let stream = all_pods.watch(¶ms, "0").await?.boxed();
|
||||||
|
wait_drained(stream, rx).await
|
||||||
|
});
|
||||||
|
'outer: while !node_pods.is_empty() {
|
||||||
|
let mut failed = HashSet::new();
|
||||||
for (namespace, name) in node_pods.iter() {
|
for (namespace, name) in node_pods.iter() {
|
||||||
info!("Evicting pod {namespace}/{name}");
|
info!("Evicting pod {namespace}/{name}");
|
||||||
let api = pods
|
let api = pods.entry(namespace.clone()).or_insert_with_key(|k| {
|
||||||
.entry(namespace)
|
Api::<Pod>::namespaced(client.clone(), k)
|
||||||
.or_insert_with_key(|k| Api::<Pod>::namespaced(client.clone(), k));
|
});
|
||||||
// Return early here because otherwise we would just wait forever for
|
match api.evict(name, &Default::default()).await {
|
||||||
// the pod to be deleted.
|
Err(kube::Error::Api(e)) if e.code == 429 => {
|
||||||
api.evict(name, &Default::default()).await?;
|
warn!(
|
||||||
}
|
"Failed to evict pod {name}: {e}; will retry in 5 seconds"
|
||||||
let mut stream = all_pods
|
);
|
||||||
.watch(&WatchParams::default().fields(filter), "0")
|
failed.insert((namespace.clone(), name.clone()));
|
||||||
.await?
|
},
|
||||||
.boxed();
|
Err(kube::Error::Api(e)) if e.code == 404 => (),
|
||||||
while let Some(event) = stream.try_next().await? {
|
Err(e) => error!("Failed to evict pod {name}: {e}"),
|
||||||
trace!("Watch pod event: {event:?}");
|
Ok(_) => {
|
||||||
if let WatchEvent::Deleted(pod) = event {
|
if tx
|
||||||
if let (Some(namespace), Some(name)) =
|
.send((namespace.clone(), name.clone()))
|
||||||
(pod.metadata.namespace, pod.metadata.name)
|
.await
|
||||||
|
.is_err()
|
||||||
{
|
{
|
||||||
node_pods.remove(&(namespace, name));
|
error!("Waiter channel closed");
|
||||||
|
break 'outer;
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
node_pods = failed;
|
||||||
let n = node_pods.len();
|
let n = node_pods.len();
|
||||||
if n == 0 {
|
if n > 0 {
|
||||||
break;
|
|
||||||
}
|
|
||||||
debug!(
|
debug!(
|
||||||
"Waiting for {n} more {}",
|
"Waiting to retry {n} {}",
|
||||||
if n == 1 { "pod" } else { "pods" }
|
if n == 1 { "pod" } else { "pods" }
|
||||||
);
|
);
|
||||||
|
tokio::time::sleep(std::time::Duration::from_secs(5)).await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
wait_task.await.unwrap()?;
|
||||||
info!("Finished draining pods from {name}");
|
info!("Finished draining pods from {name}");
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,6 +24,7 @@ pub enum LockError {
|
||||||
|
|
||||||
impl From<kube::Error> for LockError {
|
impl From<kube::Error> for LockError {
|
||||||
fn from(error: kube::Error) -> Self {
|
fn from(error: kube::Error) -> Self {
|
||||||
|
error!("Error processing request: {error}");
|
||||||
Self::ServerError(format!("{error}\n"))
|
Self::ServerError(format!("{error}\n"))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -446,7 +446,7 @@ async fn test_unlock_v1_uncordon() {
|
||||||
assert_eq!(response.into_string().await, None,);
|
assert_eq!(response.into_string().await, None,);
|
||||||
assert_eq!(status, Status::Ok);
|
assert_eq!(status, Status::Ok);
|
||||||
let lease = get_lease("reboot-lock-default").await.unwrap();
|
let lease = get_lease("reboot-lock-default").await.unwrap();
|
||||||
assert_eq!(lease.spec.unwrap().holder_identity, None);
|
assert_ne!(lease.spec.unwrap().holder_identity.as_ref(), Some(&hostname));
|
||||||
let node = get_node_by_name(&hostname).await.unwrap();
|
let node = get_node_by_name(&hostname).await.unwrap();
|
||||||
assert!(
|
assert!(
|
||||||
!node
|
!node
|
||||||
|
|
Loading…
Reference in New Issue