Re: [PATCH v2 1/2] rust: poll: make PollCondVar upgradable

From: Boqun Feng

Date: Tue Mar 03 2026 - 17:08:21 EST


On Fri, Feb 13, 2026 at 11:29:41AM +0000, Alice Ryhl wrote:
> Rust Binder currently uses PollCondVar, but it calls synchronize_rcu()
> in the destructor, which we would like to avoid. Add a variation of
> PollCondVar, which uses kfree_rcu() instead.
>
> Signed-off-by: Alice Ryhl <aliceryhl@xxxxxxxxxx>
> ---
> rust/kernel/sync/poll.rs | 160 ++++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 159 insertions(+), 1 deletion(-)
>
> diff --git a/rust/kernel/sync/poll.rs b/rust/kernel/sync/poll.rs
> index 0ec985d560c8d3405c08dbd86e48b14c7c34484d..9555f818a24d777dd908fca849015c3490ce38d3 100644
> --- a/rust/kernel/sync/poll.rs
> +++ b/rust/kernel/sync/poll.rs
> @@ -5,12 +5,21 @@
> //! Utilities for working with `struct poll_table`.
>
> use crate::{
> + alloc::AllocError,
> bindings,
> + container_of,
> fs::File,
> prelude::*,
> + sync::atomic::{Acquire, Atomic, Relaxed, Release},
> + sync::lock::{Backend, Lock},
> sync::{CondVar, LockClassKey},
> + types::Opaque, //
> +};
> +use core::{
> + marker::{PhantomData, PhantomPinned},
> + ops::Deref,
> + ptr,
> };
> -use core::{marker::PhantomData, ops::Deref};
>
> /// Creates a [`PollCondVar`] initialiser with the given name and a newly-created lock class.
> #[macro_export]
> @@ -66,6 +75,7 @@ pub fn register_wait(&self, file: &File, cv: &PollCondVar) {
> ///
> /// [`CondVar`]: crate::sync::CondVar
> #[pin_data(PinnedDrop)]
> +#[repr(transparent)]
> pub struct PollCondVar {
> #[pin]
> inner: CondVar,
> @@ -78,6 +88,17 @@ pub fn new(name: &'static CStr, key: Pin<&'static LockClassKey>) -> impl PinInit
> inner <- CondVar::new(name, key),
> })
> }
> +
> + /// Use this `CondVar` as a `PollCondVar`.
> + ///
> + /// # Safety
> + ///
> + /// After the last use of the returned `&PollCondVar`, `__wake_up_pollfree` must be called on
> + /// the `wait_queue_head` at least one grace period before the `CondVar` is destroyed.
> + unsafe fn from_non_poll(c: &CondVar) -> &PollCondVar {
> + // SAFETY: Layout is the same. Caller ensures that PollTables are cleared in time.
> + unsafe { &*ptr::from_ref(c).cast() }
> + }
> }
>
> // Make the `CondVar` methods callable on `PollCondVar`.
> @@ -104,3 +125,140 @@ fn drop(self: Pin<&mut Self>) {
> unsafe { bindings::synchronize_rcu() };
> }
> }
> +
> +/// Wrapper around [`CondVar`] that can be upgraded to [`PollCondVar`].
> +///
> +/// By using this wrapper, you can avoid rcu for cases that don't use [`PollTable`], and in all
> +/// cases you can avoid `synchronize_rcu()`.
> +///
> +/// # Invariants
> +///
> +/// `active` either references `simple`, or a `kmalloc` allocation holding an
> +/// `UpgradePollCondVarInner`. In the latter case, the allocation remains valid until
> +/// `Self::drop()` plus one grace period.
> +#[pin_data(PinnedDrop)]
> +pub struct UpgradePollCondVar {
> + #[pin]
> + simple: CondVar,
> + active: Atomic<*const CondVar>,
> + #[pin]
> + _pin: PhantomPinned,
> +}
> +
> +#[pin_data]
> +#[repr(C)]
> +struct UpgradePollCondVarInner {
> + #[pin]
> + upgraded: CondVar,
> + #[pin]
> + rcu: Opaque<bindings::callback_head>,
> +}
> +
> +impl UpgradePollCondVar {
> + /// Constructs a new upgradable condvar initialiser.
> + pub fn new(name: &'static CStr, key: Pin<&'static LockClassKey>) -> impl PinInit<Self> {
> + pin_init!(&this in Self {
> + simple <- CondVar::new(name, key),
> + // SAFETY: `this->simple` is in-bounds. Pointer remains valid since this type is
> + // pinned.
> + active: Atomic::new(unsafe { &raw const (*this.as_ptr()).simple }),
> + _pin: PhantomPinned,
> + })
> + }
> +
> + /// Obtain a [`PollCondVar`], upgrading if necessary.
> + ///
> + /// You should use the same lock as what is passed to the `wait_*` methods. Otherwise wakeups
> + /// may be missed.
> + pub fn poll<T: ?Sized, B: Backend>(
> + &self,
> + lock: &Lock<T, B>,
> + name: &'static CStr,
> + key: Pin<&'static LockClassKey>,
> + ) -> Result<&PollCondVar, AllocError> {
> + let mut ptr = self.active.load(Acquire);
> + if ptr::eq(ptr, &self.simple) {
> + self.upgrade(lock, name, key)?;
> + ptr = self.active.load(Acquire);
> + debug_assert_ne!(ptr, ptr::from_ref(&self.simple));
> + }
> + // SAFETY: Signature ensures that last use of returned `&PollCondVar` is before drop(), and
> + // drop() calls `__wake_up_pollfree` followed by waiting a grace period before the
> + // `CondVar` is destroyed.
> + Ok(unsafe { PollCondVar::from_non_poll(&*ptr) })
> + }
> +
> + fn upgrade<T: ?Sized, B: Backend>(
> + &self,
> + lock: &Lock<T, B>,
> + name: &'static CStr,
> + key: Pin<&'static LockClassKey>,
> + ) -> Result<(), AllocError> {
> + let upgraded = KBox::pin_init(
> + pin_init!(UpgradePollCondVarInner {
> + upgraded <- CondVar::new(name, key),
> + rcu: Opaque::uninit(),
> + }),
> + GFP_KERNEL,
> + )
> + .map_err(|_| AllocError)?;
> +
> + // SAFETY: The value is treated as pinned.
> + let upgraded = KBox::into_raw(unsafe { Pin::into_inner_unchecked(upgraded) });
> +
> + let res = self.active.cmpxchg(
> + ptr::from_ref(&self.simple),
> + // SAFETY: This operation stays in-bounds of the above allocation.
> + unsafe { &raw mut (*upgraded).upgraded },
> + Release,
> + );
> +
> + if res.is_err() {
> + // Already upgraded, so still succeess.
> + // SAFETY: The cmpxchg failed, so take back ownership of the box.
> + drop(unsafe { KBox::from_raw(upgraded) });
> + return Ok(());
> + }
> +
> + // If a normal waiter registers in parallel with us, then either:
> + // * We took the lock first. In that case, the waiter sees the above cmpxchg.
> + // * They took the lock first. In that case, we wake them up below.
> + drop(lock.lock());
> + self.simple.notify_all();

Hmm.. what if the waiter gets its `&CondVar` before `upgrade()` and use
that directly?

<waiter> <in upgrade()>
let poll_cv: &UpgradePollCondVar = ...;
let cv = poll_cv.deref();
cmpxchg();
drop(lock.lock());
self.simple.notify_all();
let mut guard = lock.lock();
cv.wait(&mut guard);

we still miss the wake-up, right?

It's creative, but I particularly hate we use an empty lock critical
section to synchronize ;-)

Do you think the complexity of a dynamic upgrading is worthwhile, or we
should just use the box-allocated PollCondVar unconditionally?

I think if the current users won't benefit from the dynamic upgrading
then we can avoid the complexity. We can always add it back later.
Thoughts?

Regards,
Boqun

> +
> + Ok(())
> + }
> +}
> +
> +// Make the `CondVar` methods callable on `UpgradePollCondVar`.
> +impl Deref for UpgradePollCondVar {
> + type Target = CondVar;
> +
> + fn deref(&self) -> &CondVar {
> + // SAFETY: By the type invariants, this is either `&self.simple` or references an
> + // allocation that lives until `UpgradePollCondVar::drop`.
> + unsafe { &*self.active.load(Acquire) }
> + }
> +}
> +
> +#[pinned_drop]
> +impl PinnedDrop for UpgradePollCondVar {
> + #[inline]
> + fn drop(self: Pin<&mut Self>) {
> + // ORDERING: All calls to upgrade happens-before Drop, so no synchronization is required.
> + let ptr = self.active.load(Relaxed);
> + if ptr::eq(ptr, &self.simple) {
> + return;
> + }
> + // SAFETY: When the pointer is not &self.active, it is an `UpgradePollCondVarInner`.
> + let ptr = unsafe { container_of!(ptr.cast_mut(), UpgradePollCondVarInner, upgraded) };
> + // SAFETY: The pointer points at a valid `wait_queue_head`.
> + unsafe { bindings::__wake_up_pollfree((*ptr).upgraded.wait_queue_head.get()) };
> + // This skips drop of `CondVar`, but that's ok because we reimplemented its drop here.
> + //
> + // SAFETY: `__wake_up_pollfree` ensures that all registered PollTable instances are gone in
> + // one grace period, and this is the destructor so no new PollTable instances can be
> + // registered. Thus, it's safety to rcu free the `UpgradePollCondVarInner`.
> + unsafe { bindings::kvfree_call_rcu((*ptr).rcu.get(), ptr.cast::<c_void>()) };
> + }
> +}
>
> --
> 2.53.0.273.g2a3d683680-goog
>