public inbox for linux-fsdevel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC 0/2] Avoid synchronize_rcu() for every thread drop in Rust Binder
@ 2026-01-17 15:25 Alice Ryhl
  2026-01-17 15:25 ` [PATCH RFC 1/2] rust: poll: make PollCondVar upgradable Alice Ryhl
  2026-01-17 15:25 ` [PATCH RFC 2/2] rust_binder: use UpgradePollCondVar Alice Ryhl
  0 siblings, 2 replies; 3+ messages in thread
From: Alice Ryhl @ 2026-01-17 15:25 UTC (permalink / raw)
  To: Christian Brauner, Boqun Feng, Paul E. McKenney
  Cc: Gary Guo, Greg Kroah-Hartman, Carlos Llamas, linux-fsdevel,
	rust-for-linux, linux-kernel, Alice Ryhl

Right now Rust Binder calls synchronize_rcu() more often than is
necessary. Most processes do not use epoll at all, so they don't require
rcu here. Back in Kangrejos I came up with a way to avoid this. Idea is
to move the value that needs rcu to a separate allocation that's easy to
kfree_rcu(). We pay the allocation only when the proc uses epoll using
an "upgrade" strategy - most processes don't.

One solution that may be better is to just kfree_rcu() the Binder Thread
struct directly when it's refcount drops to zero (Drop of everything
else can still run without a grace period). But I'm not sure how I would
achieve that - after all Thread is also used with kernel::list.

Based on top of:
https://lore.kernel.org/all/20260117122243.24404-5-boqun.feng@gmail.com/

Signed-off-by: Alice Ryhl <aliceryhl@google.com>
---
Alice Ryhl (2):
      rust: poll: make PollCondVar upgradable
      rust_binder: use UpgradePollCondVar

 drivers/android/binder/process.rs |   2 +-
 drivers/android/binder/thread.rs  |  18 +++--
 rust/kernel/sync/poll.rs          | 163 +++++++++++++++++++++++++++++++++++++-
 3 files changed, 173 insertions(+), 10 deletions(-)
---
base-commit: e0a15d3fd5100c25d1e06837bdb3070a7a716e32
change-id: 20260117-upgrade-poll-37ee2a7a79dd

Best regards,
-- 
Alice Ryhl <aliceryhl@google.com>


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH RFC 1/2] rust: poll: make PollCondVar upgradable
  2026-01-17 15:25 [PATCH RFC 0/2] Avoid synchronize_rcu() for every thread drop in Rust Binder Alice Ryhl
@ 2026-01-17 15:25 ` Alice Ryhl
  2026-01-17 15:25 ` [PATCH RFC 2/2] rust_binder: use UpgradePollCondVar Alice Ryhl
  1 sibling, 0 replies; 3+ messages in thread
From: Alice Ryhl @ 2026-01-17 15:25 UTC (permalink / raw)
  To: Christian Brauner, Boqun Feng, Paul E. McKenney
  Cc: Gary Guo, Greg Kroah-Hartman, Carlos Llamas, linux-fsdevel,
	rust-for-linux, linux-kernel, Alice Ryhl

Signed-off-by: Alice Ryhl <aliceryhl@google.com>
---
 rust/kernel/sync/poll.rs | 163 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 161 insertions(+), 2 deletions(-)

diff --git a/rust/kernel/sync/poll.rs b/rust/kernel/sync/poll.rs
index 0ec985d560c8d3405c08dbd86e48b14c7c34484d..9fb004c71c78375bb16ded7f518aa282cdcc50f5 100644
--- a/rust/kernel/sync/poll.rs
+++ b/rust/kernel/sync/poll.rs
@@ -5,12 +5,16 @@
 //! Utilities for working with `struct poll_table`.
 
 use crate::{
-    bindings,
+    alloc::AllocError,
+    bindings, container_of,
     fs::File,
     prelude::*,
+    sync::atomic::{Acquire, Atomic, Relaxed, Release},
+    sync::lock::{Backend, Lock},
     sync::{CondVar, LockClassKey},
+    types::Opaque,
 };
-use core::{marker::PhantomData, ops::Deref};
+use core::{marker::PhantomData, ops::Deref, ptr};
 
 /// Creates a [`PollCondVar`] initialiser with the given name and a newly-created lock class.
 #[macro_export]
@@ -22,6 +26,17 @@ macro_rules! new_poll_condvar {
     };
 }
 
+/// Creates a [`UpgradePollCondVar`] initialiser with the given name and a newly-created lock
+/// class.
+#[macro_export]
+macro_rules! new_upgrade_poll_condvar {
+    ($($name:literal)?) => {
+        $crate::sync::poll::UpgradePollCondVar::new(
+            $crate::optional_name!($($name)?), $crate::static_lock_class!()
+        )
+    };
+}
+
 /// Wraps the kernel's `poll_table`.
 ///
 /// # Invariants
@@ -66,6 +81,7 @@ pub fn register_wait(&self, file: &File, cv: &PollCondVar) {
 ///
 /// [`CondVar`]: crate::sync::CondVar
 #[pin_data(PinnedDrop)]
+#[repr(transparent)]
 pub struct PollCondVar {
     #[pin]
     inner: CondVar,
@@ -78,6 +94,17 @@ pub fn new(name: &'static CStr, key: Pin<&'static LockClassKey>) -> impl PinInit
             inner <- CondVar::new(name, key),
         })
     }
+
+    /// Use this `CondVar` as a `PollCondVar`.
+    ///
+    /// # Safety
+    ///
+    /// After the last use of the returned `&PollCondVar`, `__wake_up_pollfree` must be called on
+    /// the `wait_queue_head` at least one grace period before the `CondVar` is destroyed.
+    unsafe fn from_non_poll(c: &CondVar) -> &PollCondVar {
+        // SAFETY: Layout is the same. Caller ensures that PollTables are cleared in time.
+        unsafe { &*ptr::from_ref(c).cast() }
+    }
 }
 
 // Make the `CondVar` methods callable on `PollCondVar`.
@@ -104,3 +131,135 @@ fn drop(self: Pin<&mut Self>) {
         unsafe { bindings::synchronize_rcu() };
     }
 }
+
+/// Wrapper around [`CondVar`] that can be upgraded to [`PollCondVar`].
+///
+/// By using this wrapper, you can avoid rcu for cases that don't use [`PollTable`], and in all
+/// cases you can avoid `synchronize_rcu()`.
+///
+/// # Invariants
+///
+/// `active` either references `simple`, or a `kmalloc` allocation holding an
+/// `UpgradePollCondVarInner`. In the latter case, the allocation remains valid until
+/// `Self::drop()` plus one grace period.
+#[pin_data(PinnedDrop)]
+pub struct UpgradePollCondVar {
+    #[pin]
+    simple: CondVar,
+    active: Atomic<*mut CondVar>,
+}
+
+#[pin_data]
+#[repr(C)]
+struct UpgradePollCondVarInner {
+    #[pin]
+    upgraded: CondVar,
+    #[pin]
+    rcu: Opaque<bindings::callback_head>,
+}
+
+impl UpgradePollCondVar {
+    /// Constructs a new upgradable condvar initialiser.
+    pub fn new(name: &'static CStr, key: Pin<&'static LockClassKey>) -> impl PinInit<Self> {
+        pin_init!(&this in Self {
+            simple <- CondVar::new(name, key),
+            // SAFETY: `this->inner` is in-bounds.
+            active: Atomic::new((unsafe { &raw const (*this.as_ptr()).simple }).cast_mut()),
+        })
+    }
+
+    /// Obtain a [`PollCondVar`], upgrading if necessary.
+    ///
+    /// You should use the same lock as what is passed to the `wait_*` methods. Otherwise wakeups
+    /// may be missed.
+    pub fn poll<T: ?Sized, B: Backend>(
+        &self,
+        lock: &Lock<T, B>,
+        name: &'static CStr,
+        key: Pin<&'static LockClassKey>,
+    ) -> Result<&PollCondVar, AllocError> {
+        let mut ptr = self.active.load(Acquire).cast_const();
+        if ptr::eq(ptr, &self.simple) {
+            self.upgrade(lock, name, key)?;
+            ptr = self.active.load(Acquire).cast_const();
+            debug_assert_ne!(ptr, ptr::from_ref(&self.simple));
+        }
+        // SAFETY: Signature ensures that last use of returned `&PollCondVar` is before drop(), and
+        // drop() calls `__wake_up_pollfree` followed by waiting a grace period before the
+        // `CondVar` is destroyed.
+        Ok(unsafe { PollCondVar::from_non_poll(&*ptr) })
+    }
+
+    fn upgrade<T: ?Sized, B: Backend>(
+        &self,
+        lock: &Lock<T, B>,
+        name: &'static CStr,
+        key: Pin<&'static LockClassKey>,
+    ) -> Result<(), AllocError> {
+        let upgraded = KBox::pin_init(
+            pin_init!(UpgradePollCondVarInner {
+                upgraded <- CondVar::new(name, key),
+                rcu: Opaque::uninit(),
+            }),
+            GFP_KERNEL,
+        )
+        .map_err(|_| AllocError)?;
+
+        // SAFETY: The value is treated as pinned.
+        let upgraded = KBox::into_raw(unsafe { Pin::into_inner_unchecked(upgraded) });
+
+        let res = self.active.cmpxchg(
+            ptr::from_ref(&self.simple).cast_mut(),
+            // SAFETY: This operation stays in-bounds of the above allocation.
+            unsafe { &raw mut (*upgraded).upgraded },
+            Release,
+        );
+
+        if res.is_err() {
+            // SAFETY: The cmpxchg failed, so take back ownership of the box.
+            drop(unsafe { KBox::from_raw(upgraded) });
+            return Ok(());
+        }
+
+        // If a normal waiter registers in parallel with us, then either:
+        // * We took the lock first. In that case, the waiter sees the above cmpxchg.
+        // * They took the lock first. In that case, we wake them up below.
+        drop(lock.lock());
+        self.simple.notify_all();
+
+        Ok(())
+    }
+}
+
+// Make the `CondVar` methods callable on `UpgradePollCondVar`.
+impl Deref for UpgradePollCondVar {
+    type Target = CondVar;
+
+    fn deref(&self) -> &CondVar {
+        // SAFETY: By the type invariants, this is either `&self.simple` or references an
+        // allocation that lives until `UpgradePollCondVar::drop`.
+        unsafe { &*self.active.load(Acquire) }
+    }
+}
+
+#[pinned_drop]
+impl PinnedDrop for UpgradePollCondVar {
+    #[inline]
+    fn drop(self: Pin<&mut Self>) {
+        // ORDERING: All calls to upgrade happens-before Drop, so no synchronization is required.
+        let ptr = self.active.load(Relaxed);
+        if ptr::eq(ptr, &self.simple) {
+            return;
+        }
+        // SAFETY: When the pointer is not &self.active, it is an `UpgradePollCondVarInner`.
+        let ptr = unsafe { container_of!(ptr, UpgradePollCondVarInner, upgraded) };
+        // SAFETY: The pointer points at a valid `wait_queue_head`.
+        unsafe { bindings::__wake_up_pollfree((*ptr).upgraded.wait_queue_head.get()) };
+        // This skips drop of `CondVar`, but that's ok because we reimplemented its drop here.
+        //
+        // SAFETY: `__wake_up_pollfree` ensures that all registered PollTable instances are gone in
+        // one grace period, and this is the destructor so no new PollTable instances can be
+        // registered. Thus, it's safety to rcu free the `UpgradePollCondVarInner`.
+        unsafe { bindings::kvfree_call_rcu((*ptr).rcu.get(), ptr.cast::<ffi::c_void>()) };
+    }
+}

-- 
2.52.0.457.g6b5491de43-goog


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH RFC 2/2] rust_binder: use UpgradePollCondVar
  2026-01-17 15:25 [PATCH RFC 0/2] Avoid synchronize_rcu() for every thread drop in Rust Binder Alice Ryhl
  2026-01-17 15:25 ` [PATCH RFC 1/2] rust: poll: make PollCondVar upgradable Alice Ryhl
@ 2026-01-17 15:25 ` Alice Ryhl
  1 sibling, 0 replies; 3+ messages in thread
From: Alice Ryhl @ 2026-01-17 15:25 UTC (permalink / raw)
  To: Christian Brauner, Boqun Feng, Paul E. McKenney
  Cc: Gary Guo, Greg Kroah-Hartman, Carlos Llamas, linux-fsdevel,
	rust-for-linux, linux-kernel, Alice Ryhl

Most processes do not use Rust Binder with epoll, so avoid paying the
synchronize_rcu() cost in drop for those that don't need it. For those
that do, we also manage to replace synchronize_rcu() with kfree_rcu(),
though we introduce an extra allocation.

Signed-off-by: Alice Ryhl <aliceryhl@google.com>
---
 drivers/android/binder/process.rs |  2 +-
 drivers/android/binder/thread.rs  | 18 +++++++++++-------
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/android/binder/process.rs b/drivers/android/binder/process.rs
index 132055b4790f0ec69a87635b498909df2bf475e2..9374f1a86766c09321b57e565b6317cc290ea32b 100644
--- a/drivers/android/binder/process.rs
+++ b/drivers/android/binder/process.rs
@@ -1684,7 +1684,7 @@ pub(crate) fn poll(
         table: PollTable<'_>,
     ) -> Result<u32> {
         let thread = this.get_current_thread()?;
-        let (from_proc, mut mask) = thread.poll(file, table);
+        let (from_proc, mut mask) = thread.poll(file, table)?;
         if mask == 0 && from_proc && !this.inner.lock().work.is_empty() {
             mask |= bindings::POLLIN;
         }
diff --git a/drivers/android/binder/thread.rs b/drivers/android/binder/thread.rs
index 82264db06507d4641b60cbed96af482a9d36e7b2..8f09cf1599ae7edcf2ee60b2cb1b08cc2d0afd3f 100644
--- a/drivers/android/binder/thread.rs
+++ b/drivers/android/binder/thread.rs
@@ -16,7 +16,7 @@
     seq_file::SeqFile,
     seq_print,
     sync::atomic::{ordering::Relaxed, Atomic},
-    sync::poll::{PollCondVar, PollTable},
+    sync::poll::{PollTable, UpgradePollCondVar},
     sync::{Arc, SpinLock},
     task::Task,
     types::ARef,
@@ -412,7 +412,7 @@ pub(crate) struct Thread {
     #[pin]
     inner: SpinLock<InnerThread>,
     #[pin]
-    work_condvar: PollCondVar,
+    work_condvar: UpgradePollCondVar,
     /// Used to insert this thread into the process' `ready_threads` list.
     ///
     /// INVARIANT: May never be used for any other list than the `self.process.ready_threads`.
@@ -443,7 +443,7 @@ pub(crate) fn new(id: i32, process: Arc<Process>) -> Result<Arc<Self>> {
                 process,
                 task: ARef::from(&**kernel::current!()),
                 inner <- kernel::new_spinlock!(inner, "Thread::inner"),
-                work_condvar <- kernel::new_poll_condvar!("Thread::work_condvar"),
+                work_condvar <- kernel::new_upgrade_poll_condvar!("Thread::work_condvar"),
                 links <- ListLinks::new(),
                 links_track <- AtomicTracker::new(),
             }),
@@ -1484,10 +1484,15 @@ pub(crate) fn write_read(self: &Arc<Self>, data: UserSlice, wait: bool) -> Resul
         ret
     }
 
-    pub(crate) fn poll(&self, file: &File, table: PollTable<'_>) -> (bool, u32) {
-        table.register_wait(file, &self.work_condvar);
+    pub(crate) fn poll(&self, file: &File, table: PollTable<'_>) -> Result<(bool, u32)> {
+        let condvar = self.work_condvar.poll(
+            &self.inner,
+            c"Thread::work_condvar (upgraded)",
+            kernel::static_lock_class!(),
+        )?;
+        table.register_wait(file, condvar);
         let mut inner = self.inner.lock();
-        (inner.should_use_process_work_queue(), inner.poll())
+        Ok((inner.should_use_process_work_queue(), inner.poll()))
     }
 
     /// Make the call to `get_work` or `get_work_local` return immediately, if any.
@@ -1523,7 +1528,6 @@ pub(crate) fn notify_if_poll_ready(&self, sync: bool) {
     pub(crate) fn release(self: &Arc<Self>) {
         self.inner.lock().is_dead = true;
 
-        //self.work_condvar.clear();
         self.unwind_transaction_stack();
 
         // Cancel all pending work items.

-- 
2.52.0.457.g6b5491de43-goog


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2026-01-17 15:25 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-01-17 15:25 [PATCH RFC 0/2] Avoid synchronize_rcu() for every thread drop in Rust Binder Alice Ryhl
2026-01-17 15:25 ` [PATCH RFC 1/2] rust: poll: make PollCondVar upgradable Alice Ryhl
2026-01-17 15:25 ` [PATCH RFC 2/2] rust_binder: use UpgradePollCondVar Alice Ryhl

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox