Rust for Linux List
 help / color / mirror / Atom feed
* [PATCH 0/1] rust: revocable: Fix racing access during revocation
@ 2026-06-05  5:02 Yuan Tan
  2026-06-05  5:05 ` [PATCH 1/1] " Yuan Tan
  2026-06-05  6:09 ` [PATCH 0/1] " Boqun Feng
  0 siblings, 2 replies; 4+ messages in thread
From: Yuan Tan @ 2026-06-05  5:02 UTC (permalink / raw)
  To: ojeda, boqun, rust-for-linux; +Cc: zhiyunq, ardalan, pgovind2, dzueck, Yuan Tan

Hi Linux kernel maintainers,

We are developing a tool called FerroLens to detect potential unsound
behavior in Rust code in the Linux kernel. FerroLens reported the following
bug in rust/kernel/revocable.rs.

There is a potential race condition between the call to
revoke_internal/no_sync and try_access/try_access_with_guard on the same
object. This occurs due to the use of Ordering::Relaxed to synchronize on
is_available.

The race occurs in the following scenario:

Thread A (Revoker)
1. is_available.swap(false, Relaxed)
2. synchronize_rcu() begins

Thread B (reader)
3. executes rcu_read_lock().
4. is_available.load(Relaxed) returns true (stale value).

Thread A
5. synchronize_rcu() decides it doesn't need to wait for Thread B because
   B's critical section started after A's grace period began
6. synchronize_rcu() returns
7. drop_in_place(data)

Thread B
8. UB: Thread B accesses data via the guard.

The safety comment for revoke says there are "no more concurrent users",
but that does not prevent a new `try_access` from starting concurrently
with `revoke_internal`. And because both accesses to `is_available` are
`Relaxed`, a racing reader can still observe the old `true` value and
create a reference to `T`.

If `SYNC = true`, `synchronize_rcu()` only waits for RCU read-side critical
sections that were already in flight when the grace period started. A
reader that starts after that point is not waited for, even if it still
reads the stale `true` and obtains `&T`. Then `drop_in_place` can run while
that reader still holds the reference, which is a use-after-free.

Here is a poc kernel module which demonstrates the issue:

---
use core::{
    hint::spin_loop,
    sync::atomic::{AtomicBool, AtomicU32, Ordering},
};
use kernel::{
      bindings,
    c_str,
    error::from_err_ptr,
    prelude::*,
    revocable::{
        self, Revocable, TEST_PHASE_AFTER_DROP, TEST_PHASE_AFTER_SWAP, TEST_PHASE_AFTER_SYNC,
        TEST_PHASE_BEFORE_DROP, TEST_PHASE_BEFORE_SYNC, TEST_PHASE_READER_BEFORE_LOAD,
    },
    sync::{rcu, Arc},
};

  module! {
      type: RevocableExample,
      name: "revoke_poc",
      authors: ["Priya Govindasamy"],
      description: "Poc for potential race condition in Revocable<T>",
      license: "GPL",
  }

  struct DeviceState {
      value: u32,
  }

  struct RevocableExample {
      state: Arc<Revocable<DeviceState>>,
      race: Arc<RaceSync>,
  }

struct RaceSync {
    ready: AtomicU32,
    done: AtomicU32,
    go: AtomicBool,
    abort: AtomicBool,
    reader_may_enter: AtomicBool,
    release_reader: AtomicBool,
    reader_holding_ref: AtomicBool,
}

  struct ThreadContext {
      state: Arc<Revocable<DeviceState>>,
      race: Arc<RaceSync>,
      thread_id: u32,
  }

fn spin_until(flag: &AtomicBool) {
    while !flag.load(Ordering::Acquire) {
        spin_loop();
    }
}

  fn wait_to_start(race: &RaceSync) -> bool {
      loop {
          if race.abort.load(Ordering::Acquire) {
              return false;
          }
          if race.go.load(Ordering::Acquire) {
              return true;
          }
          spin_loop();
      }
  }

fn read_state_value(state: &Revocable<DeviceState>, race: &RaceSync) -> Option<u32> {
    spin_until(&race.reader_may_enter);
    pr_info!("reader: coordinator released reader to enter RCU critical section\n");

    let guard = rcu::read_lock();
    pr_info!("reader: entered RCU critical section after revoke started\n");
    pr_info!("reader: about to call try_access_with_guard after revoke started\n");

    let state = state.try_access_with_guard(&guard)?;
    race.reader_holding_ref.store(true, Ordering::Release);
    pr_info!("reader: holding reference while revoke continues\n");
    spin_until(&race.release_reader);
    let value = state.value;
    race.reader_holding_ref.store(false, Ordering::Release);
    Some(value)
}

  unsafe extern "C" fn read_value_thread(data: *mut kernel::ffi::c_void) -> i32 {
      // SAFETY: `data` comes from `Arc::into_raw` in `spawn_read_thread`, and each thread takes
      // ownership of exactly one reference.
      let ctx = unsafe { Arc::from_raw(data.cast::<ThreadContext>()) };

      ctx.race.ready.fetch_add(1, Ordering::AcqRel);
      if !wait_to_start(&ctx.race) {
          ctx.race.done.fetch_add(1, Ordering::AcqRel);
          return 0;
      }

      match read_state_value(&ctx.state, &ctx.race) {
          Some(v) => pr_info!("REVOKE_BUG: thread {} read value = {}\n", ctx.thread_id, v),
          None => pr_info!("thread {} saw revoked state\n", ctx.thread_id),
      }

      ctx.race.done.fetch_add(1, Ordering::AcqRel);
      0
  }

  unsafe extern "C" fn revoke_state_thread(data: *mut kernel::ffi::c_void) -> i32 {
      // SAFETY: `data` comes from `Arc::into_raw` in `spawn_revoke_thread`, and each thread takes
      // ownership of exactly one reference.
      let ctx = unsafe { Arc::from_raw(data.cast::<ThreadContext>()) };

      ctx.race.ready.fetch_add(1, Ordering::AcqRel);
      if !wait_to_start(&ctx.race) {
          ctx.race.done.fetch_add(1, Ordering::AcqRel);
          return 0;
      }

    pr_info!("revoker: about to call revoke\n");
    ctx.state.revoke();
    pr_info!("thread {} revoked state\n", ctx.thread_id);

      ctx.race.done.fetch_add(1, Ordering::AcqRel);
      0
  }

  fn spawn_read_thread(
      state: Arc<Revocable<DeviceState>>,
      race: Arc<RaceSync>,
      thread_id: u32,
  ) -> Result {
      let ctx = Arc::new(ThreadContext { state, race, thread_id }, GFP_KERNEL)?;
      let data = Arc::into_raw(ctx) as *mut kernel::ffi::c_void;

      let task = match from_err_ptr(unsafe {
          bindings::kthread_create_on_node(
              Some(read_value_thread),
              data,
              bindings::NUMA_NO_NODE,
              c_str!("revocable_reader/%u").as_char_ptr(),
              thread_id,
          )
      }) {
          Ok(task) => task,
          Err(err) => {
              // SAFETY: Thread creation failed, so the raw reference was not handed off.
              unsafe { drop(Arc::from_raw(data.cast::<ThreadContext>())) };
              return Err(err);
          }
      };

      // SAFETY: `task` is a valid sleeping kthread returned by `kthread_create_on_node`.
      unsafe { bindings::wake_up_process(task) };
      Ok(())
  }

  fn spawn_revoke_thread(
      state: Arc<Revocable<DeviceState>>,
      race: Arc<RaceSync>,
      thread_id: u32,
  ) -> Result {
      let ctx = Arc::new(ThreadContext { state, race, thread_id }, GFP_KERNEL)?;
      let data = Arc::into_raw(ctx) as *mut kernel::ffi::c_void;

      let task = match from_err_ptr(unsafe {
          bindings::kthread_create_on_node(
              Some(revoke_state_thread),
              data,
              bindings::NUMA_NO_NODE,
              c_str!("revocable_revoke/%u").as_char_ptr(),
              thread_id,
          )
      }) {
          Ok(task) => task,
          Err(err) => {
              // SAFETY: Thread creation failed, so the raw reference was not handed off.
              unsafe { drop(Arc::from_raw(data.cast::<ThreadContext>())) };
              return Err(err);
          }
      };

      // SAFETY: `task` is a valid sleeping kthread returned by `kthread_create_on_node`.
      unsafe { bindings::wake_up_process(task) };
      Ok(())
  }

 impl kernel::Module for RevocableExample {
      fn init(_module: &'static ThisModule) -> Result<Self> {
          let state = Arc::pin_init(Revocable::new(DeviceState { value: 123 }), GFP_KERNEL)?;
          let race = Arc::new(
              RaceSync {
                  ready: AtomicU32::new(0),
                  done: AtomicU32::new(0),
                  go: AtomicBool::new(false),
                  abort: AtomicBool::new(false),
                  reader_may_enter: AtomicBool::new(false),
                  release_reader: AtomicBool::new(false),
                  reader_holding_ref: AtomicBool::new(false),
              },
              GFP_KERNEL,
          )?;

          let m = Self { state, race };
          revocable::test_hooks_reset();

          spawn_revoke_thread(m.state.clone(), m.race.clone(), 1)?;
          if let Err(err) = spawn_read_thread(m.state.clone(), m.race.clone(), 2) {
              m.race.abort.store(true, Ordering::Release);
              revocable::test_hooks_disable();
              while m.race.done.load(Ordering::Acquire) != m.race.ready.load(Ordering::Acquire) {
                  spin_loop();
              }
              return Err(err);
          }

          while m.race.ready.load(Ordering::Acquire) != 2 {
              spin_loop();
          }
          m.race.go.store(true, Ordering::Release);
          pr_info!("main: released reader and revoker together\n");

          revocable::test_hooks_wait_for(TEST_PHASE_AFTER_SWAP);
          pr_info!("main: observed revoke_internal after swap(false)\n");
          revocable::test_hooks_allow_through(TEST_PHASE_AFTER_SWAP);

          revocable::test_hooks_wait_for(TEST_PHASE_BEFORE_SYNC);
          pr_info!("main: releasing revoker into synchronize_rcu\n");
          revocable::test_hooks_allow_through(TEST_PHASE_BEFORE_SYNC);
          m.race.reader_may_enter.store(true, Ordering::Release);

          revocable::test_hooks_wait_for(TEST_PHASE_READER_BEFORE_LOAD);
          pr_info!("main: reader reached try_access_with_guard before load\n");
          revocable::test_hooks_allow_through(TEST_PHASE_READER_BEFORE_LOAD);

          revocable::test_hooks_wait_for(TEST_PHASE_AFTER_SYNC);
          pr_info!("main: revoker completed synchronize_rcu\n");
          if m.race.reader_holding_ref.load(Ordering::Acquire) {
              pr_info!(
                  "BUG_WINDOW: reader still holds reference after synchronize_rcu completed\n"
              );
          }
          revocable::test_hooks_allow_through(TEST_PHASE_AFTER_SYNC);

          revocable::test_hooks_wait_for(TEST_PHASE_BEFORE_DROP);
          pr_info!("main: revoker reached the drop_in_place boundary\n");
          if m.race.reader_holding_ref.load(Ordering::Acquire) {
              pr_info!(
                  "BUG_WINDOW: reader still holds reference immediately before drop_in_place\n"
              );
          }
          m.race.release_reader.store(true, Ordering::Release);
          revocable::test_hooks_allow_through(TEST_PHASE_BEFORE_DROP);
          revocable::test_hooks_allow_through(TEST_PHASE_AFTER_DROP);

          while m.race.done.load(Ordering::Acquire) != 2 {
              spin_loop();
          }
          revocable::test_hooks_disable();

          Ok(m)
      }
  }

  impl Drop for RevocableExample {
      fn drop(&mut self) {
          // Safe to call more than once; only the first revoke does the work.
          self.state.revoke();
      }
  }


Makefile:

obj-m += revoke_poc.o

KDIR := /lib/modules/$(shell uname -r)/build

PWD := $(shell pwd)

all:
	$(MAKE) -C $(KDIR) M=$(PWD) LLVM=1 modules

clean:
	$(MAKE) -C $(KDIR) M=$(PWD) clean


Note that this bug could not be triggered on x86 architecture because it uses a Total Store Order (TSO) model.

Yuan Tan (1):
  rust: revocable: Fix racing access during revocation

 rust/kernel/revocable.rs | 56 ++++++++++++++++++++++++++--------------
 1 file changed, 37 insertions(+), 19 deletions(-)

-- 
2.43.2


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2026-06-05  6:35 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-05  5:02 [PATCH 0/1] rust: revocable: Fix racing access during revocation Yuan Tan
2026-06-05  5:05 ` [PATCH 1/1] " Yuan Tan
2026-06-05  6:09 ` [PATCH 0/1] " Boqun Feng
2026-06-05  6:34   ` Gary Guo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox