From: Mitchell Levy <levymitchell0@gmail.com>
To: "Miguel Ojeda" <ojeda@kernel.org>,
"Alex Gaynor" <alex.gaynor@gmail.com>,
"Boqun Feng" <boqun.feng@gmail.com>,
"Gary Guo" <gary@garyguo.net>,
"Björn Roy Baron" <bjorn3_gh@protonmail.com>,
"Andreas Hindborg" <a.hindborg@kernel.org>,
"Alice Ryhl" <aliceryhl@google.com>,
"Trevor Gross" <tmgross@umich.edu>,
"Andrew Morton" <akpm@linux-foundation.org>,
"Dennis Zhou" <dennis@kernel.org>, "Tejun Heo" <tj@kernel.org>,
"Christoph Lameter" <cl@linux.com>,
"Danilo Krummrich" <dakr@kernel.org>,
"Benno Lossin" <lossin@kernel.org>,
"Yury Norov" <yury.norov@gmail.com>,
"Viresh Kumar" <viresh.kumar@linaro.org>
Cc: Tyler Hicks <code@tyhicks.com>,
linux-kernel@vger.kernel.org, rust-for-linux@vger.kernel.org,
linux-mm@kvack.org, Mitchell Levy <levymitchell0@gmail.com>
Subject: [PATCH v3 6/7] rust: percpu: Add pin-hole optimizations for numerics
Date: Thu, 28 Aug 2025 12:00:13 -0700 [thread overview]
Message-ID: <20250828-rust-percpu-v3-6-4dd92e1e7904@gmail.com> (raw)
In-Reply-To: <20250828-rust-percpu-v3-0-4dd92e1e7904@gmail.com>
The C implementations of `this_cpu_add`, `this_cpu_sub`, etc., are
optimized to save an instruction by avoiding having to compute
`this_cpu_ptr(&x)` for some per-CPU variable `x`. For example, rather
than
u64 *x_ptr = this_cpu_ptr(&x);
*x_ptr += 5;
the implementation of `this_cpu_add` is clever enough to make use of the
fact that per-CPU variables are implemented on x86 via segment
registers, and so we can use only a single instruction (where we assume
`&x` is already in `rax`)
add gs:[rax], 5
Add this optimization via a `PerCpuNumeric` type to enable code-reuse
between `DynamicPerCpu` and `StaticPerCpu`.
Signed-off-by: Mitchell Levy <levymitchell0@gmail.com>
---
rust/kernel/percpu.rs | 1 +
rust/kernel/percpu/dynamic.rs | 2 +-
rust/kernel/percpu/numeric.rs | 128 ++++++++++++++++++++++++++++++++++++++++++
samples/rust/rust_percpu.rs | 36 ++++++++++++
4 files changed, 166 insertions(+), 1 deletion(-)
diff --git a/rust/kernel/percpu.rs b/rust/kernel/percpu.rs
index c68c7520b67f..c693d16518d2 100644
--- a/rust/kernel/percpu.rs
+++ b/rust/kernel/percpu.rs
@@ -4,6 +4,7 @@
pub mod cpu_guard;
mod dynamic;
+pub mod numeric;
mod static_;
#[doc(inline)]
diff --git a/rust/kernel/percpu/dynamic.rs b/rust/kernel/percpu/dynamic.rs
index 64f04cef3705..aad08e4b4251 100644
--- a/rust/kernel/percpu/dynamic.rs
+++ b/rust/kernel/percpu/dynamic.rs
@@ -7,7 +7,7 @@
/// Represents a dynamic allocation of a per-CPU variable via alloc_percpu. Calls free_percpu when
/// dropped.
-pub struct PerCpuAllocation<T>(PerCpuPtr<T>);
+pub struct PerCpuAllocation<T>(pub(super) PerCpuPtr<T>);
impl<T: Zeroable> PerCpuAllocation<T> {
/// Dynamically allocates a space in the per-CPU area suitably sized and aligned to hold a `T`,
diff --git a/rust/kernel/percpu/numeric.rs b/rust/kernel/percpu/numeric.rs
new file mode 100644
index 000000000000..4de93f653f0e
--- /dev/null
+++ b/rust/kernel/percpu/numeric.rs
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+//! Pin-hole optimizations for PerCpu<T> where T is a numeric type.
+
+use super::*;
+use core::arch::asm;
+
+/// Represents a per-CPU variable that can be manipulated with machine-intrinsic numeric
+/// operations.
+pub struct PerCpuNumeric<'a, T> {
+ // INVARIANT: `ptr.0` is a valid offset into the per-CPU area and is initialized on all CPUs
+ // (since we don't have a CPU guard, we have to be pessimistic and assume we could be on any
+ // CPU).
+ ptr: &'a PerCpuPtr<T>,
+}
+
+macro_rules! impl_ops {
+ ($ty:ty, $reg:tt) => {
+ impl DynamicPerCpu<$ty> {
+ /// Returns a `PerCpuNumeric` that can be used to manipulate the underlying per-CPU variable.
+ pub fn num(&mut self) -> PerCpuNumeric<'_, $ty> {
+ // The invariant is satisfied because `DynamicPerCpu`'s invariant guarantees that
+ // this pointer is valid and initialized on all CPUs.
+ PerCpuNumeric { ptr: &self.alloc.0 }
+ }
+ }
+ impl StaticPerCpu<$ty> {
+ /// Returns a `PerCpuNumeric` that can be used to manipulate the underlying per-CPU variable.
+ pub fn num(&mut self) -> PerCpuNumeric<'_, $ty> {
+ // The invariant is satisfied because `StaticPerCpu`'s invariant guarantees that
+ // this pointer is valid and initialized on all CPUs.
+ PerCpuNumeric { ptr: &self.0 }
+ }
+ }
+
+ impl PerCpuNumeric<'_, $ty> {
+ /// Adds `rhs` to the per-CPU variable.
+ pub fn add(&mut self, rhs: $ty) {
+ // SAFETY: `self.ptr.0` is a valid offset into the per-CPU area (i.e., valid as a
+ // pointer relative to the `gs` segment register) by the invariants of this type.
+ unsafe {
+ asm!(
+ concat!("add gs:[{off}], {val:", $reg, "}"),
+ off = in(reg) self.ptr.0.cast::<*mut $ty>(),
+ val = in(reg) rhs,
+ );
+ }
+ }
+ }
+ impl PerCpuNumeric<'_, $ty> {
+ /// Subtracts `rhs` from the per-CPU variable.
+ pub fn sub(&mut self, rhs: $ty) {
+ // SAFETY: `self.ptr.0` is a valid offset into the per-CPU area (i.e., valid as a
+ // pointer relative to the `gs` segment register) by the invariants of this type.
+ unsafe {
+ asm!(
+ concat!("sub gs:[{off}], {val:", $reg, "}"),
+ off = in(reg) self.ptr.0.cast::<*mut $ty>(),
+ val = in(reg) rhs,
+ );
+ }
+ }
+ }
+ };
+}
+
+macro_rules! impl_ops_byte {
+ ($ty:ty) => {
+ impl DynamicPerCpu<$ty> {
+ /// Returns a `PerCpuNumeric` that can be used to manipulate the underlying per-CPU
+ /// variable.
+ pub fn num(&mut self) -> PerCpuNumeric<'_, $ty> {
+ // The invariant is satisfied because `DynamicPerCpu`'s invariant guarantees that
+ // this pointer is valid and initialized on all CPUs.
+ PerCpuNumeric { ptr: &self.alloc.0 }
+ }
+ }
+ impl StaticPerCpu<$ty> {
+ /// Returns a `PerCpuNumeric` that can be used to manipulate the underlying per-CPU
+ /// variable.
+ pub fn num(&mut self) -> PerCpuNumeric<'_, $ty> {
+ // The invariant is satisfied because `DynamicPerCpu`'s invariant guarantees that
+ // this pointer is valid and initialized on all CPUs.
+ PerCpuNumeric { ptr: &self.0 }
+ }
+ }
+
+ impl PerCpuNumeric<'_, $ty> {
+ /// Adds `rhs` to the per-CPU variable.
+ pub fn add(&mut self, rhs: $ty) {
+ // SAFETY: `self.ptr.0` is a valid offset into the per-CPU area (i.e., valid as a
+ // pointer relative to the `gs` segment register) by the invariants of this type.
+ unsafe {
+ asm!(
+ concat!("add gs:[{off}], {val}"),
+ off = in(reg) self.ptr.0.cast::<*mut $ty>(),
+ val = in(reg_byte) rhs,
+ );
+ }
+ }
+ }
+ impl PerCpuNumeric<'_, $ty> {
+ /// Subtracts `rhs` from the per-CPU variable.
+ pub fn sub(&mut self, rhs: $ty) {
+ // SAFETY: `self.ptr.0` is a valid offset into the per-CPU area (i.e., valid as a
+ // pointer relative to the `gs` segment register) by the invariants of this type.
+ unsafe {
+ asm!(
+ concat!("sub gs:[{off}], {val}"),
+ off = in(reg) self.ptr.0.cast::<*mut $ty>(),
+ val = in(reg_byte) rhs,
+ );
+ }
+ }
+ }
+ };
+}
+
+impl_ops_byte!(i8);
+impl_ops!(i16, "x");
+impl_ops!(i32, "e");
+impl_ops!(i64, "r");
+impl_ops!(isize, "r");
+
+impl_ops_byte!(u8);
+impl_ops!(u16, "x");
+impl_ops!(u32, "e");
+impl_ops!(u64, "r");
+impl_ops!(usize, "r");
diff --git a/samples/rust/rust_percpu.rs b/samples/rust/rust_percpu.rs
index 06b322019134..e3a46a053b8d 100644
--- a/samples/rust/rust_percpu.rs
+++ b/samples/rust/rust_percpu.rs
@@ -27,6 +27,26 @@
define_per_cpu!(UPERCPU: u64 = 0);
define_per_cpu!(CHECKED: RefCell<u64> = RefCell::new(0));
+macro_rules! make_optimization_test {
+ ($ty:ty) => {
+ let mut test: DynamicPerCpu<$ty> = DynamicPerCpu::new_zero(GFP_KERNEL).unwrap();
+ {
+ let _ = CpuGuard::new();
+ // SAFETY: No other usage of `test`
+ unsafe { test.get_mut(CpuGuard::new()) }.with(|val: &mut $ty| *val = 10);
+ test.num().add(1);
+ // SAFETY: No other usage of `test`
+ unsafe { test.get_mut(CpuGuard::new()) }.with(|val: &mut $ty| assert_eq!(*val, 11));
+ test.num().add(10);
+ // SAFETY: No other usage of `test`
+ unsafe { test.get_mut(CpuGuard::new()) }.with(|val: &mut $ty| assert_eq!(*val, 21));
+ test.num().sub(5);
+ // SAFETY: No other usage of `test`
+ unsafe { test.get_mut(CpuGuard::new()) }.with(|val: &mut $ty| assert_eq!(*val, 16));
+ }
+ };
+}
+
impl kernel::Module for PerCpuMod {
fn init(_module: &'static ThisModule) -> Result<Self, Error> {
pr_info!("rust percpu test start\n");
@@ -198,6 +218,22 @@ fn init(_module: &'static ThisModule) -> Result<Self, Error> {
pr_info!("rust dynamic percpu test done\n");
+ pr_info!("rust numeric optimizations test start\n");
+
+ make_optimization_test!(u8);
+ make_optimization_test!(u16);
+ make_optimization_test!(u32);
+ make_optimization_test!(u64);
+ make_optimization_test!(usize);
+
+ make_optimization_test!(i8);
+ make_optimization_test!(i16);
+ make_optimization_test!(i32);
+ make_optimization_test!(i64);
+ make_optimization_test!(isize);
+
+ pr_info!("rust numeric optimizations test done\n");
+
// Return Err to unload the module
Result::Err(EINVAL)
}
--
2.34.1
next prev parent reply other threads:[~2025-08-28 19:01 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-28 19:00 [PATCH v3 0/7] rust: Add Per-CPU Variable API Mitchell Levy
2025-08-28 19:00 ` [PATCH v3 1/7] rust: percpu: introduce a rust API for per-CPU variables Mitchell Levy
2025-09-03 21:42 ` Yury Norov
2025-09-04 19:53 ` Mitchell Levy
2025-09-04 20:27 ` Yury Norov
2025-09-04 21:17 ` Mitchell Levy
2025-08-28 19:00 ` [PATCH v3 2/7] rust: percpu: add a rust per-CPU variable sample Mitchell Levy
2025-08-28 19:00 ` [PATCH v3 3/7] rust: cpumask: Add a `Cpumask` iterator Mitchell Levy
2025-08-29 5:19 ` Viresh Kumar
2025-08-28 19:00 ` [PATCH v3 4/7] rust: cpumask: Add getters for globally defined cpumasks Mitchell Levy
2025-08-29 5:20 ` Viresh Kumar
2025-09-03 22:03 ` Yury Norov
2025-09-04 19:55 ` Mitchell Levy
2025-08-28 19:00 ` [PATCH v3 5/7] rust: percpu: Support non-zeroable types for DynamicPerCpu Mitchell Levy
2025-09-03 22:19 ` Yury Norov
2025-09-04 20:26 ` Mitchell Levy
2025-09-04 20:37 ` Yury Norov
2025-09-04 21:05 ` Mitchell Levy
2025-09-04 21:46 ` Yury Norov
2025-09-04 21:57 ` Miguel Ojeda
2025-09-03 23:05 ` Miguel Ojeda
2025-09-04 20:17 ` Mitchell Levy
2025-09-04 20:37 ` Miguel Ojeda
2025-09-04 21:50 ` Mitchell Levy
2025-08-28 19:00 ` Mitchell Levy [this message]
2025-08-28 19:00 ` [PATCH v3 7/7] rust: percpu: cache per-CPU pointers in the dynamic case Mitchell Levy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250828-rust-percpu-v3-6-4dd92e1e7904@gmail.com \
--to=levymitchell0@gmail.com \
--cc=a.hindborg@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=alex.gaynor@gmail.com \
--cc=aliceryhl@google.com \
--cc=bjorn3_gh@protonmail.com \
--cc=boqun.feng@gmail.com \
--cc=cl@linux.com \
--cc=code@tyhicks.com \
--cc=dakr@kernel.org \
--cc=dennis@kernel.org \
--cc=gary@garyguo.net \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=lossin@kernel.org \
--cc=ojeda@kernel.org \
--cc=rust-for-linux@vger.kernel.org \
--cc=tj@kernel.org \
--cc=tmgross@umich.edu \
--cc=viresh.kumar@linaro.org \
--cc=yury.norov@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).