From: Mitchell Levy <levymitchell0@gmail.com>
To: "Miguel Ojeda" <ojeda@kernel.org>,
"Alex Gaynor" <alex.gaynor@gmail.com>,
"Boqun Feng" <boqun.feng@gmail.com>,
"Gary Guo" <gary@garyguo.net>,
"Björn Roy Baron" <bjorn3_gh@protonmail.com>,
"Andreas Hindborg" <a.hindborg@kernel.org>,
"Alice Ryhl" <aliceryhl@google.com>,
"Trevor Gross" <tmgross@umich.edu>,
"Andrew Morton" <akpm@linux-foundation.org>,
"Dennis Zhou" <dennis@kernel.org>, "Tejun Heo" <tj@kernel.org>,
"Christoph Lameter" <cl@linux.com>,
"Danilo Krummrich" <dakr@kernel.org>,
"Benno Lossin" <lossin@kernel.org>
Cc: linux-kernel@vger.kernel.org, rust-for-linux@vger.kernel.org,
linux-mm@kvack.org, Mitchell Levy <levymitchell0@gmail.com>
Subject: [PATCH v2 4/5] rust: percpu: Add pin-hole optimizations for numerics
Date: Sat, 12 Jul 2025 14:31:15 -0700 [thread overview]
Message-ID: <20250712-rust-percpu-v2-4-826f2567521b@gmail.com> (raw)
In-Reply-To: <20250712-rust-percpu-v2-0-826f2567521b@gmail.com>
The C implementations of `this_cpu_add`, `this_cpu_sub`, etc., are
optimized to save an instruction by avoiding having to compute
`this_cpu_ptr(&x)` for some per-CPU variable `x`. For example, rather
than
u64 *x_ptr = this_cpu_ptr(&x);
*x_ptr += 5;
the implementation of `this_cpu_add` is clever enough to make use of the
fact that per-CPU variables are implemented on x86 via segment
registers, and so we can use only a single instruction (where we assume
`&x` is already in `rax`)
add gs:[rax], 5
Add this optimization via a `PerCpuNumeric` type to enable code-reuse
between `DynamicPerCpu` and `StaticPerCpu`.
Signed-off-by: Mitchell Levy <levymitchell0@gmail.com>
---
lib/percpu_test_rust.rs | 36 +++++++++++++
rust/kernel/percpu.rs | 1 +
rust/kernel/percpu/numeric.rs | 117 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 154 insertions(+)
diff --git a/lib/percpu_test_rust.rs b/lib/percpu_test_rust.rs
index a9652e6ece08..114015435a85 100644
--- a/lib/percpu_test_rust.rs
+++ b/lib/percpu_test_rust.rs
@@ -25,6 +25,26 @@
define_per_cpu!(PERCPU: i64 = 0);
define_per_cpu!(UPERCPU: u64 = 0);
+macro_rules! make_optimization_test {
+ ($ty:ty) => {
+ let mut test: DynamicPerCpu<$ty> = DynamicPerCpu::new(GFP_KERNEL).unwrap();
+ {
+ let _ = CpuGuard::new();
+ // SAFETY: No other usage of `test`
+ unsafe { test.get(CpuGuard::new()) }.with(|val: &mut $ty| *val = 10);
+ test.num().add(1);
+ // SAFETY: No other usage of `test`
+ unsafe { test.get(CpuGuard::new()) }.with(|val: &mut $ty| assert_eq!(*val, 11));
+ test.num().add(10);
+ // SAFETY: No other usage of `test`
+ unsafe { test.get(CpuGuard::new()) }.with(|val: &mut $ty| assert_eq!(*val, 21));
+ test.num().sub(5);
+ // SAFETY: No other usage of `test`
+ unsafe { test.get(CpuGuard::new()) }.with(|val: &mut $ty| assert_eq!(*val, 16));
+ }
+ };
+}
+
impl kernel::Module for PerCpuTestModule {
fn init(_module: &'static ThisModule) -> Result<Self, Error> {
pr_info!("rust percpu test start\n");
@@ -94,6 +114,22 @@ fn init(_module: &'static ThisModule) -> Result<Self, Error> {
pr_info!("rust dynamic percpu test done\n");
+ pr_info!("rust numeric optimizations test start\n");
+
+ make_optimization_test!(u8);
+ make_optimization_test!(u16);
+ make_optimization_test!(u32);
+ make_optimization_test!(u64);
+ make_optimization_test!(usize);
+
+ make_optimization_test!(i8);
+ make_optimization_test!(i16);
+ make_optimization_test!(i32);
+ make_optimization_test!(i64);
+ make_optimization_test!(isize);
+
+ pr_info!("rust numeric optimizations test done\n");
+
// Return Err to unload the module
Result::Err(EINVAL)
}
diff --git a/rust/kernel/percpu.rs b/rust/kernel/percpu.rs
index 7dfceb6aefd7..b97d1d07a614 100644
--- a/rust/kernel/percpu.rs
+++ b/rust/kernel/percpu.rs
@@ -2,6 +2,7 @@
//! This module contains abstractions for creating and using per-CPU variables from Rust.
//! See the define_per_cpu! macro and the DynamicPerCpu<T> type, as well as the PerCpu<T> trait.
pub mod cpu_guard;
+pub mod numeric;
use bindings::{alloc_percpu, free_percpu};
diff --git a/rust/kernel/percpu/numeric.rs b/rust/kernel/percpu/numeric.rs
new file mode 100644
index 000000000000..e4008f872af1
--- /dev/null
+++ b/rust/kernel/percpu/numeric.rs
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+//! Pin-hole optimizations for PerCpu<T> where T is a numeric type.
+
+use crate::percpu::*;
+use core::arch::asm;
+
+/// Represents a per-CPU variable that can be manipulated with machine-intrinsic numeric
+/// operations.
+pub struct PerCpuNumeric<'a, T> {
+ ptr: &'a PerCpuPtr<T>,
+}
+
+macro_rules! impl_ops {
+ ($ty:ty, $reg:tt) => {
+ impl DynamicPerCpu<$ty> {
+ /// Returns a `PerCpuNumeric` that can be used to manipulate the underlying per-CPU variable.
+ pub fn num(&self) -> PerCpuNumeric<'_, $ty> {
+ PerCpuNumeric { ptr: &self.alloc.0 }
+ }
+ }
+ impl StaticPerCpu<$ty> {
+ /// Returns a `PerCpuNumeric` that can be used to manipulate the underlying per-CPU variable.
+ pub fn num(&self) -> PerCpuNumeric<'_, $ty> {
+ PerCpuNumeric { ptr: &self.0 }
+ }
+ }
+
+ impl PerCpuNumeric<'_, $ty> {
+ /// Adds `rhs` to the per-CPU variable.
+ pub fn add(&mut self, rhs: $ty) {
+ // SAFETY: `self.ptr.0` is a valid offset into the per-CPU area (i.e., valid as a
+ // pointer relative to the `gs` segment register) by the invariants of PerCpu.
+ unsafe {
+ asm!(
+ concat!("add gs:[{off}], {val:", $reg, "}"),
+ off = in(reg) self.ptr.0 as *mut $ty,
+ val = in(reg) rhs,
+ );
+ }
+ }
+ }
+ impl PerCpuNumeric<'_, $ty> {
+ /// Subtracts `rhs` from the per-CPU variable.
+ pub fn sub(&mut self, rhs: $ty) {
+ // SAFETY: `self.ptr.0` is a valid offset into the per-CPU area (i.e., valid as a
+ // pointer relative to the `gs` segment register) by the invariants of PerCpu.
+ unsafe {
+ asm!(
+ concat!("sub gs:[{off}], {val:", $reg, "}"),
+ off = in(reg) self.ptr.0 as *mut $ty,
+ val = in(reg) rhs,
+ );
+ }
+ }
+ }
+ };
+}
+
+macro_rules! impl_ops_byte {
+ ($ty:ty) => {
+ impl DynamicPerCpu<$ty> {
+ /// Returns a `PerCpuNumeric` that can be used to manipulate the underlying per-CPU
+ /// variable.
+ pub fn num(&self) -> PerCpuNumeric<'_, $ty> {
+ PerCpuNumeric { ptr: &self.alloc.0 }
+ }
+ }
+ impl StaticPerCpu<$ty> {
+ /// Returns a `PerCpuNumeric` that can be used to manipulate the underlying per-CPU
+ /// variable.
+ pub fn num(&self) -> PerCpuNumeric<'_, $ty> {
+ PerCpuNumeric { ptr: &self.0 }
+ }
+ }
+
+ impl PerCpuNumeric<'_, $ty> {
+ /// Adds `rhs` to the per-CPU variable.
+ pub fn add(&mut self, rhs: $ty) {
+ // SAFETY: `self.ptr.0` is a valid offset into the per-CPU area (i.e., valid as a
+ // pointer relative to the `gs` segment register) by the invariants of PerCpu.
+ unsafe {
+ asm!(
+ concat!("add gs:[{off}], {val}"),
+ off = in(reg) self.ptr.0 as *mut $ty,
+ val = in(reg_byte) rhs,
+ );
+ }
+ }
+ }
+ impl PerCpuNumeric<'_, $ty> {
+ /// Subtracts `rhs` from the per-CPU variable.
+ pub fn sub(&mut self, rhs: $ty) {
+ // SAFETY: `self.ptr.0` is a valid offset into the per-CPU area (i.e., valid as a
+ // pointer relative to the `gs` segment register) by the invariants of PerCpu.
+ unsafe {
+ asm!(
+ concat!("sub gs:[{off}], {val}"),
+ off = in(reg) self.ptr.0 as *mut $ty,
+ val = in(reg_byte) rhs,
+ );
+ }
+ }
+ }
+ };
+}
+
+impl_ops_byte!(i8);
+impl_ops!(i16, "x");
+impl_ops!(i32, "e");
+impl_ops!(i64, "r");
+impl_ops!(isize, "r");
+
+impl_ops_byte!(u8);
+impl_ops!(u16, "x");
+impl_ops!(u32, "e");
+impl_ops!(u64, "r");
+impl_ops!(usize, "r");
--
2.34.1
next prev parent reply other threads:[~2025-07-12 21:32 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-07-12 21:31 [PATCH v2 0/5] rust: Add Per-CPU Variable API Mitchell Levy
2025-07-12 21:31 ` [PATCH v2 1/5] rust: percpu: introduce a rust API for per-CPU variables Mitchell Levy
2025-07-12 21:31 ` [PATCH v2 2/5] rust: rust-analyzer: add lib to dirs searched for crates Mitchell Levy
2025-07-12 21:31 ` [PATCH v2 3/5] rust: percpu: add a rust per-CPU variable test Mitchell Levy
2025-07-13 9:30 ` Benno Lossin
2025-07-15 10:31 ` Mitchell Levy
2025-07-15 11:31 ` Benno Lossin
2025-07-15 14:10 ` Boqun Feng
2025-07-15 15:55 ` Benno Lossin
2025-07-15 16:31 ` Boqun Feng
2025-07-15 17:44 ` Benno Lossin
2025-07-15 21:34 ` Boqun Feng
2025-07-16 10:32 ` Benno Lossin
2025-07-16 15:33 ` Boqun Feng
2025-07-16 17:21 ` Benno Lossin
2025-07-16 17:52 ` Boqun Feng
2025-07-16 18:22 ` Benno Lossin
2025-07-16 15:35 ` Boqun Feng
2025-07-12 21:31 ` Mitchell Levy [this message]
2025-07-12 21:31 ` [PATCH v2 5/5] rust: percpu: cache per-CPU pointers in the dynamic case Mitchell Levy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250712-rust-percpu-v2-4-826f2567521b@gmail.com \
--to=levymitchell0@gmail.com \
--cc=a.hindborg@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=alex.gaynor@gmail.com \
--cc=aliceryhl@google.com \
--cc=bjorn3_gh@protonmail.com \
--cc=boqun.feng@gmail.com \
--cc=cl@linux.com \
--cc=dakr@kernel.org \
--cc=dennis@kernel.org \
--cc=gary@garyguo.net \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=lossin@kernel.org \
--cc=ojeda@kernel.org \
--cc=rust-for-linux@vger.kernel.org \
--cc=tj@kernel.org \
--cc=tmgross@umich.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.