linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Mitchell Levy <levymitchell0@gmail.com>
To: "Miguel Ojeda" <ojeda@kernel.org>,
	"Alex Gaynor" <alex.gaynor@gmail.com>,
	"Boqun Feng" <boqun.feng@gmail.com>,
	"Gary Guo" <gary@garyguo.net>,
	"Björn Roy Baron" <bjorn3_gh@protonmail.com>,
	"Andreas Hindborg" <a.hindborg@kernel.org>,
	"Alice Ryhl" <aliceryhl@google.com>,
	"Trevor Gross" <tmgross@umich.edu>,
	"Andrew Morton" <akpm@linux-foundation.org>,
	"Dennis Zhou" <dennis@kernel.org>, "Tejun Heo" <tj@kernel.org>,
	"Christoph Lameter" <cl@linux.com>,
	"Danilo Krummrich" <dakr@kernel.org>,
	"Benno Lossin" <lossin@kernel.org>
Cc: linux-kernel@vger.kernel.org, rust-for-linux@vger.kernel.org,
	 linux-mm@kvack.org, Mitchell Levy <levymitchell0@gmail.com>
Subject: [PATCH v2 4/5] rust: percpu: Add pin-hole optimizations for numerics
Date: Sat, 12 Jul 2025 14:31:15 -0700	[thread overview]
Message-ID: <20250712-rust-percpu-v2-4-826f2567521b@gmail.com> (raw)
In-Reply-To: <20250712-rust-percpu-v2-0-826f2567521b@gmail.com>

The C implementations of `this_cpu_add`, `this_cpu_sub`, etc., are
optimized to save an instruction by avoiding having to compute
`this_cpu_ptr(&x)` for some per-CPU variable `x`. For example, rather
than

    u64 *x_ptr = this_cpu_ptr(&x);
    *x_ptr += 5;

the implementation of `this_cpu_add` is clever enough to make use of the
fact that per-CPU variables are implemented on x86 via segment
registers, and so we can use only a single instruction (where we assume
`&x` is already in `rax`)

    add gs:[rax], 5

Add this optimization via a `PerCpuNumeric` type to enable code-reuse
between `DynamicPerCpu` and `StaticPerCpu`.

Signed-off-by: Mitchell Levy <levymitchell0@gmail.com>
---
 lib/percpu_test_rust.rs       |  36 +++++++++++++
 rust/kernel/percpu.rs         |   1 +
 rust/kernel/percpu/numeric.rs | 117 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 154 insertions(+)

diff --git a/lib/percpu_test_rust.rs b/lib/percpu_test_rust.rs
index a9652e6ece08..114015435a85 100644
--- a/lib/percpu_test_rust.rs
+++ b/lib/percpu_test_rust.rs
@@ -25,6 +25,26 @@
 define_per_cpu!(PERCPU: i64 = 0);
 define_per_cpu!(UPERCPU: u64 = 0);
 
+macro_rules! make_optimization_test {
+    ($ty:ty) => {
+        let mut test: DynamicPerCpu<$ty> = DynamicPerCpu::new(GFP_KERNEL).unwrap();
+        {
+            let _ = CpuGuard::new();
+            // SAFETY: No other usage of `test`
+            unsafe { test.get(CpuGuard::new()) }.with(|val: &mut $ty| *val = 10);
+            test.num().add(1);
+            // SAFETY: No other usage of `test`
+            unsafe { test.get(CpuGuard::new()) }.with(|val: &mut $ty| assert_eq!(*val, 11));
+            test.num().add(10);
+            // SAFETY: No other usage of `test`
+            unsafe { test.get(CpuGuard::new()) }.with(|val: &mut $ty| assert_eq!(*val, 21));
+            test.num().sub(5);
+            // SAFETY: No other usage of `test`
+            unsafe { test.get(CpuGuard::new()) }.with(|val: &mut $ty| assert_eq!(*val, 16));
+        }
+    };
+}
+
 impl kernel::Module for PerCpuTestModule {
     fn init(_module: &'static ThisModule) -> Result<Self, Error> {
         pr_info!("rust percpu test start\n");
@@ -94,6 +114,22 @@ fn init(_module: &'static ThisModule) -> Result<Self, Error> {
 
         pr_info!("rust dynamic percpu test done\n");
 
+        pr_info!("rust numeric optimizations test start\n");
+
+        make_optimization_test!(u8);
+        make_optimization_test!(u16);
+        make_optimization_test!(u32);
+        make_optimization_test!(u64);
+        make_optimization_test!(usize);
+
+        make_optimization_test!(i8);
+        make_optimization_test!(i16);
+        make_optimization_test!(i32);
+        make_optimization_test!(i64);
+        make_optimization_test!(isize);
+
+        pr_info!("rust numeric optimizations test done\n");
+
         // Return Err to unload the module
         Result::Err(EINVAL)
     }
diff --git a/rust/kernel/percpu.rs b/rust/kernel/percpu.rs
index 7dfceb6aefd7..b97d1d07a614 100644
--- a/rust/kernel/percpu.rs
+++ b/rust/kernel/percpu.rs
@@ -2,6 +2,7 @@
 //! This module contains abstractions for creating and using per-CPU variables from Rust.
 //! See the define_per_cpu! macro and the DynamicPerCpu<T> type, as well as the PerCpu<T> trait.
 pub mod cpu_guard;
+pub mod numeric;
 
 use bindings::{alloc_percpu, free_percpu};
 
diff --git a/rust/kernel/percpu/numeric.rs b/rust/kernel/percpu/numeric.rs
new file mode 100644
index 000000000000..e4008f872af1
--- /dev/null
+++ b/rust/kernel/percpu/numeric.rs
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+//! Pin-hole optimizations for PerCpu<T> where T is a numeric type.
+
+use crate::percpu::*;
+use core::arch::asm;
+
+/// Represents a per-CPU variable that can be manipulated with machine-intrinsic numeric
+/// operations.
+pub struct PerCpuNumeric<'a, T> {
+    ptr: &'a PerCpuPtr<T>,
+}
+
+macro_rules! impl_ops {
+    ($ty:ty, $reg:tt) => {
+        impl DynamicPerCpu<$ty> {
+            /// Returns a `PerCpuNumeric` that can be used to manipulate the underlying per-CPU variable.
+            pub fn num(&self) -> PerCpuNumeric<'_, $ty> {
+                PerCpuNumeric { ptr: &self.alloc.0 }
+            }
+        }
+        impl StaticPerCpu<$ty> {
+            /// Returns a `PerCpuNumeric` that can be used to manipulate the underlying per-CPU variable.
+            pub fn num(&self) -> PerCpuNumeric<'_, $ty> {
+                PerCpuNumeric { ptr: &self.0 }
+            }
+        }
+
+        impl PerCpuNumeric<'_, $ty> {
+            /// Adds `rhs` to the per-CPU variable.
+            pub fn add(&mut self, rhs: $ty) {
+                // SAFETY: `self.ptr.0` is a valid offset into the per-CPU area (i.e., valid as a
+                // pointer relative to the `gs` segment register) by the invariants of PerCpu.
+                unsafe {
+                    asm!(
+                        concat!("add gs:[{off}], {val:", $reg, "}"),
+                        off = in(reg) self.ptr.0 as *mut $ty,
+                        val = in(reg) rhs,
+                    );
+                }
+            }
+        }
+        impl PerCpuNumeric<'_, $ty> {
+            /// Subtracts `rhs` from the per-CPU variable.
+            pub fn sub(&mut self, rhs: $ty) {
+                // SAFETY: `self.ptr.0` is a valid offset into the per-CPU area (i.e., valid as a
+                // pointer relative to the `gs` segment register) by the invariants of PerCpu.
+                unsafe {
+                    asm!(
+                        concat!("sub gs:[{off}], {val:", $reg, "}"),
+                        off = in(reg) self.ptr.0 as *mut $ty,
+                        val = in(reg) rhs,
+                    );
+                }
+            }
+        }
+    };
+}
+
+macro_rules! impl_ops_byte {
+    ($ty:ty) => {
+        impl DynamicPerCpu<$ty> {
+            /// Returns a `PerCpuNumeric` that can be used to manipulate the underlying per-CPU
+            /// variable.
+            pub fn num(&self) -> PerCpuNumeric<'_, $ty> {
+                PerCpuNumeric { ptr: &self.alloc.0 }
+            }
+        }
+        impl StaticPerCpu<$ty> {
+            /// Returns a `PerCpuNumeric` that can be used to manipulate the underlying per-CPU
+            /// variable.
+            pub fn num(&self) -> PerCpuNumeric<'_, $ty> {
+                PerCpuNumeric { ptr: &self.0 }
+            }
+        }
+
+        impl PerCpuNumeric<'_, $ty> {
+            /// Adds `rhs` to the per-CPU variable.
+            pub fn add(&mut self, rhs: $ty) {
+                // SAFETY: `self.ptr.0` is a valid offset into the per-CPU area (i.e., valid as a
+                // pointer relative to the `gs` segment register) by the invariants of PerCpu.
+                unsafe {
+                    asm!(
+                        concat!("add gs:[{off}], {val}"),
+                        off = in(reg) self.ptr.0 as *mut $ty,
+                        val = in(reg_byte) rhs,
+                    );
+                }
+            }
+        }
+        impl PerCpuNumeric<'_, $ty> {
+            /// Subtracts `rhs` from the per-CPU variable.
+            pub fn sub(&mut self, rhs: $ty) {
+                // SAFETY: `self.ptr.0` is a valid offset into the per-CPU area (i.e., valid as a
+                // pointer relative to the `gs` segment register) by the invariants of PerCpu.
+                unsafe {
+                    asm!(
+                        concat!("sub gs:[{off}], {val}"),
+                        off = in(reg) self.ptr.0 as *mut $ty,
+                        val = in(reg_byte) rhs,
+                    );
+                }
+            }
+        }
+    };
+}
+
+impl_ops_byte!(i8);
+impl_ops!(i16, "x");
+impl_ops!(i32, "e");
+impl_ops!(i64, "r");
+impl_ops!(isize, "r");
+
+impl_ops_byte!(u8);
+impl_ops!(u16, "x");
+impl_ops!(u32, "e");
+impl_ops!(u64, "r");
+impl_ops!(usize, "r");

-- 
2.34.1


  parent reply	other threads:[~2025-07-12 21:32 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-07-12 21:31 [PATCH v2 0/5] rust: Add Per-CPU Variable API Mitchell Levy
2025-07-12 21:31 ` [PATCH v2 1/5] rust: percpu: introduce a rust API for per-CPU variables Mitchell Levy
2025-07-12 21:31 ` [PATCH v2 2/5] rust: rust-analyzer: add lib to dirs searched for crates Mitchell Levy
2025-07-12 21:31 ` [PATCH v2 3/5] rust: percpu: add a rust per-CPU variable test Mitchell Levy
2025-07-13  9:30   ` Benno Lossin
2025-07-15 10:31     ` Mitchell Levy
2025-07-15 11:31       ` Benno Lossin
2025-07-15 14:10         ` Boqun Feng
2025-07-15 15:55           ` Benno Lossin
2025-07-15 16:31             ` Boqun Feng
2025-07-15 17:44               ` Benno Lossin
2025-07-15 21:34                 ` Boqun Feng
2025-07-16 10:32                   ` Benno Lossin
2025-07-16 15:33                     ` Boqun Feng
2025-07-16 17:21                       ` Benno Lossin
2025-07-16 17:52                         ` Boqun Feng
2025-07-16 18:22                           ` Benno Lossin
2025-07-16 15:35                 ` Boqun Feng
2025-07-12 21:31 ` Mitchell Levy [this message]
2025-07-12 21:31 ` [PATCH v2 5/5] rust: percpu: cache per-CPU pointers in the dynamic case Mitchell Levy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250712-rust-percpu-v2-4-826f2567521b@gmail.com \
    --to=levymitchell0@gmail.com \
    --cc=a.hindborg@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=alex.gaynor@gmail.com \
    --cc=aliceryhl@google.com \
    --cc=bjorn3_gh@protonmail.com \
    --cc=boqun.feng@gmail.com \
    --cc=cl@linux.com \
    --cc=dakr@kernel.org \
    --cc=dennis@kernel.org \
    --cc=gary@garyguo.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lossin@kernel.org \
    --cc=ojeda@kernel.org \
    --cc=rust-for-linux@vger.kernel.org \
    --cc=tj@kernel.org \
    --cc=tmgross@umich.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).