Linux Documentation
 help / color / mirror / Atom feed
* [PATCH RESEND v10 6/8] perf: arm_cspmu: Switch to acpi_get_cpu_uid() from get_acpi_id_for_cpu()
From: Chengwen Feng @ 2026-04-01  8:16 UTC (permalink / raw)
  To: Bjorn Helgaas, Catalin Marinas, Will Deacon, Rafael J . Wysocki
  Cc: Jonathan Corbet, WANG Xuerui, Thomas Gleixner, Dave Hansen,
	H . Peter Anvin, Juergen Gross, Boris Ostrovsky, Len Brown,
	Sunil V L, Mark Rutland, Jonathan Cameron, Kees Cook, Yanteng Si,
	Sean Christopherson, Kai Huang, Tom Lendacky, Thomas Huth,
	Thorsten Blum, Kevin Loughlin, Zheyun Shen, Peter Zijlstra,
	Pawan Gupta, Xin Li, Ahmed S . Darwish, Sohil Mehta,
	Ilkka Koskinen, Robin Murphy, James Clark, Besar Wicaksono, Ma Ke,
	Wei Huang, Andy Gospodarek, Somnath Kotur, punit.agrawal,
	guohanjun, suzuki.poulose, ryan.roberts, chenl311, masahiroy,
	wangyuquan1236, anshuman.khandual, heinrich.schuchardt,
	Eric.VanTassell, wangzhou1, wanghuiqiang, liuyonglong,
	fengchengwen, linux-pci, linux-doc, linux-kernel,
	linux-arm-kernel, loongarch, linux-riscv, xen-devel, linux-acpi,
	linux-perf-users, stable, x86
In-Reply-To: <20260401081640.26875-1-fengchengwen@huawei.com>

Update arm_cspmu to use acpi_get_cpu_uid() instead of
get_acpi_id_for_cpu(), aligning with unified ACPI CPU UID interface.

No functional changes are introduced by this switch (valid inputs retain
original behavior).

Cc: stable@vger.kernel.org
Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
---
 drivers/perf/arm_cspmu/arm_cspmu.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/perf/arm_cspmu/arm_cspmu.c b/drivers/perf/arm_cspmu/arm_cspmu.c
index 34430b68f602..ed72c3d1f796 100644
--- a/drivers/perf/arm_cspmu/arm_cspmu.c
+++ b/drivers/perf/arm_cspmu/arm_cspmu.c
@@ -1107,15 +1107,17 @@ static int arm_cspmu_acpi_get_cpus(struct arm_cspmu *cspmu)
 {
 	struct acpi_apmt_node *apmt_node;
 	int affinity_flag;
+	u32 cpu_uid;
 	int cpu;
+	int ret;
 
 	apmt_node = arm_cspmu_apmt_node(cspmu->dev);
 	affinity_flag = apmt_node->flags & ACPI_APMT_FLAGS_AFFINITY;
 
 	if (affinity_flag == ACPI_APMT_FLAGS_AFFINITY_PROC) {
 		for_each_possible_cpu(cpu) {
-			if (apmt_node->proc_affinity ==
-			    get_acpi_id_for_cpu(cpu)) {
+			ret = acpi_get_cpu_uid(cpu, &cpu_uid);
+			if (ret == 0 && apmt_node->proc_affinity == cpu_uid) {
 				cpumask_set_cpu(cpu, &cspmu->associated_cpus);
 				break;
 			}
-- 
2.17.1


^ permalink raw reply related

* Re: [PATCH v10 0/8] ACPI: Unify CPU UID interface and fix ARM64 TPH steer-tag issue
From: fengchengwen @ 2026-04-01  8:22 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Bjorn Helgaas, Catalin Marinas, Will Deacon, Jonathan Corbet,
	Palmer Dabbelt, Borislav Petkov, H . Peter Anvin, Juergen Gross,
	Boris Ostrovsky, Sunil V L, Mark Rutland, Jonathan Cameron,
	Kees Cook, Yanteng Si, Sean Christopherson, Kai Huang,
	Tom Lendacky, Thomas Huth, Thorsten Blum, Kevin Loughlin,
	Zheyun Shen, Peter Zijlstra, Pawan Gupta, Xin Li,
	Ahmed S . Darwish, Sohil Mehta, Ilkka Koskinen, Robin Murphy,
	James Clark, Besar Wicaksono, Ma Ke, Wei Huang, Andy Gospodarek,
	Somnath Kotur, punit.agrawal, guohanjun, suzuki.poulose,
	ryan.roberts, chenl311, masahiroy, wangyuquan1236,
	anshuman.khandual, heinrich.schuchardt, Eric.VanTassell,
	wangzhou1, wanghuiqiang, liuyonglong, linux-pci, linux-doc,
	linux-kernel, linux-arm-kernel, loongarch, linux-riscv, xen-devel,
	linux-acpi, linux-perf-users
In-Reply-To: <CAJZ5v0gO1VSPs58JeupaiTZBkP3i_-H2bEQpfU-6k2TXjdiaOQ@mail.gmail.com>

On 3/31/2026 8:24 PM, Rafael J. Wysocki wrote:
> On Fri, Mar 20, 2026 at 4:17 AM Chengwen Feng <fengchengwen@huawei.com> wrote:
>>
>> This patchset unifies ACPI Processor UID retrieval across
>> arm64/loongarch/riscv/x86 via acpi_get_cpu_uid() (with input validation)
>> and fixes ARM64 CPU steer-tag retrieval failure in PCI/TPH:
>>
>> 1-4: Add acpi_get_cpu_uid() for arm64/loongarch/riscv/x86 (update
>>      respective users)
>> 5: Centralize acpi_get_cpu_uid() declaration in include/linux/acpi.h
>> 6: Clean up perf/arm_cspmu
>> 7: Clean up ACPI/PPTT and remove unused get_acpi_id_for_cpu()
>> 8: Pass ACPI Processor UID to Cache Locality _DSM
>>
>> The interface refactor ensures consistent CPU UID retrieval across
>> architectures (no functional changes for valid inputs) and provides the
>> unified interface required for the ARM64 TPH fix.
>>
>> ---
>> Changes in v10:
>> - Refine commit header&log according to Punit's and Bjorn's review
>> - Split perf/arm_cspmu as a separate commit which address Punit's
>>   review
>>
>> Changes in v9:
>> - Address Bjorn's review: split commits to each platform so that make
>>   them easy to review
>>
>> Changes in v8:
>> - Moving arm64's get_cpu_for_acpi_id() to kernel/acpi.c which address
>>   Jeremy's review
>>
>> Chengwen Feng (8):
>>   arm64: acpi: Add acpi_get_cpu_uid() for unified ACPI CPU UID retrieval
>>   LoongArch: Add acpi_get_cpu_uid() for unified ACPI CPU UID retrieval
>>   RISC-V: ACPI: Add acpi_get_cpu_uid() for unified ACPI CPU UID
>>     retrieval
>>   x86/acpi: Add acpi_get_cpu_uid() for unified ACPI CPU UID retrieval
>>   ACPI: Centralize acpi_get_cpu_uid() declaration in
>>     include/linux/acpi.h
>>   perf: arm_cspmu: Switch to acpi_get_cpu_uid() from
>>     get_acpi_id_for_cpu()
>>   ACPI: PPTT: Use acpi_get_cpu_uid() and remove get_acpi_id_for_cpu()
>>   PCI/TPH: Pass ACPI Processor UID to Cache Locality _DSM
>>
>>  Documentation/PCI/tph.rst          |  4 +--
>>  arch/arm64/include/asm/acpi.h      | 17 +---------
>>  arch/arm64/kernel/acpi.c           | 30 ++++++++++++++++++
>>  arch/loongarch/include/asm/acpi.h  |  5 ---
>>  arch/loongarch/kernel/acpi.c       |  9 ++++++
>>  arch/riscv/include/asm/acpi.h      |  4 ---
>>  arch/riscv/kernel/acpi.c           | 16 ++++++++++
>>  arch/riscv/kernel/acpi_numa.c      |  9 ++++--
>>  arch/x86/include/asm/cpu.h         |  1 -
>>  arch/x86/include/asm/smp.h         |  1 -
>>  arch/x86/kernel/acpi/boot.c        | 20 ++++++++++++
>>  arch/x86/xen/enlighten_hvm.c       |  5 +--
>>  drivers/acpi/pptt.c                | 50 ++++++++++++++++++++++--------
>>  drivers/acpi/riscv/rhct.c          |  7 ++++-
>>  drivers/pci/tph.c                  | 16 +++++++---
>>  drivers/perf/arm_cspmu/arm_cspmu.c |  6 ++--
>>  include/linux/acpi.h               | 11 +++++++
>>  include/linux/pci-tph.h            |  4 +--
>>  18 files changed, 158 insertions(+), 57 deletions(-)
>>
>> --
> 
> It doesn't look like anyone has a particular heartburn related to this
> series, so I could apply it in principle, but I'd appreciate some ACKs
> from arch maintainers.
> 
> Why don't you resend it with all of the tags collected so far (and
> please add x86@kernel.org to the CC list)?

done for resend v10 which with all the review/ack tag, add add x86@kernel.org to CC list

Thanks

^ permalink raw reply

* Re: [PATCH v10 01/21] gpu: nova-core: gsp: Return GspStaticInfo from boot()
From: Eliot Courtney @ 2026-04-01  8:25 UTC (permalink / raw)
  To: Joel Fernandes, linux-kernel
  Cc: Miguel Ojeda, Boqun Feng, Gary Guo, Bjorn Roy Baron, Benno Lossin,
	Andreas Hindborg, Alice Ryhl, Trevor Gross, Danilo Krummrich,
	Dave Airlie, Daniel Almeida, Koen Koning, dri-devel,
	rust-for-linux, Nikola Djukic, Maarten Lankhorst, Maxime Ripard,
	Thomas Zimmermann, David Airlie, Simona Vetter, Jonathan Corbet,
	Alex Deucher, Christian Koenig, Jani Nikula, Joonas Lahtinen,
	Rodrigo Vivi, Tvrtko Ursulin, Huang Rui, Matthew Auld,
	Matthew Brost, Lucas De Marchi, Thomas Hellstrom, Helge Deller,
	Alex Gaynor, Boqun Feng, John Hubbard, Alistair Popple,
	Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
	Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
	Elle Rhumsaa, alexeyi, Eliot Courtney, joel, linux-doc, amd-gfx,
	intel-gfx, intel-xe, linux-fbdev
In-Reply-To: <20260331212048.2229260-2-joelagnelf@nvidia.com>

On Wed Apr 1, 2026 at 6:20 AM JST, Joel Fernandes wrote:
> Refactor the GSP boot function to return only the GspStaticInfo,
> removing the FbLayout from the return tuple.
>
> This enables access required for memory management initialization to:
> - bar1_pde_base: BAR1 page directory base.
> - bar2_pde_base: BAR2 page directory base.
> - usable memory regions in vidmem.
>
> Cc: Nikola Djukic <ndjukic@nvidia.com>
> Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
> ---

Please see my feedback from v9[1] which still applies.

[1]: https://lore.kernel.org/all/DH0LH3D38CZ1.2DK8BN4CMU4FW@nvidia.com/

^ permalink raw reply

* Re: [PATCH v10 02/21] gpu: nova-core: gsp: Extract usable FB region from GSP
From: Eliot Courtney @ 2026-04-01  8:27 UTC (permalink / raw)
  To: Joel Fernandes, linux-kernel
  Cc: Miguel Ojeda, Boqun Feng, Gary Guo, Bjorn Roy Baron, Benno Lossin,
	Andreas Hindborg, Alice Ryhl, Trevor Gross, Danilo Krummrich,
	Dave Airlie, Daniel Almeida, Koen Koning, dri-devel,
	rust-for-linux, Nikola Djukic, Maarten Lankhorst, Maxime Ripard,
	Thomas Zimmermann, David Airlie, Simona Vetter, Jonathan Corbet,
	Alex Deucher, Christian Koenig, Jani Nikula, Joonas Lahtinen,
	Rodrigo Vivi, Tvrtko Ursulin, Huang Rui, Matthew Auld,
	Matthew Brost, Lucas De Marchi, Thomas Hellstrom, Helge Deller,
	Alex Gaynor, Boqun Feng, John Hubbard, Alistair Popple,
	Timur Tabi, Edwin Peer, Alexandre Courbot, Andrea Righi,
	Andy Ritger, Zhi Wang, Balbir Singh, Philipp Stanner,
	Elle Rhumsaa, alexeyi, Eliot Courtney, joel, linux-doc, amd-gfx,
	intel-gfx, intel-xe, linux-fbdev
In-Reply-To: <20260331212048.2229260-3-joelagnelf@nvidia.com>

On Wed Apr 1, 2026 at 6:20 AM JST, Joel Fernandes wrote:
> Add first_usable_fb_region() to GspStaticConfigInfo to extract the first
> usable FB region from GSP's fbRegionInfoParams. Usable regions are those
> that are not reserved or protected.
>
> The extracted region is stored in GetGspStaticInfoReply and exposed as
> usable_fb_region field for use by the memory subsystem.
>
> Cc: Nikola Djukic <ndjukic@nvidia.com>
> Signed-off-by: Joel Fernandes <joelagnelf@nvidia.com>
> ---

Please see my feedback from v9[1] which still applies.

[1]: https://lore.kernel.org/all/DH1GK30TUB4V.2GR6ANXIZDFFQ@nvidia.com/

thanks

^ permalink raw reply

* Re: [PATCH v11 03/22] drm: Add new general DRM property "color format"
From: Michel Dänzer @ 2026-04-01  8:27 UTC (permalink / raw)
  To: Nicolas Frattaroli, Ville Syrjälä, Dave Stevenson
  Cc: Harry Wentland, Leo Li, Rodrigo Siqueira, Alex Deucher,
	Christian König, David Airlie, Simona Vetter,
	Maarten Lankhorst, Maxime Ripard, Thomas Zimmermann,
	Andrzej Hajda, Neil Armstrong, Robert Foss, Laurent Pinchart,
	Jonas Karlman, Jernej Skrabec, Sandy Huang, Heiko Stübner,
	Andy Yan, Jani Nikula, Rodrigo Vivi, Joonas Lahtinen,
	Tvrtko Ursulin, Dmitry Baryshkov, Sascha Hauer, Rob Herring,
	Jonathan Corbet, Shuah Khan, kernel, amd-gfx, dri-devel,
	linux-kernel, linux-arm-kernel, linux-rockchip, intel-gfx,
	intel-xe, linux-doc, Werner Sembach, Andri Yngvason, Marius Vlad
In-Reply-To: <7991520.DvuYhMxLoT@workhorse>

On 3/26/26 13:02, Nicolas Frattaroli wrote:
> On Thursday, 26 March 2026 12:16:12 Central European Standard Time Dave Stevenson wrote:
>> On Wed, 25 Mar 2026 at 13:43, Ville Syrjälä
>> <ville.syrjala@linux.intel.com> wrote:
>>> On Wed, Mar 25, 2026 at 12:49:19PM +0000, Dave Stevenson wrote:
>>>> On Tue, 24 Mar 2026 at 16:02, Nicolas Frattaroli
>>>> <nicolas.frattaroli@collabora.com> wrote:
>>>>>
>>>>> +/**
>>>>> + * enum drm_connector_color_format - Connector Color Format Request
>>>>> + *
>>>>> + * This enum, unlike &enum drm_output_color_format, is used to specify requests
>>>>> + * for a specific color format on a connector through the DRM "color format"
>>>>> + * property. The difference is that it has an "AUTO" value to specify that
>>>>> + * no specific choice has been made.
>>>>> + */
>>>>> +enum drm_connector_color_format {
>>>>> +       /**
>>>>> +        * @DRM_CONNECTOR_COLOR_FORMAT_AUTO: The driver or display protocol
>>>>> +        * helpers should pick a suitable color format. All implementations of a
>>>>> +        * specific display protocol must behave the same way with "AUTO", but
>>>>> +        * different display protocols do not necessarily have the same "AUTO"
>>>>> +        * semantics.
>>>>> +        *
>>>>> +        * For HDMI, "AUTO" picks RGB, but falls back to YCbCr 4:2:0 if the
>>>>> +        * bandwidth required for full-scale RGB is not available, or the mode
>>>>> +        * is YCbCr 4:2:0-only, as long as the mode and output both support
>>>>> +        * YCbCr 4:2:0.
>>>>
>>>> Is there a reason you propose dropping back to YCbCr 4:2:0 without
>>>> trying YCbCr 4:2:2 first? Minimising the subsampling is surely
>>>> beneficial, and vc4 for one can do 4:2:2 but not 4:2:0.
>>>
>>> On HDMI 4:2:2 is always 12bpc, so it doesn't save any bandwidth
>>> compared to 8bpc 4:4:4.
>>
>> It does save bandwidth against 10 or 12bpc RGB 4:4:4.
>>
>> Or is the implication that max_bpc = 12 and
>> DRM_CONNECTOR_COLOR_FORMAT_AUTO should drop bpc down to 8 and select
>> RGB in preference to selecting 4:2:2?
> 
> Yes. Some people consider max-bpc to not be a legitimate way of requesting
> an actual bpc, and don't think drivers will choose the highest bpc <= max-bpc,
> and instead may negotiate a fantasy number anywhere below or equal to max-bpc.

Ridiculing others like this for disagreeing with you is uncalled for.

Is there any evidence for your claim that the driver must always use the 
highest possible bpc <= max-bpc?


> Of course this logic could be done in userspace which knows whether the
> less chroma for more bit depth trade-off is worth it, but userspace does
> not know the negotiated link bpc, and my attempts at adding a property for
> it are being blocked.

Assuming you're referring to the concerns I raised there, I don't have the power or intent to block it.


-- 
Earthling Michel Dänzer       \        GNOME / Xwayland / Mesa developer
https://redhat.com             \               Libre software enthusiast

^ permalink raw reply

* Re: [PATCH 5/5] types: Add standard __ob_trap and __ob_wrap scalar types
From: Peter Zijlstra @ 2026-04-01  8:31 UTC (permalink / raw)
  To: Kees Cook
  Cc: Linus Torvalds, Justin Stitt, Miguel Ojeda, Nathan Chancellor,
	Andrew Morton, Andy Shevchenko, Arnd Bergmann, Mark Rutland,
	Matthew Wilcox (Oracle), Suren Baghdasaryan, Thomas Gleixner,
	Finn Thain, Geert Uytterhoeven, Thomas Weißschuh, llvm,
	Marco Elver, Jonathan Corbet, Nicolas Schier, Greg Kroah-Hartman,
	linux-kernel, kasan-dev, linux-hardening, linux-doc, linux-kbuild
In-Reply-To: <202603311321.4EE9FEA@keescook>

On Tue, Mar 31, 2026 at 01:31:16PM -0700, Kees Cook wrote:

(still slowly digesting the thread)

> Yeah, as you mentioned earlier, I'd agree that nesting is rarely
> useful. The only thing I'd want to be careful about is ordering/scope. I
> *think* it would just operate as a "goto" and things like the cleanup.h
> handlers wouldn't be involved: they operate when a scope is crossed
> like before. And I think the overflow result wouldn't be represented
> anywhere. i.e. the wrapped/truncated value wouldn't be stored:
> 
> int func()
> {
> 	...
> 	u8 __ob_trap product = 5;
> 	...
> 	product = a * b; // if store is truncated, goto __overflow
> 	...
> 	return product;
> 
> __overflow:
> 	pr_info("%u\n", product); // shows "5"
> 	return -1;
> }

Note that there is a 'fun' problem with this in combination with
cleanup.h.

Something like:

int func()
{
	u8 __ob_trap prod = 0;

	scoped_guard (mutex, &my_lock) {
		prod = a * b;
	}

	return prod;

__overflow:
	// whatever
	return -1;
}

is fine. *HOWEVER*, something like:

int func()
{
	int __ob_trap size = base + count * extra;
	int err;

	struct my_obj *obj __cleanup(kfree) = kzalloc(size, GFP_KERNEL);

	err = my_obj_init(obj);
	if (err)
		return ERR_PTR(err);

	return_ptr(obj);

__overflow:
	// what now..
	return NULL;
}

is most terribly broken. Specifically, the goto will jump into the scope
of obj -- and that is not allowed.



^ permalink raw reply

* Re: [PATCH 5/5] types: Add standard __ob_trap and __ob_wrap scalar types
From: Peter Zijlstra @ 2026-04-01  8:57 UTC (permalink / raw)
  To: Kees Cook
  Cc: Linus Torvalds, Justin Stitt, Miguel Ojeda, Nathan Chancellor,
	Andrew Morton, Andy Shevchenko, Arnd Bergmann, Mark Rutland,
	Matthew Wilcox (Oracle), Suren Baghdasaryan, Thomas Gleixner,
	Finn Thain, Geert Uytterhoeven, Thomas Weißschuh, llvm,
	Marco Elver, Jonathan Corbet, Nicolas Schier, Greg Kroah-Hartman,
	linux-kernel, kasan-dev, linux-hardening, linux-doc, linux-kbuild
In-Reply-To: <202603311321.4EE9FEA@keescook>

On Tue, Mar 31, 2026 at 01:31:16PM -0700, Kees Cook wrote:

> int func()
> {
> 	...
> 	u8 __ob_trap product = 5;
> 	...
> 	product = a * b; // if store is truncated, goto __overflow
> 	...
> 	return product;
> 
> __overflow:
> 	pr_info("%u\n", product); // shows "5"

I'm confused by this 'product is still 5' thing. It seems to me that
making this happen will, in general, require more instructions/registers
than allowing the old value to be clobbered and have product be the
truncated result of whatever overflow.

Specifically, what is the value of preserving the old value?

> 	return -1;
> }

^ permalink raw reply

* Re: [PATCH v8 02/10] x86/bhi: Make clear_bhb_loop() effective on newer CPUs
From: David Laight @ 2026-04-01  9:02 UTC (permalink / raw)
  To: Pawan Gupta
  Cc: Borislav Petkov, x86, Jon Kohler, Nikolay Borisov, H. Peter Anvin,
	Josh Poimboeuf, David Kaplan, Sean Christopherson, Dave Hansen,
	Peter Zijlstra, Alexei Starovoitov, Daniel Borkmann,
	Andrii Nakryiko, KP Singh, Jiri Olsa, David S. Miller,
	Andy Lutomirski, Thomas Gleixner, Ingo Molnar, David Ahern,
	Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
	John Fastabend, Stanislav Fomichev, Hao Luo, Paolo Bonzini,
	Jonathan Corbet, linux-kernel, kvm, Asit Mallick, Tao Zhang, bpf,
	netdev, linux-doc
In-Reply-To: <20260401081236.3rjp2wigkr6w3nym@desk>

On Wed, 1 Apr 2026 01:12:36 -0700
Pawan Gupta <pawan.kumar.gupta@linux.intel.com> wrote:

> On Sat, Mar 28, 2026 at 10:08:37AM +0000, David Laight wrote:
> > On Fri, 27 Mar 2026 17:42:56 -0700
> > Pawan Gupta <pawan.kumar.gupta@linux.intel.com> wrote:
> >   
> > > On Thu, Mar 26, 2026 at 01:29:31PM -0700, Pawan Gupta wrote:  
> > > > On Thu, Mar 26, 2026 at 10:45:57AM +0000, David Laight wrote:    
> > > > > On Thu, 26 Mar 2026 11:01:20 +0100
> > > > > Borislav Petkov <bp@alien8.de> wrote:
> > > > >     
> > > > > > On Thu, Mar 26, 2026 at 01:39:34AM -0700, Pawan Gupta wrote:    
> > > > > > > I believe the equivalent for cpu_feature_enabled() in asm is the
> > > > > > > ALTERNATIVE. Please let me know if I am missing something.      
> > > > > > 
> > > > > > Yes, you are.
> > > > > > 
> > > > > > The point is that you don't want to stick those alternative calls inside some
> > > > > > magic bhb_loop function but hand them in from the outside, as function
> > > > > > arguments.
> > > > > > 
> > > > > > Basically what I did.
> > > > > > 
> > > > > > Then you were worried about this being C code and it had to be noinstr... So
> > > > > > that outer function can be rewritten in asm, I think, and still keep it well
> > > > > > separate.
> > > > > > 
> > > > > > I'll try to rewrite it once I get a free minute, and see how it looks.
> > > > > >     
> > > > > 
> > > > > I think someone tried getting C code to write the values to global data
> > > > > and getting the asm to read them.
> > > > > That got discounted because it spilt things between two largely unrelated files.    
> > > > 
> > > > 
> > > > The implementation with global variables wasn't that bad, let me revive it.
> > > > 
> > > > This part which ties sequence to BHI mitigation, which is not ideal,
> > > > (because VMSCAPE also uses it) it does seems a cleaner option.
> > > > 
> > > > --- a/arch/x86/kernel/cpu/bugs.c
> > > > +++ b/arch/x86/kernel/cpu/bugs.c
> > > > @@ -2095,6 +2095,11 @@ static void __init bhi_select_mitigation(void)
> > > > 
> > > >  static void __init bhi_update_mitigation(void)
> > > >  {
> > > > +   if (!cpu_feature_enabled(X86_FEATURE_BHI_CTRL)) {
> > > > +       bhi_seq_outer_loop = 5;
> > > > +       bhi_seq_inner_loop = 5;
> > > > +   }
> > > > +
> > > > 
> > > > I believe this can be moved to somewhere common to all mitigations.
> > > >     
> > > > > I think the BPF code would need significant refactoring to call a C function.    
> > > > 
> > > > Ya, true. Will use globals and keep clear_bhb_loop() in asm.    
> > > 
> > > While testing this approach, I noticed that syscalls were suffering an 8%
> > > regression on ICX for Native BHI mitigation:
> > > 
> > >   $ perf bench syscall basic -l 100000000
> > > 
> > > Bisection pointed to the change for using 8-bit registers (al/ah replacing
> > > eax/ecx) as the main contributor to the regression. (Global variables added
> > > a bit, but within noise).
> > > 
> > > Further digging revealed a strange behavior, using %ah for the inner loop
> > > was causing the regression, interchanging %al and %ah in the loops
> > > (for movb and sub) eliminated the regression.
> > > 
> > > <clear_bhb_loop_nofence>:
> > > 
> > > 	movb	bhb_seq_outer_loop(%rip), %al
> > > 
> > > 	call	1f
> > > 	jmp	5f
> > > 1:	call	2f
> > > .Lret1:	RET
> > > 2:	movb	bhb_seq_inner_loop(%rip), %ah
> > > 3:	jmp	4f
> > > 	nop
> > > 4:	sub	$1, %ah <---- No regression with %al here
> > > 	jnz	3b
> > > 	sub	$1, %al
> > > 	jnz	1b
> > > 
> > > My guess is, "sub $1, %al" is faster than "sub $1, %ah". Using %al in the
> > > inner loop, which is executed more number of times is likely making the
> > > difference. A perf profile is needed to confirm this.  
> > 
> > I bet it is also CPU dependant - it is quite likely that there isn't
> > any special hardware to support partial writes of %ah so it ends up taking
> > a slow path (possibly even a microcoded one to get an 8% regression).  
> 
> Strangely, %ah in the inner loop incurs less uops and has fewer branch
> misses, yet takes more cycles. Below is the perf data for the sequence on a
> Rocket Lake (similar observation on ICX and EMR):
> 
>   Event                     %al inner      %ah inner       Delta
>   ----------------------  -------------  -------------  ----------
>   cycles                    776,775,020    972,322,384    +25.2%
>   instructions/cycle               1.23           0.98    -20.3%
>   branch-misses               4,792,502        560,449    -88.3%
>   uops_issued.any           768,019,010    696,888,357     -9.3%
>   time elapsed                 0.1627s        0.2048s     +25.9%
> 
> Time elapsed directly correlates with the increase in cycles.

That might be consistent with the %ah accesses (probably writes)
being very slow/synchronising.
So you are getting a full cpu stall instead speculative execution
of the following instructions - which must include a lot of mis-predicted
branches.

> > As well as swapping %al <-> %ah try changing the outer loop decrement to
> > 	sub $0x100, %ax
> > since %al is zero that will set the z flag the same.  
> 
> Unfortunately, using "sub $0x100, %ax"(with %al as inner loop) isn't better
> than just using "sub $1, %ah" in the outer loop:
> 
>   Event                     %al inner      + sub %ax       Delta
>   ----------------------  -------------  -------------  ----------
>   cycles                    776,775,020    813,372,036     +4.7%
>   instructions/cycle               1.23           1.17     -4.5%
>   branch-misses               4,792,502      7,610,323    +58.8%
>   uops_issued.any           768,019,010    827,465,137     +7.7%
>   time elapsed                 0.1627s        0.1707s      +4.9%

That is even more interesting.
The 'sub %ax' version has more uops and more branch-misses.
Looks like the extra cost of the %ah access is less than the cost
of the extra mis-predicted branches.

Makes me wonder where a version that uses %cl fits?
(Or use a zero-extending read and %eax/%ecx - likely to be the same.)
I'll bet 'one beer' that is nearest the 'sub %ax' version.

	David

> 
> > I've just hacked a test into some test code I've got.
> > I'm not seeing an unexpected costs on either zen-5 or haswell.
> > So it may be more subtle.  
> 
> This is puzzling, but atleast it is evident that using %al for the inner
> loop seems to be the best option. In summary:
> 
>   Variant   Cycles     Uops Issued  Branch Misses
>   -------  ----------  -----------  -------------
>   %al       776M        768M           4.8M         (fastest)
>   %ah       972M (+25%) 697M (-9%)     560K (-88%)  (fewer uops + misses, yet slowest)
>   sub %ax   813M (+5%)  827M (+8%)     7.6M (+59%)  (most uops + misses)


^ permalink raw reply

* Re: [PATCH 3/5] compiler_attributes: Add overflow_behavior macros __ob_trap and __ob_wrap
From: Peter Zijlstra @ 2026-04-01  9:08 UTC (permalink / raw)
  To: Kees Cook
  Cc: Justin Stitt, Miguel Ojeda, Marco Elver, Andrey Konovalov,
	Andrey Ryabinin, Jonathan Corbet, Shuah Khan, Miguel Ojeda,
	Nathan Chancellor, kasan-dev, linux-doc, llvm, Linus Torvalds,
	Nicolas Schier, Arnd Bergmann, Greg Kroah-Hartman, Andrew Morton,
	linux-kernel, linux-hardening, linux-kbuild
In-Reply-To: <202603311249.30B44C66@keescook>

On Tue, Mar 31, 2026 at 12:52:10PM -0700, Kees Cook wrote:

> I think for this series, __ob_trap/__ob_wrap is what should be used.
> 
> And for other folks, the background here is that we originally wanted
> to use macros for "__trap" and "__wrap", but the powerpc C compiler
> (both Clang and GCC) have a builtin macro named "__trap" already. So
> I switched to just using the Clang-native type qualifier. We can use
> the attribute style too, but there was a lot of confusion during the
> Clang development phases where people kept forgetting this was a type
> qualifier, not an attribute (i.e. the attribute is an internal alias
> for the qualifier, and the qualifier is a new type).

Since you mention qualifiers...

What is the result of __typeof_unqual__(int __ob_trap) ?

^ permalink raw reply

* [PATCH net-next v3 1/3] dpll: add frequency monitoring to netlink spec
From: Ivan Vecera @ 2026-04-01  9:12 UTC (permalink / raw)
  To: netdev
  Cc: Vadim Fedorenko, Arkadiusz Kubalewski, David S. Miller,
	Donald Hunter, Eric Dumazet, Jakub Kicinski, Jiri Pirko,
	Jonathan Corbet, Michal Schmidt, Paolo Abeni, Petr Oros,
	Prathosh Satish, Shuah Khan, Simon Horman, linux-doc,
	linux-kernel
In-Reply-To: <20260401091237.1071995-1-ivecera@redhat.com>

Add DPLL_A_FREQUENCY_MONITOR device attribute to allow control over
the frequency monitor feature. The attribute uses the existing
dpll_feature_state enum (enable/disable) and is present in both
device-get reply and device-set request.

Add DPLL_A_PIN_MEASURED_FREQUENCY pin attribute to expose the measured
input frequency in millihertz (mHz). The attribute is present in the
pin-get reply. Add DPLL_PIN_MEASURED_FREQUENCY_DIVIDER constant to
allow userspace to extract integer and fractional parts.

Reviewed-by: Vadim Fedorenko <vadim.fedorenko@linux.dev>
Signed-off-by: Ivan Vecera <ivecera@redhat.com>
---
Changes v2 -> v3:
- Improved frequency-monitor doc wording (Jakub)
- Changed measured-frequency to mHz with divider constant (Jakub)

Changes v1 -> v2:
- Renamed actual-frequency to measured-frequency (Vadim)
---
 Documentation/driver-api/dpll.rst     | 20 +++++++++++++++
 Documentation/netlink/specs/dpll.yaml | 35 +++++++++++++++++++++++++++
 drivers/dpll/dpll_nl.c                |  5 ++--
 include/uapi/linux/dpll.h             |  5 +++-
 4 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/Documentation/driver-api/dpll.rst b/Documentation/driver-api/dpll.rst
index 83118c728ed90..93c191b2d0898 100644
--- a/Documentation/driver-api/dpll.rst
+++ b/Documentation/driver-api/dpll.rst
@@ -250,6 +250,24 @@ in the ``DPLL_A_PIN_PHASE_OFFSET`` attribute.
   ``DPLL_A_PHASE_OFFSET_MONITOR`` attr state of a feature
   =============================== ========================
 
+Frequency monitor
+=================
+
+Some DPLL devices may offer the capability to measure the actual
+frequency of all available input pins. The attribute and current feature state
+shall be included in the response message of the ``DPLL_CMD_DEVICE_GET``
+command for supported DPLL devices. In such cases, users can also control
+the feature using the ``DPLL_CMD_DEVICE_SET`` command by setting the
+``enum dpll_feature_state`` values for the attribute.
+Once enabled the measured input frequency for each input pin shall be
+returned in the ``DPLL_A_PIN_MEASURED_FREQUENCY`` attribute. The value
+is in millihertz (mHz), using ``DPLL_PIN_MEASURED_FREQUENCY_DIVIDER``
+as the divider.
+
+  =============================== ========================
+  ``DPLL_A_FREQUENCY_MONITOR``    attr state of a feature
+  =============================== ========================
+
 Embedded SYNC
 =============
 
@@ -411,6 +429,8 @@ according to attribute purpose.
       ``DPLL_A_PIN_STATE``             attr state of pin on the parent
                                        pin
     ``DPLL_A_PIN_CAPABILITIES``        attr bitmask of pin capabilities
+    ``DPLL_A_PIN_MEASURED_FREQUENCY``  attr measured frequency of
+                                       an input pin in mHz
   ==================================== ==================================
 
   ==================================== =================================
diff --git a/Documentation/netlink/specs/dpll.yaml b/Documentation/netlink/specs/dpll.yaml
index 3dd48a32f7837..40465a3d7fc20 100644
--- a/Documentation/netlink/specs/dpll.yaml
+++ b/Documentation/netlink/specs/dpll.yaml
@@ -240,6 +240,20 @@ definitions:
       integer part of a measured phase offset value.
       Value of (DPLL_A_PHASE_OFFSET % DPLL_PHASE_OFFSET_DIVIDER) is a
       fractional part of a measured phase offset value.
+  -
+    type: const
+    name: pin-measured-frequency-divider
+    value: 1000
+    doc: |
+      pin measured frequency divider allows userspace to calculate
+      a value of measured input frequency as a fractional value with
+      three digit decimal precision (millihertz).
+      Value of (DPLL_A_PIN_MEASURED_FREQUENCY /
+      DPLL_PIN_MEASURED_FREQUENCY_DIVIDER) is an integer part of
+      a measured frequency value.
+      Value of (DPLL_A_PIN_MEASURED_FREQUENCY %
+      DPLL_PIN_MEASURED_FREQUENCY_DIVIDER) is a fractional part of
+      a measured frequency value.
   -
     type: enum
     name: feature-state
@@ -319,6 +333,13 @@ attribute-sets:
         name: phase-offset-avg-factor
         type: u32
         doc: Averaging factor applied to calculation of reported phase offset.
+      -
+        name: frequency-monitor
+        type: u32
+        enum: feature-state
+        doc: Current or desired state of the frequency monitor feature.
+          If enabled, dpll device shall measure all currently available
+          inputs for their actual input frequency.
   -
     name: pin
     enum-name: dpll_a_pin
@@ -456,6 +477,17 @@ attribute-sets:
           Value is in PPT (parts per trillion, 10^-12).
           Note: This attribute provides higher resolution than the standard
           fractional-frequency-offset (which is in PPM).
+      -
+        name: measured-frequency
+        type: u64
+        doc: |
+          The measured frequency of the input pin in millihertz (mHz).
+          Value of (DPLL_A_PIN_MEASURED_FREQUENCY /
+          DPLL_PIN_MEASURED_FREQUENCY_DIVIDER) is an integer part (Hz)
+          of a measured frequency value.
+          Value of (DPLL_A_PIN_MEASURED_FREQUENCY %
+          DPLL_PIN_MEASURED_FREQUENCY_DIVIDER) is a fractional part
+          of a measured frequency value.
 
   -
     name: pin-parent-device
@@ -544,6 +576,7 @@ operations:
             - type
             - phase-offset-monitor
             - phase-offset-avg-factor
+            - frequency-monitor
 
       dump:
         reply: *dev-attrs
@@ -563,6 +596,7 @@ operations:
             - mode
             - phase-offset-monitor
             - phase-offset-avg-factor
+            - frequency-monitor
     -
       name: device-create-ntf
       doc: Notification about device appearing
@@ -643,6 +677,7 @@ operations:
             - esync-frequency-supported
             - esync-pulse
             - reference-sync
+            - measured-frequency
 
       dump:
         request:
diff --git a/drivers/dpll/dpll_nl.c b/drivers/dpll/dpll_nl.c
index a2b22d4921142..1e652340a5d73 100644
--- a/drivers/dpll/dpll_nl.c
+++ b/drivers/dpll/dpll_nl.c
@@ -43,11 +43,12 @@ static const struct nla_policy dpll_device_get_nl_policy[DPLL_A_ID + 1] = {
 };
 
 /* DPLL_CMD_DEVICE_SET - do */
-static const struct nla_policy dpll_device_set_nl_policy[DPLL_A_PHASE_OFFSET_AVG_FACTOR + 1] = {
+static const struct nla_policy dpll_device_set_nl_policy[DPLL_A_FREQUENCY_MONITOR + 1] = {
 	[DPLL_A_ID] = { .type = NLA_U32, },
 	[DPLL_A_MODE] = NLA_POLICY_RANGE(NLA_U32, 1, 2),
 	[DPLL_A_PHASE_OFFSET_MONITOR] = NLA_POLICY_MAX(NLA_U32, 1),
 	[DPLL_A_PHASE_OFFSET_AVG_FACTOR] = { .type = NLA_U32, },
+	[DPLL_A_FREQUENCY_MONITOR] = NLA_POLICY_MAX(NLA_U32, 1),
 };
 
 /* DPLL_CMD_PIN_ID_GET - do */
@@ -115,7 +116,7 @@ static const struct genl_split_ops dpll_nl_ops[] = {
 		.doit		= dpll_nl_device_set_doit,
 		.post_doit	= dpll_post_doit,
 		.policy		= dpll_device_set_nl_policy,
-		.maxattr	= DPLL_A_PHASE_OFFSET_AVG_FACTOR,
+		.maxattr	= DPLL_A_FREQUENCY_MONITOR,
 		.flags		= GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
 	},
 	{
diff --git a/include/uapi/linux/dpll.h b/include/uapi/linux/dpll.h
index de0005f28e5c5..871685f7c353b 100644
--- a/include/uapi/linux/dpll.h
+++ b/include/uapi/linux/dpll.h
@@ -191,7 +191,8 @@ enum dpll_pin_capabilities {
 	DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE = 4,
 };
 
-#define DPLL_PHASE_OFFSET_DIVIDER	1000
+#define DPLL_PHASE_OFFSET_DIVIDER		1000
+#define DPLL_PIN_MEASURED_FREQUENCY_DIVIDER	1000
 
 /**
  * enum dpll_feature_state - Allow control (enable/disable) and status checking
@@ -218,6 +219,7 @@ enum dpll_a {
 	DPLL_A_CLOCK_QUALITY_LEVEL,
 	DPLL_A_PHASE_OFFSET_MONITOR,
 	DPLL_A_PHASE_OFFSET_AVG_FACTOR,
+	DPLL_A_FREQUENCY_MONITOR,
 
 	__DPLL_A_MAX,
 	DPLL_A_MAX = (__DPLL_A_MAX - 1)
@@ -254,6 +256,7 @@ enum dpll_a_pin {
 	DPLL_A_PIN_REFERENCE_SYNC,
 	DPLL_A_PIN_PHASE_ADJUST_GRAN,
 	DPLL_A_PIN_FRACTIONAL_FREQUENCY_OFFSET_PPT,
+	DPLL_A_PIN_MEASURED_FREQUENCY,
 
 	__DPLL_A_PIN_MAX,
 	DPLL_A_PIN_MAX = (__DPLL_A_PIN_MAX - 1)
-- 
2.52.0


^ permalink raw reply related

* [PATCH net-next v3 0/3] dpll: add frequency monitoring feature
From: Ivan Vecera @ 2026-04-01  9:12 UTC (permalink / raw)
  To: netdev
  Cc: Arkadiusz Kubalewski, David S. Miller, Donald Hunter,
	Eric Dumazet, Jakub Kicinski, Jiri Pirko, Jonathan Corbet,
	Michal Schmidt, Paolo Abeni, Petr Oros, Prathosh Satish,
	Shuah Khan, Simon Horman, Vadim Fedorenko, linux-doc,
	linux-kernel

This series adds support for monitoring the measured input frequency
of DPLL input pins via the DPLL netlink interface.

Some DPLL devices can measure the actual frequency being received on
input pins. The approach mirrors the existing phase-offset-monitor
feature: a device-level attribute (DPLL_A_FREQUENCY_MONITOR) enables
or disables monitoring, and a per-pin attribute
(DPLL_A_PIN_MEASURED_FREQUENCY) exposes the measured frequency in
millihertz (mHz) when monitoring is enabled.

Patch 1 adds the new attributes to the DPLL netlink spec (dpll.yaml),
the DPLL_PIN_MEASURED_FREQUENCY_DIVIDER constant, regenerates the
auto-generated UAPI header and netlink policy, and updates
Documentation/driver-api/dpll.rst.

Patch 2 adds the callback operations (freq_monitor_get/set for
devices, measured_freq_get for pins) and the corresponding netlink
GET/SET handlers in the DPLL core. The core only invokes
measured_freq_get when the frequency monitor is enabled on the parent
device. The freq_monitor_get callback is required when measured_freq_get
is provided.

Patch 3 implements the feature in the ZL3073x driver by extracting
a common measurement latch helper from the existing FFO update path,
adding a frequency measurement function, and wiring up the new
callbacks.

Changes v2 -> v3:
- Improved frequency-monitor doc wording (Jakub)
- Changed measured-frequency to mHz with divider constant (Jakub)
- Made freq_monitor_get required when measured_freq_get is present (Jakub)

Changes v1 -> v2:
- Renamed actual-frequency to measured-frequency (Vadim)

Signed-off-by: Ivan Vecera <ivecera@redhat.com>

Ivan Vecera (3):
  dpll: add frequency monitoring to netlink spec
  dpll: add frequency monitoring callback ops
  dpll: zl3073x: implement frequency monitoring

 Documentation/driver-api/dpll.rst     |  20 ++++++
 Documentation/netlink/specs/dpll.yaml |  35 +++++++++
 drivers/dpll/dpll_netlink.c           |  92 ++++++++++++++++++++++++
 drivers/dpll/dpll_nl.c                |   5 +-
 drivers/dpll/zl3073x/core.c           |  88 +++++++++++++++++++----
 drivers/dpll/zl3073x/dpll.c           | 100 ++++++++++++++++++++++++--
 drivers/dpll/zl3073x/dpll.h           |   2 +
 drivers/dpll/zl3073x/ref.h            |  14 ++++
 include/linux/dpll.h                  |  10 +++
 include/uapi/linux/dpll.h             |   5 +-
 10 files changed, 351 insertions(+), 20 deletions(-)

-- 
2.52.0


^ permalink raw reply

* [PATCH net-next v3 2/3] dpll: add frequency monitoring callback ops
From: Ivan Vecera @ 2026-04-01  9:12 UTC (permalink / raw)
  To: netdev
  Cc: Vadim Fedorenko, Arkadiusz Kubalewski, David S. Miller,
	Donald Hunter, Eric Dumazet, Jakub Kicinski, Jiri Pirko,
	Jonathan Corbet, Michal Schmidt, Paolo Abeni, Petr Oros,
	Prathosh Satish, Shuah Khan, Simon Horman, linux-doc,
	linux-kernel
In-Reply-To: <20260401091237.1071995-1-ivecera@redhat.com>

Add new callback operations for a dpll device:
- freq_monitor_get(..) - to obtain current state of frequency monitor
  feature from dpll device,
- freq_monitor_set(..) - to allow feature configuration.

Add new callback operation for a dpll pin:
- measured_freq_get(..) - to obtain the measured frequency in mHz.

Obtain the feature state value using the get callback and provide it to
the user if the device driver implements callbacks. The measured_freq_get
pin callback is only invoked when the frequency monitor is enabled.
The freq_monitor_get device callback is required when measured_freq_get
is provided by the driver.

Execute the set callback upon user requests.

Reviewed-by: Vadim Fedorenko <vadim.fedorenko@linux.dev>
Signed-off-by: Ivan Vecera <ivecera@redhat.com>
---
Changes v2 -> v3:
- Made freq_monitor_get required when measured_freq_get is present (Jakub)

Changes v1 -> v2:
- Renamed actual-frequency to measured-frequency (Vadim)
---
 drivers/dpll/dpll_netlink.c | 92 +++++++++++++++++++++++++++++++++++++
 include/linux/dpll.h        | 10 ++++
 2 files changed, 102 insertions(+)

diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c
index 83cbd64abf5a4..576d0cd074bd4 100644
--- a/drivers/dpll/dpll_netlink.c
+++ b/drivers/dpll/dpll_netlink.c
@@ -175,6 +175,26 @@ dpll_msg_add_phase_offset_monitor(struct sk_buff *msg, struct dpll_device *dpll,
 	return 0;
 }
 
+static int
+dpll_msg_add_freq_monitor(struct sk_buff *msg, struct dpll_device *dpll,
+			  struct netlink_ext_ack *extack)
+{
+	const struct dpll_device_ops *ops = dpll_device_ops(dpll);
+	enum dpll_feature_state state;
+	int ret;
+
+	if (ops->freq_monitor_set && ops->freq_monitor_get) {
+		ret = ops->freq_monitor_get(dpll, dpll_priv(dpll),
+					    &state, extack);
+		if (ret)
+			return ret;
+		if (nla_put_u32(msg, DPLL_A_FREQUENCY_MONITOR, state))
+			return -EMSGSIZE;
+	}
+
+	return 0;
+}
+
 static int
 dpll_msg_add_phase_offset_avg_factor(struct sk_buff *msg,
 				     struct dpll_device *dpll,
@@ -400,6 +420,40 @@ static int dpll_msg_add_ffo(struct sk_buff *msg, struct dpll_pin *pin,
 			    ffo);
 }
 
+static int dpll_msg_add_measured_freq(struct sk_buff *msg, struct dpll_pin *pin,
+				      struct dpll_pin_ref *ref,
+				      struct netlink_ext_ack *extack)
+{
+	const struct dpll_device_ops *dev_ops = dpll_device_ops(ref->dpll);
+	const struct dpll_pin_ops *ops = dpll_pin_ops(ref);
+	struct dpll_device *dpll = ref->dpll;
+	enum dpll_feature_state state;
+	u64 measured_freq;
+	int ret;
+
+	if (!ops->measured_freq_get)
+		return 0;
+	if (WARN_ON(!dev_ops->freq_monitor_get))
+		return -EINVAL;
+	ret = dev_ops->freq_monitor_get(dpll, dpll_priv(dpll),
+					&state, extack);
+	if (ret)
+		return ret;
+	if (state == DPLL_FEATURE_STATE_DISABLE)
+		return 0;
+	ret = ops->measured_freq_get(pin, dpll_pin_on_dpll_priv(dpll, pin),
+				    dpll, dpll_priv(dpll), &measured_freq,
+				    extack);
+	if (ret)
+		return ret;
+	if (nla_put_64bit(msg, DPLL_A_PIN_MEASURED_FREQUENCY,
+			  sizeof(measured_freq), &measured_freq,
+			  DPLL_A_PIN_PAD))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
 static int
 dpll_msg_add_pin_freq(struct sk_buff *msg, struct dpll_pin *pin,
 		      struct dpll_pin_ref *ref, struct netlink_ext_ack *extack)
@@ -670,6 +724,9 @@ dpll_cmd_pin_get_one(struct sk_buff *msg, struct dpll_pin *pin,
 	if (ret)
 		return ret;
 	ret = dpll_msg_add_ffo(msg, pin, ref, extack);
+	if (ret)
+		return ret;
+	ret = dpll_msg_add_measured_freq(msg, pin, ref, extack);
 	if (ret)
 		return ret;
 	ret = dpll_msg_add_pin_esync(msg, pin, ref, extack);
@@ -722,6 +779,9 @@ dpll_device_get_one(struct dpll_device *dpll, struct sk_buff *msg,
 	if (ret)
 		return ret;
 	ret = dpll_msg_add_phase_offset_avg_factor(msg, dpll, extack);
+	if (ret)
+		return ret;
+	ret = dpll_msg_add_freq_monitor(msg, dpll, extack);
 	if (ret)
 		return ret;
 
@@ -948,6 +1008,32 @@ dpll_phase_offset_avg_factor_set(struct dpll_device *dpll, struct nlattr *a,
 						extack);
 }
 
+static int
+dpll_freq_monitor_set(struct dpll_device *dpll, struct nlattr *a,
+		      struct netlink_ext_ack *extack)
+{
+	const struct dpll_device_ops *ops = dpll_device_ops(dpll);
+	enum dpll_feature_state state = nla_get_u32(a), old_state;
+	int ret;
+
+	if (!(ops->freq_monitor_set && ops->freq_monitor_get)) {
+		NL_SET_ERR_MSG_ATTR(extack, a,
+				    "dpll device not capable of frequency monitor");
+		return -EOPNOTSUPP;
+	}
+	ret = ops->freq_monitor_get(dpll, dpll_priv(dpll), &old_state,
+				    extack);
+	if (ret) {
+		NL_SET_ERR_MSG(extack,
+			       "unable to get current state of frequency monitor");
+		return ret;
+	}
+	if (state == old_state)
+		return 0;
+
+	return ops->freq_monitor_set(dpll, dpll_priv(dpll), state, extack);
+}
+
 static int
 dpll_pin_freq_set(struct dpll_pin *pin, struct nlattr *a,
 		  struct netlink_ext_ack *extack)
@@ -1878,6 +1964,12 @@ dpll_set_from_nlattr(struct dpll_device *dpll, struct genl_info *info)
 			if (ret)
 				return ret;
 			break;
+		case DPLL_A_FREQUENCY_MONITOR:
+			ret = dpll_freq_monitor_set(dpll, a,
+						    info->extack);
+			if (ret)
+				return ret;
+			break;
 		}
 	}
 
diff --git a/include/linux/dpll.h b/include/linux/dpll.h
index 2ce295b46b8cd..b7277a8b484d2 100644
--- a/include/linux/dpll.h
+++ b/include/linux/dpll.h
@@ -52,6 +52,12 @@ struct dpll_device_ops {
 	int (*phase_offset_avg_factor_get)(const struct dpll_device *dpll,
 					   void *dpll_priv, u32 *factor,
 					   struct netlink_ext_ack *extack);
+	int (*freq_monitor_set)(const struct dpll_device *dpll, void *dpll_priv,
+				enum dpll_feature_state state,
+				struct netlink_ext_ack *extack);
+	int (*freq_monitor_get)(const struct dpll_device *dpll, void *dpll_priv,
+				enum dpll_feature_state *state,
+				struct netlink_ext_ack *extack);
 };
 
 struct dpll_pin_ops {
@@ -110,6 +116,10 @@ struct dpll_pin_ops {
 	int (*ffo_get)(const struct dpll_pin *pin, void *pin_priv,
 		       const struct dpll_device *dpll, void *dpll_priv,
 		       s64 *ffo, struct netlink_ext_ack *extack);
+	int (*measured_freq_get)(const struct dpll_pin *pin, void *pin_priv,
+				 const struct dpll_device *dpll,
+				 void *dpll_priv, u64 *measured_freq,
+				 struct netlink_ext_ack *extack);
 	int (*esync_set)(const struct dpll_pin *pin, void *pin_priv,
 			 const struct dpll_device *dpll, void *dpll_priv,
 			 u64 freq, struct netlink_ext_ack *extack);
-- 
2.52.0


^ permalink raw reply related

* [PATCH net-next v3 3/3] dpll: zl3073x: implement frequency monitoring
From: Ivan Vecera @ 2026-04-01  9:12 UTC (permalink / raw)
  To: netdev
  Cc: Petr Oros, Arkadiusz Kubalewski, David S. Miller, Donald Hunter,
	Eric Dumazet, Jakub Kicinski, Jiri Pirko, Jonathan Corbet,
	Michal Schmidt, Paolo Abeni, Prathosh Satish, Shuah Khan,
	Simon Horman, Vadim Fedorenko, linux-doc, linux-kernel
In-Reply-To: <20260401091237.1071995-1-ivecera@redhat.com>

Extract common measurement latch logic from zl3073x_ref_ffo_update()
into a new zl3073x_ref_freq_meas_latch() helper and add
zl3073x_ref_freq_meas_update() that uses it to latch and read absolute
input reference frequencies in Hz.

Add meas_freq field to struct zl3073x_ref and the corresponding
zl3073x_ref_meas_freq_get() accessor. The measured frequencies are
updated periodically alongside the existing FFO measurements.

Add freq_monitor boolean to struct zl3073x_dpll and implement the
freq_monitor_set/get device callbacks to enable/disable frequency
monitoring via the DPLL netlink interface.

Implement measured_freq_get pin callback for input pins that returns the
measured input frequency in mHz.

Reviewed-by: Petr Oros <poros@redhat.com>
Signed-off-by: Ivan Vecera <ivecera@redhat.com>
---
Changes v2 -> v3:
- Changed measured_freq_get to return value in mHz

Changes v1 -> v2:
- Renamed actual-frequency to measured-frequency (Vadim)
---
 drivers/dpll/zl3073x/core.c |  88 ++++++++++++++++++++++++++-----
 drivers/dpll/zl3073x/dpll.c | 100 ++++++++++++++++++++++++++++++++++--
 drivers/dpll/zl3073x/dpll.h |   2 +
 drivers/dpll/zl3073x/ref.h  |  14 +++++
 4 files changed, 187 insertions(+), 17 deletions(-)

diff --git a/drivers/dpll/zl3073x/core.c b/drivers/dpll/zl3073x/core.c
index 6363002d48d46..cb47a5db061aa 100644
--- a/drivers/dpll/zl3073x/core.c
+++ b/drivers/dpll/zl3073x/core.c
@@ -632,22 +632,21 @@ int zl3073x_ref_phase_offsets_update(struct zl3073x_dev *zldev, int channel)
 }
 
 /**
- * zl3073x_ref_ffo_update - update reference fractional frequency offsets
+ * zl3073x_ref_freq_meas_latch - latch reference frequency measurements
  * @zldev: pointer to zl3073x_dev structure
+ * @type: measurement type (ZL_REF_FREQ_MEAS_CTRL_*)
  *
- * The function asks device to update fractional frequency offsets latch
- * registers the latest measured values, reads and stores them into
+ * The function waits for the previous measurement to finish, selects all
+ * references and requests a new measurement of the given type.
  *
  * Return: 0 on success, <0 on error
  */
 static int
-zl3073x_ref_ffo_update(struct zl3073x_dev *zldev)
+zl3073x_ref_freq_meas_latch(struct zl3073x_dev *zldev, u8 type)
 {
-	int i, rc;
+	int rc;
 
-	/* Per datasheet we have to wait for 'ref_freq_meas_ctrl' to be zero
-	 * to ensure that the measured data are coherent.
-	 */
+	/* Wait for previous measurement to finish */
 	rc = zl3073x_poll_zero_u8(zldev, ZL_REG_REF_FREQ_MEAS_CTRL,
 				  ZL_REF_FREQ_MEAS_CTRL);
 	if (rc)
@@ -663,15 +662,64 @@ zl3073x_ref_ffo_update(struct zl3073x_dev *zldev)
 	if (rc)
 		return rc;
 
-	/* Request frequency offset measurement */
-	rc = zl3073x_write_u8(zldev, ZL_REG_REF_FREQ_MEAS_CTRL,
-			      ZL_REF_FREQ_MEAS_CTRL_REF_FREQ_OFF);
+	/* Request measurement */
+	rc = zl3073x_write_u8(zldev, ZL_REG_REF_FREQ_MEAS_CTRL, type);
 	if (rc)
 		return rc;
 
 	/* Wait for finish */
-	rc = zl3073x_poll_zero_u8(zldev, ZL_REG_REF_FREQ_MEAS_CTRL,
-				  ZL_REF_FREQ_MEAS_CTRL);
+	return zl3073x_poll_zero_u8(zldev, ZL_REG_REF_FREQ_MEAS_CTRL,
+				    ZL_REF_FREQ_MEAS_CTRL);
+}
+
+/**
+ * zl3073x_ref_freq_meas_update - update measured input reference frequencies
+ * @zldev: pointer to zl3073x_dev structure
+ *
+ * The function asks device to latch measured input reference frequencies
+ * and stores the results in the ref state.
+ *
+ * Return: 0 on success, <0 on error
+ */
+static int
+zl3073x_ref_freq_meas_update(struct zl3073x_dev *zldev)
+{
+	int i, rc;
+
+	rc = zl3073x_ref_freq_meas_latch(zldev, ZL_REF_FREQ_MEAS_CTRL_REF_FREQ);
+	if (rc)
+		return rc;
+
+	/* Read measured frequencies in Hz (unsigned 32-bit, LSB = 1 Hz) */
+	for (i = 0; i < ZL3073X_NUM_REFS; i++) {
+		u32 value;
+
+		rc = zl3073x_read_u32(zldev, ZL_REG_REF_FREQ(i), &value);
+		if (rc)
+			return rc;
+
+		zldev->ref[i].meas_freq = value;
+	}
+
+	return 0;
+}
+
+/**
+ * zl3073x_ref_ffo_update - update reference fractional frequency offsets
+ * @zldev: pointer to zl3073x_dev structure
+ *
+ * The function asks device to latch the latest measured fractional
+ * frequency offset values, reads and stores them into the ref state.
+ *
+ * Return: 0 on success, <0 on error
+ */
+static int
+zl3073x_ref_ffo_update(struct zl3073x_dev *zldev)
+{
+	int i, rc;
+
+	rc = zl3073x_ref_freq_meas_latch(zldev,
+					 ZL_REF_FREQ_MEAS_CTRL_REF_FREQ_OFF);
 	if (rc)
 		return rc;
 
@@ -714,6 +762,20 @@ zl3073x_dev_periodic_work(struct kthread_work *work)
 		dev_warn(zldev->dev, "Failed to update phase offsets: %pe\n",
 			 ERR_PTR(rc));
 
+	/* Update measured input reference frequencies if any DPLL has
+	 * frequency monitoring enabled.
+	 */
+	list_for_each_entry(zldpll, &zldev->dplls, list) {
+		if (zldpll->freq_monitor) {
+			rc = zl3073x_ref_freq_meas_update(zldev);
+			if (rc)
+				dev_warn(zldev->dev,
+					 "Failed to update measured frequencies: %pe\n",
+					 ERR_PTR(rc));
+			break;
+		}
+	}
+
 	/* Update references' fractional frequency offsets */
 	rc = zl3073x_ref_ffo_update(zldev);
 	if (rc)
diff --git a/drivers/dpll/zl3073x/dpll.c b/drivers/dpll/zl3073x/dpll.c
index a29f606318f6d..d788ca45a17e5 100644
--- a/drivers/dpll/zl3073x/dpll.c
+++ b/drivers/dpll/zl3073x/dpll.c
@@ -39,6 +39,7 @@
  * @pin_state: last saved pin state
  * @phase_offset: last saved pin phase offset
  * @freq_offset: last saved fractional frequency offset
+ * @measured_freq: last saved measured frequency
  */
 struct zl3073x_dpll_pin {
 	struct list_head	list;
@@ -54,6 +55,7 @@ struct zl3073x_dpll_pin {
 	enum dpll_pin_state	pin_state;
 	s64			phase_offset;
 	s64			freq_offset;
+	u32			measured_freq;
 };
 
 /*
@@ -202,6 +204,21 @@ zl3073x_dpll_input_pin_ffo_get(const struct dpll_pin *dpll_pin, void *pin_priv,
 	return 0;
 }
 
+static int
+zl3073x_dpll_input_pin_measured_freq_get(const struct dpll_pin *dpll_pin,
+					 void *pin_priv,
+					 const struct dpll_device *dpll,
+					 void *dpll_priv, u64 *measured_freq,
+					 struct netlink_ext_ack *extack)
+{
+	struct zl3073x_dpll_pin *pin = pin_priv;
+
+	*measured_freq = pin->measured_freq;
+	*measured_freq *= DPLL_PIN_MEASURED_FREQUENCY_DIVIDER;
+
+	return 0;
+}
+
 static int
 zl3073x_dpll_input_pin_frequency_get(const struct dpll_pin *dpll_pin,
 				     void *pin_priv,
@@ -1116,6 +1133,35 @@ zl3073x_dpll_phase_offset_monitor_set(const struct dpll_device *dpll,
 	return 0;
 }
 
+static int
+zl3073x_dpll_freq_monitor_get(const struct dpll_device *dpll,
+			      void *dpll_priv,
+			      enum dpll_feature_state *state,
+			      struct netlink_ext_ack *extack)
+{
+	struct zl3073x_dpll *zldpll = dpll_priv;
+
+	if (zldpll->freq_monitor)
+		*state = DPLL_FEATURE_STATE_ENABLE;
+	else
+		*state = DPLL_FEATURE_STATE_DISABLE;
+
+	return 0;
+}
+
+static int
+zl3073x_dpll_freq_monitor_set(const struct dpll_device *dpll,
+			      void *dpll_priv,
+			      enum dpll_feature_state state,
+			      struct netlink_ext_ack *extack)
+{
+	struct zl3073x_dpll *zldpll = dpll_priv;
+
+	zldpll->freq_monitor = (state == DPLL_FEATURE_STATE_ENABLE);
+
+	return 0;
+}
+
 static const struct dpll_pin_ops zl3073x_dpll_input_pin_ops = {
 	.direction_get = zl3073x_dpll_pin_direction_get,
 	.esync_get = zl3073x_dpll_input_pin_esync_get,
@@ -1123,6 +1169,7 @@ static const struct dpll_pin_ops zl3073x_dpll_input_pin_ops = {
 	.ffo_get = zl3073x_dpll_input_pin_ffo_get,
 	.frequency_get = zl3073x_dpll_input_pin_frequency_get,
 	.frequency_set = zl3073x_dpll_input_pin_frequency_set,
+	.measured_freq_get = zl3073x_dpll_input_pin_measured_freq_get,
 	.phase_offset_get = zl3073x_dpll_input_pin_phase_offset_get,
 	.phase_adjust_get = zl3073x_dpll_input_pin_phase_adjust_get,
 	.phase_adjust_set = zl3073x_dpll_input_pin_phase_adjust_set,
@@ -1151,6 +1198,8 @@ static const struct dpll_device_ops zl3073x_dpll_device_ops = {
 	.phase_offset_avg_factor_set = zl3073x_dpll_phase_offset_avg_factor_set,
 	.phase_offset_monitor_get = zl3073x_dpll_phase_offset_monitor_get,
 	.phase_offset_monitor_set = zl3073x_dpll_phase_offset_monitor_set,
+	.freq_monitor_get = zl3073x_dpll_freq_monitor_get,
+	.freq_monitor_set = zl3073x_dpll_freq_monitor_set,
 	.supported_modes_get = zl3073x_dpll_supported_modes_get,
 };
 
@@ -1572,6 +1621,7 @@ zl3073x_dpll_pin_ffo_check(struct zl3073x_dpll_pin *pin)
 	struct zl3073x_dev *zldev = zldpll->dev;
 	const struct zl3073x_ref *ref;
 	u8 ref_id;
+	s64 ffo;
 
 	/* Get reference monitor status */
 	ref_id = zl3073x_input_pin_ref_get(pin->id);
@@ -1582,10 +1632,47 @@ zl3073x_dpll_pin_ffo_check(struct zl3073x_dpll_pin *pin)
 		return false;
 
 	/* Compare with previous value */
-	if (pin->freq_offset != ref->ffo) {
+	ffo = zl3073x_ref_ffo_get(ref);
+	if (pin->freq_offset != ffo) {
 		dev_dbg(zldev->dev, "%s freq offset changed: %lld -> %lld\n",
-			pin->label, pin->freq_offset, ref->ffo);
-		pin->freq_offset = ref->ffo;
+			pin->label, pin->freq_offset, ffo);
+		pin->freq_offset = ffo;
+
+		return true;
+	}
+
+	return false;
+}
+
+/**
+ * zl3073x_dpll_pin_measured_freq_check - check for pin measured frequency
+ * change
+ * @pin: pin to check
+ *
+ * Check for the given pin's measured frequency change.
+ *
+ * Return: true on measured frequency change, false otherwise
+ */
+static bool
+zl3073x_dpll_pin_measured_freq_check(struct zl3073x_dpll_pin *pin)
+{
+	struct zl3073x_dpll *zldpll = pin->dpll;
+	struct zl3073x_dev *zldev = zldpll->dev;
+	const struct zl3073x_ref *ref;
+	u8 ref_id;
+	u32 freq;
+
+	if (!zldpll->freq_monitor)
+		return false;
+
+	ref_id = zl3073x_input_pin_ref_get(pin->id);
+	ref = zl3073x_ref_state_get(zldev, ref_id);
+
+	freq = zl3073x_ref_meas_freq_get(ref);
+	if (pin->measured_freq != freq) {
+		dev_dbg(zldev->dev, "%s measured freq changed: %u -> %u\n",
+			pin->label, pin->measured_freq, freq);
+		pin->measured_freq = freq;
 
 		return true;
 	}
@@ -1677,13 +1764,18 @@ zl3073x_dpll_changes_check(struct zl3073x_dpll *zldpll)
 			pin_changed = true;
 		}
 
-		/* Check for phase offset and ffo change once per second */
+		/* Check for phase offset, ffo, and measured freq change
+		 * once per second.
+		 */
 		if (zldpll->check_count % 2 == 0) {
 			if (zl3073x_dpll_pin_phase_offset_check(pin))
 				pin_changed = true;
 
 			if (zl3073x_dpll_pin_ffo_check(pin))
 				pin_changed = true;
+
+			if (zl3073x_dpll_pin_measured_freq_check(pin))
+				pin_changed = true;
 		}
 
 		if (pin_changed)
diff --git a/drivers/dpll/zl3073x/dpll.h b/drivers/dpll/zl3073x/dpll.h
index 115ee4f67e7ab..434c32a7db123 100644
--- a/drivers/dpll/zl3073x/dpll.h
+++ b/drivers/dpll/zl3073x/dpll.h
@@ -15,6 +15,7 @@
  * @id: DPLL index
  * @check_count: periodic check counter
  * @phase_monitor: is phase offset monitor enabled
+ * @freq_monitor: is frequency monitor enabled
  * @ops: DPLL device operations for this instance
  * @dpll_dev: pointer to registered DPLL device
  * @tracker: tracking object for the acquired reference
@@ -28,6 +29,7 @@ struct zl3073x_dpll {
 	u8				id;
 	u8				check_count;
 	bool				phase_monitor;
+	bool				freq_monitor;
 	struct dpll_device_ops		ops;
 	struct dpll_device		*dpll_dev;
 	dpll_tracker			tracker;
diff --git a/drivers/dpll/zl3073x/ref.h b/drivers/dpll/zl3073x/ref.h
index 06d8d4d97ea26..be16be20dbc7e 100644
--- a/drivers/dpll/zl3073x/ref.h
+++ b/drivers/dpll/zl3073x/ref.h
@@ -23,6 +23,7 @@ struct zl3073x_dev;
  * @sync_ctrl: reference sync control
  * @config: reference config
  * @ffo: current fractional frequency offset
+ * @meas_freq: measured input frequency in Hz
  * @mon_status: reference monitor status
  */
 struct zl3073x_ref {
@@ -40,6 +41,7 @@ struct zl3073x_ref {
 	);
 	struct_group(stat, /* Status */
 		s64	ffo;
+		u32	meas_freq;
 		u8	mon_status;
 	);
 };
@@ -68,6 +70,18 @@ zl3073x_ref_ffo_get(const struct zl3073x_ref *ref)
 	return ref->ffo;
 }
 
+/**
+ * zl3073x_ref_meas_freq_get - get measured input frequency
+ * @ref: pointer to ref state
+ *
+ * Return: measured input frequency in Hz
+ */
+static inline u32
+zl3073x_ref_meas_freq_get(const struct zl3073x_ref *ref)
+{
+	return ref->meas_freq;
+}
+
 /**
  * zl3073x_ref_freq_get - get given input reference frequency
  * @ref: pointer to ref state
-- 
2.52.0


^ permalink raw reply related

* Re: [PATCH 3/5] compiler_attributes: Add overflow_behavior macros __ob_trap and __ob_wrap
From: Peter Zijlstra @ 2026-04-01  9:20 UTC (permalink / raw)
  To: Vincent Mailhol
  Cc: Kees Cook, Justin Stitt, Marco Elver, Andrey Konovalov,
	Andrey Ryabinin, Jonathan Corbet, Shuah Khan, Miguel Ojeda,
	Nathan Chancellor, kasan-dev, linux-doc, llvm, Linus Torvalds,
	Nicolas Schier, Arnd Bergmann, Greg Kroah-Hartman, Andrew Morton,
	linux-kernel, linux-hardening, linux-kbuild
In-Reply-To: <bd0a4235-a7f0-4624-802c-aa49a9d13f29@kernel.org>

On Wed, Apr 01, 2026 at 09:19:51AM +0200, Vincent Mailhol wrote:
> Le 31/03/2026 à 18:37, Kees Cook a écrit :

> > +  - Saturate (explicitly hold the maximum or minimum representable value)
> 
> I just wanted to ask how much consideration was put into this last
> "saturate" option.
> 
> When speaking of "safe" as in "functional safety" this seems a good
> option to me. The best option is of course proper handling, but as
> discussed, we are speaking of the scenario in which the code is already
> buggy and which is the fallout option doing the least damage.
> 
> What I have in mind is a new __ob_saturate type qualifier. Something like:
> 
> 	void foo(int num)
> 	{
> 		int __ob_saturate saturate_var = num;
> 	
> 		saturate_var += 42;
> 	}
> 
> would just print a warning and continue execution, thus solving the
> trapping issue. The above code would generate something equivalent to that:
> 
> 	void foo(int num)
> 	{
> 		int __ob_saturate saturate_var = num;
> 	
> 		if (check_add_overflow(saturate_var, increment,
> 				       &saturate_var) {
> 			WARN(true, "saturation occurred");
> 			saturate_var = type_max(saturate_var);
> 	}

So I would like to second this option as being interesting.

But while pondering it, I did want to note that all of the options, with
the exception of __ob_wrap (which is effectively what we have today for
*everything*), will be 'interesting' to compose with _Atomic, another
one of these qualifiers.

Now, in the kernel we don't use _Atomic, so strictly speaking I don't
care ;-) But here goes...

Something like _Atomic int __ob_wrap, is trivial and good.

_Atomic int __ob_trap is either doable or impossible depending on how
you define the result to be on 'trap'. Specifically, the semantics
proposed where it keeps the old value makes it impossible.

And _Atomic int __ob_saturate is equally 'challenging', since the
fundamental thing of 'reset to min/max on under/over-flow' is rather
a non-atomic kind of thing. Look at the trouble we went through with
refcount_t to sort of make this work.


^ permalink raw reply

* Re: [PATCH 5/5] types: Add standard __ob_trap and __ob_wrap scalar types
From: Peter Zijlstra @ 2026-04-01  9:38 UTC (permalink / raw)
  To: Kees Cook
  Cc: Linus Torvalds, Justin Stitt, Miguel Ojeda, Nathan Chancellor,
	Andrew Morton, Andy Shevchenko, Arnd Bergmann, Mark Rutland,
	Matthew Wilcox (Oracle), Suren Baghdasaryan, Thomas Gleixner,
	Finn Thain, Geert Uytterhoeven, Thomas Weißschuh, llvm,
	Marco Elver, Jonathan Corbet, Nicolas Schier, Greg Kroah-Hartman,
	linux-kernel, kasan-dev, linux-hardening, linux-doc, linux-kbuild
In-Reply-To: <202603311321.4EE9FEA@keescook>

On Tue, Mar 31, 2026 at 01:31:16PM -0700, Kees Cook wrote:

> int func()
> {
> 	...
> 	u8 __ob_trap product = 5;
> 	...
> 	product = a * b; // if store is truncated, goto __overflow
> 	...
> 	return product;
> 
> __overflow:
> 	pr_info("%u\n", product); // shows "5"
> 	return -1;
> }
> 
> (Isn't this just an implicit "try"?)

So I like this implicit try with a default label, and mostly I expect
this will be fine.

But as Linus already mentioned, sometimes you might want more. Could we
perhaps also have an explicit version, something along the lines of:

int func()
{
	int __ob_trap size;

	size = try(count * flex_size, __mul_overflow);
	size = try(size + base_size, __add_overflow);

	obj = kzalloc(size,...);

}

where we have something like:

#define try(stmt, _label) ({		\
	__label __overflow; 		\
	if (0) {			\
__overflow:				\
		goto _label;		\
	}				\
	stmt; })

That is, have the overflow trapped and confined in the
statement-expression by using the overflow label as a local label and
use this little trampoline to re-direct to a custom label.



^ permalink raw reply

* Re: [PATCH v2] bootconfig: Apply early options from embedded config
From: Kiryl Shutsemau @ 2026-04-01  9:40 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: Breno Leitao, Jonathan Corbet, Shuah Khan, linux-kernel,
	linux-trace-kernel, linux-doc, oss, paulmck, rostedt, kernel-team
In-Reply-To: <20260401100237.ff9a37505d041a00e4d8658a@kernel.org>

On Wed, Apr 01, 2026 at 10:02:37AM +0900, Masami Hiramatsu wrote:
> On Tue, 31 Mar 2026 11:18:33 +0100
> Kiryl Shutsemau <kirill@shutemov.name> wrote:
> 
> > On Wed, Mar 25, 2026 at 11:22:04PM +0900, Masami Hiramatsu wrote:
> > > > diff --git a/init/main.c b/init/main.c
> > > > index 453ac9dff2da0..14a04c283fa48 100644
> > > > --- a/init/main.c
> > > > +++ b/init/main.c
> > > > @@ -416,9 +416,64 @@ static int __init warn_bootconfig(char *str)
> > > >  	return 0;
> > > >  }
> > > >  
> > > > +/*
> > > > + * do_early_param() is defined later in this file but called from
> > > > + * bootconfig_apply_early_params() below, so we need a forward declaration.
> > > > + */
> > > > +static int __init do_early_param(char *param, char *val,
> > > > +				 const char *unused, void *arg);
> > > > +
> > > > +/*
> > > > + * bootconfig_apply_early_params - dispatch kernel.* keys from the embedded
> > > > + * bootconfig as early_param() calls.
> > > > + *
> > > > + * early_param() handlers must run before most of the kernel initialises
> > > > + * (e.g. before the GIC driver reads irqchip.gicv3_pseudo_nmi).  A bootconfig
> > > > + * attached to the initrd arrives too late for this because the initrd is not
> > > > + * mapped yet when early params are processed.  The embedded bootconfig lives
> > > > + * in the kernel image itself (.init.data), so it is always reachable.
> > > > + *
> > > > + * This function is called from setup_boot_config() which runs in
> > > > + * start_kernel() before parse_early_param(), making the timing correct.
> > > > + */
> > > > +static void __init bootconfig_apply_early_params(void)
> > > 
> > > [sashiko comment]
> > > | Does this run early enough for architectural parameters?
> > > | While setup_boot_config() runs before parse_early_param() in start_kernel(),
> > > | it runs after setup_arch(). setup_boot_config() relies on xbc_init() which
> > > | uses the memblock allocator, requiring setup_arch() to have already
> > > | initialized it.
> > > | However, the kernel expects many early parameters (like mem=, earlycon,
> > > | noapic, and iommu) to be parsed during setup_arch() via the architecture's
> > > | call to parse_early_param(). Since setup_arch() completes before
> > > | setup_boot_config() runs, will these architectural early parameters be
> > > | silently ignored because the decisions they influence were already
> > > | finalized?
> > > 
> > > This is the major reason that I did not support early parameter
> > > in bootconfig. Some archs initialize kernel_cmdline in setup_arch()
> > > and setup early parameters in it.
> > > To fix this, we need to change setup_arch() for each architecture so
> > > that it calls this bootconfig_apply_early_params().
> > 
> > Hi Masami,
> > 
> > I have a question about bootconfig design. Is it necessary to parse the
> > bootconfig at boot time?
> > 
> > We could avoid a lot of complexity if we flattened the bootconfig into a
> > simple key-value list before embedding it in the kernel image or
> > attaching it to the initrd. That would eliminate the need for memory
> > allocations and allow the config to be used earlier during boot.
> 
> Hi Kiryl,
> 
> Thanks for a good question.
> 
> If it is embedded in the kernel, yes, we can compile it. But if it is
> attached to initrd, the kernel needs to verify it. So anyway we have to
> parse the key-value. Of course we can make it a binary data structure,
> but it still needs verification. Moreover, memory allocation is not
> required by design (the first design uses a statically allocated data).
> 
> Even if it is normalized as key-value, we can not trust the contents
> without outer verification system, like verified boot. Therefore, our
> basic strategy is to parse the text.

Hm. I am not sure what issues verification aims to solve. Could you
elaborate.

With normalized key-value structure, we should be able to extract the
needed value in the same way as with normal kernel command line, no?

-- 
  Kiryl Shutsemau / Kirill A. Shutemov

^ permalink raw reply

* [PATCH RESEND] checkpatch: Suppress warnings when Reported-by: is followed by Link:
From: Cryolitia PukNgae via B4 Relay @ 2026-04-01  9:47 UTC (permalink / raw)
  To: Andy Whitcroft, Joe Perches, Dwaipayan Ray, Lukas Bulwahn
  Cc: Andrew Morton, linux-doc, linux-mm, linux-kernel, zhanjun,
	niecheng1, kernel, Petr Vorel, Cryolitia PukNgae

From: Cryolitia PukNgae <cryolitia@uniontech.com>

> The tag should be followed by a Closes: tag pointing to the report,
> unless the report is not available on the web. The Link: tag can be
> used instead of Closes: if the patch fixes a part of the issue(s)
> being reported.

Accroding to Documentation/process/submitting-patches.rst , Link: is
also acceptable to followed a Reported-by:, if the patch fixes a part
of the issue(s) being reported.

Reviewed-by: Petr Vorel <pvorel@suse.cz>
Signed-off-by: Cryolitia PukNgae <cryolitia@uniontech.com>
---
 scripts/checkpatch.pl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index e56374662ff7..b8a098061181 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -3223,10 +3223,10 @@ sub process {
 			if ($sign_off =~ /^reported(?:|-and-tested)-by:$/i) {
 				if (!defined $lines[$linenr]) {
 					WARN("BAD_REPORTED_BY_LINK",
-					     "Reported-by: should be immediately followed by Closes: with a URL to the report\n" . $herecurr . "\n");
-				} elsif ($rawlines[$linenr] !~ /^closes:\s*/i) {
+					     "Reported-by: should be immediately followed by Closes: or Link: with a URL to the report\n" . $herecurr . "\n");
+				} elsif ($rawlines[$linenr] !~ /^(closes|link):\s*/i) {
 					WARN("BAD_REPORTED_BY_LINK",
-					     "Reported-by: should be immediately followed by Closes: with a URL to the report\n" . $herecurr . $rawlines[$linenr] . "\n");
+					     "Reported-by: should be immediately followed by Closes: or Link: with a URL to the report\n" . $herecurr . $rawlines[$linenr] . "\n");
 				}
 			}
 		}

---
base-commit: 9147566d801602c9e7fc7f85e989735735bf38ba
change-id: 20260401-checkpatch-b97d0cbe7ee9

Best regards,
--  
Cryolitia PukNgae <cryolitia@uniontech.com>



^ permalink raw reply related

* Re: [PATCH net-next V9 02/14] devlink: Add helpers to lock nested-in instances
From: Cosmin Ratiu @ 2026-04-01 10:22 UTC (permalink / raw)
  To: jacob.e.keller@intel.com, Tariq Toukan, kuba@kernel.org
  Cc: allison.henderson@oracle.com, jiri@resnulli.us, Moshe Shemesh,
	davem@davemloft.net, daniel.zahka@gmail.com,
	donald.hunter@gmail.com, netdev@vger.kernel.org,
	matttbe@kernel.org, pabeni@redhat.com, horms@kernel.org,
	Parav Pandit, corbet@lwn.net, razor@blackwall.org, Dragos Tatulea,
	linux-kernel@vger.kernel.org, willemb@google.com, Jiri Pirko,
	Adithya Jayachandran, Dan Jurgens, leon@kernel.org,
	kees@kernel.org, vadim.fedorenko@linux.dev, Saeed Mahameed,
	shuah@kernel.org, andrew+netdev@lunn.ch, Mark Bloch,
	Shahar Shitrit, Carolina Jubran, Nimrod Oren,
	daniel@iogearbox.net, minhquangbui99@gmail.com, dw@davidwei.uk,
	skhan@linuxfoundation.org, Petr Machata, edumazet@google.com,
	antonio@openvpn.net, mst@redhat.com,
	linux-kselftest@vger.kernel.org, linux-rdma@vger.kernel.org,
	Shay Drori, sdf@fomichev.me, chuck.lever@oracle.com, Gal Pressman,
	joe@dama.to, linux-doc@vger.kernel.org
In-Reply-To: <c547be19-adaf-4442-be2b-debcbafa4191@intel.com>

On Tue, 2026-03-31 at 16:55 -0700, Jacob Keller wrote:
> On 3/31/2026 5:20 AM, Cosmin Ratiu wrote:
> > On Mon, 2026-03-30 at 19:08 -0700, Jakub Kicinski wrote:
> > > This is an AI-generated review of your patch. The human sending
> > > this
> > > email has considered the AI review valid, or at least plausible.
> > > ---
> > > devlink: Add helpers to lock nested-in instances
> > > 
> > > Upcoming code will need to obtain a reference to locked nested-in
> > > devlink instances. Add helpers to lock, obtain an already locked
> > > reference and unlock/unref the nested-in instance.
> > > 
> > > > diff --git a/net/devlink/core.c b/net/devlink/core.c
> > > > index eeb6a71f5f56e..db11248df7120 100644
> > > > --- a/net/devlink/core.c
> > > > +++ b/net/devlink/core.c
> > > 
> > > [ ... ]
> > > 
> > > > +/* Returns the nested in devlink object and validates its lock
> > > > is
> > > > held. */
> > > > +struct devlink *devlink_nested_in_get_locked(struct
> > > > devlink_rel
> > > > *rel)
> > > > +{
> > > > +	struct devlink *devlink;
> > > > +	unsigned long index;
> > > > +
> > > > +	if (!rel)
> > > > +		return NULL;
> > > > +	index = rel->nested_in.devlink_index;
> > > > +	devlink = xa_find(&devlinks, &index, index,
> > > > DEVLINK_REGISTERED);
> > > > +	if (devlink)
> > > > +		devl_assert_locked(devlink);
> > > 
> > > Can this dereference a freed pointer if the lock is not actually
> > > held?
> > > 
> > > xa_find() acquires and releases the RCU read lock internally.
> > > When it
> > > returns,
> > > the devlink pointer is no longer protected by RCU or a reference
> > > count.
> > > 
> > > If a caller invokes this helper without holding the devlink lock
> > > (which is the
> > > exact scenario the assertion is designed to detect), another
> > > thread
> > > could
> > > concurrently unregister and free the devlink.
> > > 
> > > If the RCU grace period expires before the assertion executes,
> > > devl_assert_locked() would dereference freed memory. Should the
> > > lookup and
> > > assertion be enclosed within an explicit rcu_read_lock() block to
> > > safely
> > > assert the lock?
> > 
> > This function is meant to be invoked by callers already holding all
> > locks from a given instance up the nesting tree. Calling it outside
> > this context could indeed lead to a race as described, where
> > another
> > entity unregisters a devlink about-to-be-asserted on.
> > 
> 
> Hmm. I'm struggling to follow this. If you already expect the parent
> to
> hold the nested devlink's lock, it must have a pointer to this
> devlink
> instance. In that case, why would you even need
> devlink_nested_in_get_locked in the first place?

After some more intense staring, I realized that intermediate instances
don't actually need to be locked, only the ancestor needs to. With that
in mind, the code get simplified:
- devlink_nested_in_get_locked and devlink_nested_in_put_unlock can be
removed.
- recursive unlocking in devl_rate_unlock is gone.

> 
> > All current callers hold the locks, but to make it more robust, I
> > will
> > add the rcu_read_lock/unlock section around the find+assert.
> > 
> 
> You should at least do that. It is the correct pattern when dealing
> with
> reference counting data structures from the xarray. That's already
> how
> devlinks_xa_get() is implemented.
> 
> > > 
> > > > +	return devlink;
> > > > +}
> > > > +
> > > > +void devlink_nested_in_put_unlock(struct devlink_rel *rel)
> > > > +{
> > > > +	struct devlink *devlink =
> > > > devlink_nested_in_get_locked(rel);
> > > 
> > > Could this corrupt the mutex state and cause a refcount
> > > underflow?
> > > 
> > > This helper takes a devlink_rel pointer instead of the devlink
> > > pointer
> > > acquired by devlink_nested_in_get_lock(), and performs a
> > > secondary
> > > global
> > > lookup to find the devlink.
> > > 
> > > If a caller mistakenly calls this in an error cleanup path where
> > > they
> > > did not
> > > actually acquire the lock, the global xa_find() will still locate
> > > the
> > > registered devlink. This would execute devl_unlock() and
> > > devlink_put() on a
> > > devlink the current thread does not own.
> > > 
> 
> If the caller already held the lock, why is
> devlink_nested_in_put_unlock
> calling the devl_unlock instead of the caller anyways? That seems
> confusing. Wouldn't the normal pattern be to
> devlink_nested_in_get_lock()? Oh, that is a separate function. Ok I
> see.
> 
> > > Would it be safer for unlock/put helpers to take the exact
> > > pointer
> > > returned by
> > > the lock/get helper to ensure safe resource cleanup?
> > 
> > 2 issues here:
> > 1) Mistakenly calling this without having acquired the lock. This
> > is
> > akin to saying mutex_unlock is dangerous if the lock isn't held.
> > Technically true, but moot.
> > 2) The rel argument: It is intentional, so that all 3 functions are
> > symmetrical.
> > 
> 
> IMO it would make more sense for the put version to be a put on the
> returned devlink pointer. I guess its not symmetrical, but it removes
> the need to perform the second lookup and it makes it easier to
> reason
> about the pointer you're releasing being the same one.
> 
> Having put take different arguments from get is the usual pattern for
> such a behavior.
> 
> Also devlink_nested_in_get_locked() doesn't increase the ref count so
> it
> is sort of "relying" on the caller already having a reference to it,
> which makes me think its not very useful. The only valid way to call
> this function as it exists now safely is to already hold a reference
> to
> the object, which also already requires you to have a valid pointer
> making me wonder why you'd ever need to call it in the first place.
> 
> The only example you have is to make devlink_nested_in_put_unlock()
> take
> a devlink_rel pointer as its argument instead of just calling it on
> the
> pointer returned by devlink_nested_in_get_lock().
> 
> This implementation seems confusing and likely to lead to errors.

I hope the next version will be more suitable.
Thank you for the comments and suggestions.

> 
> Thanks,
> Jake


^ permalink raw reply

* [RFC PATCH] Documentation: Add managed interrupts
From: Sebastian Andrzej Siewior @ 2026-04-01 11:02 UTC (permalink / raw)
  To: linux-doc, linux-kernel
  Cc: Aaron Tomlin, Christoph Hellwig, Frederic Weisbecker, Jens Axboe,
	Jonathan Corbet, Ming Lei, Thomas Gleixner, Valentin Schneider,
	Waiman Long, Peter Zijlstra, John Ogness

I stumbled upon "isolcpus=managed_irq" which is the last piece which
can only be handled by isolcpus= and has no runtime knob. I knew roughly
what managed interrupts should do but I lacked some details how it is
used and what the managed_irq sub parameter means in practise.

This documents what we have as of today and how it works. I added some
examples how the parameter affects the configuration. Did I miss
something?

Given that the spreading as computed group_cpus_evenly() does not take
the mask of isolated CPUs into account I'm not sure how relevant the
managed_irq argument is. The virtio_scsi driver has no way to limit the
interrupts and I don't see this for the nvme. Even if the number of
queues can be reduced to two (as in the example) it is still spread
evenly in the system instead and the isolated CPUs are not taken into
account.
To make this worse, you can even argue further whether or not the
application on the isolated CPU wants to receive the interrupt directly
or would prefer not to.

Given all this, I am not sure if it makes sense to add 'io_queue' to the
mix or if it could be incorporated into 'managed_irq'.

One more point: Given that isolcpus= is marked deprecated as of commit
   b0d40d2b22fe4 ("sched/isolation: Document isolcpus= boot parameter flags, mark it deprecated")

and the 'managed_irq' is evaluated at device's probe time it would
require additional callbacks to re-evaluate the situation. Probably for
'io_queue', too. Does is make sense or should we simply drop the
"deprecation" notice and allowing using it long term?
Dynamic partitions work with cpusets, there this (managed_irq)
limitation but is it really? And if static partition is the use case why
bother.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 Documentation/core-api/irq/index.rst       |   1 +
 Documentation/core-api/irq/managed_irq.rst | 116 +++++++++++++++++++++
 2 files changed, 117 insertions(+)
 create mode 100644 Documentation/core-api/irq/managed_irq.rst

diff --git a/Documentation/core-api/irq/index.rst b/Documentation/core-api/irq/index.rst
index 0d65d11e54200..13bd24dd2b1cc 100644
--- a/Documentation/core-api/irq/index.rst
+++ b/Documentation/core-api/irq/index.rst
@@ -9,3 +9,4 @@ IRQs
    irq-affinity
    irq-domain
    irqflags-tracing
+   managed_irq
diff --git a/Documentation/core-api/irq/managed_irq.rst b/Documentation/core-api/irq/managed_irq.rst
new file mode 100644
index 0000000000000..05e295f3c289d
--- /dev/null
+++ b/Documentation/core-api/irq/managed_irq.rst
@@ -0,0 +1,116 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+Affinity managed interrupts
+===========================
+
+The IRQ core provides support for managing interrupts according to a specified
+CPU affinity. Under normal operation, an interrupt is associated with a
+particular CPU. If that CPU is taken offline, the interrupt is migrated to
+another online CPU.
+
+Devices with large numbers of interrupt vectors can stress the available vector
+space. For example, an NVMe device with 128 I/O queues typically requests one
+interrupt per queue on systems with at least 128 CPUs. Two such devices
+therefore request 256 interrupts. On x86, the interrupt vector space is
+notoriously low, providing only 256 vectors per CPU, and the kernel reserves a
+subset of these, further reducing the number available for device interrupts.
+In practice this is not an issue because the interrupts are distributed across
+many CPUs, so each CPU only receives a small number of vectors.
+
+During system suspend, however, all secondary CPUs are taken offline and all
+interrupts are migrated to the single CPU that remains online. This can exhaust
+the available interrupt vectors on that CPU and cause the suspend operation to
+fail.
+
+Affinity‑managed interrupts address this limitation. Each interrupt is assigned
+a CPU affinity mask that specifies the set of CPUs on which the interrupt may
+be targeted. When a CPU in the mask goes offline, the interrupt is moved to the
+next CPU in the mask. If the last CPU in the mask goes offline, the interrupt
+is shut down. Drivers using affinity‑managed interrupts must ensure that the
+associated queue is quiesced before the interrupt is disabled so that no
+further interrupts are generated. When a CPU in the affinity mask comes back
+online, the interrupt is re‑enabled.
+
+Implementation
+--------------
+
+Devices must provide per‑instance interrupts, such as per‑I/O‑queue interrupts
+for storage devices like NVMe. The driver allocates interrupt vectors with the
+required affinity settings using struct irq_affinity. For MSI‑X devices, this
+is done via pci_alloc_irq_vectors_affinity() with the PCI_IRQ_AFFINITY flag
+set.
+
+Based on the provided affinity information, the IRQ core attempts to spread the
+interrupts evenly across the system. The affinity masks are computed during
+this allocation step, but the final IRQ assignment is performed when
+request_irq() is invoked.
+
+Isolated CPUs
+-------------
+
+The affinity of managed interrupts is handled entirely in the kernel and cannot
+be modified from user space through the /proc interfaces. The managed_irq
+sub‑parameter of the isolcpus boot option specifies a CPU mask that managed
+interrupts should attempt to avoid. This isolation is best‑effort and only
+applies if the automatically assigned interrupt mask also contains online CPUs
+outside the avoided mask. If the requested mask contains only isolated CPUs,
+the setting has no effect.
+
+CPUs listed in the avoided mask remain part of the interrupt’s affinity mask.
+This means that if all non‑isolated CPUs go offline while isolated CPUs remain
+online, the interrupt will be assigned to one of the isolated CPUs.
+
+The following examples assume a system with 8 CPUs.
+
+- A QEMU instance is booted with "-device virtio-scsi-pci".
+  The MSI‑X device exposes 11 interrupts: 3 "management" interrupts and 8
+  "queue" interrupts. The driver requests the 8 queue interrupts, each of which
+  is affine to exactly one CPU. If that CPU goes offline, the interrupt is shut
+  down.
+
+  Assuming interrupt 48 is one of the queue interrupts, the following appears::
+
+    /proc/irq/48/effective_affinity_list:7
+    /proc/irq/48/smp_affinity_list:7
+
+  This indicates that the interrupt is served only by CPU7. Shutting down CPU7
+  does not migrate the interrupt to another CPU::
+
+    /proc/irq/48/effective_affinity_list:0
+    /proc/irq/48/smp_affinity_list:7
+
+  This can be verified via the debugfs interface
+  (/sys/kernel/debug/irq/irqs/48). The dstate field will include
+  IRQD_IRQ_DISABLED, IRQD_IRQ_MASKED and IRQD_MANAGED_SHUTDOWN.
+
+- A QEMU instance is booted with "-device virtio-scsi-pci,num_queues=2"
+  and the kernel command line includes:
+  "irqaffinity=0,1 isolcpus=domain,2-7 isolcpus=managed_irq,1-3,5-7".
+  The MSI‑X device exposes 5 interrupts: 3 management interrupts and 2 queue
+  interrupts. The management interrupts follow the irqaffinity= setting. The
+  queue interrupts are spread across available CPUs::
+
+    /proc/irq/47/effective_affinity_list:0
+    /proc/irq/47/smp_affinity_list:0-3
+    /proc/irq/48/effective_affinity_list:4
+    /proc/irq/48/smp_affinity_list:4-7
+
+  The two queue interrupts are evenly distributed. Interrupt 48 is placed on CPU4
+  because the managed_irq mask avoids CPUs 5–7 when possible.
+
+  Replacing the managed_irq argument with "isolcpus=managed_irq,1-3,4-5,7"
+  results in::
+
+    /proc/irq/48/effective_affinity_list:6
+    /proc/irq/48/smp_affinity_list:4-7
+
+  Interrupt 48 is now served on CPU6 because the system avoids CPUs 4, 5 and
+  7. If CPU6 is taken offline, the interrupt migrates to one of the "isolated"
+  CPUs::
+
+    /proc/irq/48/effective_affinity_list:7
+    /proc/irq/48/smp_affinity_list:4-7
+
+  The interrupt is shut down once all CPUs listed in its smp_affinity mask are
+  offline.
-- 
2.53.0


^ permalink raw reply related

* [PATCH v6] hwmon: add driver for ARCTIC Fan Controller
From: Aureo Serrano de Souza @ 2026-04-01 11:25 UTC (permalink / raw)
  To: linux-hwmon
  Cc: linux, linux, corbet, skhan, linux-doc, linux-kernel,
	Aureo Serrano de Souza

Add hwmon driver for the ARCTIC Fan Controller, a USB HID device
(VID 0x3904, PID 0xF001) with 10 fan channels. Exposes fan speed in
RPM (read-only) and PWM duty cycle (0-255, read/write) via sysfs.

The device pushes IN reports at ~1 Hz containing RPM readings. PWM is
set via OUT reports; the device applies the new duty cycle and sends
back a 2-byte ACK (Report ID 0x02). The driver waits up to 1 s for
the ACK using a completion. Measured device latency: max ~563 ms over
500 iterations. PWM control is manual-only: the device never changes
duty cycle autonomously.

raw_event() may run in hardirq context, so fan_rpm[] is protected by
a spinlock with irq-save. pwm_duty[] is also protected by this spinlock
because reset_resume() clears it outside the hwmon core lock. The OUT
report buffer is built and write_pending is armed under the same lock so
that no reset_resume() can race with the pwm_duty[] snapshot. priv->buf
is exclusively accessed by write(), which the hwmon core serializes.

Signed-off-by: Aureo Serrano de Souza <aureo.serrano@arctic.de>
---
Thanks to Guenter Roeck and Thomas Weißschuh for the reviews.

Changes since v5:
- arctic_fan_probe(): switch from devm_hwmon_device_register_with_info()
  to hwmon_device_register_with_info(); store the returned pointer in
  priv->hwmon_dev for explicit teardown in remove()
- arctic_fan_remove(): call hwmon_device_unregister(priv->hwmon_dev)
  before hid_device_io_stop/hid_hw_close/hid_hw_stop; this closes the
  use-after-free window where a concurrent sysfs write could call
  hid_hw_output_report() on an already-stopped device; matches the
  removal pattern used by nzxt-smart2 and aquacomputer_d5next
- arctic_fan_write(): expand write_pending comment to document the
  residual theoretical late-ACK race (unfixable without a correlation
  ID in the device ACK report) and its practical impossibility (observed
  max ACK latency ~563 ms, timeout 1 s; a delay > 1 s indicates a
  non-functional device)
- arctic_fan_reset_resume(), arctic_fan_read(), arctic_fan_write():
  extend in_report_lock coverage to pwm_duty[]; reset_resume() clears
  pwm_duty[] outside the hwmon core lock, so all paths that read or
  write pwm_duty[] now hold in_report_lock to prevent a data race
  during resume
- arctic_fan_write(): build the OUT report buffer inside in_report_lock
  so reset_resume() cannot clear pwm_duty[] between the pwm_duty[]
  snapshot and the buffer write; this makes the lock coverage complete

Changes since v4:
- arctic_fan_write(): switch to wait_for_completion_timeout() (non-
  interruptible); eliminates the signal-interrupted write case of the
  late-ACK race that write_pending could not fully prevent
- arctic_fan_write(): guard pwm_duty[channel] commit with
  ack_status == 0 check; a device error ACK (status 0x01) no longer
  silently poisons the cached duty used in future OUT reports
- arctic_fan_probe()/remove(): replace devm_add_action_or_reset() +
  no-op remove() with explicit hid_device_io_stop/hid_hw_close/
  hid_hw_stop in remove(); devm_add_action_or_reset() was called after
  hdev->driver = NULL, causing a NULL deref in hid_hw_close() on unbind
- add reset_resume callback: device resets PWM to hardware defaults on
  power loss during suspend; driver now clears cached pwm_duty[] on
  reset-resume so stale pre-suspend values are not re-sent as if valid
- Documentation/hwmon/arctic_fan_controller.rst: document suspend/
  resume behaviour and the updated pwm[1-10] read semantics

Changes since v3:
- buf[]: upgrade from __aligned(8) to ____cacheline_aligned so the
  DMA buffer occupies its own cache line, preventing false sharing with
  adjacent fan_rpm[]/pwm_duty[] fields on non-coherent architectures
- arctic_fan_write(): add write_pending flag (protected by
  in_report_lock) so raw_event() delivers ACKs only while a write is
  in flight
- arctic_fan_write(): commit pwm_duty[channel] only after the device
  ACKs the command; a failed or timed-out write no longer leaves a
  stale value in the cached duty state
- arctic_fan_probe(): start IO (hid_device_io_start) before registering
  with hwmon; previously a sysfs write arriving between hwmon
  registration and io_start could send an OUT report whose ACK would be
  discarded by the HID core, causing a spurious timeout
- Documentation/hwmon/arctic_fan_controller.rst: document that cached
  PWM values start at 0 (hardware state unknown at probe) and that each
  OUT report carries all 10 channel values

Changes since v2:
- buf[]: add __aligned(8) for DMA safety
- ARCTIC_ACK_TIMEOUT_MS: restore 1000 ms; note observed max ~563 ms
- arctic_fan_parse_report(): replace hwmon_lock/hwmon_unlock with
  spin_lock_irqsave; hwmon_lock() may sleep and is unsafe when
  raw_event() runs in hardirq/softirq context
- arctic_fan_raw_event(): use spin_lock_irqsave for ACK path
- arctic_fan_write(): use spin_lock_irqsave for completion reinit
- arctic_fan_write(): clamp val to [0, 255] before u8 cast
- remove priv->hwmon_dev (no longer needed)

Changes since v1:
- Use hid_dbg() instead of module_param debug flag
- Move hid_device_id table adjacent to hid_driver struct
- Use get_unaligned_le16() for RPM parsing
- Remove impossible bounds/NULL checks; remove retry loop
- Add hid_is_usb() guard
- Do not update pwm_duty from IN reports (device is manual-only)
- Add completion/ACK mechanism for OUT report acknowledgement
- Add Documentation/hwmon/arctic_fan_controller.rst and MAINTAINERS

diff --git a/Documentation/hwmon/arctic_fan_controller.rst b/Documentation/hwmon/arctic_fan_controller.rst
new file mode 100644
index 0000000000..b5be88ae46
--- /dev/null
+++ b/Documentation/hwmon/arctic_fan_controller.rst
@@ -0,0 +1,56 @@
+.. SPDX-License-Identifier: GPL-2.0-or-later
+
+Kernel driver arctic_fan_controller
+=====================================
+
+Supported devices:
+
+* ARCTIC Fan Controller (USB HID, VID 0x3904, PID 0xF001)
+
+Author: Aureo Serrano de Souza <aureo.serrano@arctic.de>
+
+Description
+-----------
+
+This driver provides hwmon support for the ARCTIC Fan Controller, a USB
+Custom HID device with 10 fan channels. The device sends IN reports about
+once per second containing current RPM values (bytes 11-30, 10 x uint16 LE).
+Fan speed control is manual-only: the device does not change PWM
+autonomously; it only applies a new duty cycle when it receives an OUT
+report from the host.
+
+After the device applies an OUT report, it sends back a 2-byte ACK IN
+report (Report ID 0x02, byte 1 = 0x00 on success) confirming the command
+was applied.
+
+Usage notes
+-----------
+
+Since it is a USB device, hotplug is supported. The device is autodetected.
+
+The device does not support GET_REPORT, so the driver cannot read back the
+current hardware PWM state at probe time. The cached PWM values (readable
+via pwm[1-10]) start at 0 and reflect only values that have been
+successfully written. Because each OUT report carries all 10 channel values,
+writing a single channel also sends the cached values for all other channels.
+Users should set all channels to the desired values before relying on the
+cached state.
+
+On system suspend, the device may lose power and reset its PWM channels to
+hardware defaults. The driver clears its cached duty values on resume so
+that reads reflect the unknown hardware state rather than stale pre-suspend
+values. Userspace is responsible for re-applying the desired duty cycles
+after resume.
+
+Sysfs entries
+-------------
+
+================ ==============================================================
+fan[1-10]_input  Fan speed in RPM (read-only). Updated from IN reports at ~1 Hz.
+pwm[1-10]        PWM duty cycle (0-255). Write: sends an OUT report setting the
+                 duty cycle (scaled from 0-255 to 0-100% for the device);
+                 the cached value is updated only after the device ACKs the
+                 command with a success status. Read: returns the last
+                 successfully written value; initialized to 0 at driver load
+                 and after resume (hardware state unknown).
+================ ==============================================================
diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
index b2ca8513cf..c34713040e 100644
--- a/Documentation/hwmon/index.rst
+++ b/Documentation/hwmon/index.rst
@@ -42,6 +42,7 @@ Hardware Monitoring Kernel Drivers
    aht10
    amc6821
    aquacomputer_d5next
+   arctic_fan_controller
    asb100
    asc7621
    aspeed-g6-pwm-tach
diff --git a/MAINTAINERS b/MAINTAINERS
index 96ea84948d..ec3112bd41 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2053,6 +2053,13 @@ S:	Maintained
 F:	drivers/net/arcnet/
 F:	include/uapi/linux/if_arcnet.h

+ARCTIC FAN CONTROLLER DRIVER
+M:	Aureo Serrano de Souza <aureo.serrano@arctic.de>
+L:	linux-hwmon@vger.kernel.org
+S:	Maintained
+F:	Documentation/hwmon/arctic_fan_controller.rst
+F:	drivers/hwmon/arctic_fan_controller.c
+
 ARM AND ARM64 SoC SUB-ARCHITECTURES (COMMON PARTS)
 M:	Arnd Bergmann <arnd@arndb.de>
 M:	Krzysztof Kozlowski <krzk@kernel.org>
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 328867242c..6c90a8dd40 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -388,6 +388,18 @@ config SENSORS_APPLESMC
 	  Say Y here if you have an applicable laptop and want to experience
 	  the awesome power of applesmc.

+config SENSORS_ARCTIC_FAN_CONTROLLER
+	tristate "ARCTIC Fan Controller"
+	depends on USB_HID
+	help
+	  If you say yes here you get support for the ARCTIC Fan Controller,
+	  a USB HID device (VID 0x3904, PID 0xF001) with 10 fan channels.
+	  The driver exposes fan speed (RPM) and PWM control via the hwmon
+	  sysfs interface.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called arctic_fan_controller.
+
 config SENSORS_ARM_SCMI
 	tristate "ARM SCMI Sensors"
 	depends on ARM_SCMI_PROTOCOL
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 5833c807c6..ef831c3375 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -49,6 +49,7 @@ obj-$(CONFIG_SENSORS_ADT7475)	+= adt7475.o
 obj-$(CONFIG_SENSORS_AHT10)	+= aht10.o
 obj-$(CONFIG_SENSORS_APPLESMC)	+= applesmc.o
 obj-$(CONFIG_SENSORS_AQUACOMPUTER_D5NEXT) += aquacomputer_d5next.o
+obj-$(CONFIG_SENSORS_ARCTIC_FAN_CONTROLLER)	+= arctic_fan_controller.o
 obj-$(CONFIG_SENSORS_ARM_SCMI)	+= scmi-hwmon.o
 obj-$(CONFIG_SENSORS_ARM_SCPI)	+= scpi-hwmon.o
 obj-$(CONFIG_SENSORS_AS370)	+= as370-hwmon.o
diff --git a/drivers/hwmon/arctic_fan_controller.c b/drivers/hwmon/arctic_fan_controller.c
new file mode 100644
index 0000000000..2bfb003f01
--- /dev/null
+++ b/drivers/hwmon/arctic_fan_controller.c
@@ -0,0 +1,371 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Linux hwmon driver for ARCTIC Fan Controller
+ *
+ * USB Custom HID device with 10 fan channels.
+ * Exposes fan RPM (input) and PWM (0-255) via hwmon. Device pushes IN reports
+ * at ~1 Hz; no GET_REPORT. OUT reports set PWM duty (bytes 1-10, 0-100%).
+ * PWM is manual-only: the device does not change duty autonomously, only
+ * when it receives an OUT report from the host.
+ */
+
+#include <linux/cache.h>
+#include <linux/completion.h>
+#include <linux/err.h>
+#include <linux/hid.h>
+#include <linux/hwmon.h>
+#include <linux/jiffies.h>
+#include <linux/minmax.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/unaligned.h>
+
+#define ARCTIC_VID			0x3904
+#define ARCTIC_PID			0xF001
+#define ARCTIC_NUM_FANS			10
+#define ARCTIC_OUTPUT_REPORT_ID		0x01
+#define ARCTIC_REPORT_LEN		32
+#define ARCTIC_RPM_OFFSET		11	/* bytes 11-30: 10 x uint16 LE */
+/* ACK report: device sends Report ID 0x02, 2 bytes (ID + status) after applying OUT report */
+#define ARCTIC_ACK_REPORT_ID		0x02
+#define ARCTIC_ACK_REPORT_LEN		2
+/*
+ * Time to wait for ACK report after send.
+ * Measured over 500 iterations: max ~563 ms. Keep 1 s as margin.
+ */
+#define ARCTIC_ACK_TIMEOUT_MS		1000
+
+struct arctic_fan_data {
+	struct hid_device *hdev;
+	struct device *hwmon_dev;	/* stored for explicit unregister in remove() */
+	spinlock_t in_report_lock;	/* protects fan_rpm, ack_status, write_pending, pwm_duty */
+	struct completion in_report_received; /* ACK (ID 0x02) received in raw_event */
+	int ack_status;			/* 0 = OK, negative errno on device error */
+	bool write_pending;		/* true while an OUT report ACK is in flight */
+	u32 fan_rpm[ARCTIC_NUM_FANS];
+	u8 pwm_duty[ARCTIC_NUM_FANS];	/* 0-255 matching sysfs range; converted to 0-100 on send */
+	/*
+	 * OUT report buffer. Cache-line aligned so it occupies its own cache
+	 * line, preventing DMA cache-coherency issues with adjacent fields
+	 * (fan_rpm[], pwm_duty[]) on non-coherent architectures.
+	 * Embedded in the devm_kzalloc'd struct so it is heap-allocated and
+	 * passes usb_hcd_map_urb_for_dma(). Serialized by the hwmon core.
+	 */
+	u8 buf[ARCTIC_REPORT_LEN] ____cacheline_aligned;
+};
+
+/*
+ * Parse RPM values from the periodic status report (10 x uint16 LE at rpm_off).
+ * pwm_duty is not updated from the report: the device is manual-only, so the
+ * host cache is the authoritative source for PWM.
+ * Called from raw_event which may run in IRQ context; must not sleep.
+ */
+static void arctic_fan_parse_report(struct arctic_fan_data *priv, u8 *buf,
+				    int len, int rpm_off)
+{
+	unsigned long flags;
+	int i;
+
+	if (len < rpm_off + 20)
+		return;
+
+	spin_lock_irqsave(&priv->in_report_lock, flags);
+	for (i = 0; i < ARCTIC_NUM_FANS; i++)
+		priv->fan_rpm[i] = get_unaligned_le16(&buf[rpm_off + i * 2]);
+	spin_unlock_irqrestore(&priv->in_report_lock, flags);
+}
+
+/*
+ * raw_event: IN reports.
+ *
+ * Status report: Report ID 0x01, 32 bytes:
+ *   byte 0 = report ID, bytes 1-10 = PWM 0-100%, bytes 11-30 = 10 x RPM uint16 LE.
+ *   Device pushes these at ~1 Hz; no GET_REPORT.
+ *
+ * ACK report: Report ID 0x02, 2 bytes:
+ *   byte 0 = 0x02, byte 1 = status (0x00 = OK, 0x01 = ERROR).
+ *   Sent once after accepting and applying an OUT report (ID 0x01).
+ */
+static int arctic_fan_raw_event(struct hid_device *hdev,
+				struct hid_report *report, u8 *data, int size)
+{
+	struct arctic_fan_data *priv = hid_get_drvdata(hdev);
+	unsigned long flags;
+
+	hid_dbg(hdev, "arctic_fan: raw_event id=%u size=%d\n", report->id, size);
+
+	if (report->id == ARCTIC_ACK_REPORT_ID && size == ARCTIC_ACK_REPORT_LEN) {
+		spin_lock_irqsave(&priv->in_report_lock, flags);
+		/*
+		 * Only deliver if a write is in flight. This prevents a
+		 * late-arriving ACK from a timed-out write from erroneously
+		 * satisfying a subsequent write's completion wait.
+		 */
+		if (priv->write_pending) {
+			priv->ack_status = data[1] == 0x00 ? 0 : -EIO;
+			complete(&priv->in_report_received);
+		}
+		spin_unlock_irqrestore(&priv->in_report_lock, flags);
+		return 0;
+	}
+
+	if (report->id != ARCTIC_OUTPUT_REPORT_ID || size != ARCTIC_REPORT_LEN) {
+		hid_dbg(hdev, "arctic_fan: raw_event id=%u size=%d ignored\n",
+			report->id, size);
+		return 0;
+	}
+
+	arctic_fan_parse_report(priv, data, size, ARCTIC_RPM_OFFSET);
+	return 0;
+}
+
+static umode_t arctic_fan_is_visible(const void *data,
+				     enum hwmon_sensor_types type,
+				     u32 attr, int channel)
+{
+	if (type == hwmon_fan && attr == hwmon_fan_input)
+		return 0444;
+	if (type == hwmon_pwm && attr == hwmon_pwm_input)
+		return 0644;
+	return 0;
+}
+
+static int arctic_fan_read(struct device *dev, enum hwmon_sensor_types type,
+			   u32 attr, int channel, long *val)
+{
+	struct arctic_fan_data *priv = dev_get_drvdata(dev);
+	unsigned long flags;
+
+	if (type == hwmon_fan && attr == hwmon_fan_input) {
+		spin_lock_irqsave(&priv->in_report_lock, flags);
+		*val = priv->fan_rpm[channel];
+		spin_unlock_irqrestore(&priv->in_report_lock, flags);
+		return 0;
+	}
+	if (type == hwmon_pwm && attr == hwmon_pwm_input) {
+		spin_lock_irqsave(&priv->in_report_lock, flags);
+		*val = priv->pwm_duty[channel];
+		spin_unlock_irqrestore(&priv->in_report_lock, flags);
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static int arctic_fan_write(struct device *dev, enum hwmon_sensor_types type,
+			    u32 attr, int channel, long val)
+{
+	struct arctic_fan_data *priv = dev_get_drvdata(dev);
+	u8 new_duty = (u8)clamp_val(val, 0, 255);
+	unsigned long flags;
+	unsigned long t;
+	int i, ret;
+
+	/*
+	 * Build the buffer and arm write_pending under in_report_lock so that
+	 * reset_resume() cannot clear pwm_duty[] between the pwm_duty[] read
+	 * and the buffer write, and raw_event() cannot deliver a stale ACK
+	 * from a previous write into this write's completion.
+	 *
+	 * priv->buf is heap-allocated (embedded in the devm_kzalloc'd struct),
+	 * satisfying usb_hcd_map_urb_for_dma(). Exclusively accessed by
+	 * write() which the hwmon core serializes.
+	 *
+	 * pwm_duty[channel] is committed only after a positive device ACK so a
+	 * failed or timed-out write does not corrupt the cached state.
+	 *
+	 * Residual theoretical race: if write A times out (write_pending
+	 * cleared), write B sets write_pending = true, and a late ACK from
+	 * write A—delayed beyond ARCTIC_ACK_TIMEOUT_MS—arrives during write
+	 * B's pending window, it would falsely satisfy write B's completion.
+	 * This cannot be prevented in driver code without protocol support
+	 * (for example, a correlation ID echoed in the device ACK report).
+	 * In testing, observed ACK latency stayed below the 1 s timeout
+	 * (maximum ~563 ms over 500 iterations).
+	 *
+	 * The wait is non-interruptible so that a signal cannot cause write()
+	 * to return early while the OUT report is already in flight; an
+	 * interruptible early return would create the same late-ACK window
+	 * without even the timeout guard.
+	 * Serialized by the hwmon core: only one arctic_fan_write() at a time.
+	 * Use irqsave to match the IRQ context in which raw_event may run.
+	 */
+	spin_lock_irqsave(&priv->in_report_lock, flags);
+	priv->buf[0] = ARCTIC_OUTPUT_REPORT_ID;
+	for (i = 0; i < ARCTIC_NUM_FANS; i++) {
+		u8 d = i == channel ? new_duty : priv->pwm_duty[i];
+
+		priv->buf[1 + i] = DIV_ROUND_CLOSEST((unsigned int)d * 100, 255);
+	}
+	priv->ack_status = -ETIMEDOUT;
+	priv->write_pending = true;
+	reinit_completion(&priv->in_report_received);
+	spin_unlock_irqrestore(&priv->in_report_lock, flags);
+
+	ret = hid_hw_output_report(priv->hdev, priv->buf, ARCTIC_REPORT_LEN);
+	if (ret < 0) {
+		spin_lock_irqsave(&priv->in_report_lock, flags);
+		priv->write_pending = false;
+		spin_unlock_irqrestore(&priv->in_report_lock, flags);
+		return ret;
+	}
+
+	t = wait_for_completion_timeout(&priv->in_report_received,
+					msecs_to_jiffies(ARCTIC_ACK_TIMEOUT_MS));
+	spin_lock_irqsave(&priv->in_report_lock, flags);
+	priv->write_pending = false;
+	/* Commit inside the lock so reset_resume() cannot race with this write */
+	if (t && priv->ack_status == 0)
+		priv->pwm_duty[channel] = new_duty;
+	spin_unlock_irqrestore(&priv->in_report_lock, flags);
+
+	if (!t)
+		return -ETIMEDOUT;
+	return priv->ack_status; /* 0=OK, -EIO=device error */
+}
+
+static const struct hwmon_ops arctic_fan_ops = {
+	.is_visible = arctic_fan_is_visible,
+	.read = arctic_fan_read,
+	.write = arctic_fan_write,
+};
+
+static const struct hwmon_channel_info *arctic_fan_info[] = {
+	HWMON_CHANNEL_INFO(fan,
+			   HWMON_F_INPUT, HWMON_F_INPUT, HWMON_F_INPUT,
+			   HWMON_F_INPUT, HWMON_F_INPUT, HWMON_F_INPUT,
+			   HWMON_F_INPUT, HWMON_F_INPUT, HWMON_F_INPUT,
+			   HWMON_F_INPUT),
+	HWMON_CHANNEL_INFO(pwm,
+			   HWMON_PWM_INPUT, HWMON_PWM_INPUT, HWMON_PWM_INPUT,
+			   HWMON_PWM_INPUT, HWMON_PWM_INPUT, HWMON_PWM_INPUT,
+			   HWMON_PWM_INPUT, HWMON_PWM_INPUT, HWMON_PWM_INPUT,
+			   HWMON_PWM_INPUT),
+	NULL
+};
+
+static const struct hwmon_chip_info arctic_fan_chip_info = {
+	.ops = &arctic_fan_ops,
+	.info = arctic_fan_info,
+};
+
+static int arctic_fan_reset_resume(struct hid_device *hdev)
+{
+	struct arctic_fan_data *priv = hid_get_drvdata(hdev);
+	unsigned long flags;
+
+	/*
+	 * The device resets its PWM channels to hardware defaults on power
+	 * loss during suspend. Clear the cached duty values so they reflect
+	 * the unknown hardware state, consistent with probe-time behaviour
+	 * (the device has no GET_REPORT support). Hold in_report_lock so
+	 * this does not race with a concurrent pwm read or write callback.
+	 */
+	spin_lock_irqsave(&priv->in_report_lock, flags);
+	memset(priv->pwm_duty, 0, sizeof(priv->pwm_duty));
+	spin_unlock_irqrestore(&priv->in_report_lock, flags);
+	return 0;
+}
+
+static int arctic_fan_probe(struct hid_device *hdev,
+			    const struct hid_device_id *id)
+{
+	struct arctic_fan_data *priv;
+	int ret;
+
+	if (!hid_is_usb(hdev))
+		return -ENODEV;
+
+	ret = hid_parse(hdev);
+	if (ret)
+		return ret;
+
+	priv = devm_kzalloc(&hdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->hdev = hdev;
+	spin_lock_init(&priv->in_report_lock);
+	init_completion(&priv->in_report_received);
+	hid_set_drvdata(hdev, priv);
+
+	ret = hid_hw_start(hdev, HID_CONNECT_DRIVER);
+	if (ret)
+		return ret;
+
+	ret = hid_hw_open(hdev);
+	if (ret)
+		goto out_stop;
+
+	/*
+	 * Start IO before registering with hwmon. If IO were started after
+	 * hwmon registration, a sysfs write arriving in that narrow window
+	 * would send an OUT report but the ACK could not be delivered (the HID
+	 * core discards events until io_started), causing a spurious timeout.
+	 */
+	hid_device_io_start(hdev);
+
+	/*
+	 * Use the non-devm variant and store the pointer so remove() can
+	 * call hwmon_device_unregister() before tearing down the HID
+	 * transport. devm_hwmon_device_register_with_info() would defer
+	 * unregistration until after remove() returns, leaving a window
+	 * where a concurrent sysfs write could call hid_hw_output_report()
+	 * on an already-stopped device (use-after-free).
+	 */
+	priv->hwmon_dev = hwmon_device_register_with_info(&hdev->dev, "arctic_fan",
+							  priv, &arctic_fan_chip_info,
+							  NULL);
+	if (IS_ERR(priv->hwmon_dev)) {
+		ret = PTR_ERR(priv->hwmon_dev);
+		goto out_close;
+	}
+
+	return 0;
+
+out_close:
+	hid_device_io_stop(hdev);
+	hid_hw_close(hdev);
+out_stop:
+	hid_hw_stop(hdev);
+	return ret;
+}
+
+static void arctic_fan_remove(struct hid_device *hdev)
+{
+	struct arctic_fan_data *priv = hid_get_drvdata(hdev);
+
+	/*
+	 * Unregister hwmon before stopping the HID transport. This removes
+	 * the sysfs files and waits for any in-progress write() callback to
+	 * return, so no hwmon op can call hid_hw_output_report() after
+	 * hid_hw_stop() frees the underlying USB resources.
+	 * Matches the pattern used by nzxt-smart2 and aquacomputer_d5next.
+	 */
+	hwmon_device_unregister(priv->hwmon_dev);
+	hid_device_io_stop(hdev);
+	hid_hw_close(hdev);
+	hid_hw_stop(hdev);
+}
+
+static const struct hid_device_id arctic_fan_id_table[] = {
+	{ HID_USB_DEVICE(ARCTIC_VID, ARCTIC_PID) },
+	{ }
+};
+MODULE_DEVICE_TABLE(hid, arctic_fan_id_table);
+
+static struct hid_driver arctic_fan_driver = {
+	.name = "arctic_fan",
+	.id_table = arctic_fan_id_table,
+	.probe = arctic_fan_probe,
+	.remove = arctic_fan_remove,
+	.raw_event = arctic_fan_raw_event,
+	.reset_resume = arctic_fan_reset_resume,
+};
+
+module_hid_driver(arctic_fan_driver);
+
+MODULE_AUTHOR("Aureo Serrano de Souza <aureo.serrano@arctic.de>");
+MODULE_DESCRIPTION("HID hwmon driver for ARCTIC Fan Controller");
+MODULE_LICENSE("GPL");

^ permalink raw reply related

* Re: [PATCH v2 1/2] drm: Rename drm_atomic_state
From: Luca Ceresoli @ 2026-04-01 11:42 UTC (permalink / raw)
  To: Maxime Ripard, David Airlie, Simona Vetter, Maarten Lankhorst,
	Thomas Zimmermann, Jonathan Corbet
  Cc: Jani Nikula, Joonas Lahtinen, Rodrigo Vivi, Tvrtko Ursulin,
	Alex Deucher, Christian König, Rob Clark, Dmitry Baryshkov,
	Andrzej Hajda, Neil Armstrong, Robert Foss, Dave Stevenson,
	Laurent Pinchart, dri-devel, linux-doc, Simona Vetter
In-Reply-To: <20260331-drm-drm-atomic-update-v2-1-7e8fe6ddcd32@kernel.org>

Hi Maxime,

On Tue Mar 31, 2026 at 4:41 PM CEST, Maxime Ripard wrote:
> The KMS framework uses two slightly different definitions for the state
> concept. For a given object (plane, CRTC, encoder, etc., so
> drm_$OBJECT_state), the state is the entire state of that object.
> However, at the device level, drm_atomic_state refers to a state update
> for a limited number of objects.
>
> Thus, drm_atomic_state isn't the entire device state, but only the full
> state of some objects in that device. This has been an endless source of
> confusion and thus bugs.
>
> We can rename drm_atomic_state to drm_atomic_commit to make it less
> confusing.
>
> This patch was created using:
>
> rg -l drm_atomic_state | \
> 	xargs sed -i 's/drm_atomic_state/drm_atomic_commit/g; s/drm_atomic_commit_helper/drm_atomic_state_helper/g'
> mv drivers/gpu/drm/tests/drm_atomic_state_test.c drivers/gpu/drm/tests/drm_atomic_commit_test.c
>
> Acked-by: Simona Vetter <simona.vetter@ffwll.ch>
> Acked-by: Thomas Zimmermann <tzimmermann@suse.de>
> Signed-off-by: Maxime Ripard <mripard@kernel.org>

Reviewed-by: Luca Ceresoli <luca.ceresoli@bootlin.com>

And with a couple minor tweaks to apply on my work branch based on
v7.0-rc6:
Tested-by: Luca Ceresoli <luca.ceresoli@bootlin.com>

Luca

--
Luca Ceresoli, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com

^ permalink raw reply

* Re: [PATCH v2 2/2] Documentation: drm: Remove drm_atomic_state rename entry
From: Luca Ceresoli @ 2026-04-01 11:42 UTC (permalink / raw)
  To: Maxime Ripard, David Airlie, Simona Vetter, Maarten Lankhorst,
	Thomas Zimmermann, Jonathan Corbet
  Cc: Jani Nikula, Joonas Lahtinen, Rodrigo Vivi, Tvrtko Ursulin,
	Alex Deucher, Christian König, Rob Clark, Dmitry Baryshkov,
	Andrzej Hajda, Neil Armstrong, Robert Foss, Dave Stevenson,
	Laurent Pinchart, dri-devel, linux-doc
In-Reply-To: <20260331-drm-drm-atomic-update-v2-2-7e8fe6ddcd32@kernel.org>

On Tue Mar 31, 2026 at 4:41 PM CEST, Maxime Ripard wrote:
> We just did that item so we can remove it.
>
> Signed-off-by: Maxime Ripard <mripard@kernel.org>

Reviewed-by: Luca Ceresoli <luca.ceresoli@bootlin.com>

--
Luca Ceresoli, Bootlin
Embedded Linux and Kernel engineering
https://bootlin.com

^ permalink raw reply

* [PATCH 00/33] rust: bump minimum Rust and `bindgen` versions
From: Miguel Ojeda @ 2026-04-01 11:45 UTC (permalink / raw)
  To: Miguel Ojeda, Nathan Chancellor, Nicolas Schier, Danilo Krummrich,
	Andreas Hindborg, Catalin Marinas, Will Deacon, Paul Walmsley,
	Palmer Dabbelt, Albert Ou, Alexandre Courbot, David Airlie,
	Simona Vetter, Brendan Higgins, David Gow, Greg Kroah-Hartman,
	Arve Hjønnevåg, Todd Kjos, Christian Brauner,
	Carlos Llamas, Alice Ryhl, Jonathan Corbet
  Cc: Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
	Trevor Gross, rust-for-linux, linux-kbuild, Lorenzo Stoakes,
	Vlastimil Babka, Liam R . Howlett, Uladzislau Rezki, linux-block,
	moderated for non-subscribers, Alexandre Ghiti, linux-riscv,
	nouveau, dri-devel, Rae Moar, linux-kselftest, kunit-dev,
	Nick Desaulniers, Bill Wendling, Justin Stitt, llvm, linux-kernel,
	Shuah Khan, linux-doc

As proposed in the past in e.g. LPC 2025 and the Maintainers Summit [1],
we are going to follow Debian Stable's Rust versions as our minimum
supported version.

Debian Trixie was released with a Rust 1.85.0 toolchain [2], which it
still uses to this day [3] (i.e. no update to Rust 1.85.1).

Debian Trixie was released with `bindgen` 0.71.1, which it also still
uses to this day [4].

Debian Trixie's release happened on 2025-08-09 [5], which means that a
fair amount of time has passed since its release for kernel developers
to upgrade.

Thus bump the minimum to the new versions, i.e.

  - Rust: 1.78.0 -> 1.85.0
  - bindgen: 0.65.1 -> 0.71.1

There are a few main parts to the series, in this order:

  - The Rust bump (and cleanups).
  - The bindgen bump (and cleanups).
  - Documentation updates.
  - The `cfi_encoding` patch, added here, which needs the bump.
  - The per-version flags support and a Clippy cleanup on top.

Link: https://lwn.net/Articles/1050174/ [1]
Link: https://www.debian.org/releases/trixie/release-notes/whats-new.en.html#desktops-and-well-known-packages [2]
Link: https://packages.debian.org/trixie/rustc [3]
Link: https://packages.debian.org/trixie/bindgen [4]
Link: https://www.debian.org/releases/trixie/ [5]
---
The cleanups should cover most of it -- there may be more we can do
later e.g. in linux-next.

Most patches are optional, so if there are concerns with any, they can
be dropped or be done later. Most are straightforward, though, and e.g.
a couple of them update TODO comments to keep the series even simpler.

The patches have been split as much as possible to be able to add as
much context as possible and to make it easier to review and to drop any
if needed.

All in all, it is a nice `--stat` of deletions I think :)

Alice Ryhl (1):
  rust: declare cfi_encoding for lru_status

Miguel Ojeda (32):
  rust: bump Rust minimum supported version to 1.85.0 (Debian Trixie)
  rust: bump Clippy's MSRV and clean `incompatible_msrv` allows
  rust: simplify `RUSTC_VERSION` Kconfig conditions
  rust: remove `RUSTC_HAS_SLICE_AS_FLATTENED` and simplify code
  rust: remove `RUSTC_HAS_COERCE_POINTEE` and simplify code
  rust: kbuild: remove skipping of `-Wrustdoc::unescaped_backticks`
  rust: kbuild: remove `feature(...)`s that are now stable
  rust: kbuild: simplify `--remap-path-prefix` workaround
  rust: kbuild: make `--remap-path-prefix` workaround conditional
  rust: transmute: simplify code with Rust 1.80.0 `split_at_*checked()`
  rust: alloc: simplify with `NonNull::add()` now that it is stable
  rust: macros: update `extract_if` MSRV TODO comment
  rust: block: update `const_refs_to_static` MSRV TODO comment
  rust: bump `bindgen` minimum supported version to 0.71.1 (Debian
    Trixie)
  rust: rust_is_available: remove warning for 0.66.[01] buggy versions
  rust: rust_is_available: remove warning for < 0.69.5 && libclang >=
    19.1
  rust: kbuild: update `bindgen --rust-target` version and replace
    comment
  rust: kbuild: remove "dummy parameter" workaround for `bindgen` <
    0.71.1
  rust: kbuild: remove "`try` keyword" workaround for `bindgen` < 0.59.2
  rust: kbuild: remove unneeded old `allow`s for generated layout tests
  gpu: nova-core: bindings: remove unneeded `cfg_attr`
  docs: rust: quick-start: openSUSE provides `rust-src` package nowadays
  docs: rust: quick-start: update Ubuntu versioned packages
  docs: rust: quick-start: update minimum Ubuntu version
  docs: rust: quick-start: add Ubuntu 26.04 LTS and remove subsection
    title
  docs: rust: quick-start: remove Gentoo "testing" note
  docs: rust: quick-start: remove Nix "unstable channel" note
  docs: rust: quick-start: remove GDB/Binutils mention
  docs: rust: general-information: simplify Kconfig example
  docs: rust: general-information: use real example
  rust: kbuild: support global per-version flags
  rust: kbuild: allow `clippy::precedence` for Rust < 1.86.0

 .clippy.toml                                  |  2 +-
 Documentation/process/changes.rst             |  4 +-
 Documentation/rust/general-information.rst    |  4 +-
 Documentation/rust/quick-start.rst            | 52 +++++++----------
 Makefile                                      |  9 +++
 arch/Kconfig                                  |  3 +-
 arch/arm64/Kconfig                            |  8 ---
 arch/riscv/Kconfig                            |  3 -
 drivers/android/binder/Makefile               |  3 +-
 drivers/android/binder/page_range.rs          |  6 +-
 drivers/android/binder/page_range_helper.c    | 24 --------
 drivers/android/binder/page_range_helper.h    | 15 -----
 drivers/gpu/nova-core/gsp/cmdq.rs             |  6 +-
 drivers/gpu/nova-core/gsp/fw/r570_144.rs      |  3 -
 init/Kconfig                                  | 15 +----
 rust/Makefile                                 | 36 ++++--------
 rust/bindgen_parameters                       |  8 +--
 rust/bindings/bindings_helper.h               |  1 -
 rust/bindings/lib.rs                          |  5 +-
 rust/kernel/alloc/allocator/iter.rs           |  8 +--
 rust/kernel/alloc/kbox.rs                     | 29 +---------
 rust/kernel/block/mq/gen_disk.rs              |  4 +-
 rust/kernel/lib.rs                            | 30 +---------
 rust/kernel/list/arc.rs                       | 22 +------
 rust/kernel/prelude.rs                        |  3 -
 rust/kernel/ptr.rs                            |  1 -
 rust/kernel/slice.rs                          | 49 ----------------
 rust/kernel/sync/arc.rs                       | 21 +------
 rust/kernel/transmute.rs                      | 35 ++---------
 rust/macros/kunit.rs                          |  2 +-
 rust/uapi/lib.rs                              |  5 +-
 scripts/Makefile.build                        |  6 +-
 scripts/min-tool-version.sh                   |  4 +-
 scripts/rust_is_available.sh                  | 36 +-----------
 scripts/rust_is_available_bindgen_0_66.h      |  2 -
 ...ust_is_available_bindgen_libclang_concat.h |  3 -
 scripts/rust_is_available_test.py             | 58 +------------------
 37 files changed, 79 insertions(+), 446 deletions(-)
 delete mode 100644 drivers/android/binder/page_range_helper.c
 delete mode 100644 drivers/android/binder/page_range_helper.h
 delete mode 100644 rust/kernel/slice.rs
 delete mode 100644 scripts/rust_is_available_bindgen_0_66.h
 delete mode 100644 scripts/rust_is_available_bindgen_libclang_concat.h

--
2.53.0

^ permalink raw reply

* [PATCH 01/33] rust: bump Rust minimum supported version to 1.85.0 (Debian Trixie)
From: Miguel Ojeda @ 2026-04-01 11:45 UTC (permalink / raw)
  To: Miguel Ojeda, Nathan Chancellor, Nicolas Schier, Danilo Krummrich,
	Andreas Hindborg, Catalin Marinas, Will Deacon, Paul Walmsley,
	Palmer Dabbelt, Albert Ou, Alexandre Courbot, David Airlie,
	Simona Vetter, Brendan Higgins, David Gow, Greg Kroah-Hartman,
	Arve Hjønnevåg, Todd Kjos, Christian Brauner,
	Carlos Llamas, Alice Ryhl, Jonathan Corbet
  Cc: Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
	Trevor Gross, rust-for-linux, linux-kbuild, Lorenzo Stoakes,
	Vlastimil Babka, Liam R . Howlett, Uladzislau Rezki, linux-block,
	moderated for non-subscribers, Alexandre Ghiti, linux-riscv,
	nouveau, dri-devel, Rae Moar, linux-kselftest, kunit-dev,
	Nick Desaulniers, Bill Wendling, Justin Stitt, llvm, linux-kernel,
	Shuah Khan, linux-doc
In-Reply-To: <20260401114540.30108-1-ojeda@kernel.org>

As proposed in the past in e.g. LPC 2025 and the Maintainers Summit [1],
we are going to follow Debian Stable's Rust versions as our minimum
supported version.

Debian Trixie was released with a Rust 1.85.0 toolchain [2], which it
still uses to this day [3] (i.e. no update to Rust 1.85.1).

Debian Trixie's release happened on 2025-08-09 [4], which means that a
fair amount of time has passed since its release for kernel developers
to upgrade.

Thus bump the minimum to the new version.

Then, in later commits, clean up most of the workarounds and other bits
that this upgrade of the minimum allows us.

pin-init was left as-is since the patches come from upstream. And the
vendored crates are unmodified, since we do not want to change those.

Note that the minimum LLVM major version for Rust 1.85.0 is LLVM 18 (the
Rust upstream binaries use LLVM 19.1.7), thus e.g. `RUSTC_LLVM_VERSION`
tests can also be updated, but there are no suitable ones to simplify.

Ubuntu 25.10 also has a recent enough Rust toolchain [5], and they also
provide versioned packages with a Rust 1.85.1 toolchain even back to
Ubuntu 22.04 LTS [6].

Link: https://lwn.net/Articles/1050174/ [1]
Link: https://www.debian.org/releases/trixie/release-notes/whats-new.en.html#desktops-and-well-known-packages [2]
Link: https://packages.debian.org/trixie/rustc [3]
Link: https://www.debian.org/releases/trixie/ [4]
Link: https://packages.ubuntu.com/search?suite=all&searchon=names&keywords=rustc [5]
Link: https://launchpad.net/ubuntu/+source/rustc-1.85 [6]
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 Documentation/process/changes.rst | 2 +-
 scripts/min-tool-version.sh       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst
index 6b373e193548..474594bd4831 100644
--- a/Documentation/process/changes.rst
+++ b/Documentation/process/changes.rst
@@ -31,7 +31,7 @@ you probably needn't concern yourself with pcmciautils.
 ====================== ===============  ========================================
 GNU C                  8.1              gcc --version
 Clang/LLVM (optional)  15.0.0           clang --version
-Rust (optional)        1.78.0           rustc --version
+Rust (optional)        1.85.0           rustc --version
 bindgen (optional)     0.65.1           bindgen --version
 GNU make               4.0              make --version
 bash                   4.2              bash --version
diff --git a/scripts/min-tool-version.sh b/scripts/min-tool-version.sh
index 99b5575c1ef7..a270ec761f64 100755
--- a/scripts/min-tool-version.sh
+++ b/scripts/min-tool-version.sh
@@ -31,7 +31,7 @@ llvm)
 	fi
 	;;
 rustc)
-	echo 1.78.0
+	echo 1.85.0
 	;;
 bindgen)
 	echo 0.65.1
-- 
2.53.0


^ permalink raw reply related

* [PATCH 02/33] rust: bump Clippy's MSRV and clean `incompatible_msrv` allows
From: Miguel Ojeda @ 2026-04-01 11:45 UTC (permalink / raw)
  To: Miguel Ojeda, Nathan Chancellor, Nicolas Schier, Danilo Krummrich,
	Andreas Hindborg, Catalin Marinas, Will Deacon, Paul Walmsley,
	Palmer Dabbelt, Albert Ou, Alexandre Courbot, David Airlie,
	Simona Vetter, Brendan Higgins, David Gow, Greg Kroah-Hartman,
	Arve Hjønnevåg, Todd Kjos, Christian Brauner,
	Carlos Llamas, Alice Ryhl, Jonathan Corbet
  Cc: Boqun Feng, Gary Guo, Björn Roy Baron, Benno Lossin,
	Trevor Gross, rust-for-linux, linux-kbuild, Lorenzo Stoakes,
	Vlastimil Babka, Liam R . Howlett, Uladzislau Rezki, linux-block,
	moderated for non-subscribers, Alexandre Ghiti, linux-riscv,
	nouveau, dri-devel, Rae Moar, linux-kselftest, kunit-dev,
	Nick Desaulniers, Bill Wendling, Justin Stitt, llvm, linux-kernel,
	Shuah Khan, linux-doc
In-Reply-To: <20260401114540.30108-1-ojeda@kernel.org>

Following the Rust compiler bump, we can now update Clippy's MSRV we
set in the configuration, which will improve the diagnostics it generates.

Thus do so and clean a few of the `allow`s that are not needed anymore.

Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
---
 .clippy.toml                      | 2 +-
 drivers/gpu/nova-core/gsp/cmdq.rs | 6 +-----
 rust/kernel/ptr.rs                | 1 -
 rust/kernel/transmute.rs          | 2 --
 4 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/.clippy.toml b/.clippy.toml
index a51de9a46380..b0a78cc8be20 100644
--- a/.clippy.toml
+++ b/.clippy.toml
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 
-msrv = "1.78.0"
+msrv = "1.85.0"
 
 check-private-items = true
 
diff --git a/drivers/gpu/nova-core/gsp/cmdq.rs b/drivers/gpu/nova-core/gsp/cmdq.rs
index 46819a82a51a..d9f69366642a 100644
--- a/drivers/gpu/nova-core/gsp/cmdq.rs
+++ b/drivers/gpu/nova-core/gsp/cmdq.rs
@@ -281,7 +281,6 @@ fn allocate_command(&mut self, size: usize) -> Result<GspCommand<'_>> {
         let (slice_1, slice_2) = {
             let (slice_1, slice_2) = self.driver_write_area();
 
-            #[allow(clippy::incompatible_msrv)]
             (slice_1.as_flattened_mut(), slice_2.as_flattened_mut())
         };
 
@@ -572,10 +571,7 @@ fn wait_for_msg(&self, timeout: Delta) -> Result<GspMessage<'_>> {
             Delta::from_millis(1),
             timeout,
         )
-        .map(|(slice_1, slice_2)| {
-            #[allow(clippy::incompatible_msrv)]
-            (slice_1.as_flattened(), slice_2.as_flattened())
-        })?;
+        .map(|(slice_1, slice_2)| (slice_1.as_flattened(), slice_2.as_flattened()))?;
 
         // Extract the `GspMsgElement`.
         let (header, slice_1) = GspMsgElement::from_bytes_prefix(slice_1).ok_or(EIO)?;
diff --git a/rust/kernel/ptr.rs b/rust/kernel/ptr.rs
index 512e2eabe3ad..bd669e74e1cc 100644
--- a/rust/kernel/ptr.rs
+++ b/rust/kernel/ptr.rs
@@ -81,7 +81,6 @@ pub const fn new_checked(align: usize) -> Option<Self> {
     /// This is equivalent to [`align_of`], but with the return value provided as an [`Alignment`].
     #[inline(always)]
     pub const fn of<T>() -> Self {
-        #![allow(clippy::incompatible_msrv)]
         // This cannot panic since alignments are always powers of two.
         //
         // We unfortunately cannot use `new` as it would require the `generic_const_exprs` feature.
diff --git a/rust/kernel/transmute.rs b/rust/kernel/transmute.rs
index 5711580c9f9b..b9e6eadc08f5 100644
--- a/rust/kernel/transmute.rs
+++ b/rust/kernel/transmute.rs
@@ -49,7 +49,6 @@ fn from_bytes(bytes: &[u8]) -> Option<&Self>
         let slice_ptr = bytes.as_ptr().cast::<Self>();
         let size = size_of::<Self>();
 
-        #[allow(clippy::incompatible_msrv)]
         if bytes.len() == size && slice_ptr.is_aligned() {
             // SAFETY: Size and alignment were just checked.
             unsafe { Some(&*slice_ptr) }
@@ -92,7 +91,6 @@ fn from_bytes_mut(bytes: &mut [u8]) -> Option<&mut Self>
         let slice_ptr = bytes.as_mut_ptr().cast::<Self>();
         let size = size_of::<Self>();
 
-        #[allow(clippy::incompatible_msrv)]
         if bytes.len() == size && slice_ptr.is_aligned() {
             // SAFETY: Size and alignment were just checked.
             unsafe { Some(&mut *slice_ptr) }
-- 
2.53.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox