public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@kernel.org>
To: "Chang S. Bae" <chang.seok.bae@intel.com>, linux-kernel@vger.kernel.org
Cc: x86@kernel.org, mingo@redhat.com, bp@alien8.de,
	dave.hansen@linux.intel.com, peterz@infradead.org,
	david.kaplan@amd.com, chang.seok.bae@intel.com
Subject: Re: [PATCH 1/7] stop_machine: Introduce stop_machine_nmi()
Date: Wed, 28 Jan 2026 09:02:03 +0100	[thread overview]
Message-ID: <87o6me9nd0.ffs@tglx> (raw)
In-Reply-To: <20260125014224.249901-2-chang.seok.bae@intel.com>

On Sun, Jan 25 2026 at 01:42, Chang S. Bae wrote:
> +/**
> + * stop_machine_nmi: freeze the machine and run this function in NMI context
> + * @fn: the function to run
> + * @data: the data ptr for the @fn()
> + * @cpus: the cpus to run the @fn() on (NULL = any online cpu)

Please format these tabular, use uppercase letters to start the
explanation, use CPU[s] all over the place and write out words instead
of using made up abbreviations. This is documentation not twitter.

 * @fn:		The function to invoke
 * @data:	The data pointer for @fn()
 * @cpus:	A cpumask containing the CPUs to run fn() on

Also this NULL == any online CPU is just made up. What's wrong with
cpu_online_mask?

> +
> +bool noinstr stop_machine_nmi_handler(void);
> +DECLARE_STATIC_KEY_FALSE(stop_machine_nmi_handler_enable);
> +static __always_inline bool stop_machine_nmi_handler_enabled(void)

Can you please separate the declarations from the inline with an empty
new line? This glued together way to write it is unreadable.

> +{
> +	return static_branch_unlikely(&stop_machine_nmi_handler_enable);
> +}
> +
>  #else	/* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
>  
>  static __always_inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
> @@ -186,5 +217,24 @@ stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
>  	return stop_machine(fn, data, cpus);
>  }
>  
> +/* stop_machine_nmi() is only supported in SMP systems. */
> +static __always_inline int stop_machine_nmi(cpu_stop_fn_t fn, void *data,
> +					const struct cpumask *cpus)

Align the second line argument with the first argument above.

See https://www.kernel.org/doc/html/latest/process/maintainer-tip.html

> +{
> +	return -EINVAL;
> +}
> +

> +
> +void arch_send_self_nmi(void);
>  #endif	/* _LINUX_STOP_MACHINE */
> diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
> index 3fe6b0c99f3d..189b4b108d13 100644
> --- a/kernel/stop_machine.c
> +++ b/kernel/stop_machine.c
> @@ -174,6 +174,8 @@ struct multi_stop_data {
>  
>  	enum multi_stop_state	state;
>  	atomic_t		thread_ack;
> +
> +	bool			use_nmi;
>  };
>  
>  static void set_state(struct multi_stop_data *msdata,
> @@ -197,6 +199,42 @@ notrace void __weak stop_machine_yield(const struct cpumask *cpumask)
>  	cpu_relax();
>  }
>  
> +struct stop_machine_nmi_ctrl {
> +	bool nmi_enabled;
> +	struct multi_stop_data *msdata;
> +	int err;

Please align the struct member names tabular. See documentation.

> +};
> +
> +DEFINE_STATIC_KEY_FALSE(stop_machine_nmi_handler_enable);
> +static DEFINE_PER_CPU(struct stop_machine_nmi_ctrl, stop_machine_nmi_ctrl);
> +
> +static void enable_nmi_handler(struct multi_stop_data *msdata)
> +{
> +	this_cpu_write(stop_machine_nmi_ctrl.msdata, msdata);
> +	this_cpu_write(stop_machine_nmi_ctrl.nmi_enabled, true);
> +}
> +
> +void __weak arch_send_self_nmi(void)
> +{
> +	/* Arch code must implement this to support stop_machine_nmi() */

Architecture

> +}

Also this weak function is wrong.

All of this NMI mode needs to be guarded with a config option as it
otherwise is compiled in unconditionally and any accidental usage on an
architecture which does not support this will result in a undecodable
malfunction. There is a world outside of x86.

With that arch_send_self_nmi() becomes a plain declaration in a header.

> +
> +bool noinstr stop_machine_nmi_handler(void)
> +{
> +	struct multi_stop_data *msdata;
> +	int err;
> +
> +	if (!raw_cpu_read(stop_machine_nmi_ctrl.nmi_enabled))
> +		return false;
> +
> +	raw_cpu_write(stop_machine_nmi_ctrl.nmi_enabled, false);
> +
> +	msdata = raw_cpu_read(stop_machine_nmi_ctrl.msdata);
> +	err = msdata->fn(msdata->data);
> +	raw_cpu_write(stop_machine_nmi_ctrl.err, err);
> +	return true;
> +}
> +
>  /* This is the cpu_stop function which stops the CPU. */
>  static int multi_cpu_stop(void *data)
>  {
> @@ -234,8 +272,15 @@ static int multi_cpu_stop(void *data)
>  				hard_irq_disable();
>  				break;
>  			case MULTI_STOP_RUN:
> -				if (is_active)
> -					err = msdata->fn(msdata->data);
> +				if (is_active) {
> +					if (msdata->use_nmi) {
> +						enable_nmi_handler(msdata);
> +						arch_send_self_nmi();
> +						err = raw_cpu_read(stop_machine_nmi_ctrl.err);
> +					} else {
> +						err = msdata->fn(msdata->data);
> +					}

And this wants to become

    if (IS_ENABLED(CONFIG_STOMP_MACHINE_NMI) && msdata->use_nmi)
    	err = stop_this_cpu_nmi(msdata);
    else
	err = msdata->fn(msdata->data);

> +				}
>  				break;
>  			default:
>  				break;
> @@ -584,14 +629,15 @@ static int __init cpu_stop_init(void)
>  }
>  early_initcall(cpu_stop_init);
>  
> -int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
> -			    const struct cpumask *cpus)
> +static int __stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
> +			    const struct cpumask *cpus, bool use_nmi)

The argument alignment was correct before....

>  {
>  	struct multi_stop_data msdata = {
>  		.fn = fn,
>  		.data = data,
>  		.num_threads = num_online_cpus(),
>  		.active_cpus = cpus,
> +		.use_nmi = use_nmi,
>  	};
>  
>  	lockdep_assert_cpus_held();
> @@ -620,6 +666,24 @@ int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
>  	return stop_cpus(cpu_online_mask, multi_cpu_stop, &msdata);
>  }

> +int stop_machine_nmi(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
> +{
> +	int ret;
> +
> +	cpus_read_lock();
> +	ret = stop_machine_cpuslocked_nmi(fn, data, cpus);
> +	cpus_read_unlock();
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(stop_machine_nmi);

Why needs this to be exported? No module has any business with stomp
machine.

Thanks

        tglx

  parent reply	other threads:[~2026-01-28  8:02 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-25  1:42 [PATCH 0/7] x86/microcode: Refactor NMI-based rendezvous mechanism to stop-machine Chang S. Bae
2026-01-25  1:42 ` [PATCH 1/7] stop_machine: Introduce stop_machine_nmi() Chang S. Bae
2026-01-26 11:51   ` kernel test robot
2026-01-27 14:49     ` Borislav Petkov
2026-01-27 19:15       ` Chang S. Bae
2026-01-27 15:49   ` Borislav Petkov
2026-01-27 16:00     ` Kaplan, David
2026-01-27 20:49       ` Borislav Petkov
2026-01-28  1:31         ` Kaplan, David
2026-01-28 16:35           ` Borislav Petkov
2026-01-29 12:17             ` Borislav Petkov
2026-01-29 15:47               ` Chang S. Bae
2026-02-02 10:54               ` Borislav Petkov
2026-02-06  2:14                 ` Chang S. Bae
2026-03-04 16:33                   ` Borislav Petkov
2026-01-28  8:02   ` Thomas Gleixner [this message]
2026-01-29 17:07     ` Chang S. Bae
2026-01-30 10:02       ` Thomas Gleixner
2026-01-25  1:42 ` [PATCH 2/7] x86/apic: Implement self-NMI support Chang S. Bae
2026-01-28  8:05   ` Thomas Gleixner
2026-01-29 16:32     ` Chang S. Bae
2026-01-25  1:42 ` [PATCH 3/7] x86/nmi: Support stop_machine_nmi() handler Chang S. Bae
2026-01-25  1:42 ` [PATCH 4/7] x86/microcode: Distinguish NMI control path on stop-machine callback Chang S. Bae
2026-01-28  8:11   ` Thomas Gleixner
2026-01-29 16:32     ` Chang S. Bae
2026-01-25  1:42 ` [PATCH 5/7] x86/microcode: Use stop-machine NMI facility Chang S. Bae
2026-01-25  1:42 ` [PATCH 6/7] x86/nmi: Reference stop-machine static key for offline microcode handler Chang S. Bae
2026-01-25  1:42 ` [PATCH 7/7] x86/microcode: Remove microcode_nmi_handler_enable Chang S. Bae

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87o6me9nd0.ffs@tglx \
    --to=tglx@kernel.org \
    --cc=bp@alien8.de \
    --cc=chang.seok.bae@intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=david.kaplan@amd.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox