public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Shrikanth Hegde <sshegde@linux.ibm.com>
To: Yury Norov <ynorov@nvidia.com>
Cc: linux-kernel@vger.kernel.org, mingo@kernel.org,
	peterz@infradead.org, juri.lelli@redhat.com,
	vincent.guittot@linaro.org, tglx@linutronix.de,
	yury.norov@gmail.com, gregkh@linuxfoundation.org,
	pbonzini@redhat.com, seanjc@google.com, kprateek.nayak@amd.com,
	vschneid@redhat.com, iii@linux.ibm.com, huschle@linux.ibm.com,
	rostedt@goodmis.org, dietmar.eggemann@arm.com, mgorman@suse.de,
	bsegall@google.com, maddy@linux.ibm.com, srikar@linux.ibm.com,
	hdanton@sina.com, chleroy@kernel.org, vineeth@bitbyteword.org,
	joelagnelf@nvidia.com
Subject: Re: [PATCH v2 03/17] cpumask: Introduce cpu_preferred_mask
Date: Tue, 5 May 2026 09:37:21 +0530	[thread overview]
Message-ID: <b78cd18e-1f98-4695-954f-dfae4edecde3@linux.ibm.com> (raw)
In-Reply-To: <adaXA6mlZlGJS3Jo@yury>

Hi Yury.

On 4/8/26 11:27 PM, Yury Norov wrote:

> I suggest adding, for example, config PREFERRED_CPUS that would select
> PARAVIRT, and would be disabled by default.
> 
> Regardless, whatever you decide, please keep all the cpu_paravirt_mask
> ifdefery on the cpumasks level. For example, in patch #5:
> 
>   +#ifdef CONFIG_PARAVIRT
>   +static inline bool task_can_run_on_preferred_cpu(struct task_struct *p)
>   +{
>   +       return cpumask_intersects(p->cpus_ptr, cpu_preferred_mask);
>   +}
>   +#else
>   +static inline bool task_can_run_on_preferred_cpu(struct task_struct *p)
>   +{
>   +       return true;
>   +}
>   +#endif
> 
> That looks wrong to me. Instead, either declare cpu_preferred_mask
> unconditionally, and maintain it well, or
> 
>   +#ifdef CONFIG_PREFERRED_CPUS
>   +extern struct cpumask __cpu_preferred_mask;
>   +#else
>   +#define __cpu_preferred_mask __cpu_online_mask
>   +#endif
> 
> This way, your higher level code will be clean.
> 
> Thanks,
> Yury

Thanks Yury for the suggestion. This method is indeed cleaner.

So I have made it as below. It is rough patch, i may have to clean it up still.
But this helps to get much of ifdeffery elsewhere.
Most of the ifdeffery will be in cpumask.h, sched.h.

Hopefully I should be able to send the series by this week.
---

diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index 88c594c6d7fc..c62001b52fab 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -192,3 +192,17 @@ config SCHED_CLASS_EXT
  	  For more information:
  	    Documentation/scheduler/sched-ext.rst
  	    https://github.com/sched-ext/scx
+
+config PREFERRED_CPU
+	bool "Dynamic vCPU management based on steal time"
+	default y if PARAVIRT
+	help
+	This feature helps to reduce the steal time in paravirtualised
+	environment, there by reducing vCPU preemption. Reducing vCPU
+	preemption provides improved lock holder preemption and reduces
+	cost of vCPU preemption in the host.

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 80211900f373..577b8d992a45 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -120,12 +120,20 @@ extern struct cpumask __cpu_enabled_mask;
  extern struct cpumask __cpu_present_mask;
  extern struct cpumask __cpu_active_mask;
  extern struct cpumask __cpu_dying_mask;
+
+#ifdef CONFIG_PREFERRED_CPU
+extern struct cpumask __cpu_preferred_mask;
+#else
+#define __cpu_preferred_mask __cpu_online_mask
+#endif
+
  #define cpu_possible_mask ((const struct cpumask *)&__cpu_possible_mask)
  #define cpu_online_mask   ((const struct cpumask *)&__cpu_online_mask)
  #define cpu_enabled_mask   ((const struct cpumask *)&__cpu_enabled_mask)
  #define cpu_present_mask  ((const struct cpumask *)&__cpu_present_mask)
  #define cpu_active_mask   ((const struct cpumask *)&__cpu_active_mask)
  #define cpu_dying_mask    ((const struct cpumask *)&__cpu_dying_mask)
+#define cpu_preferred_mask ((const struct cpumask *)&__cpu_preferred_mask)
  
  extern atomic_t __num_online_cpus;
  extern unsigned int __num_possible_cpus;
@@ -1164,6 +1172,7 @@ void init_cpu_possible(const struct cpumask *src);
  
  void set_cpu_online(unsigned int cpu, bool online);
  void set_cpu_possible(unsigned int cpu, bool possible);
+void set_cpu_preferred(unsigned int cpu, bool preferred);
  
  /**
   * to_cpumask - convert a NR_CPUS bitmap to a struct cpumask *
@@ -1256,7 +1265,12 @@ static __always_inline bool cpu_dying(unsigned int cpu)
  	return cpumask_test_cpu(cpu, cpu_dying_mask);
  }
  
-#else
+static __always_inline bool cpu_preferred(unsigned int cpu)
+{
+       return cpumask_test_cpu(cpu, cpu_preferred_mask);
+}
+
+#else	/* NR_CPUS <= 1 */
  
  #define num_online_cpus()	1U
  #define num_possible_cpus()	1U
@@ -1294,6 +1308,11 @@ static __always_inline bool cpu_dying(unsigned int cpu)
  	return false;
  }
  
+static __always_inline bool cpu_preferred(unsigned int cpu)
+{
+       return false;
+}
+
  #endif /* NR_CPUS > 1 */
  
  #define cpu_is_offline(cpu)	unlikely(!cpu_online(cpu))
diff --git a/kernel/cpu.c b/kernel/cpu.c
index bc4f7a9ba64e..7787c907f9b8 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -3107,6 +3107,11 @@ EXPORT_SYMBOL(__cpu_dying_mask);
  atomic_t __num_online_cpus __read_mostly;
  EXPORT_SYMBOL(__num_online_cpus);
  
+#ifdef CONFIG_PREFERRED_CPU
+struct cpumask __cpu_preferred_mask __read_mostly;
+EXPORT_SYMBOL(__cpu_preferred_mask);
+#endif
+
  void init_cpu_present(const struct cpumask *src)
  {
  	cpumask_copy(&__cpu_present_mask, src);
@@ -3154,6 +3159,14 @@ void set_cpu_possible(unsigned int cpu, bool possible)
  	}
  }
  
+void set_cpu_preferred(unsigned int cpu, bool preferred)
+{
+	if(!IS_ENABLED(CONFIG_PREFERRED_CPU))
+		return;
+
+	assign_cpu((cpu), &__cpu_preferred_mask, (preferred));
+}
+
  /*
   * Activate the first processor.
   */

  reply	other threads:[~2026-05-05  4:11 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-07 19:19 [PATCH v2 00/17] sched/paravirt: Introduce cpu_preferred_mask and steal-driven vCPU backoff Shrikanth Hegde
2026-04-07 19:19 ` [PATCH v2 01/17] sched/debug: Remove unused schedstats Shrikanth Hegde
2026-04-07 19:19 ` [PATCH v2 02/17] sched/docs: Document cpu_preferred_mask and Preferred CPU concept Shrikanth Hegde
2026-04-07 19:19 ` [PATCH v2 03/17] cpumask: Introduce cpu_preferred_mask Shrikanth Hegde
2026-04-07 20:27   ` Yury Norov
2026-04-08  9:16     ` Shrikanth Hegde
2026-04-08 17:57       ` Yury Norov
2026-05-05  4:07         ` Shrikanth Hegde [this message]
2026-04-07 19:19 ` [PATCH v2 04/17] sysfs: Add preferred CPU file Shrikanth Hegde
2026-04-07 19:19 ` [PATCH v2 05/17] sched/core: allow only preferred CPUs in is_cpu_allowed Shrikanth Hegde
2026-04-08  1:05   ` Yury Norov
2026-04-08 12:56     ` Shrikanth Hegde
2026-04-08 18:09       ` Yury Norov
2026-04-07 19:19 ` [PATCH v2 06/17] sched/fair: Select preferred CPU at wakeup when possible Shrikanth Hegde
2026-04-07 19:19 ` [PATCH v2 07/17] sched/fair: load balance only among preferred CPUs Shrikanth Hegde
2026-04-07 19:19 ` [PATCH v2 08/17] sched/rt: Select a preferred CPU for wakeup and pulling rt task Shrikanth Hegde
2026-04-07 19:19 ` [PATCH v2 09/17] sched/core: Keep tick on non-preferred CPUs until tasks are out Shrikanth Hegde
2026-04-07 19:19 ` [PATCH v2 10/17] sched/core: Push current task from non preferred CPU Shrikanth Hegde
2026-04-07 19:19 ` [PATCH v2 11/17] sched/debug: Add migration stats due to non preferred CPUs Shrikanth Hegde
2026-04-07 19:19 ` [PATCH v2 12/17] sched/feature: Add STEAL_MONITOR feature Shrikanth Hegde
2026-04-07 19:19 ` [PATCH v2 13/17] sched/core: Introduce a simple steal monitor Shrikanth Hegde
2026-04-07 19:19 ` [PATCH v2 14/17] sched/core: Compute steal values at regular intervals Shrikanth Hegde
2026-04-07 19:19 ` [PATCH v2 15/17] sched/core: Handle steal values and mark CPUs as preferred Shrikanth Hegde
2026-04-07 19:19 ` [PATCH v2 16/17] sched/core: Mark the direction of steal values to avoid oscillations Shrikanth Hegde
2026-04-07 19:19 ` [PATCH v2 17/17] sched/debug: Add debug knobs for steal monitor Shrikanth Hegde
2026-04-07 19:50 ` [PATCH v2 00/17] sched/paravirt: Introduce cpu_preferred_mask and steal-driven vCPU backoff Shrikanth Hegde
2026-04-08 10:14 ` Hillf Danton
2026-04-08 13:49   ` Shrikanth Hegde
2026-04-09  5:15     ` Hillf Danton
2026-04-09 10:27       ` Shrikanth Hegde
2026-04-10  9:47 ` Shrikanth Hegde

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b78cd18e-1f98-4695-954f-dfae4edecde3@linux.ibm.com \
    --to=sshegde@linux.ibm.com \
    --cc=bsegall@google.com \
    --cc=chleroy@kernel.org \
    --cc=dietmar.eggemann@arm.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=hdanton@sina.com \
    --cc=huschle@linux.ibm.com \
    --cc=iii@linux.ibm.com \
    --cc=joelagnelf@nvidia.com \
    --cc=juri.lelli@redhat.com \
    --cc=kprateek.nayak@amd.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=maddy@linux.ibm.com \
    --cc=mgorman@suse.de \
    --cc=mingo@kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=seanjc@google.com \
    --cc=srikar@linux.ibm.com \
    --cc=tglx@linutronix.de \
    --cc=vincent.guittot@linaro.org \
    --cc=vineeth@bitbyteword.org \
    --cc=vschneid@redhat.com \
    --cc=ynorov@nvidia.com \
    --cc=yury.norov@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox