All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] sched: Introduce task_struct::latency_sensi_flag.
@ 2024-05-05  3:06 fuyuanli
  2024-05-06  9:50 ` Sebastian Andrzej Siewior
  2024-05-06 14:04 ` Jakub Kicinski
  0 siblings, 2 replies; 5+ messages in thread
From: fuyuanli @ 2024-05-05  3:06 UTC (permalink / raw)
  To: mingo, peterz, juri.lelli, vincent.guittot
  Cc: dietmar.eggemann, rostedt, bsegall, mgorman, bristot, vschneid,
	kolyshkin, fuyuanli, akpm, kent.overstreet, frederic, oleg, hca,
	paulmck, tj, bigeasy, kuba, CruzZhao, pabeni, linux-kernel

In the path local_bh_enable()->__local_bh_enable_ip(), the softirq
handlers will be executed in the context of current task. But for some
tasks sensitive to running latency, we expect that they will not spend
extra time executing softirq. So latency_sensi_flag is introduced in
task_struct, when it is set to 1, task only wakes up softirq daemon in
__local_bh_enable_ip().

A test has been made in two hosts named A and B. In A, several clients
sent udp packets to a single server in B concurrently as fast as
possible. In B, the IRQs of these flows were bound to CPU 0 by flow
director, so there was always a triggered net_rx softirq on CPU 0. Then
a test program was started in B, which was also bound to CPU 0, and
keeped calling sendto() in a loop. Sampling with perf, results showed
that about 25% of running time of test program was spent executing
local_bh_enable() contained in syscall sendto(), but after setting
latency_sensi_flag to 1, this proportion had been reduced to 0.5%.

Signed-off-by: fuyuanli <fuyuanli@didiglobal.com>
---
 include/linux/sched.h            |  2 ++
 include/uapi/linux/sched.h       |  4 +++-
 include/uapi/linux/sched/types.h |  3 +++
 init/init_task.c                 |  1 +
 kernel/sched/core.c              | 12 ++++++++++++
 kernel/softirq.c                 | 20 ++++++++++++--------
 6 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3c2abbc587b4..af39888079c0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -925,6 +925,8 @@ struct task_struct {
 	/* Bit to tell TOMOYO we're in execve(): */
 	unsigned			in_execve:1;
 	unsigned			in_iowait:1;
+	/* Bit means if task is sensitive to latency */
+	unsigned			latency_sensi_flag:1;
 #ifndef TIF_RESTORE_SIGMASK
 	unsigned			restore_sigmask:1;
 #endif
diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
index 3bac0a8ceab2..07c7ec5bd5a6 100644
--- a/include/uapi/linux/sched.h
+++ b/include/uapi/linux/sched.h
@@ -132,6 +132,7 @@ struct clone_args {
 #define SCHED_FLAG_KEEP_PARAMS		0x10
 #define SCHED_FLAG_UTIL_CLAMP_MIN	0x20
 #define SCHED_FLAG_UTIL_CLAMP_MAX	0x40
+#define SCHED_FLAG_LATENCY_SENSITIVE	0x80
 
 #define SCHED_FLAG_KEEP_ALL	(SCHED_FLAG_KEEP_POLICY | \
 				 SCHED_FLAG_KEEP_PARAMS)
@@ -143,6 +144,7 @@ struct clone_args {
 			 SCHED_FLAG_RECLAIM		| \
 			 SCHED_FLAG_DL_OVERRUN		| \
 			 SCHED_FLAG_KEEP_ALL		| \
-			 SCHED_FLAG_UTIL_CLAMP)
+			 SCHED_FLAG_UTIL_CLAMP		| \
+			 SCHED_FLAG_LATENCY_SENSITIVE)
 
 #endif /* _UAPI_LINUX_SCHED_H */
diff --git a/include/uapi/linux/sched/types.h b/include/uapi/linux/sched/types.h
index 90662385689b..d435b75e6ac9 100644
--- a/include/uapi/linux/sched/types.h
+++ b/include/uapi/linux/sched/types.h
@@ -116,6 +116,9 @@ struct sched_attr {
 	__u32 sched_util_min;
 	__u32 sched_util_max;
 
+	/* Latency sensitive flag */
+	__u32 sched_latency_sensi_flag;
+
 };
 
 #endif /* _UAPI_LINUX_SCHED_TYPES_H */
diff --git a/init/init_task.c b/init/init_task.c
index 4daee6d761c8..f36237d06485 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -98,6 +98,7 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
 #ifdef CONFIG_CGROUP_SCHED
 	.sched_task_group = &root_task_group,
 #endif
+	.latency_sensi_flag = 0,
 	.ptraced	= LIST_HEAD_INIT(init_task.ptraced),
 	.ptrace_entry	= LIST_HEAD_INIT(init_task.ptrace_entry),
 	.real_parent	= &init_task,
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7019a40457a6..6dfc2db7ef88 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7763,6 +7763,9 @@ static int __sched_setscheduler(struct task_struct *p,
 			return retval;
 	}
 
+	if (attr->sched_latency_sensi_flag > 1)
+		return -EINVAL;
+
 	/*
 	 * SCHED_DEADLINE bandwidth accounting relies on stable cpusets
 	 * information.
@@ -7804,6 +7807,8 @@ static int __sched_setscheduler(struct task_struct *p,
 		if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP)
 			goto change;
 
+		if (attr->sched_flags & SCHED_FLAG_LATENCY_SENSITIVE)
+			p->latency_sensi_flag = attr->sched_latency_sensi_flag;
 		p->sched_reset_on_fork = reset_on_fork;
 		retval = 0;
 		goto unlock;
@@ -7908,6 +7913,9 @@ static int __sched_setscheduler(struct task_struct *p,
 
 	check_class_changed(rq, p, prev_class, oldprio);
 
+	if (attr->sched_flags & SCHED_FLAG_LATENCY_SENSITIVE)
+		p->latency_sensi_flag = attr->sched_latency_sensi_flag;
+
 	/* Avoid rq from going away on us: */
 	preempt_disable();
 	head = splice_balance_callbacks(rq);
@@ -8314,6 +8322,10 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
 		get_params(p, &kattr);
 		kattr.sched_flags &= SCHED_FLAG_ALL;
 
+		kattr.sched_latency_sensi_flag = p->latency_sensi_flag;
+		if (kattr.sched_latency_sensi_flag)
+			kattr.sched_flags |= SCHED_FLAG_LATENCY_SENSITIVE;
+
 #ifdef CONFIG_UCLAMP_TASK
 		/*
 		 * This could race with another potential updater, but this is fine
diff --git a/kernel/softirq.c b/kernel/softirq.c
index b315b21fb28c..f4e7ce4cde81 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -226,10 +226,10 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
 		goto out;
 
 	/*
-	 * If this was called from non preemptible context, wake up the
-	 * softirq daemon.
+	 * If this was called from non preemptible context, or current task is
+	 * sensitive to running latency, wake up the softirq daemon.
 	 */
-	if (!preempt_on) {
+	if (!preempt_on || current->latency_sensi_flag) {
 		wakeup_softirqd();
 		goto out;
 	}
@@ -375,11 +375,15 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt)
 	__preempt_count_sub(cnt - 1);
 
 	if (unlikely(!in_interrupt() && local_softirq_pending())) {
-		/*
-		 * Run softirq if any pending. And do it in its own stack
-		 * as we may be calling this deep in a task call stack already.
-		 */
-		do_softirq();
+		/* If task is sensitive to running latency, only wake up the softirq daemon. */
+		if (current->latency_sensi_flag)
+			wakeup_softirqd();
+		else
+			/*
+			 * Run softirq if any pending. And do it in its own stack
+			 * as we may be calling this deep in a task call stack already.
+			 */
+			do_softirq();
 	}
 
 	preempt_count_dec();
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2024-05-08  3:02 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-05-05  3:06 [PATCH] sched: Introduce task_struct::latency_sensi_flag fuyuanli
2024-05-06  9:50 ` Sebastian Andrzej Siewior
2024-05-08  2:56   ` 付元力 Jerry Fu
2024-05-06 14:04 ` Jakub Kicinski
2024-05-08  3:02   ` 付元力 Jerry Fu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.