* [PATCH-RT sched v3 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se
2024-07-16 5:25 [PATCH-RT sched v3 0/2] Optimize the RT group scheduling Xavier
@ 2024-07-16 5:25 ` Xavier
2024-07-16 5:25 ` [PATCH-RT sched v3 2/2] RT test: Adding test cases for RT group scheduling Xavier
2024-07-16 8:59 ` [PATCH-RT sched v3 0/2] Optimize the " Peter Zijlstra
2 siblings, 0 replies; 7+ messages in thread
From: Xavier @ 2024-07-16 5:25 UTC (permalink / raw)
To: mingo, peterz, juri.lelli, vincent.guittot
Cc: dietmar.eggemann, rostedt, bsegall, mgorman, bristot, vschneid,
linux-kernel, oliver.sang, Xavier
This patch optimizes the enqueue and dequeue of rt_se, the strategy employs
a bottom-up removal approach. Specifically, when removing an rt_se at a
certain level, if it is determined that the highest priority of the rq
associated with that rt_se has not changed, there is no need to continue
removing rt_se at higher levels. At this point, only the total number
of removed rt_se needs to be recorded, and the rt_nr_running count of
higher-level rq should be removed accordingly.
Signed-off-by: Xavier <xavier_qy@163.com>
---
kernel/sched/debug.c | 48 ++++++++
kernel/sched/rt.c | 287 +++++++++++++++++++++++++++++++++++++------
kernel/sched/sched.h | 1 +
3 files changed, 298 insertions(+), 38 deletions(-)
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index c1eb9a1afd13..bf9edba5e87b 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -712,6 +712,54 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
#endif
}
+void print_rt_se(struct seq_file *m, struct sched_rt_entity *rt_se)
+{
+ struct task_struct *task;
+
+#ifdef CONFIG_RT_GROUP_SCHED
+ if (rt_se->my_q) {
+ SEQ_printf_task_group_path(m, rt_se->my_q->tg, "%s\n");
+ return;
+ }
+#endif
+ task = container_of(rt_se, struct task_struct, rt);
+ SEQ_printf(m, " prio-%d, pid-%d, %s\n", task->prio, task->pid, task->comm);
+}
+
+/*shall be called in rq lock*/
+void print_rt_rq_task(struct seq_file *m, struct rt_rq *rt_rq)
+{
+ struct rt_prio_array *array = &rt_rq->active;
+ struct sched_rt_entity *rt_se;
+ struct list_head *queue, *head;
+ unsigned long bitmap[2];
+ int idx;
+ int count = 0;
+
+ if (!rt_rq->rt_nr_running)
+ return;
+
+ memcpy(bitmap, array->bitmap, sizeof(unsigned long) * 2);
+ idx = sched_find_first_bit(bitmap);
+ WARN_ON_ONCE(idx >= MAX_RT_PRIO);
+
+ while (1) {
+ clear_bit(idx, bitmap);
+ queue = array->queue + idx;
+ head = queue;
+ queue = queue->next;
+ do {
+ rt_se = list_entry(queue, struct sched_rt_entity, run_list);
+ print_rt_se(m, rt_se);
+ queue = queue->next;
+ count++;
+ } while (queue != head);
+ idx = sched_find_first_bit(bitmap);
+ if (idx >= MAX_RT_PRIO)
+ break;
+ }
+}
+
void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
{
#ifdef CONFIG_RT_GROUP_SCHED
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index aa4c1c874fa4..b18c424a50d2 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1113,7 +1113,7 @@ void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
#endif /* CONFIG_SMP */
#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
-static void
+static int
inc_rt_prio(struct rt_rq *rt_rq, int prio)
{
int prev_prio = rt_rq->highest_prio.curr;
@@ -1122,9 +1122,11 @@ inc_rt_prio(struct rt_rq *rt_rq, int prio)
rt_rq->highest_prio.curr = prio;
inc_rt_prio_smp(rt_rq, prio, prev_prio);
+
+ return prev_prio > prio;
}
-static void
+static int
dec_rt_prio(struct rt_rq *rt_rq, int prio)
{
int prev_prio = rt_rq->highest_prio.curr;
@@ -1149,12 +1151,22 @@ dec_rt_prio(struct rt_rq *rt_rq, int prio)
}
dec_rt_prio_smp(rt_rq, prio, prev_prio);
+ if (rt_rq->highest_prio.curr > prio)
+ return prio;
+ else
+ return MAX_RT_PRIO;
}
#else
-static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {}
-static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {}
+static inline int inc_rt_prio(struct rt_rq *rt_rq, int prio)
+{
+ return 0;
+}
+static inline int dec_rt_prio(struct rt_rq *rt_rq, int prio)
+{
+ return 0;
+}
#endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */
@@ -1218,28 +1230,31 @@ unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se)
}
static inline
-void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
+int inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
int prio = rt_se_prio(rt_se);
+ int prio_change;
WARN_ON(!rt_prio(prio));
rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
- inc_rt_prio(rt_rq, prio);
+ prio_change = inc_rt_prio(rt_rq, prio);
inc_rt_group(rt_se, rt_rq);
+ return prio_change;
}
static inline
-void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
+int dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq, int prio)
{
+ int prio_changed;
WARN_ON(!rt_prio(rt_se_prio(rt_se)));
- WARN_ON(!rt_rq->rt_nr_running);
rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
- dec_rt_prio(rt_rq, rt_se_prio(rt_se));
+ prio_changed = dec_rt_prio(rt_rq, prio);
dec_rt_group(rt_se, rt_rq);
+ return prio_changed;
}
/*
@@ -1255,12 +1270,13 @@ static inline bool move_entity(unsigned int flags)
return true;
}
-static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_array *array)
+static void __delist_rt_entity(struct sched_rt_entity *rt_se,
+ struct rt_prio_array *array, int last_prio)
{
list_del_init(&rt_se->run_list);
- if (list_empty(array->queue + rt_se_prio(rt_se)))
- __clear_bit(rt_se_prio(rt_se), array->bitmap);
+ if (list_empty(array->queue + last_prio))
+ __clear_bit(last_prio, array->bitmap);
rt_se->on_list = 0;
}
@@ -1371,7 +1387,12 @@ update_stats_dequeue_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
}
}
-static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
+/*
+ * Returns: -1 indicates that rt_se was not enqueued, 0 indicates that the highest
+ * priority of the rq did not change after enqueue, and 1 indicates that the highest
+ * priority of the rq changed after enqueue.
+ */
+static int __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
{
struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
struct rt_prio_array *array = &rt_rq->active;
@@ -1386,8 +1407,8 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag
*/
if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) {
if (rt_se->on_list)
- __delist_rt_entity(rt_se, array);
- return;
+ __delist_rt_entity(rt_se, array, rt_se_prio(rt_se));
+ return -1;
}
if (move_entity(flags)) {
@@ -1402,73 +1423,263 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag
}
rt_se->on_rq = 1;
- inc_rt_tasks(rt_se, rt_rq);
+ return inc_rt_tasks(rt_se, rt_rq);
}
-static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
+/**
+ * delete rt_se from rt_rq
+ *
+ * @rt_se Nodes to be deleted
+ * @last_prio The highest priority of this rt_se before the previous round
+ * of deletion
+ * @flags operation flags
+ *
+ * Returns: =0 indicates that the highest priority of the current rq did not
+ * change during this deletion. >0 indicates it changed, and it returns the
+ * previous highest priority to use in the next round of deletion.
+ */
+static int __dequeue_rt_entity(struct sched_rt_entity *rt_se, int last_prio,
+ unsigned int flags)
{
struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
struct rt_prio_array *array = &rt_rq->active;
if (move_entity(flags)) {
WARN_ON_ONCE(!rt_se->on_list);
- __delist_rt_entity(rt_se, array);
+ __delist_rt_entity(rt_se, array, last_prio);
}
rt_se->on_rq = 0;
- dec_rt_tasks(rt_se, rt_rq);
+ return dec_rt_tasks(rt_se, rt_rq, last_prio);
+}
+
+static inline void dec_rq_nr_running(struct sched_rt_entity *rt_se,
+ unsigned int rt, unsigned int rr)
+{
+ struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
+
+ rt_rq->rt_nr_running -= rt;
+ rt_rq->rr_nr_running -= rr;
+}
+
+static inline void add_rq_nr_running(struct sched_rt_entity *rt_se,
+ unsigned int rt, unsigned int rr)
+{
+ struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
+
+ rt_rq->rt_nr_running += rt;
+ rt_rq->rr_nr_running += rr;
+}
+
+static inline bool on_top_rt_rq(struct sched_rt_entity *rt_se)
+{
+#ifdef CONFIG_RT_GROUP_SCHED
+ if (rt_se->parent)
+ return false;
+#endif
+ return true;
}
/*
- * Because the prio of an upper entry depends on the lower
- * entries, we must remove entries top - down.
+ * To optimize the enqueue and dequeue of rt_se, this strategy employs a
+ * bottom-up removal approach. Specifically, when removing an rt_se at a
+ * certain level, if it is determined that the highest priority of the rq
+ * associated with that rt_se has not changed, there is no need to continue
+ * removing rt_se at higher levels. At this point, only the total number
+ * of removed rt_se needs to be recorded, and the rt_nr_running count of
+ * higher-level rq should be removed accordingly.
+ *
+ * For enqueue operations, if an rt_se at a certain level is in the rq,
+ * it is still necessary to check the priority of the higher-level rq.
+ * If the priority of the higher-level rq is found to be lower than that
+ * of the rt_se to be added, it should be removed, as updating the highest
+ * priority of the rq during addition will cause the rq to be repositioned
+ * in the parent rq.
+ *
+ * Conversely, for dequeue operations, if an rt_se at a certain level is
+ * not in the rq, the operation can be exited immediately to reduce
+ * unnecessary checks and handling.
+ *
+ * The return value refers to the last rt_se that was removed for enqueue
+ * operations. And for dequeue operations, it refers to the last rt_se
+ * that was either removed or had its rt_nr_running updated.
*/
-static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
+static struct sched_rt_entity *dequeue_rt_stack(struct sched_rt_entity *rt_se,
+ unsigned int flags, int for_enqueue)
{
- struct sched_rt_entity *back = NULL;
- unsigned int rt_nr_running;
+ struct sched_rt_entity *last = rt_se;
+ struct sched_rt_entity *origin = rt_se;
+ unsigned int del_rt_nr = 0;
+ unsigned int del_rr_nr = 0;
+ int prio_changed = rt_se_prio(rt_se);
+ int sub_on_rq = 1;
for_each_sched_rt_entity(rt_se) {
- rt_se->back = back;
- back = rt_se;
- }
+ if (on_rt_rq(rt_se)) {
+ if (sub_on_rq) {
+ /*
+ * The number of tasks removed from the sub-level rt_se also needs
+ * to be subtracted from the rq of the current rt_se, as the current
+ * rt_se's rq no longer includes the number of removed tasks.
+ */
+ dec_rq_nr_running(rt_se, del_rt_nr, del_rr_nr);
+ if ((prio_changed != MAX_RT_PRIO) ||
+ (rt_se_prio(rt_se) > rt_se_prio(origin))) {
+ /*
+ * If the removal of the lower-level rt_se causes the
+ * highest priority of the current rq to change, or if the
+ * priority of current rq is lower than the rt_se to be
+ * added, then the current rt_se also needs to be removed
+ * from its parent rq, and the number of deleted tasks
+ * should be accumulated.
+ */
+ if (prio_changed == MAX_RT_PRIO)
+ prio_changed = rt_se_prio(rt_se);
+ del_rt_nr += rt_se_nr_running(rt_se);
+ del_rr_nr += rt_se_rr_nr_running(rt_se);
+ prio_changed = __dequeue_rt_entity(rt_se,
+ prio_changed, flags);
+ last = rt_se;
+ } else if (!for_enqueue) {
+ /* For dequeue, last may only rt_nr_running was modified.*/
+ last = rt_se;
+ }
+ } else {
+ /*
+ * Entering this branch must be for enqueue, as dequeue would break
+ * if an rt_se is not online.
+ * If the sub-level node is not online, and the current rt_se's
+ * priority is lower than the one being added, current rt_se need
+ * to be removed.
+ */
+ prio_changed = rt_se_prio(rt_se);
+ if (prio_changed > rt_se_prio(origin)) {
+ del_rt_nr += rt_se_nr_running(rt_se);
+ del_rr_nr += rt_se_rr_nr_running(rt_se);
+ prio_changed = __dequeue_rt_entity(rt_se,
+ prio_changed, flags);
+ last = rt_se;
+ } else {
+ prio_changed = MAX_RT_PRIO;
+ }
+ }
- rt_nr_running = rt_rq_of_se(back)->rt_nr_running;
+ /*
+ * If the current rt_se is on the top rt_rq, then the already deleted
+ * nodes, plus the count of the rt_rq where current rt_se located,
+ * need to be removed from the top_rt_rq.
+ */
+ if (on_top_rt_rq(rt_se)) {
+ dequeue_top_rt_rq(rt_rq_of_se(rt_se),
+ del_rt_nr + rt_rq_of_se(rt_se)->rt_nr_running);
+ }
+ sub_on_rq = 1;
+ } else if (for_enqueue) {
+ struct rt_rq *group_rq = group_rt_rq(rt_se);
- for (rt_se = back; rt_se; rt_se = rt_se->back) {
- if (on_rt_rq(rt_se))
- __dequeue_rt_entity(rt_se, flags);
+ /*
+ * In the case of an enqueue operation, if a certain level is found to be
+ * not online, then the previous counts need to be reset to zero.
+ */
+ prio_changed = MAX_RT_PRIO;
+ sub_on_rq = 0;
+ del_rt_nr = 0;
+ del_rr_nr = 0;
+
+ /*
+ * If the current group is being throttled, then there is no need to check
+ * higher levels since enqueueing will not affect higher-level nodes.
+ */
+ if (group_rq && rt_rq_throttled(group_rq))
+ break;
+
+ if (on_top_rt_rq(rt_se))
+ dequeue_top_rt_rq(rt_rq_of_se(rt_se),
+ rt_rq_of_se(rt_se)->rt_nr_running);
+ } else {
+ last = rt_se;
+ break;
+ }
}
- dequeue_top_rt_rq(rt_rq_of_se(back), rt_nr_running);
+ return last;
}
static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
{
struct rq *rq = rq_of_rt_se(rt_se);
+ struct sched_rt_entity *last;
+ unsigned int add_rt_nr = 0;
+ unsigned int add_rr_nr = 0;
+ int enqueue = 1;
+ int prio_change = 1;
update_stats_enqueue_rt(rt_rq_of_se(rt_se), rt_se, flags);
- dequeue_rt_stack(rt_se, flags);
- for_each_sched_rt_entity(rt_se)
- __enqueue_rt_entity(rt_se, flags);
+ last = dequeue_rt_stack(rt_se, flags, 1);
+
+ for_each_sched_rt_entity(rt_se) {
+ if (enqueue || !on_rt_rq(rt_se) || (prio_change == 1)) {
+ prio_change = __enqueue_rt_entity(rt_se, flags);
+ if (prio_change >= 0) {
+ add_rt_nr = rt_se_nr_running(rt_se);
+ add_rr_nr = rt_se_rr_nr_running(rt_se);
+ } else {
+ add_rt_nr = add_rr_nr = 0;
+ }
+ } else {
+ add_rq_nr_running(rt_se, add_rt_nr, add_rr_nr);
+ }
+
+ if (rt_se == last)
+ enqueue = 0;
+ }
+
enqueue_top_rt_rq(&rq->rt);
}
static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
{
struct rq *rq = rq_of_rt_se(rt_se);
+ struct sched_rt_entity *last;
+ unsigned int add_rt_nr = 0;
+ unsigned int add_rr_nr = 0;
+ int prio_change = 1;
update_stats_dequeue_rt(rt_rq_of_se(rt_se), rt_se, flags);
- dequeue_rt_stack(rt_se, flags);
+ last = dequeue_rt_stack(rt_se, flags, 0);
for_each_sched_rt_entity(rt_se) {
struct rt_rq *rt_rq = group_rt_rq(rt_se);
+ if (rt_rq && rt_rq->rt_nr_running) {
+ if (on_rt_rq(rt_se)) {
+ add_rq_nr_running(rt_se, add_rt_nr, add_rr_nr);
+ } else {
+ prio_change = __enqueue_rt_entity(rt_se, flags);
+ if (prio_change == 0) {
+ /*
+ * If enqueue is successful and the priority of the rq has
+ * not changed, then the parent node only needs to add the
+ * count of the current rt_se. Otherwise, the parent node
+ * will also need to enqueue.
+ */
+ add_rt_nr = rt_se_nr_running(rt_se);
+ add_rr_nr = rt_se_rr_nr_running(rt_se);
+ }
+ }
+ } else {
+ add_rt_nr = add_rr_nr = 0;
+ }
- if (rt_rq && rt_rq->rt_nr_running)
- __enqueue_rt_entity(rt_se, flags);
+ /*
+ * last is the rt_se of the last deletion or modification of the
+ * count, so the subsequent rt_se does not need to be updated.
+ */
+ if (rt_se == last)
+ break;
}
+
enqueue_top_rt_rq(&rq->rt);
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index a831af102070..b634153aacf0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2878,6 +2878,7 @@ extern void print_rt_stats(struct seq_file *m, int cpu);
extern void print_dl_stats(struct seq_file *m, int cpu);
extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
+extern void print_rt_rq_task(struct seq_file *m, struct rt_rq *rt_rq);
extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
extern void resched_latency_warn(int cpu, u64 latency);
--
2.45.2
^ permalink raw reply related [flat|nested] 7+ messages in thread* [PATCH-RT sched v3 2/2] RT test: Adding test cases for RT group scheduling
2024-07-16 5:25 [PATCH-RT sched v3 0/2] Optimize the RT group scheduling Xavier
2024-07-16 5:25 ` [PATCH-RT sched v3 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se Xavier
@ 2024-07-16 5:25 ` Xavier
2024-07-16 8:59 ` [PATCH-RT sched v3 0/2] Optimize the " Peter Zijlstra
2 siblings, 0 replies; 7+ messages in thread
From: Xavier @ 2024-07-16 5:25 UTC (permalink / raw)
To: mingo, peterz, juri.lelli, vincent.guittot
Cc: dietmar.eggemann, rostedt, bsegall, mgorman, bristot, vschneid,
linux-kernel, oliver.sang, Xavier
Adding test cases for RT group scheduling, create some RT infinite loop
processes/threads, then set them to the same or different priorities.
Place them in different RT task groups, run for a period of time,
and finally count the number of infinite loop executions for all tasks.
Signed-off-by: Xavier <xavier_qy@163.com>
---
MAINTAINERS | 7 +
tools/testing/selftests/sched/Makefile | 4 +-
tools/testing/selftests/sched/deadloop.c | 192 ++++++++++++++++++
.../selftests/sched/rt_group_sched_test.sh | 119 +++++++++++
4 files changed, 320 insertions(+), 2 deletions(-)
create mode 100644 tools/testing/selftests/sched/deadloop.c
create mode 100755 tools/testing/selftests/sched/rt_group_sched_test.sh
diff --git a/MAINTAINERS b/MAINTAINERS
index 43353b705988..d29effe57bf8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -19480,6 +19480,13 @@ L: linux-remoteproc@vger.kernel.org
S: Maintained
F: drivers/tty/rpmsg_tty.c
+RT GROUP SCHED TEST
+M: Xavier <xavier_qy@163.com>
+L: linux-kernel@vger.kernel.org
+S: Maintained
+F: tools/testing/selftests/sched/deadloop.c
+F: tools/testing/selftests/sched/rt_group_sched_test.sh
+
RTL2830 MEDIA DRIVER
L: linux-media@vger.kernel.org
S: Orphan
diff --git a/tools/testing/selftests/sched/Makefile b/tools/testing/selftests/sched/Makefile
index 099ee9213557..96decb58bf35 100644
--- a/tools/testing/selftests/sched/Makefile
+++ b/tools/testing/selftests/sched/Makefile
@@ -8,7 +8,7 @@ CFLAGS += -O2 -Wall -g -I./ $(KHDR_INCLUDES) -Wl,-rpath=./ \
$(CLANG_FLAGS)
LDLIBS += -lpthread
-TEST_GEN_FILES := cs_prctl_test
-TEST_PROGS := cs_prctl_test
+TEST_GEN_FILES := cs_prctl_test deadloop
+TEST_PROGS := cs_prctl_test deadloop
include ../lib.mk
diff --git a/tools/testing/selftests/sched/deadloop.c b/tools/testing/selftests/sched/deadloop.c
new file mode 100644
index 000000000000..d850a3e2a0ab
--- /dev/null
+++ b/tools/testing/selftests/sched/deadloop.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <signal.h>
+
+/*
+ * Create multiple infinite loop threads based on the passed parameters
+ * Usage: deadloop num policy prio
+ * num: the number of child threads
+ * policy: the scheduling policy of the child threads, 0-fair, 1-fifo, 2-rr
+ * prio: the priority
+ * If this process is killed, it will print the loop count of all child threads
+ * to the OUTPUT_FILE
+ *
+ * Date: June 27, 2024
+ * Author: Xavier <xavier_qy@163.com>
+ */
+
+#define OUTPUT_FILE "rt_group_sched_test.log"
+
+#if __GLIBC_PREREQ(2, 30) == 0
+#include <sys/syscall.h>
+static pid_t gettid(void)
+{
+ return syscall(SYS_gettid);
+}
+#endif
+
+#define do_err(x) \
+do { \
+ if ((x) < 0) { \
+ printf("test BUG_ON func %s, line %d %ld\n", \
+ __func__, __LINE__, (long)(x) \
+ ); \
+ while (1) \
+ sleep(1); \
+ } \
+} while (0)
+
+#define do_false(x) \
+do { \
+ if ((x) == 1) { \
+ printf("test BUG_ON func %s, line %d %d\n", \
+ __func__, __LINE__, (x) \
+ ); \
+ while (1) \
+ sleep(1); \
+ } \
+} while (0)
+
+
+struct thread_data {
+ pthread_t thread;
+ int index;
+ int pid;
+ unsigned long cnt;
+};
+
+static struct thread_data *pdata;
+static int thread_num = 1;
+
+static void create_thread_posix(void *entry, pthread_t *thread, int *para,
+ int policy, int prio)
+{
+ int ret;
+ struct sched_param param;
+ pthread_attr_t attr;
+
+ memset(¶m, 0, sizeof(param));
+ ret = pthread_attr_init(&attr);
+ do_err(ret);
+
+ ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
+ do_err(ret);
+
+ param.sched_priority = prio;
+
+ ret = pthread_attr_setschedpolicy(&attr, policy);
+ do_err(ret);
+
+ ret = pthread_attr_setschedparam(&attr, ¶m);
+ do_err(ret);
+
+ ret = pthread_create(thread, &attr, entry, para);
+ do_err(ret);
+}
+
+static void *dead_loop_entry(void *arg)
+{
+ int index = *(int *)arg;
+ struct sched_param param;
+ int cur = gettid();
+
+ sched_getparam(cur, ¶m);
+ pdata[index].pid = cur;
+ printf("cur:%d prio:%d\n", cur, param.sched_priority);
+
+ while (1) {
+ asm volatile("" ::: "memory");
+ pdata[index].cnt++;
+ }
+ return NULL;
+}
+
+static void handle_signal(int signal)
+{
+ int cnt = 0;
+
+ if (signal == SIGTERM) {
+ FILE *file = freopen(OUTPUT_FILE, "a", stdout);
+
+ if (file == NULL) {
+ perror("freopen");
+ exit(0);
+ }
+
+ while (cnt < thread_num) {
+ printf("pid:%d cnt:%ld\n", pdata[cnt].pid, pdata[cnt].cnt);
+ cnt++;
+ }
+ fclose(file);
+ exit(0);
+ }
+}
+
+static int dead_loop_create(int policy, int prio)
+{
+ int cnt = 0;
+ int ret;
+ void *status;
+ struct sched_param param;
+
+ param.sched_priority = prio;
+ pdata = malloc(thread_num * sizeof(struct thread_data));
+ do_false(!pdata);
+
+ if (policy) {
+ ret = sched_setscheduler(0, policy, ¶m);
+ do_err(ret);
+ }
+
+ while (cnt < thread_num) {
+ pdata[cnt].index = cnt;
+ create_thread_posix(dead_loop_entry, &pdata[cnt].thread,
+ &pdata[cnt].index, policy, prio);
+ cnt++;
+ }
+
+ signal(SIGTERM, handle_signal);
+
+ cnt = 0;
+ while (cnt < thread_num) {
+ pthread_join(pdata[cnt].thread, &status);
+ cnt++;
+ }
+
+ free(pdata);
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int policy = 2;
+ int prio = 50;
+
+ if (argc == 2)
+ thread_num = atoi(argv[1]);
+
+ if (argc == 3) {
+ thread_num = atoi(argv[1]);
+ policy = atoi(argv[2]);
+ if (policy > 0)
+ prio = 50;
+ }
+
+ if (argc == 4) {
+ thread_num = atoi(argv[1]);
+ policy = atoi(argv[2]);
+ prio = atoi(argv[3]);
+ }
+
+ dead_loop_create(policy, prio);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/sched/rt_group_sched_test.sh b/tools/testing/selftests/sched/rt_group_sched_test.sh
new file mode 100755
index 000000000000..9031250a2684
--- /dev/null
+++ b/tools/testing/selftests/sched/rt_group_sched_test.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Test for rt group scheduling
+# Date: June 27, 2024
+# Author: Xavier <xavier_qy@163.com>
+
+# Record the list of child process PIDs
+PIDS=()
+
+# File for redirected output
+LOGFILE="rt_group_sched_test.log"
+
+# Cleanup function: kill all recorded child processes and unmount the cgroup
+function cleanup() {
+ echo "Cleaning up..."
+ for pid in "${PIDS[@]}"; do
+ if kill -0 $pid 2>/dev/null; then
+ kill -TERM $pid
+ fi
+ done
+
+ # Sleep for a while to ensure the processes are properly killed
+ sleep 2
+
+ # Unmount the cgroup filesystem
+ umount /sys/fs/cgroup/cpu 2>/dev/null
+ umount /sys/fs/cgroup 2>/dev/null
+ echo "Cleanup completed."
+
+ # Ensure the LOGFILE exists and is correct
+ if [ ! -f "$LOGFILE" ]; then
+ echo "$LOGFILE not found!"
+ exit 1
+ fi
+
+ # Initialize the total count variable
+ total=0
+
+ # Read matching lines and calculate the total sum
+ while IFS= read -r line
+ do
+ # Use grep to match lines containing 'pid:' and 'cnt:', and extract the value of cnt
+ if echo "$line" | grep -q '^pid:[[:digit:]]\+ cnt:[[:digit:]]\+'; then
+ cnt=$(echo "$line" | sed -n \
+ 's/^pid:[[:digit:]]\+ cnt:\([[:digit:]]\+\)/\1/p')
+ total=$((total + cnt))
+ fi
+ done < "$LOGFILE"
+
+ # Print the total sum
+ echo "Total cnt: $total"
+ echo "Finished processing."
+}
+
+# Capture actions when interrupted or terminated by a signal
+trap cleanup EXIT
+
+# Start the cgroup filesystem and create the necessary directories
+function setup_cgroups() {
+ mount -t tmpfs -o mode=755 cgroup_root /sys/fs/cgroup
+ mkdir -p /sys/fs/cgroup/cpu
+ mount -t cgroup -o cpu none /sys/fs/cgroup/cpu
+}
+
+# Create cgroup subdirectories and configure their settings
+function create_child_cgroup() {
+ local base_dir=$1
+ local name=$2
+ local rt_period=$3
+ local rt_runtime=$4
+ mkdir -p "$base_dir/$name"
+ echo $rt_period > "$base_dir/$name/cpu.rt_period_us"
+ echo $rt_runtime > "$base_dir/$name/cpu.rt_runtime_us"
+}
+# Launch a process and add it to the specified cgroup
+function launch_process() {
+ local process_name=$1
+
+ # Three parameters representing the number of child threads, scheduling policy, and priority
+ local args=$2
+ local cgroup_path=$3
+
+ # Launch the process
+ exec -a $process_name ./deadloop $args &
+ local pid=$!
+ PIDS+=($pid)
+
+ # Short sleep to ensure the process starts
+ sleep 1
+
+ # Check if the process started successfully
+ if ! pgrep -x $process_name > /dev/null; then
+ echo "Error: No process found with name $process_name."
+ exit 1
+ fi
+
+ echo $pid > "$cgroup_path/cgroup.procs"
+ echo "Process $process_name with PID $pid added to cgroup $cgroup_path"
+}
+
+# Main function running all tasks
+function main() {
+ echo "The test needs 30 seconds..."
+ rm -f "$LOGFILE"
+ setup_cgroups
+ create_child_cgroup "/sys/fs/cgroup/cpu" "child1" 1000000 800000
+ create_child_cgroup "/sys/fs/cgroup/cpu/child1" "child2" 1000000 700000
+ create_child_cgroup "/sys/fs/cgroup/cpu/child1/child2" "child3" 1000000 600000
+ launch_process "child1" "3 2 50" "/sys/fs/cgroup/cpu/child1"
+ launch_process "child2" "3 2 50" "/sys/fs/cgroup/cpu/child1/child2"
+ launch_process "child3" "1 2 50" "/sys/fs/cgroup/cpu/child1/child2/child3"
+ launch_process "tg_root" "1 2 50" "/sys/fs/cgroup/cpu"
+
+ # Run for 30 seconds
+ sleep 30
+}
+
+# Execute the main function
+main
--
2.45.2
^ permalink raw reply related [flat|nested] 7+ messages in thread