* [PATCH-RT sched v1 0/2] Optimize the RT group scheduling
@ 2024-06-27 17:21 Xavier
2024-06-27 17:21 ` [PATCH-RT sched v1 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se Xavier
` (3 more replies)
0 siblings, 4 replies; 20+ messages in thread
From: Xavier @ 2024-06-27 17:21 UTC (permalink / raw)
To: mingo, peterz, juri.lelli, vincent.guittot
Cc: dietmar.eggemann, rostedt, bsegall, mgorman, bristot, vschneid,
linux-kernel, Xavier
Hi all,
The first patch optimizes the enqueue and dequeue of rt_se, the strategy
employs a bottom-up removal approach.
The second patch provides validation for the efficiency improvements made
by patch 1. The test case count the number of infinite loop executions for
all threads.
origion optimized
10242794134 10659512784
13650210798 13555924695
12953159254 13733609646
11888973428 11742656925
12791797633 13447598015
11451270205 11704847480
13335320346 13858155642
10682907328 10513565749
10173249704 10254224697
8309259793 8893668653
avg 11547894262 11836376429
Run two QEMU emulators simultaneously, one running the original kernel and the
other running the optimized kernel, and compare the average of the results over
10 runs. After optimizing, the number of iterations in the infinite loop increased
by approximately 2.5%.
Kindly review.
Xavier (2):
RT SCHED: Optimize the enqueue and dequeue operations for rt_se
RT test: Adding test cases for RT group scheduling
MAINTAINERS | 7 +
kernel/sched/debug.c | 50 ++++
kernel/sched/rt.c | 277 +++++++++++++++---
kernel/sched/sched.h | 1 +
tools/testing/selftests/sched/Makefile | 4 +-
tools/testing/selftests/sched/deadloop.c | 192 ++++++++++++
.../selftests/sched/rt_group_sched_test.sh | 119 ++++++++
7 files changed, 606 insertions(+), 44 deletions(-)
create mode 100644 tools/testing/selftests/sched/deadloop.c
create mode 100755 tools/testing/selftests/sched/rt_group_sched_test.sh
--
2.45.2
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH-RT sched v1 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se
2024-06-27 17:21 [PATCH-RT sched v1 0/2] Optimize the RT group scheduling Xavier
@ 2024-06-27 17:21 ` Xavier
2024-06-28 23:29 ` kernel test robot
2024-06-29 0:20 ` kernel test robot
2024-06-27 17:21 ` [PATCH-RT sched v1 2/2] RT test: Adding test cases for RT group scheduling Xavier
` (2 subsequent siblings)
3 siblings, 2 replies; 20+ messages in thread
From: Xavier @ 2024-06-27 17:21 UTC (permalink / raw)
To: mingo, peterz, juri.lelli, vincent.guittot
Cc: dietmar.eggemann, rostedt, bsegall, mgorman, bristot, vschneid,
linux-kernel, Xavier
This patch optimizes the enqueue and dequeue of rt_se, the strategy employs
a bottom-up removal approach. Specifically, when removing an rt_se at a
certain level, if it is determined that the highest priority of the rq
associated with that rt_se has not changed, there is no need to continue
removing rt_se at higher levels. At this point, only the total number
of removed rt_se needs to be recorded, and the rt_nr_running count of
higher-level rq should be removed accordingly.
Signed-off-by: Xavier <xavier_qy@163.com>
---
kernel/sched/debug.c | 50 ++++++++
kernel/sched/rt.c | 277 ++++++++++++++++++++++++++++++++++++-------
kernel/sched/sched.h | 1 +
3 files changed, 286 insertions(+), 42 deletions(-)
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index c1eb9a1afd13..d823280c0e73 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -712,6 +712,56 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
#endif
}
+static void print_rt_se(struct seq_file *m, struct sched_rt_entity *rt_se)
+{
+ struct task_struct *task;
+
+ if (rt_se->my_q) {
+#ifdef CONFIG_RT_GROUP_SCHED
+ SEQ_printf_task_group_path(m, rt_se->my_q->tg, "%s\n");
+#endif
+ } else {
+ task = container_of(rt_se, struct task_struct, rt);
+ SEQ_printf(m, " prio-%d, pid-%d, %s\n", task->prio, task->pid, task->comm);
+ }
+}
+
+/*shall be called in rq lock*/
+void print_rt_rq_task(struct seq_file *m, struct rt_rq *rt_rq)
+{
+ struct rt_prio_array *array = &rt_rq->active;
+ struct sched_rt_entity *rt_se;
+ struct list_head *queue, *head;
+ unsigned long bitmap[2];
+ int idx;
+ int count = 0;
+
+ if (!rt_rq->rt_nr_running)
+ return;
+
+ memcpy(bitmap, array->bitmap, sizeof(unsigned long) * 2);
+ idx = sched_find_first_bit(bitmap);
+ WARN_ON_ONCE(idx >= MAX_RT_PRIO);
+
+ while (1) {
+ clear_bit(idx, bitmap);
+ queue = array->queue + idx;
+ head = queue;
+ queue = queue->next;
+ do {
+ rt_se = list_entry(queue, struct sched_rt_entity, run_list);
+ print_rt_se(m, rt_se);
+ queue = queue->next;
+ count++;
+ } while (queue != head);
+ idx = sched_find_first_bit(bitmap);
+ if (idx >= MAX_RT_PRIO)
+ break;
+ }
+
+ WARN_ON_ONCE(count != rt_rq->rt_nr_running);
+}
+
void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
{
#ifdef CONFIG_RT_GROUP_SCHED
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index aa4c1c874fa4..f0b7e094de11 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1113,7 +1113,7 @@ void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
#endif /* CONFIG_SMP */
#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
-static void
+static int
inc_rt_prio(struct rt_rq *rt_rq, int prio)
{
int prev_prio = rt_rq->highest_prio.curr;
@@ -1122,9 +1122,11 @@ inc_rt_prio(struct rt_rq *rt_rq, int prio)
rt_rq->highest_prio.curr = prio;
inc_rt_prio_smp(rt_rq, prio, prev_prio);
+
+ return prev_prio > prio;
}
-static void
+static int
dec_rt_prio(struct rt_rq *rt_rq, int prio)
{
int prev_prio = rt_rq->highest_prio.curr;
@@ -1149,12 +1151,22 @@ dec_rt_prio(struct rt_rq *rt_rq, int prio)
}
dec_rt_prio_smp(rt_rq, prio, prev_prio);
+ if (rt_rq->highest_prio.curr > prio)
+ return prio;
+ else
+ return 0;
}
#else
-static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {}
-static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {}
+static inline int inc_rt_prio(struct rt_rq *rt_rq, int prio)
+{
+ return 0;
+}
+static inline int dec_rt_prio(struct rt_rq *rt_rq, int prio)
+{
+ return 0;
+}
#endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */
@@ -1218,28 +1230,31 @@ unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se)
}
static inline
-void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
+int inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
int prio = rt_se_prio(rt_se);
+ int prio_change;
WARN_ON(!rt_prio(prio));
rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
- inc_rt_prio(rt_rq, prio);
+ prio_change = inc_rt_prio(rt_rq, prio);
inc_rt_group(rt_se, rt_rq);
+ return prio_change;
}
static inline
-void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
+int dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq, int prio)
{
+ int prio_changed;
WARN_ON(!rt_prio(rt_se_prio(rt_se)));
- WARN_ON(!rt_rq->rt_nr_running);
rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
- dec_rt_prio(rt_rq, rt_se_prio(rt_se));
+ prio_changed = dec_rt_prio(rt_rq, prio);
dec_rt_group(rt_se, rt_rq);
+ return prio_changed;
}
/*
@@ -1255,12 +1270,13 @@ static inline bool move_entity(unsigned int flags)
return true;
}
-static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_array *array)
+static void __delist_rt_entity(struct sched_rt_entity *rt_se,
+ struct rt_prio_array *array, int last_prio)
{
list_del_init(&rt_se->run_list);
- if (list_empty(array->queue + rt_se_prio(rt_se)))
- __clear_bit(rt_se_prio(rt_se), array->bitmap);
+ if (list_empty(array->queue + last_prio))
+ __clear_bit(last_prio, array->bitmap);
rt_se->on_list = 0;
}
@@ -1371,7 +1387,12 @@ update_stats_dequeue_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
}
}
-static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
+/*
+ * Returns: -1 indicates that rt_se was not enqueued, 0 indicates that the highest
+ * priority of the rq did not change after enqueue, and 1 indicates that the highest
+ * priority of the rq changed after enqueue.
+ */
+static int __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
{
struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
struct rt_prio_array *array = &rt_rq->active;
@@ -1386,8 +1407,8 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag
*/
if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) {
if (rt_se->on_list)
- __delist_rt_entity(rt_se, array);
- return;
+ __delist_rt_entity(rt_se, array, rt_se_prio(rt_se));
+ return -1;
}
if (move_entity(flags)) {
@@ -1402,73 +1423,245 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag
}
rt_se->on_rq = 1;
- inc_rt_tasks(rt_se, rt_rq);
+ return inc_rt_tasks(rt_se, rt_rq);
}
-static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
+/**
+ * delete rt_se from rt_rq
+ *
+ * @rt_se Nodes to be deleted
+ * @last_prio The highest priority of this rt_se before the previous round
+ * of deletion
+ * @flags operation flags
+ *
+ * Returns: =0 indicates that the highest priority of the current rq did not
+ * change during this deletion. >0 indicates it changed, and it returns the
+ * previous highest priority to use in the next round of deletion.
+ */
+static int __dequeue_rt_entity(struct sched_rt_entity *rt_se, int last_prio,
+ unsigned int flags)
{
struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
struct rt_prio_array *array = &rt_rq->active;
if (move_entity(flags)) {
WARN_ON_ONCE(!rt_se->on_list);
- __delist_rt_entity(rt_se, array);
+ __delist_rt_entity(rt_se, array, last_prio);
}
rt_se->on_rq = 0;
- dec_rt_tasks(rt_se, rt_rq);
+ return dec_rt_tasks(rt_se, rt_rq, last_prio);
+}
+
+static inline void dec_rq_nr_running(struct sched_rt_entity *rt_se,
+ unsigned int rt, unsigned int rr)
+{
+ struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
+
+ rt_rq->rt_nr_running -= rt;
+ rt_rq->rr_nr_running -= rr;
+}
+
+static inline void add_rq_nr_running(struct sched_rt_entity *rt_se,
+ unsigned int rt, unsigned int rr)
+{
+ struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
+
+ rt_rq->rt_nr_running += rt;
+ rt_rq->rr_nr_running += rr;
}
/*
- * Because the prio of an upper entry depends on the lower
- * entries, we must remove entries top - down.
+ * To optimize the enqueue and dequeue of rt_se, this strategy employs a
+ * bottom-up removal approach. Specifically, when removing an rt_se at a
+ * certain level, if it is determined that the highest priority of the rq
+ * associated with that rt_se has not changed, there is no need to continue
+ * removing rt_se at higher levels. At this point, only the total number
+ * of removed rt_se needs to be recorded, and the rt_nr_running count of
+ * higher-level rq should be removed accordingly.
+ *
+ * For enqueue operations, if an rt_se at a certain level is in the rq,
+ * it is still necessary to check the priority of the higher-level rq.
+ * If the priority of the higher-level rq is found to be lower than that
+ * of the rt_se to be added, it should be removed, as updating the highest
+ * priority of the rq during addition will cause the rq to be repositioned
+ * in the parent rq.
+ *
+ * Conversely, for dequeue operations, if an rt_se at a certain level is
+ * not in the rq, the operation can be exited immediately to reduce
+ * unnecessary checks and handling.
+ *
+ * The return value refers to the last rt_se that was removed for enqueue
+ * operations. And for dequeue operations, it refers to the last rt_se
+ * that was either removed or had its rt_nr_running updated.
*/
-static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
+static struct sched_rt_entity *dequeue_rt_stack(struct sched_rt_entity *rt_se,
+ unsigned int flags, int for_enqueue)
{
- struct sched_rt_entity *back = NULL;
- unsigned int rt_nr_running;
+ struct sched_rt_entity *last = rt_se;
+ struct sched_rt_entity *origin = rt_se;
+ unsigned int del_rt_nr = 0;
+ unsigned int del_rr_nr = 0;
+ int prio_changed = rt_se_prio(rt_se);
+ int sub_on_rq = 1;
for_each_sched_rt_entity(rt_se) {
- rt_se->back = back;
- back = rt_se;
- }
-
- rt_nr_running = rt_rq_of_se(back)->rt_nr_running;
+ if (on_rt_rq(rt_se)) {
+ if (sub_on_rq) {
+ /*
+ * The number of tasks removed from the sub-level rt_se also needs
+ * to be subtracted from the rq of the current rt_se, as the current
+ * rt_se's rq no longer includes the number of removed tasks.
+ */
+ dec_rq_nr_running(rt_se, del_rt_nr, del_rr_nr);
+
+ if (prio_changed) {
+ /*
+ * If the removal of the lower-level rt_se causes the
+ * highest priority of the current rq to change, then the
+ * current rt_se also needs to be removed from its parent
+ * rq, and the number of deleted tasks should be
+ * accumulated.
+ */
+ del_rt_nr += rt_se_nr_running(rt_se);
+ del_rr_nr += rt_se_rr_nr_running(rt_se);
+ prio_changed = __dequeue_rt_entity(rt_se,
+ prio_changed, flags);
+ last = rt_se;
+ } else if (!for_enqueue) {
+ /* For dequeue, last may only rt_nr_running was modified.*/
+ last = rt_se;
+ }
+ } else {
+ /*
+ * Entering this branch must be for enqueue, as dequeue would break
+ * if an rt_se is not online.
+ * If the sub-level node is not online, and the current rt_se's
+ * priority is lower than the one being added, current rt_se need
+ * to be removed.
+ */
+ prio_changed = rt_se_prio(rt_se);
+ if (prio_changed > rt_se_prio(origin)) {
+ del_rt_nr += rt_se_nr_running(rt_se);
+ del_rr_nr += rt_se_rr_nr_running(rt_se);
+ prio_changed = __dequeue_rt_entity(rt_se,
+ prio_changed, flags);
+ last = rt_se;
+ } else {
+ prio_changed = 0;
+ }
+ }
- for (rt_se = back; rt_se; rt_se = rt_se->back) {
- if (on_rt_rq(rt_se))
- __dequeue_rt_entity(rt_se, flags);
+ /*
+ * If the current rt_se is on the top rt_rq, then the already deleted
+ * nodes, plus the count of the rt_rq where current rt_se located,
+ * need to be removed from the top_rt_rq.
+ */
+ if (!rt_se->parent) {
+ dequeue_top_rt_rq(rt_rq_of_se(rt_se),
+ del_rt_nr + rt_rq_of_se(rt_se)->rt_nr_running);
+ }
+ sub_on_rq = 1;
+ } else if (for_enqueue) {
+ /*
+ * In the case of an enqueue operation, if a certain level is found to be
+ * not online, then the previous counts need to be reset to zero.
+ */
+ prio_changed = 0;
+ sub_on_rq = 0;
+ del_rt_nr = 0;
+ del_rr_nr = 0;
+
+ if (!rt_se->parent)
+ dequeue_top_rt_rq(rt_rq_of_se(rt_se),
+ rt_rq_of_se(rt_se)->rt_nr_running);
+ } else {
+ last = rt_se;
+ break;
+ }
}
- dequeue_top_rt_rq(rt_rq_of_se(back), rt_nr_running);
+ return last;
}
+
static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
{
struct rq *rq = rq_of_rt_se(rt_se);
+ struct sched_rt_entity *last;
+ unsigned int add_rt_nr = 0;
+ unsigned int add_rr_nr = 0;
+ int enqueue = 1;
+ int prio_change = 1;
update_stats_enqueue_rt(rt_rq_of_se(rt_se), rt_se, flags);
- dequeue_rt_stack(rt_se, flags);
- for_each_sched_rt_entity(rt_se)
- __enqueue_rt_entity(rt_se, flags);
+ last = dequeue_rt_stack(rt_se, flags, 1);
+ do {
+ if (enqueue || !on_rt_rq(rt_se) || (prio_change == 1)) {
+ prio_change = __enqueue_rt_entity(rt_se, flags);
+ if (prio_change >= 0) {
+ add_rt_nr = rt_se_nr_running(rt_se);
+ add_rr_nr = rt_se_rr_nr_running(rt_se);
+ } else {
+ add_rt_nr = add_rr_nr = 0;
+ }
+ } else {
+ add_rq_nr_running(rt_se, add_rt_nr, add_rr_nr);
+ }
+
+ if (rt_se == last)
+ enqueue = 0;
+
+ rt_se = rt_se->parent;
+ } while (rt_se);
+
enqueue_top_rt_rq(&rq->rt);
}
static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
{
struct rq *rq = rq_of_rt_se(rt_se);
+ struct sched_rt_entity *last;
+ unsigned int add_rt_nr = 0;
+ unsigned int add_rr_nr = 0;
+ int prio_change = 1;
update_stats_dequeue_rt(rt_rq_of_se(rt_se), rt_se, flags);
- dequeue_rt_stack(rt_se, flags);
-
- for_each_sched_rt_entity(rt_se) {
+ last = dequeue_rt_stack(rt_se, flags, 0);
+ do {
struct rt_rq *rt_rq = group_rt_rq(rt_se);
+ if (rt_rq && rt_rq->rt_nr_running) {
+ if (on_rt_rq(rt_se)) {
+ add_rq_nr_running(rt_se, add_rt_nr, add_rr_nr);
+ } else {
+ prio_change = __enqueue_rt_entity(rt_se, flags);
+ if (prio_change == 0) {
+ /*
+ * If enqueue is successful and the priority of the rq has
+ * not changed, then the parent node only needs to add the
+ * count of the current rt_se. Otherwise, the parent node
+ * will also need to enqueue.
+ */
+ add_rt_nr = rt_se_nr_running(rt_se);
+ add_rr_nr = rt_se_rr_nr_running(rt_se);
+ }
+ }
+ } else {
+ add_rt_nr = add_rr_nr = 0;
+ }
+
+ /*
+ * last is the rt_se of the last deletion or modification of the
+ * count, so the subsequent rt_se does not need to be updated.
+ */
+ if (rt_se == last)
+ break;
+
+ rt_se = rt_se->parent;
+ } while (rt_se);
- if (rt_rq && rt_rq->rt_nr_running)
- __enqueue_rt_entity(rt_se, flags);
- }
enqueue_top_rt_rq(&rq->rt);
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index a831af102070..b634153aacf0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2878,6 +2878,7 @@ extern void print_rt_stats(struct seq_file *m, int cpu);
extern void print_dl_stats(struct seq_file *m, int cpu);
extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
+extern void print_rt_rq_task(struct seq_file *m, struct rt_rq *rt_rq);
extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
extern void resched_latency_warn(int cpu, u64 latency);
--
2.45.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH-RT sched v1 2/2] RT test: Adding test cases for RT group scheduling
2024-06-27 17:21 [PATCH-RT sched v1 0/2] Optimize the RT group scheduling Xavier
2024-06-27 17:21 ` [PATCH-RT sched v1 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se Xavier
@ 2024-06-27 17:21 ` Xavier
2024-06-29 11:28 ` [PATCH-RT sched v2 0/2] Optimize the " Xavier
2024-07-29 9:32 ` [PATCH-RT sched v1 0/2] Optimize the " Michal Koutný
3 siblings, 0 replies; 20+ messages in thread
From: Xavier @ 2024-06-27 17:21 UTC (permalink / raw)
To: mingo, peterz, juri.lelli, vincent.guittot
Cc: dietmar.eggemann, rostedt, bsegall, mgorman, bristot, vschneid,
linux-kernel, Xavier
Adding test cases for RT group scheduling, create some RT infinite loop
processes/threads, then set them to the same or different priorities.
Place them in different RT task groups, run for a period of time,
and finally count the number of infinite loop executions for all tasks.
Signed-off-by: Xavier <xavier_qy@163.com>
---
MAINTAINERS | 7 +
tools/testing/selftests/sched/Makefile | 4 +-
tools/testing/selftests/sched/deadloop.c | 192 ++++++++++++++++++
.../selftests/sched/rt_group_sched_test.sh | 119 +++++++++++
4 files changed, 320 insertions(+), 2 deletions(-)
create mode 100644 tools/testing/selftests/sched/deadloop.c
create mode 100755 tools/testing/selftests/sched/rt_group_sched_test.sh
diff --git a/MAINTAINERS b/MAINTAINERS
index cf9c9221c388..2cde1546ba01 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -19481,6 +19481,13 @@ L: linux-remoteproc@vger.kernel.org
S: Maintained
F: drivers/tty/rpmsg_tty.c
+RT GROUP SCHED TEST
+M: Xavier <xavier_qy@163.com>
+L: linux-kernel@vger.kernel.org
+S: Maintained
+F: tools/testing/selftests/sched/deadloop.c
+F: tools/testing/selftests/sched/rt_group_sched_test.sh
+
RTL2830 MEDIA DRIVER
L: linux-media@vger.kernel.org
S: Orphan
diff --git a/tools/testing/selftests/sched/Makefile b/tools/testing/selftests/sched/Makefile
index 099ee9213557..96decb58bf35 100644
--- a/tools/testing/selftests/sched/Makefile
+++ b/tools/testing/selftests/sched/Makefile
@@ -8,7 +8,7 @@ CFLAGS += -O2 -Wall -g -I./ $(KHDR_INCLUDES) -Wl,-rpath=./ \
$(CLANG_FLAGS)
LDLIBS += -lpthread
-TEST_GEN_FILES := cs_prctl_test
-TEST_PROGS := cs_prctl_test
+TEST_GEN_FILES := cs_prctl_test deadloop
+TEST_PROGS := cs_prctl_test deadloop
include ../lib.mk
diff --git a/tools/testing/selftests/sched/deadloop.c b/tools/testing/selftests/sched/deadloop.c
new file mode 100644
index 000000000000..d850a3e2a0ab
--- /dev/null
+++ b/tools/testing/selftests/sched/deadloop.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <signal.h>
+
+/*
+ * Create multiple infinite loop threads based on the passed parameters
+ * Usage: deadloop num policy prio
+ * num: the number of child threads
+ * policy: the scheduling policy of the child threads, 0-fair, 1-fifo, 2-rr
+ * prio: the priority
+ * If this process is killed, it will print the loop count of all child threads
+ * to the OUTPUT_FILE
+ *
+ * Date: June 27, 2024
+ * Author: Xavier <xavier_qy@163.com>
+ */
+
+#define OUTPUT_FILE "rt_group_sched_test.log"
+
+#if __GLIBC_PREREQ(2, 30) == 0
+#include <sys/syscall.h>
+static pid_t gettid(void)
+{
+ return syscall(SYS_gettid);
+}
+#endif
+
+#define do_err(x) \
+do { \
+ if ((x) < 0) { \
+ printf("test BUG_ON func %s, line %d %ld\n", \
+ __func__, __LINE__, (long)(x) \
+ ); \
+ while (1) \
+ sleep(1); \
+ } \
+} while (0)
+
+#define do_false(x) \
+do { \
+ if ((x) == 1) { \
+ printf("test BUG_ON func %s, line %d %d\n", \
+ __func__, __LINE__, (x) \
+ ); \
+ while (1) \
+ sleep(1); \
+ } \
+} while (0)
+
+
+struct thread_data {
+ pthread_t thread;
+ int index;
+ int pid;
+ unsigned long cnt;
+};
+
+static struct thread_data *pdata;
+static int thread_num = 1;
+
+static void create_thread_posix(void *entry, pthread_t *thread, int *para,
+ int policy, int prio)
+{
+ int ret;
+ struct sched_param param;
+ pthread_attr_t attr;
+
+ memset(¶m, 0, sizeof(param));
+ ret = pthread_attr_init(&attr);
+ do_err(ret);
+
+ ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
+ do_err(ret);
+
+ param.sched_priority = prio;
+
+ ret = pthread_attr_setschedpolicy(&attr, policy);
+ do_err(ret);
+
+ ret = pthread_attr_setschedparam(&attr, ¶m);
+ do_err(ret);
+
+ ret = pthread_create(thread, &attr, entry, para);
+ do_err(ret);
+}
+
+static void *dead_loop_entry(void *arg)
+{
+ int index = *(int *)arg;
+ struct sched_param param;
+ int cur = gettid();
+
+ sched_getparam(cur, ¶m);
+ pdata[index].pid = cur;
+ printf("cur:%d prio:%d\n", cur, param.sched_priority);
+
+ while (1) {
+ asm volatile("" ::: "memory");
+ pdata[index].cnt++;
+ }
+ return NULL;
+}
+
+static void handle_signal(int signal)
+{
+ int cnt = 0;
+
+ if (signal == SIGTERM) {
+ FILE *file = freopen(OUTPUT_FILE, "a", stdout);
+
+ if (file == NULL) {
+ perror("freopen");
+ exit(0);
+ }
+
+ while (cnt < thread_num) {
+ printf("pid:%d cnt:%ld\n", pdata[cnt].pid, pdata[cnt].cnt);
+ cnt++;
+ }
+ fclose(file);
+ exit(0);
+ }
+}
+
+static int dead_loop_create(int policy, int prio)
+{
+ int cnt = 0;
+ int ret;
+ void *status;
+ struct sched_param param;
+
+ param.sched_priority = prio;
+ pdata = malloc(thread_num * sizeof(struct thread_data));
+ do_false(!pdata);
+
+ if (policy) {
+ ret = sched_setscheduler(0, policy, ¶m);
+ do_err(ret);
+ }
+
+ while (cnt < thread_num) {
+ pdata[cnt].index = cnt;
+ create_thread_posix(dead_loop_entry, &pdata[cnt].thread,
+ &pdata[cnt].index, policy, prio);
+ cnt++;
+ }
+
+ signal(SIGTERM, handle_signal);
+
+ cnt = 0;
+ while (cnt < thread_num) {
+ pthread_join(pdata[cnt].thread, &status);
+ cnt++;
+ }
+
+ free(pdata);
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int policy = 2;
+ int prio = 50;
+
+ if (argc == 2)
+ thread_num = atoi(argv[1]);
+
+ if (argc == 3) {
+ thread_num = atoi(argv[1]);
+ policy = atoi(argv[2]);
+ if (policy > 0)
+ prio = 50;
+ }
+
+ if (argc == 4) {
+ thread_num = atoi(argv[1]);
+ policy = atoi(argv[2]);
+ prio = atoi(argv[3]);
+ }
+
+ dead_loop_create(policy, prio);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/sched/rt_group_sched_test.sh b/tools/testing/selftests/sched/rt_group_sched_test.sh
new file mode 100755
index 000000000000..9031250a2684
--- /dev/null
+++ b/tools/testing/selftests/sched/rt_group_sched_test.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Test for rt group scheduling
+# Date: June 27, 2024
+# Author: Xavier <xavier_qy@163.com>
+
+# Record the list of child process PIDs
+PIDS=()
+
+# File for redirected output
+LOGFILE="rt_group_sched_test.log"
+
+# Cleanup function: kill all recorded child processes and unmount the cgroup
+function cleanup() {
+ echo "Cleaning up..."
+ for pid in "${PIDS[@]}"; do
+ if kill -0 $pid 2>/dev/null; then
+ kill -TERM $pid
+ fi
+ done
+
+ # Sleep for a while to ensure the processes are properly killed
+ sleep 2
+
+ # Unmount the cgroup filesystem
+ umount /sys/fs/cgroup/cpu 2>/dev/null
+ umount /sys/fs/cgroup 2>/dev/null
+ echo "Cleanup completed."
+
+ # Ensure the LOGFILE exists and is correct
+ if [ ! -f "$LOGFILE" ]; then
+ echo "$LOGFILE not found!"
+ exit 1
+ fi
+
+ # Initialize the total count variable
+ total=0
+
+ # Read matching lines and calculate the total sum
+ while IFS= read -r line
+ do
+ # Use grep to match lines containing 'pid:' and 'cnt:', and extract the value of cnt
+ if echo "$line" | grep -q '^pid:[[:digit:]]\+ cnt:[[:digit:]]\+'; then
+ cnt=$(echo "$line" | sed -n \
+ 's/^pid:[[:digit:]]\+ cnt:\([[:digit:]]\+\)/\1/p')
+ total=$((total + cnt))
+ fi
+ done < "$LOGFILE"
+
+ # Print the total sum
+ echo "Total cnt: $total"
+ echo "Finished processing."
+}
+
+# Capture actions when interrupted or terminated by a signal
+trap cleanup EXIT
+
+# Start the cgroup filesystem and create the necessary directories
+function setup_cgroups() {
+ mount -t tmpfs -o mode=755 cgroup_root /sys/fs/cgroup
+ mkdir -p /sys/fs/cgroup/cpu
+ mount -t cgroup -o cpu none /sys/fs/cgroup/cpu
+}
+
+# Create cgroup subdirectories and configure their settings
+function create_child_cgroup() {
+ local base_dir=$1
+ local name=$2
+ local rt_period=$3
+ local rt_runtime=$4
+ mkdir -p "$base_dir/$name"
+ echo $rt_period > "$base_dir/$name/cpu.rt_period_us"
+ echo $rt_runtime > "$base_dir/$name/cpu.rt_runtime_us"
+}
+# Launch a process and add it to the specified cgroup
+function launch_process() {
+ local process_name=$1
+
+ # Three parameters representing the number of child threads, scheduling policy, and priority
+ local args=$2
+ local cgroup_path=$3
+
+ # Launch the process
+ exec -a $process_name ./deadloop $args &
+ local pid=$!
+ PIDS+=($pid)
+
+ # Short sleep to ensure the process starts
+ sleep 1
+
+ # Check if the process started successfully
+ if ! pgrep -x $process_name > /dev/null; then
+ echo "Error: No process found with name $process_name."
+ exit 1
+ fi
+
+ echo $pid > "$cgroup_path/cgroup.procs"
+ echo "Process $process_name with PID $pid added to cgroup $cgroup_path"
+}
+
+# Main function running all tasks
+function main() {
+ echo "The test needs 30 seconds..."
+ rm -f "$LOGFILE"
+ setup_cgroups
+ create_child_cgroup "/sys/fs/cgroup/cpu" "child1" 1000000 800000
+ create_child_cgroup "/sys/fs/cgroup/cpu/child1" "child2" 1000000 700000
+ create_child_cgroup "/sys/fs/cgroup/cpu/child1/child2" "child3" 1000000 600000
+ launch_process "child1" "3 2 50" "/sys/fs/cgroup/cpu/child1"
+ launch_process "child2" "3 2 50" "/sys/fs/cgroup/cpu/child1/child2"
+ launch_process "child3" "1 2 50" "/sys/fs/cgroup/cpu/child1/child2/child3"
+ launch_process "tg_root" "1 2 50" "/sys/fs/cgroup/cpu"
+
+ # Run for 30 seconds
+ sleep 30
+}
+
+# Execute the main function
+main
--
2.45.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH-RT sched v1 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se
2024-06-27 17:21 ` [PATCH-RT sched v1 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se Xavier
@ 2024-06-28 23:29 ` kernel test robot
2024-06-29 0:20 ` kernel test robot
1 sibling, 0 replies; 20+ messages in thread
From: kernel test robot @ 2024-06-28 23:29 UTC (permalink / raw)
To: Xavier, mingo, peterz, juri.lelli, vincent.guittot
Cc: oe-kbuild-all, dietmar.eggemann, rostedt, bsegall, mgorman,
bristot, vschneid, linux-kernel, Xavier
[-- Attachment #1: Type: text/plain, Size: 7044 bytes --]
Hi Xavier,
kernel test robot noticed the following build errors:
[auto build test ERROR on tip/sched/core]
[also build test ERROR on shuah-kselftest/next shuah-kselftest/fixes peterz-queue/sched/core linus/master v6.10-rc5 next-20240628]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Xavier/RT-SCHED-Optimize-the-enqueue-and-dequeue-operations-for-rt_se/20240628-211332
base: tip/sched/core
patch link: https://lore.kernel.org/r/20240627172156.235421-2-xavier_qy%40163.com
patch subject: [PATCH-RT sched v1 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se
config: openrisc-allnoconfig
compiler: or1k-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build):
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202406290712.UllPC1Yc-lkp@intel.com/
All errors (new ones prefixed by >>):
In file included from kernel/sched/build_policy.c:45:
kernel/sched/rt.c: In function 'dequeue_rt_stack':
>> kernel/sched/rt.c:1560:35: error: 'struct sched_rt_entity' has no member named 'parent'
1560 | if (!rt_se->parent) {
| ^~
kernel/sched/rt.c:1575:35: error: 'struct sched_rt_entity' has no member named 'parent'
1575 | if (!rt_se->parent)
| ^~
kernel/sched/rt.c: In function 'enqueue_rt_entity':
kernel/sched/rt.c:1616:30: error: 'struct sched_rt_entity' has no member named 'parent'
1616 | rt_se = rt_se->parent;
| ^~
kernel/sched/rt.c: In function 'dequeue_rt_entity':
kernel/sched/rt.c:1662:30: error: 'struct sched_rt_entity' has no member named 'parent'
1662 | rt_se = rt_se->parent;
| ^~
vim +1560 kernel/sched/rt.c
1473
1474 /*
1475 * To optimize the enqueue and dequeue of rt_se, this strategy employs a
1476 * bottom-up removal approach. Specifically, when removing an rt_se at a
1477 * certain level, if it is determined that the highest priority of the rq
1478 * associated with that rt_se has not changed, there is no need to continue
1479 * removing rt_se at higher levels. At this point, only the total number
1480 * of removed rt_se needs to be recorded, and the rt_nr_running count of
1481 * higher-level rq should be removed accordingly.
1482 *
1483 * For enqueue operations, if an rt_se at a certain level is in the rq,
1484 * it is still necessary to check the priority of the higher-level rq.
1485 * If the priority of the higher-level rq is found to be lower than that
1486 * of the rt_se to be added, it should be removed, as updating the highest
1487 * priority of the rq during addition will cause the rq to be repositioned
1488 * in the parent rq.
1489 *
1490 * Conversely, for dequeue operations, if an rt_se at a certain level is
1491 * not in the rq, the operation can be exited immediately to reduce
1492 * unnecessary checks and handling.
1493 *
1494 * The return value refers to the last rt_se that was removed for enqueue
1495 * operations. And for dequeue operations, it refers to the last rt_se
1496 * that was either removed or had its rt_nr_running updated.
1497 */
1498 static struct sched_rt_entity *dequeue_rt_stack(struct sched_rt_entity *rt_se,
1499 unsigned int flags, int for_enqueue)
1500 {
1501 struct sched_rt_entity *last = rt_se;
1502 struct sched_rt_entity *origin = rt_se;
1503 unsigned int del_rt_nr = 0;
1504 unsigned int del_rr_nr = 0;
1505 int prio_changed = rt_se_prio(rt_se);
1506 int sub_on_rq = 1;
1507
1508 for_each_sched_rt_entity(rt_se) {
1509 if (on_rt_rq(rt_se)) {
1510 if (sub_on_rq) {
1511 /*
1512 * The number of tasks removed from the sub-level rt_se also needs
1513 * to be subtracted from the rq of the current rt_se, as the current
1514 * rt_se's rq no longer includes the number of removed tasks.
1515 */
1516 dec_rq_nr_running(rt_se, del_rt_nr, del_rr_nr);
1517
1518 if (prio_changed) {
1519 /*
1520 * If the removal of the lower-level rt_se causes the
1521 * highest priority of the current rq to change, then the
1522 * current rt_se also needs to be removed from its parent
1523 * rq, and the number of deleted tasks should be
1524 * accumulated.
1525 */
1526 del_rt_nr += rt_se_nr_running(rt_se);
1527 del_rr_nr += rt_se_rr_nr_running(rt_se);
1528 prio_changed = __dequeue_rt_entity(rt_se,
1529 prio_changed, flags);
1530 last = rt_se;
1531 } else if (!for_enqueue) {
1532 /* For dequeue, last may only rt_nr_running was modified.*/
1533 last = rt_se;
1534 }
1535 } else {
1536 /*
1537 * Entering this branch must be for enqueue, as dequeue would break
1538 * if an rt_se is not online.
1539 * If the sub-level node is not online, and the current rt_se's
1540 * priority is lower than the one being added, current rt_se need
1541 * to be removed.
1542 */
1543 prio_changed = rt_se_prio(rt_se);
1544 if (prio_changed > rt_se_prio(origin)) {
1545 del_rt_nr += rt_se_nr_running(rt_se);
1546 del_rr_nr += rt_se_rr_nr_running(rt_se);
1547 prio_changed = __dequeue_rt_entity(rt_se,
1548 prio_changed, flags);
1549 last = rt_se;
1550 } else {
1551 prio_changed = 0;
1552 }
1553 }
1554
1555 /*
1556 * If the current rt_se is on the top rt_rq, then the already deleted
1557 * nodes, plus the count of the rt_rq where current rt_se located,
1558 * need to be removed from the top_rt_rq.
1559 */
> 1560 if (!rt_se->parent) {
1561 dequeue_top_rt_rq(rt_rq_of_se(rt_se),
1562 del_rt_nr + rt_rq_of_se(rt_se)->rt_nr_running);
1563 }
1564 sub_on_rq = 1;
1565 } else if (for_enqueue) {
1566 /*
1567 * In the case of an enqueue operation, if a certain level is found to be
1568 * not online, then the previous counts need to be reset to zero.
1569 */
1570 prio_changed = 0;
1571 sub_on_rq = 0;
1572 del_rt_nr = 0;
1573 del_rr_nr = 0;
1574
1575 if (!rt_se->parent)
1576 dequeue_top_rt_rq(rt_rq_of_se(rt_se),
1577 rt_rq_of_se(rt_se)->rt_nr_running);
1578 } else {
1579 last = rt_se;
1580 break;
1581 }
1582 }
1583
1584 return last;
1585 }
1586
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
[-- Attachment #2: reproduce --]
[-- Type: text/plain, Size: 709 bytes --]
reproduce (this is a W=1 build):
git clone https://github.com/intel/lkp-tests.git ~/lkp-tests
git remote add tip https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git
git fetch tip sched/core
git checkout tip/sched/core
b4 shazam https://lore.kernel.org/r/20240627172156.235421-2-xavier_qy@163.com
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-13.2.0 ~/lkp-tests/kbuild/make.cross W=1 O=build_dir ARCH=openrisc olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-13.2.0 ~/lkp-tests/kbuild/make.cross W=1 O=build_dir ARCH=openrisc SHELL=/bin/bash kernel/
[-- Attachment #3: config --]
[-- Type: text/plain, Size: 24090 bytes --]
#
# Automatically generated file; DO NOT EDIT.
# Linux/openrisc 6.10.0-rc1 Kernel Configuration
#
CONFIG_CC_VERSION_TEXT="or1k-linux-gcc (GCC) 13.2.0"
CONFIG_CC_IS_GCC=y
CONFIG_GCC_VERSION=130200
CONFIG_CLANG_VERSION=0
CONFIG_AS_IS_GNU=y
CONFIG_AS_VERSION=24100
CONFIG_LD_IS_BFD=y
CONFIG_LD_VERSION=24100
CONFIG_LLD_VERSION=0
CONFIG_CC_HAS_ASM_GOTO_OUTPUT=y
CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT=y
CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND=y
CONFIG_CC_HAS_ASM_INLINE=y
CONFIG_CC_HAS_NO_PROFILE_FN_ATTR=y
CONFIG_PAHOLE_VERSION=127
CONFIG_IRQ_WORK=y
#
# General setup
#
CONFIG_BROKEN_ON_SMP=y
CONFIG_INIT_ENV_ARG_LIMIT=32
# CONFIG_COMPILE_TEST is not set
# CONFIG_WERROR is not set
CONFIG_LOCALVERSION=""
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_BUILD_SALT=""
CONFIG_DEFAULT_INIT=""
CONFIG_DEFAULT_HOSTNAME="(none)"
# CONFIG_SYSVIPC is not set
# CONFIG_WATCH_QUEUE is not set
# CONFIG_CROSS_MEMORY_ATTACH is not set
# CONFIG_USELIB is not set
#
# IRQ subsystem
#
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_GENERIC_IRQ_SHOW=y
CONFIG_GENERIC_IRQ_CHIP=y
CONFIG_IRQ_DOMAIN=y
CONFIG_SPARSE_IRQ=y
# end of IRQ subsystem
CONFIG_GENERIC_IRQ_MULTI_HANDLER=y
CONFIG_GENERIC_CLOCKEVENTS=y
CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
#
# Timers subsystem
#
CONFIG_HZ_PERIODIC=y
# CONFIG_NO_HZ_IDLE is not set
# CONFIG_NO_HZ is not set
# CONFIG_HIGH_RES_TIMERS is not set
# end of Timers subsystem
#
# BPF subsystem
#
# CONFIG_BPF_SYSCALL is not set
# end of BPF subsystem
CONFIG_PREEMPT_NONE_BUILD=y
CONFIG_PREEMPT_NONE=y
# CONFIG_PREEMPT_VOLUNTARY is not set
# CONFIG_PREEMPT is not set
#
# CPU/Task time and stats accounting
#
CONFIG_TICK_CPU_ACCOUNTING=y
# CONFIG_BSD_PROCESS_ACCT is not set
# CONFIG_PSI is not set
# end of CPU/Task time and stats accounting
#
# RCU Subsystem
#
CONFIG_TINY_RCU=y
# CONFIG_RCU_EXPERT is not set
CONFIG_TINY_SRCU=y
# end of RCU Subsystem
# CONFIG_IKCONFIG is not set
# CONFIG_IKHEADERS is not set
CONFIG_LOG_BUF_SHIFT=17
#
# Scheduler features
#
# end of Scheduler features
CONFIG_CC_IMPLICIT_FALLTHROUGH="-Wimplicit-fallthrough=5"
CONFIG_GCC10_NO_ARRAY_BOUNDS=y
CONFIG_CC_NO_ARRAY_BOUNDS=y
CONFIG_GCC_NO_STRINGOP_OVERFLOW=y
CONFIG_CC_NO_STRINGOP_OVERFLOW=y
# CONFIG_CGROUPS is not set
CONFIG_NAMESPACES=y
# CONFIG_UTS_NS is not set
# CONFIG_USER_NS is not set
# CONFIG_PID_NS is not set
# CONFIG_CHECKPOINT_RESTORE is not set
# CONFIG_SCHED_AUTOGROUP is not set
# CONFIG_RELAY is not set
# CONFIG_BLK_DEV_INITRD is not set
# CONFIG_BOOT_CONFIG is not set
# CONFIG_INITRAMFS_PRESERVE_MTIME is not set
CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_SYSCTL=y
CONFIG_HAVE_UID16=y
# CONFIG_EXPERT is not set
CONFIG_UID16=y
CONFIG_MULTIUSER=y
CONFIG_SYSFS_SYSCALL=y
CONFIG_FHANDLE=y
CONFIG_POSIX_TIMERS=y
CONFIG_PRINTK=y
CONFIG_BUG=y
CONFIG_ELF_CORE=y
CONFIG_FUTEX=y
CONFIG_FUTEX_PI=y
CONFIG_EPOLL=y
CONFIG_SIGNALFD=y
CONFIG_TIMERFD=y
CONFIG_EVENTFD=y
CONFIG_SHMEM=y
CONFIG_AIO=y
CONFIG_IO_URING=y
CONFIG_ADVISE_SYSCALLS=y
CONFIG_MEMBARRIER=y
CONFIG_CACHESTAT_SYSCALL=y
CONFIG_KALLSYMS=y
# CONFIG_KALLSYMS_SELFTEST is not set
CONFIG_KALLSYMS_BASE_RELATIVE=y
#
# Kernel Performance Events And Counters
#
# end of Kernel Performance Events And Counters
# CONFIG_PROFILING is not set
#
# Kexec and crash features
#
# end of Kexec and crash features
# end of General setup
CONFIG_OPENRISC=y
CONFIG_CPU_BIG_ENDIAN=y
CONFIG_MMU=y
CONFIG_GENERIC_HWEIGHT=y
CONFIG_NO_IOPORT_MAP=y
CONFIG_GENERIC_CSUM=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_LOCKDEP_SUPPORT=y
#
# Processor type and features
#
CONFIG_OR1K_1200=y
# CONFIG_DCACHE_WRITETHROUGH is not set
CONFIG_OPENRISC_BUILTIN_DTB=""
#
# Class II Instructions
#
# CONFIG_OPENRISC_HAVE_INST_FF1 is not set
# CONFIG_OPENRISC_HAVE_INST_FL1 is not set
# CONFIG_OPENRISC_HAVE_INST_MUL is not set
# CONFIG_OPENRISC_HAVE_INST_DIV is not set
# CONFIG_OPENRISC_HAVE_INST_CMOV is not set
# CONFIG_OPENRISC_HAVE_INST_ROR is not set
# CONFIG_OPENRISC_HAVE_INST_RORI is not set
# CONFIG_OPENRISC_HAVE_INST_SEXT is not set
# end of Class II Instructions
# CONFIG_SMP is not set
# CONFIG_FPU is not set
# CONFIG_HZ_100 is not set
CONFIG_HZ_250=y
# CONFIG_HZ_300 is not set
# CONFIG_HZ_1000 is not set
CONFIG_HZ=250
# CONFIG_OPENRISC_NO_SPR_SR_DSX is not set
# CONFIG_OPENRISC_HAVE_SHADOW_GPRS is not set
CONFIG_CMDLINE=""
#
# Debugging options
#
# CONFIG_JUMP_UPON_UNHANDLED_EXCEPTION is not set
# CONFIG_OPENRISC_ESR_EXCEPTION_BUG_CHECK is not set
# end of Debugging options
# end of Processor type and features
CONFIG_CPU_MITIGATIONS=y
#
# General architecture-dependent options
#
CONFIG_TRACE_IRQFLAGS_SUPPORT=y
CONFIG_HAVE_ARCH_TRACEHOOK=y
CONFIG_GENERIC_SMP_IDLE_THREAD=y
CONFIG_ARCH_HAS_DMA_SET_UNCACHED=y
CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED=y
CONFIG_ARCH_32BIT_OFF_T=y
CONFIG_MMU_GATHER_NO_RANGE=y
CONFIG_MMU_GATHER_MERGE_VMAS=y
CONFIG_MMU_LAZY_TLB_REFCOUNT=y
CONFIG_LTO_NONE=y
CONFIG_MODULES_USE_ELF_RELA=y
CONFIG_PGTABLE_LEVELS=2
CONFIG_HAVE_PAGE_SIZE_8KB=y
CONFIG_PAGE_SIZE_8KB=y
CONFIG_PAGE_SIZE_LESS_THAN_64KB=y
CONFIG_PAGE_SIZE_LESS_THAN_256KB=y
CONFIG_PAGE_SHIFT=13
# CONFIG_COMPAT_32BIT_TIME is not set
CONFIG_CPU_NO_EFFICIENT_FFS=y
#
# GCOV-based kernel profiling
#
# end of GCOV-based kernel profiling
CONFIG_FUNCTION_ALIGNMENT=0
# end of General architecture-dependent options
CONFIG_RT_MUTEXES=y
# CONFIG_MODULES is not set
CONFIG_BLOCK=y
# CONFIG_BLOCK_LEGACY_AUTOLOAD is not set
# CONFIG_BLK_DEV_BSGLIB is not set
# CONFIG_BLK_DEV_INTEGRITY is not set
# CONFIG_BLK_DEV_WRITE_MOUNTED is not set
# CONFIG_BLK_DEV_ZONED is not set
# CONFIG_BLK_WBT is not set
# CONFIG_BLK_INLINE_ENCRYPTION is not set
#
# Partition Types
#
# CONFIG_PARTITION_ADVANCED is not set
CONFIG_MSDOS_PARTITION=y
CONFIG_EFI_PARTITION=y
# end of Partition Types
#
# IO Schedulers
#
# CONFIG_MQ_IOSCHED_DEADLINE is not set
# CONFIG_MQ_IOSCHED_KYBER is not set
# CONFIG_IOSCHED_BFQ is not set
# end of IO Schedulers
CONFIG_INLINE_SPIN_UNLOCK_IRQ=y
CONFIG_INLINE_READ_UNLOCK=y
CONFIG_INLINE_READ_UNLOCK_IRQ=y
CONFIG_INLINE_WRITE_UNLOCK=y
CONFIG_INLINE_WRITE_UNLOCK_IRQ=y
CONFIG_ARCH_USE_QUEUED_RWLOCKS=y
#
# Executable file formats
#
# CONFIG_BINFMT_ELF is not set
# CONFIG_BINFMT_SCRIPT is not set
# CONFIG_BINFMT_MISC is not set
CONFIG_COREDUMP=y
# end of Executable file formats
#
# Memory Management options
#
# CONFIG_SWAP is not set
#
# Slab allocator options
#
CONFIG_SLUB=y
# CONFIG_SLAB_MERGE_DEFAULT is not set
# CONFIG_SLAB_FREELIST_RANDOM is not set
# CONFIG_SLAB_FREELIST_HARDENED is not set
# CONFIG_SLUB_STATS is not set
# CONFIG_RANDOM_KMALLOC_CACHES is not set
# end of Slab allocator options
# CONFIG_SHUFFLE_PAGE_ALLOCATOR is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_FLATMEM=y
CONFIG_EXCLUSIVE_SYSTEM_RAM=y
CONFIG_SPLIT_PTLOCK_CPUS=4
# CONFIG_COMPACTION is not set
# CONFIG_PAGE_REPORTING is not set
CONFIG_PCP_BATCH_SCALE_MAX=5
# CONFIG_KSM is not set
CONFIG_DEFAULT_MMAP_MIN_ADDR=4096
CONFIG_NEED_PER_CPU_KM=y
# CONFIG_CMA is not set
# CONFIG_IDLE_PAGE_TRACKING is not set
CONFIG_VM_EVENT_COUNTERS=y
# CONFIG_PERCPU_STATS is not set
#
# GUP_TEST needs to have DEBUG_FS enabled
#
# CONFIG_DMAPOOL_TEST is not set
# CONFIG_ANON_VMA_NAME is not set
# CONFIG_USERFAULTFD is not set
# CONFIG_LRU_GEN is not set
#
# Data Access Monitoring
#
# CONFIG_DAMON is not set
# end of Data Access Monitoring
# end of Memory Management options
# CONFIG_NET is not set
#
# Device Drivers
#
CONFIG_HAVE_PCI=y
CONFIG_GENERIC_PCI_IOMAP=y
# CONFIG_PCI is not set
# CONFIG_PCCARD is not set
#
# Generic Driver Options
#
# CONFIG_UEVENT_HELPER is not set
# CONFIG_DEVTMPFS is not set
# CONFIG_STANDALONE is not set
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
#
# Firmware loader
#
CONFIG_FW_LOADER=y
CONFIG_EXTRA_FIRMWARE=""
# CONFIG_FW_LOADER_USER_HELPER is not set
# CONFIG_FW_LOADER_COMPRESS is not set
# CONFIG_FW_UPLOAD is not set
# end of Firmware loader
CONFIG_ALLOW_DEV_COREDUMP=y
CONFIG_GENERIC_CPU_DEVICES=y
# CONFIG_FW_DEVLINK_SYNC_STATE_TIMEOUT is not set
# end of Generic Driver Options
#
# Bus devices
#
# CONFIG_MHI_BUS is not set
# CONFIG_MHI_BUS_EP is not set
# end of Bus devices
#
# Cache Drivers
#
# end of Cache Drivers
#
# Firmware Drivers
#
#
# ARM System Control and Management Interface Protocol
#
# end of ARM System Control and Management Interface Protocol
# CONFIG_GOOGLE_FIRMWARE is not set
#
# Qualcomm firmware drivers
#
# end of Qualcomm firmware drivers
#
# Tegra firmware driver
#
# end of Tegra firmware driver
# end of Firmware Drivers
# CONFIG_GNSS is not set
# CONFIG_MTD is not set
CONFIG_DTC=y
CONFIG_OF=y
# CONFIG_OF_UNITTEST is not set
CONFIG_OF_FLATTREE=y
CONFIG_OF_EARLY_FLATTREE=y
CONFIG_OF_KOBJ=y
CONFIG_OF_ADDRESS=y
CONFIG_OF_IRQ=y
CONFIG_OF_RESERVED_MEM=y
# CONFIG_OF_OVERLAY is not set
# CONFIG_PARPORT is not set
# CONFIG_BLK_DEV is not set
#
# NVME Support
#
# CONFIG_NVME_FC is not set
# end of NVME Support
#
# Misc devices
#
# CONFIG_DUMMY_IRQ is not set
# CONFIG_ENCLOSURE_SERVICES is not set
# CONFIG_SRAM is not set
# CONFIG_XILINX_SDFEC is not set
# CONFIG_OPEN_DICE is not set
# CONFIG_VCPU_STALL_DETECTOR is not set
# CONFIG_C2PORT is not set
#
# EEPROM support
#
# CONFIG_EEPROM_93CX6 is not set
# end of EEPROM support
#
# Texas Instruments shared transport line discipline
#
# end of Texas Instruments shared transport line discipline
#
# Altera FPGA firmware download module (requires I2C)
#
# CONFIG_ECHO is not set
# CONFIG_PVPANIC is not set
# end of Misc devices
#
# SCSI device support
#
CONFIG_SCSI_MOD=y
# CONFIG_RAID_ATTRS is not set
# CONFIG_SCSI is not set
# end of SCSI device support
# CONFIG_ATA is not set
# CONFIG_MD is not set
# CONFIG_TARGET_CORE is not set
#
# Input device support
#
CONFIG_INPUT=y
# CONFIG_INPUT_FF_MEMLESS is not set
# CONFIG_INPUT_SPARSEKMAP is not set
# CONFIG_INPUT_MATRIXKMAP is not set
#
# Userland interfaces
#
# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_JOYDEV is not set
# CONFIG_INPUT_EVDEV is not set
# CONFIG_INPUT_EVBUG is not set
#
# Input Device Drivers
#
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_INPUT_JOYSTICK is not set
# CONFIG_INPUT_TABLET is not set
# CONFIG_INPUT_TOUCHSCREEN is not set
# CONFIG_INPUT_MISC is not set
# CONFIG_RMI4_CORE is not set
#
# Hardware I/O ports
#
# CONFIG_SERIO is not set
# CONFIG_GAMEPORT is not set
# end of Hardware I/O ports
# end of Input device support
#
# Character devices
#
CONFIG_TTY=y
CONFIG_VT=y
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_VT_CONSOLE=y
# CONFIG_VT_HW_CONSOLE_BINDING is not set
CONFIG_UNIX98_PTYS=y
# CONFIG_LEGACY_PTYS is not set
# CONFIG_LEGACY_TIOCSTI is not set
# CONFIG_LDISC_AUTOLOAD is not set
#
# Serial drivers
#
# CONFIG_SERIAL_8250 is not set
#
# Non-8250 serial port support
#
# CONFIG_SERIAL_UARTLITE is not set
# CONFIG_SERIAL_SIFIVE is not set
# CONFIG_SERIAL_SCCNXP is not set
# CONFIG_SERIAL_SC16IS7XX_CORE is not set
# CONFIG_SERIAL_ALTERA_JTAGUART is not set
# CONFIG_SERIAL_ALTERA_UART is not set
# CONFIG_SERIAL_XILINX_PS_UART is not set
# CONFIG_SERIAL_ARC is not set
# CONFIG_SERIAL_FSL_LPUART is not set
# CONFIG_SERIAL_FSL_LINFLEXUART is not set
# CONFIG_SERIAL_CONEXANT_DIGICOLOR is not set
# CONFIG_SERIAL_SPRD is not set
# end of Serial drivers
# CONFIG_SERIAL_NONSTANDARD is not set
# CONFIG_NULL_TTY is not set
# CONFIG_SERIAL_DEV_BUS is not set
# CONFIG_VIRTIO_CONSOLE is not set
# CONFIG_IPMI_HANDLER is not set
# CONFIG_HW_RANDOM is not set
# CONFIG_DEVMEM is not set
# CONFIG_TCG_TPM is not set
# CONFIG_XILLYBUS is not set
# end of Character devices
#
# I2C support
#
# CONFIG_I2C is not set
# end of I2C support
# CONFIG_I3C is not set
# CONFIG_SPI is not set
# CONFIG_SPMI is not set
# CONFIG_HSI is not set
# CONFIG_PPS is not set
#
# PTP clock support
#
CONFIG_PTP_1588_CLOCK_OPTIONAL=y
#
# Enable PHYLIB and NETWORK_PHY_TIMESTAMPING to see the additional clocks.
#
# end of PTP clock support
# CONFIG_PINCTRL is not set
CONFIG_GPIOLIB=y
CONFIG_GPIOLIB_FASTPATH_LIMIT=512
CONFIG_OF_GPIO=y
CONFIG_GPIO_CDEV=y
# CONFIG_GPIO_CDEV_V1 is not set
#
# Memory mapped GPIO drivers
#
# CONFIG_GPIO_74XX_MMIO is not set
# CONFIG_GPIO_ALTERA is not set
# CONFIG_GPIO_CADENCE is not set
# CONFIG_GPIO_DWAPB is not set
# CONFIG_GPIO_FTGPIO010 is not set
# CONFIG_GPIO_GENERIC_PLATFORM is not set
# CONFIG_GPIO_GRGPIO is not set
# CONFIG_GPIO_HLWD is not set
# CONFIG_GPIO_MB86S7X is not set
# CONFIG_GPIO_SIFIVE is not set
# CONFIG_GPIO_XILINX is not set
# CONFIG_GPIO_AMD_FCH is not set
# end of Memory mapped GPIO drivers
#
# MFD GPIO expanders
#
# end of MFD GPIO expanders
#
# Virtual GPIO drivers
#
# CONFIG_GPIO_AGGREGATOR is not set
# CONFIG_GPIO_LATCH is not set
# CONFIG_GPIO_MOCKUP is not set
# CONFIG_GPIO_SIM is not set
# end of Virtual GPIO drivers
# CONFIG_W1 is not set
# CONFIG_POWER_RESET is not set
# CONFIG_POWER_SUPPLY is not set
# CONFIG_HWMON is not set
# CONFIG_THERMAL is not set
# CONFIG_WATCHDOG is not set
CONFIG_SSB_POSSIBLE=y
# CONFIG_SSB is not set
CONFIG_BCMA_POSSIBLE=y
# CONFIG_BCMA is not set
#
# Multifunction device drivers
#
# CONFIG_MFD_ATMEL_FLEXCOM is not set
# CONFIG_MFD_ATMEL_HLCDC is not set
# CONFIG_MFD_MADERA is not set
# CONFIG_MFD_HI6421_PMIC is not set
# CONFIG_MFD_KEMPLD is not set
# CONFIG_MFD_MT6397 is not set
# CONFIG_MFD_SM501 is not set
# CONFIG_MFD_SYSCON is not set
# CONFIG_MFD_TQMX86 is not set
# end of Multifunction device drivers
# CONFIG_REGULATOR is not set
# CONFIG_RC_CORE is not set
#
# CEC support
#
# CONFIG_MEDIA_CEC_SUPPORT is not set
# end of CEC support
# CONFIG_MEDIA_SUPPORT is not set
#
# Graphics support
#
# CONFIG_AUXDISPLAY is not set
# CONFIG_DRM is not set
#
# Frame buffer Devices
#
# CONFIG_FB is not set
# end of Frame buffer Devices
#
# Backlight & LCD device support
#
# CONFIG_LCD_CLASS_DEVICE is not set
# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
# end of Backlight & LCD device support
#
# Console display driver support
#
CONFIG_DUMMY_CONSOLE=y
CONFIG_DUMMY_CONSOLE_COLUMNS=80
CONFIG_DUMMY_CONSOLE_ROWS=25
# end of Console display driver support
# end of Graphics support
# CONFIG_SOUND is not set
# CONFIG_HID_SUPPORT is not set
CONFIG_USB_OHCI_LITTLE_ENDIAN=y
# CONFIG_USB_SUPPORT is not set
# CONFIG_MMC is not set
# CONFIG_MEMSTICK is not set
# CONFIG_NEW_LEDS is not set
# CONFIG_ACCESSIBILITY is not set
# CONFIG_RTC_CLASS is not set
# CONFIG_DMADEVICES is not set
#
# DMABUF options
#
# CONFIG_SYNC_FILE is not set
# CONFIG_DMABUF_HEAPS is not set
# end of DMABUF options
# CONFIG_UIO is not set
# CONFIG_VFIO is not set
# CONFIG_VIRT_DRIVERS is not set
# CONFIG_VIRTIO_MENU is not set
# CONFIG_VHOST_MENU is not set
#
# Microsoft Hyper-V guest support
#
# end of Microsoft Hyper-V guest support
# CONFIG_GREYBUS is not set
# CONFIG_COMEDI is not set
# CONFIG_STAGING is not set
# CONFIG_GOLDFISH is not set
CONFIG_HAVE_CLK=y
CONFIG_HAVE_CLK_PREPARE=y
CONFIG_COMMON_CLK=y
# CONFIG_COMMON_CLK_AXI_CLKGEN is not set
# CONFIG_COMMON_CLK_FIXED_MMIO is not set
# CONFIG_XILINX_VCU is not set
# CONFIG_COMMON_CLK_XLNX_CLKWZRD is not set
# CONFIG_HWSPINLOCK is not set
#
# Clock Source drivers
#
# end of Clock Source drivers
# CONFIG_MAILBOX is not set
# CONFIG_IOMMU_SUPPORT is not set
#
# Remoteproc drivers
#
# CONFIG_REMOTEPROC is not set
# end of Remoteproc drivers
#
# Rpmsg drivers
#
# CONFIG_RPMSG_VIRTIO is not set
# end of Rpmsg drivers
# CONFIG_SOUNDWIRE is not set
#
# SOC (System On Chip) specific Drivers
#
#
# Amlogic SoC drivers
#
# end of Amlogic SoC drivers
#
# Broadcom SoC drivers
#
# end of Broadcom SoC drivers
#
# NXP/Freescale QorIQ SoC drivers
#
# end of NXP/Freescale QorIQ SoC drivers
#
# fujitsu SoC drivers
#
# end of fujitsu SoC drivers
#
# i.MX SoC drivers
#
# end of i.MX SoC drivers
#
# Enable LiteX SoC Builder specific drivers
#
# CONFIG_LITEX_SOC_CONTROLLER is not set
# end of Enable LiteX SoC Builder specific drivers
# CONFIG_WPCM450_SOC is not set
#
# Qualcomm SoC drivers
#
# end of Qualcomm SoC drivers
# CONFIG_SOC_TI is not set
#
# Xilinx SoC drivers
#
# end of Xilinx SoC drivers
# end of SOC (System On Chip) specific Drivers
#
# PM Domains
#
#
# Amlogic PM Domains
#
# end of Amlogic PM Domains
#
# Broadcom PM Domains
#
# end of Broadcom PM Domains
#
# i.MX PM Domains
#
# end of i.MX PM Domains
#
# Qualcomm PM Domains
#
# end of Qualcomm PM Domains
# end of PM Domains
# CONFIG_PM_DEVFREQ is not set
# CONFIG_EXTCON is not set
# CONFIG_MEMORY is not set
# CONFIG_IIO is not set
# CONFIG_PWM is not set
#
# IRQ chip support
#
CONFIG_IRQCHIP=y
# CONFIG_AL_FIC is not set
CONFIG_OR1K_PIC=y
# CONFIG_XILINX_INTC is not set
# end of IRQ chip support
# CONFIG_IPACK_BUS is not set
# CONFIG_RESET_CONTROLLER is not set
#
# PHY Subsystem
#
# CONFIG_GENERIC_PHY is not set
# CONFIG_PHY_CAN_TRANSCEIVER is not set
#
# PHY drivers for Broadcom platforms
#
# CONFIG_BCM_KONA_USB2_PHY is not set
# end of PHY drivers for Broadcom platforms
# CONFIG_PHY_CADENCE_TORRENT is not set
# CONFIG_PHY_CADENCE_DPHY is not set
# CONFIG_PHY_CADENCE_DPHY_RX is not set
# CONFIG_PHY_CADENCE_SALVO is not set
# CONFIG_PHY_PXA_28NM_HSIC is not set
# CONFIG_PHY_PXA_28NM_USB2 is not set
# end of PHY Subsystem
# CONFIG_POWERCAP is not set
# CONFIG_MCB is not set
# CONFIG_RAS is not set
#
# Android
#
# CONFIG_ANDROID_BINDER_IPC is not set
# end of Android
# CONFIG_DAX is not set
# CONFIG_NVMEM is not set
#
# HW tracing support
#
# CONFIG_STM is not set
# CONFIG_INTEL_TH is not set
# end of HW tracing support
# CONFIG_FPGA is not set
# CONFIG_FSI is not set
# CONFIG_SIOX is not set
# CONFIG_SLIMBUS is not set
# CONFIG_INTERCONNECT is not set
# CONFIG_COUNTER is not set
# CONFIG_PECI is not set
# CONFIG_HTE is not set
# end of Device Drivers
#
# File systems
#
# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_FS_IOMAP=y
# CONFIG_EXT2_FS is not set
# CONFIG_EXT3_FS is not set
# CONFIG_EXT4_FS is not set
# CONFIG_REISERFS_FS is not set
# CONFIG_JFS_FS is not set
# CONFIG_XFS_FS is not set
# CONFIG_GFS2_FS is not set
# CONFIG_BTRFS_FS is not set
# CONFIG_NILFS2_FS is not set
# CONFIG_F2FS_FS is not set
# CONFIG_BCACHEFS_FS is not set
CONFIG_EXPORTFS=y
# CONFIG_EXPORTFS_BLOCK_OPS is not set
CONFIG_FILE_LOCKING=y
# CONFIG_FS_ENCRYPTION is not set
# CONFIG_FS_VERITY is not set
# CONFIG_DNOTIFY is not set
# CONFIG_INOTIFY_USER is not set
# CONFIG_FANOTIFY is not set
# CONFIG_QUOTA is not set
# CONFIG_AUTOFS_FS is not set
# CONFIG_FUSE_FS is not set
# CONFIG_OVERLAY_FS is not set
#
# Caches
#
# end of Caches
#
# CD-ROM/DVD Filesystems
#
# CONFIG_ISO9660_FS is not set
# CONFIG_UDF_FS is not set
# end of CD-ROM/DVD Filesystems
#
# DOS/FAT/EXFAT/NT Filesystems
#
# CONFIG_MSDOS_FS is not set
# CONFIG_VFAT_FS is not set
# CONFIG_EXFAT_FS is not set
# CONFIG_NTFS3_FS is not set
# CONFIG_NTFS_FS is not set
# end of DOS/FAT/EXFAT/NT Filesystems
#
# Pseudo filesystems
#
CONFIG_PROC_FS=y
# CONFIG_PROC_KCORE is not set
CONFIG_PROC_SYSCTL=y
CONFIG_PROC_PAGE_MONITOR=y
# CONFIG_PROC_CHILDREN is not set
CONFIG_KERNFS=y
CONFIG_SYSFS=y
# CONFIG_TMPFS is not set
# CONFIG_CONFIGFS_FS is not set
# end of Pseudo filesystems
# CONFIG_MISC_FILESYSTEMS is not set
# CONFIG_NLS is not set
# CONFIG_UNICODE is not set
CONFIG_IO_WQ=y
# end of File systems
#
# Security options
#
# CONFIG_KEYS is not set
# CONFIG_SECURITY_DMESG_RESTRICT is not set
# CONFIG_SECURITY is not set
# CONFIG_SECURITYFS is not set
# CONFIG_HARDENED_USERCOPY is not set
# CONFIG_STATIC_USERMODEHELPER is not set
CONFIG_DEFAULT_SECURITY_DAC=y
CONFIG_LSM="landlock,lockdown,yama,loadpin,safesetid,bpf"
#
# Kernel hardening options
#
#
# Memory initialization
#
CONFIG_CC_HAS_AUTO_VAR_INIT_PATTERN=y
CONFIG_CC_HAS_AUTO_VAR_INIT_ZERO_BARE=y
CONFIG_CC_HAS_AUTO_VAR_INIT_ZERO=y
# CONFIG_INIT_STACK_NONE is not set
# CONFIG_INIT_STACK_ALL_PATTERN is not set
CONFIG_INIT_STACK_ALL_ZERO=y
# CONFIG_INIT_ON_ALLOC_DEFAULT_ON is not set
# CONFIG_INIT_ON_FREE_DEFAULT_ON is not set
# CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON is not set
CONFIG_CC_HAS_ZERO_CALL_USED_REGS=y
# CONFIG_ZERO_CALL_USED_REGS is not set
# end of Memory initialization
#
# Hardening of kernel data structures
#
# CONFIG_LIST_HARDENED is not set
# CONFIG_BUG_ON_DATA_CORRUPTION is not set
# end of Hardening of kernel data structures
CONFIG_RANDSTRUCT_NONE=y
# end of Kernel hardening options
# end of Security options
# CONFIG_CRYPTO is not set
#
# Library routines
#
# CONFIG_PACKING is not set
CONFIG_BITREVERSE=y
CONFIG_GENERIC_STRNCPY_FROM_USER=y
CONFIG_GENERIC_STRNLEN_USER=y
# CONFIG_CORDIC is not set
# CONFIG_PRIME_NUMBERS is not set
CONFIG_RATIONAL=y
#
# Crypto library routines
#
CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=y
# CONFIG_CRYPTO_LIB_CHACHA is not set
# CONFIG_CRYPTO_LIB_CURVE25519 is not set
CONFIG_CRYPTO_LIB_POLY1305_RSIZE=1
# CONFIG_CRYPTO_LIB_POLY1305 is not set
# end of Crypto library routines
# CONFIG_CRC_CCITT is not set
# CONFIG_CRC16 is not set
# CONFIG_CRC_T10DIF is not set
# CONFIG_CRC64_ROCKSOFT is not set
# CONFIG_CRC_ITU_T is not set
CONFIG_CRC32=y
# CONFIG_CRC32_SELFTEST is not set
CONFIG_CRC32_SLICEBY8=y
# CONFIG_CRC32_SLICEBY4 is not set
# CONFIG_CRC32_SARWATE is not set
# CONFIG_CRC32_BIT is not set
# CONFIG_CRC64 is not set
# CONFIG_CRC4 is not set
# CONFIG_CRC7 is not set
# CONFIG_LIBCRC32C is not set
# CONFIG_CRC8 is not set
# CONFIG_RANDOM32_SELFTEST is not set
# CONFIG_XZ_DEC is not set
CONFIG_HAS_IOMEM=y
CONFIG_HAS_DMA=y
CONFIG_DMA_DECLARE_COHERENT=y
CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE=y
CONFIG_DMA_NEED_SYNC=y
# CONFIG_DMA_API_DEBUG is not set
CONFIG_GENERIC_ATOMIC64=y
# CONFIG_IRQ_POLL is not set
CONFIG_LIBFDT=y
CONFIG_STACKDEPOT=y
CONFIG_STACKDEPOT_MAX_FRAMES=64
CONFIG_SBITMAP=y
# CONFIG_LWQ_TEST is not set
# end of Library routines
CONFIG_GENERIC_IOREMAP=y
#
# Kernel hacking
#
#
# printk and dmesg options
#
# CONFIG_PRINTK_TIME is not set
# CONFIG_PRINTK_CALLER is not set
# CONFIG_STACKTRACE_BUILD_ID is not set
CONFIG_CONSOLE_LOGLEVEL_DEFAULT=7
CONFIG_CONSOLE_LOGLEVEL_QUIET=4
CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
# CONFIG_DYNAMIC_DEBUG is not set
# CONFIG_DYNAMIC_DEBUG_CORE is not set
# CONFIG_SYMBOLIC_ERRNAME is not set
# end of printk and dmesg options
# CONFIG_DEBUG_KERNEL is not set
#
# Compile-time checks and compiler options
#
CONFIG_AS_HAS_NON_CONST_ULEB128=y
CONFIG_FRAME_WARN=1024
# CONFIG_STRIP_ASM_SYMS is not set
# CONFIG_HEADERS_INSTALL is not set
CONFIG_DEBUG_SECTION_MISMATCH=y
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
CONFIG_ARCH_WANT_FRAME_POINTERS=y
# CONFIG_FRAME_POINTER is not set
# end of Compile-time checks and compiler options
#
# Generic Kernel Debugging Instruments
#
# CONFIG_MAGIC_SYSRQ is not set
# CONFIG_DEBUG_FS is not set
CONFIG_HAVE_KCSAN_COMPILER=y
# end of Generic Kernel Debugging Instruments
#
# Networking Debugging
#
# end of Networking Debugging
#
# Memory Debugging
#
# CONFIG_PAGE_EXTENSION is not set
CONFIG_SLUB_DEBUG=y
# CONFIG_SLUB_DEBUG_ON is not set
# CONFIG_PAGE_POISONING is not set
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_HAVE_DEBUG_STACKOVERFLOW=y
# CONFIG_MEM_ALLOC_PROFILING is not set
CONFIG_CC_HAS_WORKING_NOSANITIZE_ADDRESS=y
# end of Memory Debugging
#
# Debug Oops, Lockups and Hangs
#
# CONFIG_PANIC_ON_OOPS is not set
CONFIG_PANIC_ON_OOPS_VALUE=0
CONFIG_PANIC_TIMEOUT=0
# end of Debug Oops, Lockups and Hangs
#
# Scheduler Debugging
#
# CONFIG_SCHEDSTATS is not set
# end of Scheduler Debugging
# CONFIG_DEBUG_TIMEKEEPING is not set
#
# Lock Debugging (spinlocks, mutexes, etc...)
#
CONFIG_LOCK_DEBUGGING_SUPPORT=y
# CONFIG_WW_MUTEX_SELFTEST is not set
# end of Lock Debugging (spinlocks, mutexes, etc...)
# CONFIG_DEBUG_IRQFLAGS is not set
CONFIG_STACKTRACE=y
# CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set
#
# Debug kernel data structures
#
# end of Debug kernel data structures
#
# RCU Debugging
#
# end of RCU Debugging
CONFIG_TRACING_SUPPORT=y
# CONFIG_FTRACE is not set
# CONFIG_SAMPLES is not set
#
# openrisc Debugging
#
# end of openrisc Debugging
#
# Kernel Testing and Coverage
#
# CONFIG_KUNIT is not set
CONFIG_CC_HAS_SANCOV_TRACE_PC=y
# CONFIG_RUNTIME_TESTING_MENU is not set
# end of Kernel Testing and Coverage
#
# Rust hacking
#
# end of Rust hacking
# end of Kernel hacking
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH-RT sched v1 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se
2024-06-27 17:21 ` [PATCH-RT sched v1 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se Xavier
2024-06-28 23:29 ` kernel test robot
@ 2024-06-29 0:20 ` kernel test robot
1 sibling, 0 replies; 20+ messages in thread
From: kernel test robot @ 2024-06-29 0:20 UTC (permalink / raw)
To: Xavier, mingo, peterz, juri.lelli, vincent.guittot
Cc: llvm, oe-kbuild-all, dietmar.eggemann, rostedt, bsegall, mgorman,
bristot, vschneid, linux-kernel, Xavier
[-- Attachment #1: Type: text/plain, Size: 12815 bytes --]
Hi Xavier,
kernel test robot noticed the following build errors:
[auto build test ERROR on tip/sched/core]
[also build test ERROR on shuah-kselftest/next shuah-kselftest/fixes peterz-queue/sched/core linus/master v6.10-rc5 next-20240628]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Xavier/RT-SCHED-Optimize-the-enqueue-and-dequeue-operations-for-rt_se/20240628-211332
base: tip/sched/core
patch link: https://lore.kernel.org/r/20240627172156.235421-2-xavier_qy%40163.com
patch subject: [PATCH-RT sched v1 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se
config: s390-allnoconfig
compiler: clang version 19.0.0git (https://github.com/llvm/llvm-project 326ba38a991250a8587a399a260b0f7af2c9166a)
reproduce (this is a W=1 build):
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202406290757.qGal0V0S-lkp@intel.com/
All errors (new ones prefixed by >>):
In file included from kernel/sched/build_policy.c:24:
In file included from include/linux/livepatch.h:12:
In file included from include/linux/module.h:19:
In file included from include/linux/elf.h:6:
In file included from arch/s390/include/asm/elf.h:173:
In file included from arch/s390/include/asm/mmu_context.h:11:
In file included from arch/s390/include/asm/pgalloc.h:18:
In file included from include/linux/mm.h:2253:
include/linux/vmstat.h:514:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
514 | return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
| ~~~~~~~~~~~ ^ ~~~
In file included from kernel/sched/build_policy.c:34:
In file included from kernel/sched/sched.h:63:
In file included from include/linux/tick.h:8:
In file included from include/linux/clockchips.h:14:
In file included from include/linux/clocksource.h:22:
In file included from arch/s390/include/asm/io.h:93:
include/asm-generic/io.h:548:31: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
548 | val = __raw_readb(PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:561:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
561 | val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
| ~~~~~~~~~~ ^
include/uapi/linux/byteorder/big_endian.h:37:59: note: expanded from macro '__le16_to_cpu'
37 | #define __le16_to_cpu(x) __swab16((__force __u16)(__le16)(x))
| ^
include/uapi/linux/swab.h:102:54: note: expanded from macro '__swab16'
102 | #define __swab16(x) (__u16)__builtin_bswap16((__u16)(x))
| ^
In file included from kernel/sched/build_policy.c:34:
In file included from kernel/sched/sched.h:63:
In file included from include/linux/tick.h:8:
In file included from include/linux/clockchips.h:14:
In file included from include/linux/clocksource.h:22:
In file included from arch/s390/include/asm/io.h:93:
include/asm-generic/io.h:574:61: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
574 | val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
| ~~~~~~~~~~ ^
include/uapi/linux/byteorder/big_endian.h:35:59: note: expanded from macro '__le32_to_cpu'
35 | #define __le32_to_cpu(x) __swab32((__force __u32)(__le32)(x))
| ^
include/uapi/linux/swab.h:115:54: note: expanded from macro '__swab32'
115 | #define __swab32(x) (__u32)__builtin_bswap32((__u32)(x))
| ^
In file included from kernel/sched/build_policy.c:34:
In file included from kernel/sched/sched.h:63:
In file included from include/linux/tick.h:8:
In file included from include/linux/clockchips.h:14:
In file included from include/linux/clocksource.h:22:
In file included from arch/s390/include/asm/io.h:93:
include/asm-generic/io.h:585:33: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
585 | __raw_writeb(value, PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:595:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
595 | __raw_writew((u16 __force)cpu_to_le16(value), PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:605:59: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
605 | __raw_writel((u32 __force)cpu_to_le32(value), PCI_IOBASE + addr);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:693:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
693 | readsb(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:701:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
701 | readsw(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:709:20: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
709 | readsl(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:718:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
718 | writesb(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:727:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
727 | writesw(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
include/asm-generic/io.h:736:21: warning: performing pointer arithmetic on a null pointer has undefined behavior [-Wnull-pointer-arithmetic]
736 | writesl(PCI_IOBASE + addr, buffer, count);
| ~~~~~~~~~~ ^
In file included from kernel/sched/build_policy.c:45:
>> kernel/sched/rt.c:1560:16: error: no member named 'parent' in 'struct sched_rt_entity'
1560 | if (!rt_se->parent) {
| ~~~~~ ^
kernel/sched/rt.c:1575:16: error: no member named 'parent' in 'struct sched_rt_entity'
1575 | if (!rt_se->parent)
| ~~~~~ ^
kernel/sched/rt.c:1616:18: error: no member named 'parent' in 'struct sched_rt_entity'
1616 | rt_se = rt_se->parent;
| ~~~~~ ^
kernel/sched/rt.c:1662:18: error: no member named 'parent' in 'struct sched_rt_entity'
1662 | rt_se = rt_se->parent;
| ~~~~~ ^
13 warnings and 4 errors generated.
vim +1560 kernel/sched/rt.c
1473
1474 /*
1475 * To optimize the enqueue and dequeue of rt_se, this strategy employs a
1476 * bottom-up removal approach. Specifically, when removing an rt_se at a
1477 * certain level, if it is determined that the highest priority of the rq
1478 * associated with that rt_se has not changed, there is no need to continue
1479 * removing rt_se at higher levels. At this point, only the total number
1480 * of removed rt_se needs to be recorded, and the rt_nr_running count of
1481 * higher-level rq should be removed accordingly.
1482 *
1483 * For enqueue operations, if an rt_se at a certain level is in the rq,
1484 * it is still necessary to check the priority of the higher-level rq.
1485 * If the priority of the higher-level rq is found to be lower than that
1486 * of the rt_se to be added, it should be removed, as updating the highest
1487 * priority of the rq during addition will cause the rq to be repositioned
1488 * in the parent rq.
1489 *
1490 * Conversely, for dequeue operations, if an rt_se at a certain level is
1491 * not in the rq, the operation can be exited immediately to reduce
1492 * unnecessary checks and handling.
1493 *
1494 * The return value refers to the last rt_se that was removed for enqueue
1495 * operations. And for dequeue operations, it refers to the last rt_se
1496 * that was either removed or had its rt_nr_running updated.
1497 */
1498 static struct sched_rt_entity *dequeue_rt_stack(struct sched_rt_entity *rt_se,
1499 unsigned int flags, int for_enqueue)
1500 {
1501 struct sched_rt_entity *last = rt_se;
1502 struct sched_rt_entity *origin = rt_se;
1503 unsigned int del_rt_nr = 0;
1504 unsigned int del_rr_nr = 0;
1505 int prio_changed = rt_se_prio(rt_se);
1506 int sub_on_rq = 1;
1507
1508 for_each_sched_rt_entity(rt_se) {
1509 if (on_rt_rq(rt_se)) {
1510 if (sub_on_rq) {
1511 /*
1512 * The number of tasks removed from the sub-level rt_se also needs
1513 * to be subtracted from the rq of the current rt_se, as the current
1514 * rt_se's rq no longer includes the number of removed tasks.
1515 */
1516 dec_rq_nr_running(rt_se, del_rt_nr, del_rr_nr);
1517
1518 if (prio_changed) {
1519 /*
1520 * If the removal of the lower-level rt_se causes the
1521 * highest priority of the current rq to change, then the
1522 * current rt_se also needs to be removed from its parent
1523 * rq, and the number of deleted tasks should be
1524 * accumulated.
1525 */
1526 del_rt_nr += rt_se_nr_running(rt_se);
1527 del_rr_nr += rt_se_rr_nr_running(rt_se);
1528 prio_changed = __dequeue_rt_entity(rt_se,
1529 prio_changed, flags);
1530 last = rt_se;
1531 } else if (!for_enqueue) {
1532 /* For dequeue, last may only rt_nr_running was modified.*/
1533 last = rt_se;
1534 }
1535 } else {
1536 /*
1537 * Entering this branch must be for enqueue, as dequeue would break
1538 * if an rt_se is not online.
1539 * If the sub-level node is not online, and the current rt_se's
1540 * priority is lower than the one being added, current rt_se need
1541 * to be removed.
1542 */
1543 prio_changed = rt_se_prio(rt_se);
1544 if (prio_changed > rt_se_prio(origin)) {
1545 del_rt_nr += rt_se_nr_running(rt_se);
1546 del_rr_nr += rt_se_rr_nr_running(rt_se);
1547 prio_changed = __dequeue_rt_entity(rt_se,
1548 prio_changed, flags);
1549 last = rt_se;
1550 } else {
1551 prio_changed = 0;
1552 }
1553 }
1554
1555 /*
1556 * If the current rt_se is on the top rt_rq, then the already deleted
1557 * nodes, plus the count of the rt_rq where current rt_se located,
1558 * need to be removed from the top_rt_rq.
1559 */
> 1560 if (!rt_se->parent) {
1561 dequeue_top_rt_rq(rt_rq_of_se(rt_se),
1562 del_rt_nr + rt_rq_of_se(rt_se)->rt_nr_running);
1563 }
1564 sub_on_rq = 1;
1565 } else if (for_enqueue) {
1566 /*
1567 * In the case of an enqueue operation, if a certain level is found to be
1568 * not online, then the previous counts need to be reset to zero.
1569 */
1570 prio_changed = 0;
1571 sub_on_rq = 0;
1572 del_rt_nr = 0;
1573 del_rr_nr = 0;
1574
1575 if (!rt_se->parent)
1576 dequeue_top_rt_rq(rt_rq_of_se(rt_se),
1577 rt_rq_of_se(rt_se)->rt_nr_running);
1578 } else {
1579 last = rt_se;
1580 break;
1581 }
1582 }
1583
1584 return last;
1585 }
1586
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
[-- Attachment #2: reproduce --]
[-- Type: text/plain, Size: 808 bytes --]
reproduce (this is a W=1 build):
git clone https://github.com/intel/lkp-tests.git ~/lkp-tests
# install s390 cross compiling tool for clang build
# apt-get install binutils-s390x-linux-gnu
git remote add tip https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git
git fetch tip sched/core
git checkout tip/sched/core
b4 shazam https://lore.kernel.org/r/20240627172156.235421-2-xavier_qy@163.com
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang-19 ~/lkp-tests/kbuild/make.cross W=1 O=build_dir ARCH=s390 olddefconfig
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang-19 ~/lkp-tests/kbuild/make.cross W=1 O=build_dir ARCH=s390 SHELL=/bin/bash kernel/
[-- Attachment #3: config --]
[-- Type: text/plain, Size: 29164 bytes --]
#
# Automatically generated file; DO NOT EDIT.
# Linux/s390 6.10.0-rc1 Kernel Configuration
#
CONFIG_CC_VERSION_TEXT="clang version 19.0.0git (git://gitmirror/llvm_project 326ba38a991250a8587a399a260b0f7af2c9166a)"
CONFIG_GCC_VERSION=0
CONFIG_CC_IS_CLANG=y
CONFIG_CLANG_VERSION=190000
CONFIG_AS_IS_LLVM=y
CONFIG_AS_VERSION=190000
CONFIG_LD_IS_BFD=y
CONFIG_LD_VERSION=24100
CONFIG_LLD_VERSION=0
CONFIG_RUST_IS_AVAILABLE=y
CONFIG_CC_HAS_ASM_GOTO_OUTPUT=y
CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT=y
CONFIG_CC_HAS_ASM_INLINE=y
CONFIG_CC_HAS_NO_PROFILE_FN_ATTR=y
CONFIG_PAHOLE_VERSION=127
CONFIG_IRQ_WORK=y
CONFIG_BUILDTIME_TABLE_SORT=y
CONFIG_THREAD_INFO_IN_TASK=y
#
# General setup
#
CONFIG_INIT_ENV_ARG_LIMIT=32
# CONFIG_WERROR is not set
CONFIG_LOCALVERSION=""
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_BUILD_SALT=""
CONFIG_HAVE_KERNEL_GZIP=y
CONFIG_HAVE_KERNEL_BZIP2=y
CONFIG_HAVE_KERNEL_LZMA=y
CONFIG_HAVE_KERNEL_XZ=y
CONFIG_HAVE_KERNEL_LZO=y
CONFIG_HAVE_KERNEL_LZ4=y
CONFIG_HAVE_KERNEL_ZSTD=y
CONFIG_HAVE_KERNEL_UNCOMPRESSED=y
CONFIG_KERNEL_GZIP=y
# CONFIG_KERNEL_BZIP2 is not set
# CONFIG_KERNEL_LZMA is not set
# CONFIG_KERNEL_XZ is not set
# CONFIG_KERNEL_LZO is not set
# CONFIG_KERNEL_LZ4 is not set
# CONFIG_KERNEL_ZSTD is not set
# CONFIG_KERNEL_UNCOMPRESSED is not set
CONFIG_DEFAULT_INIT=""
CONFIG_DEFAULT_HOSTNAME="(none)"
# CONFIG_SYSVIPC is not set
# CONFIG_WATCH_QUEUE is not set
# CONFIG_CROSS_MEMORY_ATTACH is not set
# CONFIG_USELIB is not set
CONFIG_HAVE_ARCH_AUDITSYSCALL=y
#
# IRQ subsystem
#
CONFIG_SPARSE_IRQ=y
# end of IRQ subsystem
CONFIG_GENERIC_TIME_VSYSCALL=y
CONFIG_GENERIC_CLOCKEVENTS=y
CONFIG_CONTEXT_TRACKING=y
CONFIG_CONTEXT_TRACKING_IDLE=y
#
# Timers subsystem
#
CONFIG_HZ_PERIODIC=y
# CONFIG_NO_HZ_IDLE is not set
# CONFIG_NO_HZ is not set
# CONFIG_HIGH_RES_TIMERS is not set
# end of Timers subsystem
CONFIG_HAVE_EBPF_JIT=y
CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
#
# BPF subsystem
#
# CONFIG_BPF_SYSCALL is not set
# end of BPF subsystem
CONFIG_PREEMPT_NONE_BUILD=y
CONFIG_PREEMPT_NONE=y
# CONFIG_PREEMPT_VOLUNTARY is not set
# CONFIG_PREEMPT is not set
#
# CPU/Task time and stats accounting
#
CONFIG_VIRT_CPU_ACCOUNTING=y
CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y
# CONFIG_BSD_PROCESS_ACCT is not set
# CONFIG_PSI is not set
# end of CPU/Task time and stats accounting
# CONFIG_CPU_ISOLATION is not set
#
# RCU Subsystem
#
CONFIG_TREE_RCU=y
# CONFIG_RCU_EXPERT is not set
CONFIG_TREE_SRCU=y
CONFIG_RCU_STALL_COMMON=y
CONFIG_RCU_NEED_SEGCBLIST=y
# end of RCU Subsystem
# CONFIG_IKCONFIG is not set
# CONFIG_IKHEADERS is not set
CONFIG_LOG_BUF_SHIFT=17
CONFIG_LOG_CPU_MAX_BUF_SHIFT=12
#
# Scheduler features
#
# end of Scheduler features
CONFIG_ARCH_SUPPORTS_NUMA_BALANCING=y
CONFIG_CC_HAS_INT128=y
CONFIG_CC_IMPLICIT_FALLTHROUGH="-Wimplicit-fallthrough"
CONFIG_GCC10_NO_ARRAY_BOUNDS=y
CONFIG_GCC_NO_STRINGOP_OVERFLOW=y
CONFIG_ARCH_SUPPORTS_INT128=y
# CONFIG_CGROUPS is not set
CONFIG_NAMESPACES=y
# CONFIG_UTS_NS is not set
# CONFIG_TIME_NS is not set
# CONFIG_USER_NS is not set
# CONFIG_PID_NS is not set
# CONFIG_CHECKPOINT_RESTORE is not set
# CONFIG_SCHED_AUTOGROUP is not set
# CONFIG_RELAY is not set
# CONFIG_BLK_DEV_INITRD is not set
# CONFIG_BOOT_CONFIG is not set
# CONFIG_INITRAMFS_PRESERVE_MTIME is not set
CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y
# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_LD_ORPHAN_WARN=y
CONFIG_LD_ORPHAN_WARN_LEVEL="warn"
CONFIG_SYSCTL=y
CONFIG_SYSCTL_EXCEPTION_TRACE=y
# CONFIG_EXPERT is not set
CONFIG_MULTIUSER=y
CONFIG_SYSFS_SYSCALL=y
CONFIG_FHANDLE=y
CONFIG_POSIX_TIMERS=y
CONFIG_PRINTK=y
CONFIG_BUG=y
CONFIG_ELF_CORE=y
CONFIG_FUTEX=y
CONFIG_FUTEX_PI=y
CONFIG_EPOLL=y
CONFIG_SIGNALFD=y
CONFIG_TIMERFD=y
CONFIG_EVENTFD=y
CONFIG_SHMEM=y
CONFIG_AIO=y
CONFIG_IO_URING=y
CONFIG_ADVISE_SYSCALLS=y
CONFIG_MEMBARRIER=y
CONFIG_RSEQ=y
CONFIG_CACHESTAT_SYSCALL=y
CONFIG_KALLSYMS=y
# CONFIG_KALLSYMS_SELFTEST is not set
CONFIG_KALLSYMS_BASE_RELATIVE=y
CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE=y
CONFIG_HAVE_PERF_EVENTS=y
#
# Kernel Performance Events And Counters
#
# CONFIG_PERF_EVENTS is not set
# end of Kernel Performance Events And Counters
# CONFIG_PROFILING is not set
#
# Kexec and crash features
#
# CONFIG_KEXEC is not set
# CONFIG_KEXEC_FILE is not set
# end of Kexec and crash features
# end of General setup
CONFIG_MMU=y
CONFIG_CPU_BIG_ENDIAN=y
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_ARCH_PROC_KCORE_TEXT=y
CONFIG_GENERIC_HWEIGHT=y
CONFIG_GENERIC_BUG=y
CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y
CONFIG_AUDIT_ARCH=y
CONFIG_NO_IOPORT_MAP=y
CONFIG_ARCH_SUPPORTS_UPROBES=y
CONFIG_S390=y
CONFIG_SCHED_OMIT_FRAME_POINTER=y
CONFIG_PGTABLE_LEVELS=5
CONFIG_HAVE_LIVEPATCH=y
CONFIG_ARCH_SUPPORTS_KEXEC=y
CONFIG_ARCH_SUPPORTS_KEXEC_FILE=y
CONFIG_ARCH_SUPPORTS_KEXEC_PURGATORY=y
CONFIG_ARCH_SUPPORTS_CRASH_DUMP=y
#
# Processor type and features
#
CONFIG_HAVE_MARCH_Z10_FEATURES=y
CONFIG_HAVE_MARCH_Z196_FEATURES=y
# CONFIG_MARCH_Z10 is not set
CONFIG_MARCH_Z196=y
# CONFIG_MARCH_ZEC12 is not set
# CONFIG_MARCH_Z13 is not set
# CONFIG_MARCH_Z14 is not set
# CONFIG_MARCH_Z15 is not set
# CONFIG_MARCH_Z16 is not set
CONFIG_MARCH_Z196_TUNE=y
CONFIG_TUNE_DEFAULT=y
# CONFIG_TUNE_Z10 is not set
# CONFIG_TUNE_Z196 is not set
# CONFIG_TUNE_ZEC12 is not set
# CONFIG_TUNE_Z13 is not set
# CONFIG_TUNE_Z14 is not set
# CONFIG_TUNE_Z15 is not set
# CONFIG_TUNE_Z16 is not set
CONFIG_64BIT=y
CONFIG_COMMAND_LINE_SIZE=4096
CONFIG_SMP=y
CONFIG_NR_CPUS=64
CONFIG_HOTPLUG_CPU=y
# CONFIG_SCHED_TOPOLOGY is not set
# CONFIG_HZ_100 is not set
CONFIG_HZ_250=y
# CONFIG_HZ_300 is not set
# CONFIG_HZ_1000 is not set
CONFIG_HZ=250
# CONFIG_KERNEL_NOBP is not set
CONFIG_RELOCATABLE=y
# CONFIG_RANDOMIZE_BASE is not set
CONFIG_KERNEL_IMAGE_BASE=0x3FFE0000000
# end of Processor type and features
#
# Memory setup
#
CONFIG_ARCH_SPARSEMEM_ENABLE=y
CONFIG_ARCH_SPARSEMEM_DEFAULT=y
CONFIG_MAX_PHYSMEM_BITS=46
# CONFIG_CHECK_STACK is not set
# end of Memory setup
#
# I/O subsystem
#
# CONFIG_QDIO is not set
# CONFIG_CHSC_SCH is not set
# CONFIG_SCM_BUS is not set
# CONFIG_AP is not set
# end of I/O subsystem
CONFIG_CCW=y
#
# Virtualization
#
# CONFIG_PROTECTED_VIRTUALIZATION_GUEST is not set
# CONFIG_PFAULT is not set
# CONFIG_CMM is not set
# CONFIG_APPLDATA_BASE is not set
# CONFIG_S390_HYPFS is not set
# CONFIG_VIRTUALIZATION is not set
# CONFIG_S390_GUEST is not set
# end of Virtualization
#
# Selftests
#
# end of Selftests
CONFIG_CPU_MITIGATIONS=y
#
# General architecture-dependent options
#
CONFIG_GENERIC_ENTRY=y
# CONFIG_KPROBES is not set
# CONFIG_JUMP_LABEL is not set
CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y
CONFIG_ARCH_USE_BUILTIN_BSWAP=y
CONFIG_HAVE_KPROBES=y
CONFIG_HAVE_KRETPROBES=y
CONFIG_HAVE_KPROBES_ON_FTRACE=y
CONFIG_ARCH_CORRECT_STACKTRACE_ON_KRETPROBE=y
CONFIG_HAVE_FUNCTION_ERROR_INJECTION=y
CONFIG_HAVE_NMI=y
CONFIG_TRACE_IRQFLAGS_SUPPORT=y
CONFIG_HAVE_ARCH_TRACEHOOK=y
CONFIG_HAVE_DMA_CONTIGUOUS=y
CONFIG_GENERIC_SMP_IDLE_THREAD=y
CONFIG_ARCH_HAS_FORTIFY_SOURCE=y
CONFIG_ARCH_HAS_SET_MEMORY=y
CONFIG_ARCH_HAS_SET_DIRECT_MAP=y
CONFIG_ARCH_WANTS_NO_INSTR=y
CONFIG_ARCH_32BIT_USTAT_F_TINODE=y
CONFIG_HAVE_ASM_MODVERSIONS=y
CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y
CONFIG_HAVE_RSEQ=y
CONFIG_HAVE_FUNCTION_ARG_ACCESS_API=y
CONFIG_HAVE_PERF_REGS=y
CONFIG_HAVE_PERF_USER_STACK_DUMP=y
CONFIG_HAVE_ARCH_JUMP_LABEL=y
CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE=y
CONFIG_MMU_GATHER_TABLE_FREE=y
CONFIG_MMU_GATHER_RCU_TABLE_FREE=y
CONFIG_MMU_GATHER_MERGE_VMAS=y
CONFIG_MMU_GATHER_NO_GATHER=y
CONFIG_MMU_LAZY_TLB_REFCOUNT=y
CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG=y
CONFIG_ARCH_HAS_NMI_SAFE_THIS_CPU_OPS=y
CONFIG_HAVE_ALIGNED_STRUCT_PAGE=y
CONFIG_HAVE_CMPXCHG_LOCAL=y
CONFIG_HAVE_CMPXCHG_DOUBLE=y
CONFIG_ARCH_WANT_IPC_PARSE_VERSION=y
CONFIG_HAVE_ARCH_SECCOMP=y
CONFIG_HAVE_ARCH_SECCOMP_FILTER=y
# CONFIG_SECCOMP is not set
CONFIG_HAVE_ARCH_STACKLEAK=y
CONFIG_LTO_NONE=y
CONFIG_HAVE_VIRT_CPU_ACCOUNTING=y
CONFIG_HAVE_VIRT_CPU_ACCOUNTING_IDLE=y
CONFIG_ARCH_HAS_SCALED_CPUTIME=y
CONFIG_HAVE_VIRT_CPU_ACCOUNTING_GEN=y
CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE=y
CONFIG_ARCH_WANT_KERNEL_PMD_MKWRITE=y
CONFIG_ARCH_WANT_PMD_MKWRITE=y
CONFIG_HAVE_ARCH_SOFT_DIRTY=y
CONFIG_HAVE_MOD_ARCH_SPECIFIC=y
CONFIG_MODULES_USE_ELF_RELA=y
CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK=y
CONFIG_SOFTIRQ_ON_OWN_STACK=y
CONFIG_ALTERNATE_USER_ADDRESS_SPACE=y
CONFIG_ARCH_HAS_ELF_RANDOMIZE=y
CONFIG_HAVE_PAGE_SIZE_4KB=y
CONFIG_PAGE_SIZE_4KB=y
CONFIG_PAGE_SIZE_LESS_THAN_64KB=y
CONFIG_PAGE_SIZE_LESS_THAN_256KB=y
CONFIG_PAGE_SHIFT=12
CONFIG_HAVE_RELIABLE_STACKTRACE=y
CONFIG_CLONE_BACKWARDS2=y
CONFIG_OLD_SIGSUSPEND3=y
CONFIG_OLD_SIGACTION=y
# CONFIG_COMPAT_32BIT_TIME is not set
CONFIG_HAVE_ARCH_VMAP_STACK=y
# CONFIG_VMAP_STACK is not set
CONFIG_HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET=y
CONFIG_RANDOMIZE_KSTACK_OFFSET=y
# CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT is not set
CONFIG_ARCH_HAS_STRICT_KERNEL_RWX=y
CONFIG_STRICT_KERNEL_RWX=y
CONFIG_ARCH_HAS_STRICT_MODULE_RWX=y
CONFIG_ARCH_HAS_MEM_ENCRYPT=y
CONFIG_ARCH_HAS_VDSO_DATA=y
CONFIG_ARCH_WANT_LD_ORPHAN_WARN=y
CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
#
# GCOV-based kernel profiling
#
CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y
# end of GCOV-based kernel profiling
CONFIG_HAVE_GCC_PLUGINS=y
CONFIG_FUNCTION_ALIGNMENT_16B=y
CONFIG_FUNCTION_ALIGNMENT=16
CONFIG_CC_HAS_SANE_FUNCTION_ALIGNMENT=y
# end of General architecture-dependent options
CONFIG_RT_MUTEXES=y
# CONFIG_MODULES is not set
CONFIG_BLOCK=y
# CONFIG_BLOCK_LEGACY_AUTOLOAD is not set
# CONFIG_BLK_DEV_BSGLIB is not set
# CONFIG_BLK_DEV_INTEGRITY is not set
# CONFIG_BLK_DEV_WRITE_MOUNTED is not set
# CONFIG_BLK_DEV_ZONED is not set
# CONFIG_BLK_WBT is not set
# CONFIG_BLK_INLINE_ENCRYPTION is not set
#
# Partition Types
#
# CONFIG_PARTITION_ADVANCED is not set
CONFIG_MSDOS_PARTITION=y
CONFIG_EFI_PARTITION=y
# end of Partition Types
#
# IO Schedulers
#
# CONFIG_MQ_IOSCHED_DEADLINE is not set
# CONFIG_MQ_IOSCHED_KYBER is not set
# CONFIG_IOSCHED_BFQ is not set
# end of IO Schedulers
CONFIG_ARCH_INLINE_SPIN_TRYLOCK=y
CONFIG_ARCH_INLINE_SPIN_TRYLOCK_BH=y
CONFIG_ARCH_INLINE_SPIN_LOCK=y
CONFIG_ARCH_INLINE_SPIN_LOCK_BH=y
CONFIG_ARCH_INLINE_SPIN_LOCK_IRQ=y
CONFIG_ARCH_INLINE_SPIN_LOCK_IRQSAVE=y
CONFIG_ARCH_INLINE_SPIN_UNLOCK=y
CONFIG_ARCH_INLINE_SPIN_UNLOCK_BH=y
CONFIG_ARCH_INLINE_SPIN_UNLOCK_IRQ=y
CONFIG_ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE=y
CONFIG_ARCH_INLINE_READ_TRYLOCK=y
CONFIG_ARCH_INLINE_READ_LOCK=y
CONFIG_ARCH_INLINE_READ_LOCK_BH=y
CONFIG_ARCH_INLINE_READ_LOCK_IRQ=y
CONFIG_ARCH_INLINE_READ_LOCK_IRQSAVE=y
CONFIG_ARCH_INLINE_READ_UNLOCK=y
CONFIG_ARCH_INLINE_READ_UNLOCK_BH=y
CONFIG_ARCH_INLINE_READ_UNLOCK_IRQ=y
CONFIG_ARCH_INLINE_READ_UNLOCK_IRQRESTORE=y
CONFIG_ARCH_INLINE_WRITE_TRYLOCK=y
CONFIG_ARCH_INLINE_WRITE_LOCK=y
CONFIG_ARCH_INLINE_WRITE_LOCK_BH=y
CONFIG_ARCH_INLINE_WRITE_LOCK_IRQ=y
CONFIG_ARCH_INLINE_WRITE_LOCK_IRQSAVE=y
CONFIG_ARCH_INLINE_WRITE_UNLOCK=y
CONFIG_ARCH_INLINE_WRITE_UNLOCK_BH=y
CONFIG_ARCH_INLINE_WRITE_UNLOCK_IRQ=y
CONFIG_ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE=y
CONFIG_INLINE_SPIN_TRYLOCK=y
CONFIG_INLINE_SPIN_TRYLOCK_BH=y
CONFIG_INLINE_SPIN_LOCK=y
CONFIG_INLINE_SPIN_LOCK_BH=y
CONFIG_INLINE_SPIN_LOCK_IRQ=y
CONFIG_INLINE_SPIN_LOCK_IRQSAVE=y
CONFIG_INLINE_SPIN_UNLOCK_BH=y
CONFIG_INLINE_SPIN_UNLOCK_IRQ=y
CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE=y
CONFIG_INLINE_READ_TRYLOCK=y
CONFIG_INLINE_READ_LOCK=y
CONFIG_INLINE_READ_LOCK_BH=y
CONFIG_INLINE_READ_LOCK_IRQ=y
CONFIG_INLINE_READ_LOCK_IRQSAVE=y
CONFIG_INLINE_READ_UNLOCK=y
CONFIG_INLINE_READ_UNLOCK_BH=y
CONFIG_INLINE_READ_UNLOCK_IRQ=y
CONFIG_INLINE_READ_UNLOCK_IRQRESTORE=y
CONFIG_INLINE_WRITE_TRYLOCK=y
CONFIG_INLINE_WRITE_LOCK=y
CONFIG_INLINE_WRITE_LOCK_BH=y
CONFIG_INLINE_WRITE_LOCK_IRQ=y
CONFIG_INLINE_WRITE_LOCK_IRQSAVE=y
CONFIG_INLINE_WRITE_UNLOCK=y
CONFIG_INLINE_WRITE_UNLOCK_BH=y
CONFIG_INLINE_WRITE_UNLOCK_IRQ=y
CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE=y
CONFIG_ARCH_SUPPORTS_ATOMIC_RMW=y
CONFIG_MUTEX_SPIN_ON_OWNER=y
CONFIG_RWSEM_SPIN_ON_OWNER=y
CONFIG_LOCK_SPIN_ON_OWNER=y
CONFIG_ARCH_HAS_SYSCALL_WRAPPER=y
#
# Executable file formats
#
# CONFIG_BINFMT_ELF is not set
CONFIG_ARCH_BINFMT_ELF_STATE=y
# CONFIG_BINFMT_SCRIPT is not set
# CONFIG_BINFMT_MISC is not set
CONFIG_COREDUMP=y
# end of Executable file formats
#
# Memory Management options
#
# CONFIG_SWAP is not set
#
# Slab allocator options
#
CONFIG_SLUB=y
# CONFIG_SLAB_MERGE_DEFAULT is not set
# CONFIG_SLAB_FREELIST_RANDOM is not set
# CONFIG_SLAB_FREELIST_HARDENED is not set
# CONFIG_SLUB_STATS is not set
# CONFIG_SLUB_CPU_PARTIAL is not set
# CONFIG_RANDOM_KMALLOC_CACHES is not set
# end of Slab allocator options
# CONFIG_SHUFFLE_PAGE_ALLOCATOR is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SPARSEMEM=y
CONFIG_SPARSEMEM_EXTREME=y
CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
CONFIG_SPARSEMEM_VMEMMAP=y
CONFIG_ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP=y
CONFIG_HAVE_MEMBLOCK_PHYS_MAP=y
CONFIG_HAVE_GUP_FAST=y
CONFIG_EXCLUSIVE_SYSTEM_RAM=y
CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y
# CONFIG_MEMORY_HOTPLUG is not set
CONFIG_ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE=y
CONFIG_SPLIT_PTLOCK_CPUS=4
CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y
# CONFIG_COMPACTION is not set
# CONFIG_PAGE_REPORTING is not set
# CONFIG_MIGRATION is not set
CONFIG_PCP_BATCH_SCALE_MAX=5
CONFIG_PHYS_ADDR_T_64BIT=y
# CONFIG_KSM is not set
CONFIG_DEFAULT_MMAP_MIN_ADDR=4096
# CONFIG_TRANSPARENT_HUGEPAGE is not set
CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
CONFIG_HAVE_SETUP_PER_CPU_AREA=y
# CONFIG_CMA is not set
# CONFIG_DEFERRED_STRUCT_PAGE_INIT is not set
# CONFIG_IDLE_PAGE_TRACKING is not set
CONFIG_ARCH_HAS_CURRENT_STACK_POINTER=y
CONFIG_ZONE_DMA=y
CONFIG_VM_EVENT_COUNTERS=y
# CONFIG_PERCPU_STATS is not set
#
# GUP_TEST needs to have DEBUG_FS enabled
#
# CONFIG_DMAPOOL_TEST is not set
CONFIG_ARCH_HAS_PTE_SPECIAL=y
CONFIG_SECRETMEM=y
# CONFIG_ANON_VMA_NAME is not set
# CONFIG_USERFAULTFD is not set
# CONFIG_LRU_GEN is not set
CONFIG_ARCH_SUPPORTS_PER_VMA_LOCK=y
CONFIG_PER_VMA_LOCK=y
#
# Data Access Monitoring
#
# CONFIG_DAMON is not set
# end of Data Access Monitoring
# end of Memory Management options
# CONFIG_NET is not set
#
# Device Drivers
#
CONFIG_HAVE_PCI=y
# CONFIG_PCI is not set
# CONFIG_PCCARD is not set
#
# Generic Driver Options
#
# CONFIG_UEVENT_HELPER is not set
# CONFIG_DEVTMPFS is not set
# CONFIG_STANDALONE is not set
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
#
# Firmware loader
#
CONFIG_FW_LOADER=y
CONFIG_EXTRA_FIRMWARE=""
# CONFIG_FW_LOADER_USER_HELPER is not set
# CONFIG_FW_LOADER_COMPRESS is not set
# CONFIG_FW_UPLOAD is not set
# end of Firmware loader
CONFIG_ALLOW_DEV_COREDUMP=y
CONFIG_GENERIC_CPU_AUTOPROBE=y
CONFIG_GENERIC_CPU_VULNERABILITIES=y
# CONFIG_FW_DEVLINK_SYNC_STATE_TIMEOUT is not set
# end of Generic Driver Options
#
# Bus devices
#
# CONFIG_MHI_BUS is not set
# CONFIG_MHI_BUS_EP is not set
# end of Bus devices
#
# Cache Drivers
#
# end of Cache Drivers
#
# Firmware Drivers
#
#
# ARM System Control and Management Interface Protocol
#
# end of ARM System Control and Management Interface Protocol
# CONFIG_GOOGLE_FIRMWARE is not set
#
# Qualcomm firmware drivers
#
# end of Qualcomm firmware drivers
#
# Tegra firmware driver
#
# end of Tegra firmware driver
# end of Firmware Drivers
# CONFIG_GNSS is not set
# CONFIG_MTD is not set
# CONFIG_OF is not set
# CONFIG_BLK_DEV is not set
#
# NVME Support
#
# CONFIG_NVME_FC is not set
# end of NVME Support
#
# Misc devices
#
# CONFIG_DUMMY_IRQ is not set
# CONFIG_ENCLOSURE_SERVICES is not set
# CONFIG_C2PORT is not set
#
# EEPROM support
#
# CONFIG_EEPROM_93CX6 is not set
# end of EEPROM support
#
# Texas Instruments shared transport line discipline
#
# end of Texas Instruments shared transport line discipline
#
# Altera FPGA firmware download module (requires I2C)
#
# CONFIG_ECHO is not set
# CONFIG_PVPANIC is not set
# end of Misc devices
#
# SCSI device support
#
CONFIG_SCSI_MOD=y
# CONFIG_RAID_ATTRS is not set
# CONFIG_SCSI is not set
# end of SCSI device support
# CONFIG_MD is not set
# CONFIG_TARGET_CORE is not set
#
# Input device support
#
CONFIG_INPUT=y
# CONFIG_INPUT_FF_MEMLESS is not set
# CONFIG_INPUT_SPARSEKMAP is not set
# CONFIG_INPUT_MATRIXKMAP is not set
#
# Userland interfaces
#
# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_JOYDEV is not set
# CONFIG_INPUT_EVDEV is not set
# CONFIG_INPUT_EVBUG is not set
#
# Input Device Drivers
#
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_INPUT_JOYSTICK is not set
# CONFIG_INPUT_TABLET is not set
# CONFIG_INPUT_TOUCHSCREEN is not set
# CONFIG_INPUT_MISC is not set
# CONFIG_RMI4_CORE is not set
#
# Hardware I/O ports
#
# CONFIG_SERIO is not set
# CONFIG_GAMEPORT is not set
# end of Hardware I/O ports
# end of Input device support
#
# Character devices
#
CONFIG_TTY=y
CONFIG_VT=y
CONFIG_CONSOLE_TRANSLATIONS=y
CONFIG_VT_CONSOLE=y
# CONFIG_VT_HW_CONSOLE_BINDING is not set
CONFIG_UNIX98_PTYS=y
# CONFIG_LEGACY_PTYS is not set
# CONFIG_LEGACY_TIOCSTI is not set
# CONFIG_LDISC_AUTOLOAD is not set
# CONFIG_NULL_TTY is not set
# CONFIG_SERIAL_DEV_BUS is not set
# CONFIG_VIRTIO_CONSOLE is not set
# CONFIG_HW_RANDOM is not set
# CONFIG_DEVMEM is not set
# CONFIG_HANGCHECK_TIMER is not set
#
# S/390 character device drivers
#
# CONFIG_TN3270 is not set
# CONFIG_TN3215 is not set
# CONFIG_SCLP_TTY is not set
# CONFIG_SCLP_VT220_TTY is not set
# CONFIG_HMC_DRV is not set
# CONFIG_SCLP_OFB is not set
# CONFIG_S390_TAPE is not set
# CONFIG_VMCP is not set
# CONFIG_MONWRITER is not set
# CONFIG_S390_VMUR is not set
# end of Character devices
#
# I2C support
#
# CONFIG_I2C is not set
# end of I2C support
# CONFIG_I3C is not set
# CONFIG_SPMI is not set
# CONFIG_HSI is not set
# CONFIG_PPS is not set
#
# PTP clock support
#
CONFIG_PTP_1588_CLOCK_OPTIONAL=y
#
# Enable PHYLIB and NETWORK_PHY_TIMESTAMPING to see the additional clocks.
#
# end of PTP clock support
# CONFIG_PINCTRL is not set
# CONFIG_GPIOLIB is not set
# CONFIG_POWER_RESET is not set
# CONFIG_POWER_SUPPLY is not set
# CONFIG_THERMAL is not set
# CONFIG_WATCHDOG is not set
# CONFIG_REGULATOR is not set
# CONFIG_RC_CORE is not set
#
# CEC support
#
# CONFIG_MEDIA_CEC_SUPPORT is not set
# end of CEC support
#
# Graphics support
#
# CONFIG_AUXDISPLAY is not set
#
# Console display driver support
#
CONFIG_DUMMY_CONSOLE=y
CONFIG_DUMMY_CONSOLE_COLUMNS=80
CONFIG_DUMMY_CONSOLE_ROWS=25
# end of Console display driver support
# end of Graphics support
# CONFIG_HID_SUPPORT is not set
CONFIG_USB_OHCI_LITTLE_ENDIAN=y
# CONFIG_MEMSTICK is not set
# CONFIG_NEW_LEDS is not set
# CONFIG_ACCESSIBILITY is not set
# CONFIG_DMADEVICES is not set
#
# DMABUF options
#
# CONFIG_SYNC_FILE is not set
# CONFIG_DMABUF_HEAPS is not set
# end of DMABUF options
# CONFIG_UIO is not set
# CONFIG_VFIO is not set
# CONFIG_VIRT_DRIVERS is not set
# CONFIG_VIRTIO_MENU is not set
# CONFIG_VHOST_MENU is not set
#
# Microsoft Hyper-V guest support
#
# end of Microsoft Hyper-V guest support
# CONFIG_GREYBUS is not set
# CONFIG_COMEDI is not set
# CONFIG_STAGING is not set
# CONFIG_COMMON_CLK is not set
# CONFIG_HWSPINLOCK is not set
#
# Clock Source drivers
#
# end of Clock Source drivers
# CONFIG_MAILBOX is not set
# CONFIG_IOMMU_SUPPORT is not set
#
# Remoteproc drivers
#
# CONFIG_REMOTEPROC is not set
# end of Remoteproc drivers
#
# Rpmsg drivers
#
# CONFIG_RPMSG_VIRTIO is not set
# end of Rpmsg drivers
#
# SOC (System On Chip) specific Drivers
#
#
# Amlogic SoC drivers
#
# end of Amlogic SoC drivers
#
# Broadcom SoC drivers
#
# end of Broadcom SoC drivers
#
# NXP/Freescale QorIQ SoC drivers
#
# end of NXP/Freescale QorIQ SoC drivers
#
# fujitsu SoC drivers
#
# end of fujitsu SoC drivers
#
# i.MX SoC drivers
#
# end of i.MX SoC drivers
#
# Enable LiteX SoC Builder specific drivers
#
# end of Enable LiteX SoC Builder specific drivers
# CONFIG_WPCM450_SOC is not set
#
# Qualcomm SoC drivers
#
# end of Qualcomm SoC drivers
# CONFIG_SOC_TI is not set
#
# Xilinx SoC drivers
#
# end of Xilinx SoC drivers
# end of SOC (System On Chip) specific Drivers
#
# PM Domains
#
#
# Amlogic PM Domains
#
# end of Amlogic PM Domains
#
# Broadcom PM Domains
#
# end of Broadcom PM Domains
#
# i.MX PM Domains
#
# end of i.MX PM Domains
#
# Qualcomm PM Domains
#
# end of Qualcomm PM Domains
# end of PM Domains
# CONFIG_PM_DEVFREQ is not set
# CONFIG_EXTCON is not set
# CONFIG_MEMORY is not set
# CONFIG_IIO is not set
# CONFIG_PWM is not set
#
# IRQ chip support
#
# end of IRQ chip support
# CONFIG_RESET_CONTROLLER is not set
#
# PHY Subsystem
#
# CONFIG_GENERIC_PHY is not set
# CONFIG_PHY_CAN_TRANSCEIVER is not set
#
# PHY drivers for Broadcom platforms
#
# end of PHY drivers for Broadcom platforms
# end of PHY Subsystem
# CONFIG_POWERCAP is not set
# CONFIG_RAS is not set
#
# Android
#
# CONFIG_ANDROID_BINDER_IPC is not set
# end of Android
# CONFIG_DAX is not set
# CONFIG_NVMEM is not set
#
# HW tracing support
#
# CONFIG_STM is not set
# end of HW tracing support
# CONFIG_FPGA is not set
# CONFIG_SIOX is not set
# CONFIG_SLIMBUS is not set
# CONFIG_INTERCONNECT is not set
# CONFIG_COUNTER is not set
# CONFIG_PECI is not set
# CONFIG_HTE is not set
# end of Device Drivers
#
# File systems
#
CONFIG_DCACHE_WORD_ACCESS=y
# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_FS_IOMAP=y
# CONFIG_EXT2_FS is not set
# CONFIG_EXT3_FS is not set
# CONFIG_EXT4_FS is not set
# CONFIG_REISERFS_FS is not set
# CONFIG_JFS_FS is not set
# CONFIG_XFS_FS is not set
# CONFIG_GFS2_FS is not set
# CONFIG_BTRFS_FS is not set
# CONFIG_NILFS2_FS is not set
# CONFIG_F2FS_FS is not set
# CONFIG_BCACHEFS_FS is not set
CONFIG_EXPORTFS=y
# CONFIG_EXPORTFS_BLOCK_OPS is not set
CONFIG_FILE_LOCKING=y
# CONFIG_FS_ENCRYPTION is not set
# CONFIG_FS_VERITY is not set
# CONFIG_DNOTIFY is not set
# CONFIG_INOTIFY_USER is not set
# CONFIG_FANOTIFY is not set
# CONFIG_QUOTA is not set
# CONFIG_AUTOFS_FS is not set
# CONFIG_FUSE_FS is not set
# CONFIG_OVERLAY_FS is not set
#
# Caches
#
# end of Caches
#
# CD-ROM/DVD Filesystems
#
# CONFIG_ISO9660_FS is not set
# CONFIG_UDF_FS is not set
# end of CD-ROM/DVD Filesystems
#
# DOS/FAT/EXFAT/NT Filesystems
#
# CONFIG_MSDOS_FS is not set
# CONFIG_VFAT_FS is not set
# CONFIG_EXFAT_FS is not set
# CONFIG_NTFS3_FS is not set
# CONFIG_NTFS_FS is not set
# end of DOS/FAT/EXFAT/NT Filesystems
#
# Pseudo filesystems
#
CONFIG_PROC_FS=y
# CONFIG_PROC_KCORE is not set
CONFIG_PROC_SYSCTL=y
CONFIG_PROC_PAGE_MONITOR=y
# CONFIG_PROC_CHILDREN is not set
CONFIG_KERNFS=y
CONFIG_SYSFS=y
# CONFIG_TMPFS is not set
CONFIG_ARCH_SUPPORTS_HUGETLBFS=y
# CONFIG_HUGETLBFS is not set
CONFIG_ARCH_HAS_GIGANTIC_PAGE=y
# CONFIG_CONFIGFS_FS is not set
# end of Pseudo filesystems
# CONFIG_MISC_FILESYSTEMS is not set
# CONFIG_NLS is not set
# CONFIG_UNICODE is not set
CONFIG_IO_WQ=y
# end of File systems
#
# Security options
#
# CONFIG_KEYS is not set
# CONFIG_SECURITY_DMESG_RESTRICT is not set
# CONFIG_SECURITY is not set
# CONFIG_SECURITYFS is not set
# CONFIG_HARDENED_USERCOPY is not set
# CONFIG_FORTIFY_SOURCE is not set
# CONFIG_STATIC_USERMODEHELPER is not set
# CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT is not set
CONFIG_DEFAULT_SECURITY_DAC=y
CONFIG_LSM="landlock,lockdown,yama,loadpin,safesetid,bpf"
#
# Kernel hardening options
#
#
# Memory initialization
#
CONFIG_CC_HAS_AUTO_VAR_INIT_PATTERN=y
CONFIG_CC_HAS_AUTO_VAR_INIT_ZERO_BARE=y
CONFIG_CC_HAS_AUTO_VAR_INIT_ZERO=y
# CONFIG_INIT_STACK_NONE is not set
# CONFIG_INIT_STACK_ALL_PATTERN is not set
CONFIG_INIT_STACK_ALL_ZERO=y
# CONFIG_INIT_ON_ALLOC_DEFAULT_ON is not set
# CONFIG_INIT_ON_FREE_DEFAULT_ON is not set
# CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON is not set
# end of Memory initialization
#
# Hardening of kernel data structures
#
# CONFIG_LIST_HARDENED is not set
# CONFIG_BUG_ON_DATA_CORRUPTION is not set
# end of Hardening of kernel data structures
CONFIG_CC_HAS_RANDSTRUCT=y
CONFIG_RANDSTRUCT_NONE=y
# CONFIG_RANDSTRUCT_FULL is not set
# end of Kernel hardening options
# end of Security options
# CONFIG_CRYPTO is not set
#
# Library routines
#
# CONFIG_PACKING is not set
CONFIG_BITREVERSE=y
CONFIG_GENERIC_STRNCPY_FROM_USER=y
CONFIG_GENERIC_STRNLEN_USER=y
# CONFIG_CORDIC is not set
# CONFIG_PRIME_NUMBERS is not set
CONFIG_ARCH_USE_CMPXCHG_LOCKREF=y
CONFIG_ARCH_USE_SYM_ANNOTATIONS=y
#
# Crypto library routines
#
CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC=y
# CONFIG_CRYPTO_LIB_CHACHA is not set
# CONFIG_CRYPTO_LIB_CURVE25519 is not set
CONFIG_CRYPTO_LIB_POLY1305_RSIZE=1
# CONFIG_CRYPTO_LIB_POLY1305 is not set
# end of Crypto library routines
# CONFIG_CRC_CCITT is not set
# CONFIG_CRC16 is not set
# CONFIG_CRC_T10DIF is not set
# CONFIG_CRC64_ROCKSOFT is not set
# CONFIG_CRC_ITU_T is not set
CONFIG_CRC32=y
# CONFIG_CRC32_SELFTEST is not set
CONFIG_CRC32_SLICEBY8=y
# CONFIG_CRC32_SLICEBY4 is not set
# CONFIG_CRC32_SARWATE is not set
# CONFIG_CRC32_BIT is not set
# CONFIG_CRC64 is not set
# CONFIG_CRC4 is not set
# CONFIG_CRC7 is not set
# CONFIG_LIBCRC32C is not set
# CONFIG_CRC8 is not set
# CONFIG_RANDOM32_SELFTEST is not set
# CONFIG_ZLIB_DFLTCC is not set
# CONFIG_XZ_DEC is not set
CONFIG_GENERIC_ALLOCATOR=y
CONFIG_HAS_DMA=y
CONFIG_NEED_DMA_MAP_STATE=y
CONFIG_ARCH_DMA_ADDR_T_64BIT=y
CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED=y
CONFIG_SWIOTLB=y
# CONFIG_SWIOTLB_DYNAMIC is not set
CONFIG_DMA_NEED_SYNC=y
# CONFIG_DMA_API_DEBUG is not set
# CONFIG_IRQ_POLL is not set
CONFIG_HAVE_GENERIC_VDSO=y
CONFIG_GENERIC_GETTIMEOFDAY=y
CONFIG_GENERIC_VDSO_TIME_NS=y
CONFIG_ARCH_STACKWALK=y
CONFIG_STACKDEPOT=y
CONFIG_STACKDEPOT_MAX_FRAMES=64
CONFIG_SBITMAP=y
# CONFIG_LWQ_TEST is not set
# end of Library routines
#
# Kernel hacking
#
#
# printk and dmesg options
#
# CONFIG_PRINTK_TIME is not set
# CONFIG_PRINTK_CALLER is not set
# CONFIG_STACKTRACE_BUILD_ID is not set
CONFIG_CONSOLE_LOGLEVEL_DEFAULT=7
CONFIG_CONSOLE_LOGLEVEL_QUIET=4
CONFIG_MESSAGE_LOGLEVEL_DEFAULT=4
# CONFIG_DYNAMIC_DEBUG is not set
# CONFIG_DYNAMIC_DEBUG_CORE is not set
# CONFIG_SYMBOLIC_ERRNAME is not set
CONFIG_DEBUG_BUGVERBOSE=y
# end of printk and dmesg options
# CONFIG_DEBUG_KERNEL is not set
#
# Compile-time checks and compiler options
#
CONFIG_AS_HAS_NON_CONST_ULEB128=y
CONFIG_FRAME_WARN=2048
# CONFIG_STRIP_ASM_SYMS is not set
# CONFIG_HEADERS_INSTALL is not set
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
# end of Compile-time checks and compiler options
#
# Generic Kernel Debugging Instruments
#
# CONFIG_MAGIC_SYSRQ is not set
# CONFIG_DEBUG_FS is not set
CONFIG_ARCH_HAS_UBSAN=y
# CONFIG_UBSAN is not set
CONFIG_HAVE_ARCH_KCSAN=y
CONFIG_HAVE_KCSAN_COMPILER=y
# end of Generic Kernel Debugging Instruments
#
# Networking Debugging
#
# end of Networking Debugging
#
# Memory Debugging
#
# CONFIG_PAGE_EXTENSION is not set
CONFIG_SLUB_DEBUG=y
# CONFIG_SLUB_DEBUG_ON is not set
# CONFIG_PAGE_POISONING is not set
# CONFIG_DEBUG_RODATA_TEST is not set
CONFIG_ARCH_HAS_DEBUG_WX=y
# CONFIG_DEBUG_WX is not set
CONFIG_GENERIC_PTDUMP=y
CONFIG_HAVE_DEBUG_KMEMLEAK=y
# CONFIG_PER_VMA_LOCK_STATS is not set
CONFIG_ARCH_HAS_DEBUG_VM_PGTABLE=y
# CONFIG_DEBUG_VM_PGTABLE is not set
CONFIG_ARCH_HAS_DEBUG_VIRTUAL=y
CONFIG_DEBUG_MEMORY_INIT=y
# CONFIG_MEM_ALLOC_PROFILING is not set
CONFIG_HAVE_ARCH_KASAN=y
CONFIG_HAVE_ARCH_KASAN_VMALLOC=y
CONFIG_CC_HAS_KASAN_GENERIC=y
CONFIG_CC_HAS_WORKING_NOSANITIZE_ADDRESS=y
# CONFIG_KASAN is not set
CONFIG_HAVE_ARCH_KFENCE=y
# CONFIG_KFENCE is not set
CONFIG_HAVE_KMSAN_COMPILER=y
# end of Memory Debugging
#
# Debug Oops, Lockups and Hangs
#
# CONFIG_PANIC_ON_OOPS is not set
CONFIG_PANIC_ON_OOPS_VALUE=0
CONFIG_PANIC_TIMEOUT=0
CONFIG_HAVE_HARDLOCKUP_DETECTOR_BUDDY=y
# end of Debug Oops, Lockups and Hangs
#
# Scheduler Debugging
#
# CONFIG_SCHEDSTATS is not set
# end of Scheduler Debugging
# CONFIG_DEBUG_TIMEKEEPING is not set
#
# Lock Debugging (spinlocks, mutexes, etc...)
#
CONFIG_LOCK_DEBUGGING_SUPPORT=y
# CONFIG_WW_MUTEX_SELFTEST is not set
# end of Lock Debugging (spinlocks, mutexes, etc...)
# CONFIG_DEBUG_IRQFLAGS is not set
CONFIG_STACKTRACE=y
# CONFIG_WARN_ALL_UNSEEDED_RANDOM is not set
#
# Debug kernel data structures
#
# end of Debug kernel data structures
#
# RCU Debugging
#
CONFIG_RCU_CPU_STALL_TIMEOUT=21
CONFIG_RCU_EXP_CPU_STALL_TIMEOUT=0
# CONFIG_RCU_CPU_STALL_CPUTIME is not set
# end of RCU Debugging
CONFIG_USER_STACKTRACE_SUPPORT=y
CONFIG_HAVE_RETHOOK=y
CONFIG_HAVE_FUNCTION_TRACER=y
CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
CONFIG_HAVE_FUNCTION_GRAPH_RETVAL=y
CONFIG_HAVE_DYNAMIC_FTRACE=y
CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS=y
CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y
CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS=y
CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
CONFIG_HAVE_SYSCALL_TRACEPOINTS=y
CONFIG_HAVE_FENTRY=y
CONFIG_HAVE_NOP_MCOUNT=y
CONFIG_TRACING_SUPPORT=y
# CONFIG_FTRACE is not set
# CONFIG_SAMPLES is not set
CONFIG_HAVE_SAMPLE_FTRACE_DIRECT=y
CONFIG_HAVE_SAMPLE_FTRACE_DIRECT_MULTI=y
CONFIG_ARCH_HAS_DEVMEM_IS_ALLOWED=y
#
# s390 Debugging
#
CONFIG_EARLY_PRINTK=y
# end of s390 Debugging
#
# Kernel Testing and Coverage
#
# CONFIG_KUNIT is not set
CONFIG_ARCH_HAS_KCOV=y
CONFIG_CC_HAS_SANCOV_TRACE_PC=y
# CONFIG_KCOV is not set
# CONFIG_RUNTIME_TESTING_MENU is not set
# end of Kernel Testing and Coverage
#
# Rust hacking
#
# end of Rust hacking
# end of Kernel hacking
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH-RT sched v2 0/2] Optimize the RT group scheduling
2024-06-27 17:21 [PATCH-RT sched v1 0/2] Optimize the RT group scheduling Xavier
2024-06-27 17:21 ` [PATCH-RT sched v1 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se Xavier
2024-06-27 17:21 ` [PATCH-RT sched v1 2/2] RT test: Adding test cases for RT group scheduling Xavier
@ 2024-06-29 11:28 ` Xavier
2024-06-29 11:28 ` [PATCH-RT sched v2 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se Xavier
` (2 more replies)
2024-07-29 9:32 ` [PATCH-RT sched v1 0/2] Optimize the " Michal Koutný
3 siblings, 3 replies; 20+ messages in thread
From: Xavier @ 2024-06-29 11:28 UTC (permalink / raw)
To: mingo, peterz, juri.lelli, vincent.guittot
Cc: dietmar.eggemann, rostedt, bsegall, mgorman, bristot, vschneid,
linux-kernel, Xavier
Hi all,
Patch v2 fix the issues arising from disabling the CONFIG_RT_GROUP_SCHED
macro during compilation.
>The first patch optimizes the enqueue and dequeue of rt_se, the strategy
>employs a bottom-up removal approach.
>The second patch provides validation for the efficiency improvements made
>by patch 1. The test case count the number of infinite loop executions for
>all threads.
>
> origion optimized
>
> 10242794134 10659512784
> 13650210798 13555924695
> 12953159254 13733609646
> 11888973428 11742656925
> 12791797633 13447598015
> 11451270205 11704847480
> 13335320346 13858155642
> 10682907328 10513565749
> 10173249704 10254224697
> 8309259793 8893668653
>
>avg 11547894262 11836376429
>
>Run two QEMU emulators simultaneously, one running the original kernel and the
>other running the optimized kernel, and compare the average of the results over
>10 runs. After optimizing, the number of iterations in the infinite loop increased
>by approximately 2.5%.
Kindly review.
Xavier (2):
RT SCHED: Optimize the enqueue and dequeue operations for rt_se
RT test: Adding test cases for RT group scheduling
MAINTAINERS | 7 +
kernel/sched/debug.c | 50 ++++
kernel/sched/rt.c | 278 +++++++++++++++---
kernel/sched/sched.h | 1 +
tools/testing/selftests/sched/Makefile | 4 +-
tools/testing/selftests/sched/deadloop.c | 192 ++++++++++++
.../selftests/sched/rt_group_sched_test.sh | 119 ++++++++
7 files changed, 609 insertions(+), 42 deletions(-)
create mode 100644 tools/testing/selftests/sched/deadloop.c
create mode 100755 tools/testing/selftests/sched/rt_group_sched_test.sh
--
2.45.2
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH-RT sched v2 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se
2024-06-29 11:28 ` [PATCH-RT sched v2 0/2] Optimize the " Xavier
@ 2024-06-29 11:28 ` Xavier
2024-07-04 8:30 ` kernel test robot
2024-06-29 11:28 ` [PATCH-RT sched v2 2/2] RT test: Adding test cases for RT group scheduling Xavier
2024-07-16 6:05 ` [PATCH-RT sched v3 0/2] Optimize the " Xavier
2 siblings, 1 reply; 20+ messages in thread
From: Xavier @ 2024-06-29 11:28 UTC (permalink / raw)
To: mingo, peterz, juri.lelli, vincent.guittot
Cc: dietmar.eggemann, rostedt, bsegall, mgorman, bristot, vschneid,
linux-kernel, Xavier
This patch optimizes the enqueue and dequeue of rt_se, the strategy employs
a bottom-up removal approach. Specifically, when removing an rt_se at a
certain level, if it is determined that the highest priority of the rq
associated with that rt_se has not changed, there is no need to continue
removing rt_se at higher levels. At this point, only the total number
of removed rt_se needs to be recorded, and the rt_nr_running count of
higher-level rq should be removed accordingly.
Signed-off-by: Xavier <xavier_qy@163.com>
---
kernel/sched/debug.c | 50 ++++++++
kernel/sched/rt.c | 278 ++++++++++++++++++++++++++++++++++++-------
kernel/sched/sched.h | 1 +
3 files changed, 289 insertions(+), 40 deletions(-)
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index c1eb9a1afd13..282153397e02 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -712,6 +712,56 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
#endif
}
+static void print_rt_se(struct seq_file *m, struct sched_rt_entity *rt_se)
+{
+ struct task_struct *task;
+
+#ifdef CONFIG_RT_GROUP_SCHED
+ if (rt_se->my_q) {
+ SEQ_printf_task_group_path(m, rt_se->my_q->tg, "%s\n");
+ return;
+ }
+#endif
+ task = container_of(rt_se, struct task_struct, rt);
+ SEQ_printf(m, " prio-%d, pid-%d, %s\n", task->prio, task->pid, task->comm);
+}
+
+/*shall be called in rq lock*/
+void print_rt_rq_task(struct seq_file *m, struct rt_rq *rt_rq)
+{
+ struct rt_prio_array *array = &rt_rq->active;
+ struct sched_rt_entity *rt_se;
+ struct list_head *queue, *head;
+ unsigned long bitmap[2];
+ int idx;
+ int count = 0;
+
+ if (!rt_rq->rt_nr_running)
+ return;
+
+ memcpy(bitmap, array->bitmap, sizeof(unsigned long) * 2);
+ idx = sched_find_first_bit(bitmap);
+ WARN_ON_ONCE(idx >= MAX_RT_PRIO);
+
+ while (1) {
+ clear_bit(idx, bitmap);
+ queue = array->queue + idx;
+ head = queue;
+ queue = queue->next;
+ do {
+ rt_se = list_entry(queue, struct sched_rt_entity, run_list);
+ print_rt_se(m, rt_se);
+ queue = queue->next;
+ count++;
+ } while (queue != head);
+ idx = sched_find_first_bit(bitmap);
+ if (idx >= MAX_RT_PRIO)
+ break;
+ }
+
+ WARN_ON_ONCE(count != rt_rq->rt_nr_running);
+}
+
void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
{
#ifdef CONFIG_RT_GROUP_SCHED
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index aa4c1c874fa4..0673bce0c145 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1113,7 +1113,7 @@ void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
#endif /* CONFIG_SMP */
#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
-static void
+static int
inc_rt_prio(struct rt_rq *rt_rq, int prio)
{
int prev_prio = rt_rq->highest_prio.curr;
@@ -1122,9 +1122,11 @@ inc_rt_prio(struct rt_rq *rt_rq, int prio)
rt_rq->highest_prio.curr = prio;
inc_rt_prio_smp(rt_rq, prio, prev_prio);
+
+ return prev_prio > prio;
}
-static void
+static int
dec_rt_prio(struct rt_rq *rt_rq, int prio)
{
int prev_prio = rt_rq->highest_prio.curr;
@@ -1149,12 +1151,22 @@ dec_rt_prio(struct rt_rq *rt_rq, int prio)
}
dec_rt_prio_smp(rt_rq, prio, prev_prio);
+ if (rt_rq->highest_prio.curr > prio)
+ return prio;
+ else
+ return 0;
}
#else
-static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {}
-static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {}
+static inline int inc_rt_prio(struct rt_rq *rt_rq, int prio)
+{
+ return 0;
+}
+static inline int dec_rt_prio(struct rt_rq *rt_rq, int prio)
+{
+ return 0;
+}
#endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */
@@ -1218,28 +1230,31 @@ unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se)
}
static inline
-void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
+int inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
int prio = rt_se_prio(rt_se);
+ int prio_change;
WARN_ON(!rt_prio(prio));
rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
- inc_rt_prio(rt_rq, prio);
+ prio_change = inc_rt_prio(rt_rq, prio);
inc_rt_group(rt_se, rt_rq);
+ return prio_change;
}
static inline
-void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
+int dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq, int prio)
{
+ int prio_changed;
WARN_ON(!rt_prio(rt_se_prio(rt_se)));
- WARN_ON(!rt_rq->rt_nr_running);
rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
- dec_rt_prio(rt_rq, rt_se_prio(rt_se));
+ prio_changed = dec_rt_prio(rt_rq, prio);
dec_rt_group(rt_se, rt_rq);
+ return prio_changed;
}
/*
@@ -1255,12 +1270,13 @@ static inline bool move_entity(unsigned int flags)
return true;
}
-static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_array *array)
+static void __delist_rt_entity(struct sched_rt_entity *rt_se,
+ struct rt_prio_array *array, int last_prio)
{
list_del_init(&rt_se->run_list);
- if (list_empty(array->queue + rt_se_prio(rt_se)))
- __clear_bit(rt_se_prio(rt_se), array->bitmap);
+ if (list_empty(array->queue + last_prio))
+ __clear_bit(last_prio, array->bitmap);
rt_se->on_list = 0;
}
@@ -1371,7 +1387,12 @@ update_stats_dequeue_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
}
}
-static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
+/*
+ * Returns: -1 indicates that rt_se was not enqueued, 0 indicates that the highest
+ * priority of the rq did not change after enqueue, and 1 indicates that the highest
+ * priority of the rq changed after enqueue.
+ */
+static int __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
{
struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
struct rt_prio_array *array = &rt_rq->active;
@@ -1386,8 +1407,8 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag
*/
if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) {
if (rt_se->on_list)
- __delist_rt_entity(rt_se, array);
- return;
+ __delist_rt_entity(rt_se, array, rt_se_prio(rt_se));
+ return -1;
}
if (move_entity(flags)) {
@@ -1402,73 +1423,250 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag
}
rt_se->on_rq = 1;
- inc_rt_tasks(rt_se, rt_rq);
+ return inc_rt_tasks(rt_se, rt_rq);
}
-static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
+/**
+ * delete rt_se from rt_rq
+ *
+ * @rt_se Nodes to be deleted
+ * @last_prio The highest priority of this rt_se before the previous round
+ * of deletion
+ * @flags operation flags
+ *
+ * Returns: =0 indicates that the highest priority of the current rq did not
+ * change during this deletion. >0 indicates it changed, and it returns the
+ * previous highest priority to use in the next round of deletion.
+ */
+static int __dequeue_rt_entity(struct sched_rt_entity *rt_se, int last_prio,
+ unsigned int flags)
{
struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
struct rt_prio_array *array = &rt_rq->active;
if (move_entity(flags)) {
WARN_ON_ONCE(!rt_se->on_list);
- __delist_rt_entity(rt_se, array);
+ __delist_rt_entity(rt_se, array, last_prio);
}
rt_se->on_rq = 0;
- dec_rt_tasks(rt_se, rt_rq);
+ return dec_rt_tasks(rt_se, rt_rq, last_prio);
+}
+
+static inline void dec_rq_nr_running(struct sched_rt_entity *rt_se,
+ unsigned int rt, unsigned int rr)
+{
+ struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
+
+ rt_rq->rt_nr_running -= rt;
+ rt_rq->rr_nr_running -= rr;
+}
+
+static inline void add_rq_nr_running(struct sched_rt_entity *rt_se,
+ unsigned int rt, unsigned int rr)
+{
+ struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
+
+ rt_rq->rt_nr_running += rt;
+ rt_rq->rr_nr_running += rr;
+}
+
+static inline bool on_top_rt_rq(struct sched_rt_entity *rt_se)
+{
+#ifdef CONFIG_RT_GROUP_SCHED
+ if (rt_se->parent)
+ return false;
+#endif
+ return true;
}
/*
- * Because the prio of an upper entry depends on the lower
- * entries, we must remove entries top - down.
+ * To optimize the enqueue and dequeue of rt_se, this strategy employs a
+ * bottom-up removal approach. Specifically, when removing an rt_se at a
+ * certain level, if it is determined that the highest priority of the rq
+ * associated with that rt_se has not changed, there is no need to continue
+ * removing rt_se at higher levels. At this point, only the total number
+ * of removed rt_se needs to be recorded, and the rt_nr_running count of
+ * higher-level rq should be removed accordingly.
+ *
+ * For enqueue operations, if an rt_se at a certain level is in the rq,
+ * it is still necessary to check the priority of the higher-level rq.
+ * If the priority of the higher-level rq is found to be lower than that
+ * of the rt_se to be added, it should be removed, as updating the highest
+ * priority of the rq during addition will cause the rq to be repositioned
+ * in the parent rq.
+ *
+ * Conversely, for dequeue operations, if an rt_se at a certain level is
+ * not in the rq, the operation can be exited immediately to reduce
+ * unnecessary checks and handling.
+ *
+ * The return value refers to the last rt_se that was removed for enqueue
+ * operations. And for dequeue operations, it refers to the last rt_se
+ * that was either removed or had its rt_nr_running updated.
*/
-static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
+static struct sched_rt_entity *dequeue_rt_stack(struct sched_rt_entity *rt_se,
+ unsigned int flags, int for_enqueue)
{
- struct sched_rt_entity *back = NULL;
- unsigned int rt_nr_running;
+ struct sched_rt_entity *last = rt_se;
+ struct sched_rt_entity *origin = rt_se;
+ unsigned int del_rt_nr = 0;
+ unsigned int del_rr_nr = 0;
+ int prio_changed = rt_se_prio(rt_se);
+ int sub_on_rq = 1;
for_each_sched_rt_entity(rt_se) {
- rt_se->back = back;
- back = rt_se;
- }
-
- rt_nr_running = rt_rq_of_se(back)->rt_nr_running;
+ if (on_rt_rq(rt_se)) {
+ if (sub_on_rq) {
+ /*
+ * The number of tasks removed from the sub-level rt_se also needs
+ * to be subtracted from the rq of the current rt_se, as the current
+ * rt_se's rq no longer includes the number of removed tasks.
+ */
+ dec_rq_nr_running(rt_se, del_rt_nr, del_rr_nr);
+
+ if (prio_changed) {
+ /*
+ * If the removal of the lower-level rt_se causes the
+ * highest priority of the current rq to change, then the
+ * current rt_se also needs to be removed from its parent
+ * rq, and the number of deleted tasks should be
+ * accumulated.
+ */
+ del_rt_nr += rt_se_nr_running(rt_se);
+ del_rr_nr += rt_se_rr_nr_running(rt_se);
+ prio_changed = __dequeue_rt_entity(rt_se,
+ prio_changed, flags);
+ last = rt_se;
+ } else if (!for_enqueue) {
+ /* For dequeue, last may only rt_nr_running was modified.*/
+ last = rt_se;
+ }
+ } else {
+ /*
+ * Entering this branch must be for enqueue, as dequeue would break
+ * if an rt_se is not online.
+ * If the sub-level node is not online, and the current rt_se's
+ * priority is lower than the one being added, current rt_se need
+ * to be removed.
+ */
+ prio_changed = rt_se_prio(rt_se);
+ if (prio_changed > rt_se_prio(origin)) {
+ del_rt_nr += rt_se_nr_running(rt_se);
+ del_rr_nr += rt_se_rr_nr_running(rt_se);
+ prio_changed = __dequeue_rt_entity(rt_se,
+ prio_changed, flags);
+ last = rt_se;
+ } else {
+ prio_changed = 0;
+ }
+ }
- for (rt_se = back; rt_se; rt_se = rt_se->back) {
- if (on_rt_rq(rt_se))
- __dequeue_rt_entity(rt_se, flags);
+ /*
+ * If the current rt_se is on the top rt_rq, then the already deleted
+ * nodes, plus the count of the rt_rq where current rt_se located,
+ * need to be removed from the top_rt_rq.
+ */
+ if (on_top_rt_rq(rt_se)) {
+ dequeue_top_rt_rq(rt_rq_of_se(rt_se),
+ del_rt_nr + rt_rq_of_se(rt_se)->rt_nr_running);
+ }
+ sub_on_rq = 1;
+ } else if (for_enqueue) {
+ /*
+ * In the case of an enqueue operation, if a certain level is found to be
+ * not online, then the previous counts need to be reset to zero.
+ */
+ prio_changed = 0;
+ sub_on_rq = 0;
+ del_rt_nr = 0;
+ del_rr_nr = 0;
+
+ if (on_top_rt_rq(rt_se))
+ dequeue_top_rt_rq(rt_rq_of_se(rt_se),
+ rt_rq_of_se(rt_se)->rt_nr_running);
+ } else {
+ last = rt_se;
+ break;
+ }
}
- dequeue_top_rt_rq(rt_rq_of_se(back), rt_nr_running);
+ return last;
}
+
static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
{
struct rq *rq = rq_of_rt_se(rt_se);
+ struct sched_rt_entity *last;
+ unsigned int add_rt_nr = 0;
+ unsigned int add_rr_nr = 0;
+ int enqueue = 1;
+ int prio_change = 1;
update_stats_enqueue_rt(rt_rq_of_se(rt_se), rt_se, flags);
- dequeue_rt_stack(rt_se, flags);
- for_each_sched_rt_entity(rt_se)
- __enqueue_rt_entity(rt_se, flags);
+ last = dequeue_rt_stack(rt_se, flags, 1);
+ for_each_sched_rt_entity(rt_se) {
+ if (enqueue || !on_rt_rq(rt_se) || (prio_change == 1)) {
+ prio_change = __enqueue_rt_entity(rt_se, flags);
+ if (prio_change >= 0) {
+ add_rt_nr = rt_se_nr_running(rt_se);
+ add_rr_nr = rt_se_rr_nr_running(rt_se);
+ } else {
+ add_rt_nr = add_rr_nr = 0;
+ }
+ } else {
+ add_rq_nr_running(rt_se, add_rt_nr, add_rr_nr);
+ }
+
+ if (rt_se == last)
+ enqueue = 0;
+ }
+
enqueue_top_rt_rq(&rq->rt);
}
static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
{
struct rq *rq = rq_of_rt_se(rt_se);
+ struct sched_rt_entity *last;
+ unsigned int add_rt_nr = 0;
+ unsigned int add_rr_nr = 0;
+ int prio_change = 1;
update_stats_dequeue_rt(rt_rq_of_se(rt_se), rt_se, flags);
- dequeue_rt_stack(rt_se, flags);
-
+ last = dequeue_rt_stack(rt_se, flags, 0);
for_each_sched_rt_entity(rt_se) {
struct rt_rq *rt_rq = group_rt_rq(rt_se);
+ if (rt_rq && rt_rq->rt_nr_running) {
+ if (on_rt_rq(rt_se)) {
+ add_rq_nr_running(rt_se, add_rt_nr, add_rr_nr);
+ } else {
+ prio_change = __enqueue_rt_entity(rt_se, flags);
+ if (prio_change == 0) {
+ /*
+ * If enqueue is successful and the priority of the rq has
+ * not changed, then the parent node only needs to add the
+ * count of the current rt_se. Otherwise, the parent node
+ * will also need to enqueue.
+ */
+ add_rt_nr = rt_se_nr_running(rt_se);
+ add_rr_nr = rt_se_rr_nr_running(rt_se);
+ }
+ }
+ } else {
+ add_rt_nr = add_rr_nr = 0;
+ }
- if (rt_rq && rt_rq->rt_nr_running)
- __enqueue_rt_entity(rt_se, flags);
+ /*
+ * last is the rt_se of the last deletion or modification of the
+ * count, so the subsequent rt_se does not need to be updated.
+ */
+ if (rt_se == last)
+ break;
}
+
enqueue_top_rt_rq(&rq->rt);
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index a831af102070..b634153aacf0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2878,6 +2878,7 @@ extern void print_rt_stats(struct seq_file *m, int cpu);
extern void print_dl_stats(struct seq_file *m, int cpu);
extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
+extern void print_rt_rq_task(struct seq_file *m, struct rt_rq *rt_rq);
extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
extern void resched_latency_warn(int cpu, u64 latency);
--
2.45.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH-RT sched v2 2/2] RT test: Adding test cases for RT group scheduling
2024-06-29 11:28 ` [PATCH-RT sched v2 0/2] Optimize the " Xavier
2024-06-29 11:28 ` [PATCH-RT sched v2 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se Xavier
@ 2024-06-29 11:28 ` Xavier
2024-07-16 6:05 ` [PATCH-RT sched v3 0/2] Optimize the " Xavier
2 siblings, 0 replies; 20+ messages in thread
From: Xavier @ 2024-06-29 11:28 UTC (permalink / raw)
To: mingo, peterz, juri.lelli, vincent.guittot
Cc: dietmar.eggemann, rostedt, bsegall, mgorman, bristot, vschneid,
linux-kernel, Xavier
Adding test cases for RT group scheduling, create some RT infinite loop
processes/threads, then set them to the same or different priorities.
Place them in different RT task groups, run for a period of time,
and finally count the number of infinite loop executions for all tasks.
Signed-off-by: Xavier <xavier_qy@163.com>
---
MAINTAINERS | 7 +
tools/testing/selftests/sched/Makefile | 4 +-
tools/testing/selftests/sched/deadloop.c | 192 ++++++++++++++++++
.../selftests/sched/rt_group_sched_test.sh | 119 +++++++++++
4 files changed, 320 insertions(+), 2 deletions(-)
create mode 100644 tools/testing/selftests/sched/deadloop.c
create mode 100755 tools/testing/selftests/sched/rt_group_sched_test.sh
diff --git a/MAINTAINERS b/MAINTAINERS
index 43353b705988..d29effe57bf8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -19480,6 +19480,13 @@ L: linux-remoteproc@vger.kernel.org
S: Maintained
F: drivers/tty/rpmsg_tty.c
+RT GROUP SCHED TEST
+M: Xavier <xavier_qy@163.com>
+L: linux-kernel@vger.kernel.org
+S: Maintained
+F: tools/testing/selftests/sched/deadloop.c
+F: tools/testing/selftests/sched/rt_group_sched_test.sh
+
RTL2830 MEDIA DRIVER
L: linux-media@vger.kernel.org
S: Orphan
diff --git a/tools/testing/selftests/sched/Makefile b/tools/testing/selftests/sched/Makefile
index 099ee9213557..96decb58bf35 100644
--- a/tools/testing/selftests/sched/Makefile
+++ b/tools/testing/selftests/sched/Makefile
@@ -8,7 +8,7 @@ CFLAGS += -O2 -Wall -g -I./ $(KHDR_INCLUDES) -Wl,-rpath=./ \
$(CLANG_FLAGS)
LDLIBS += -lpthread
-TEST_GEN_FILES := cs_prctl_test
-TEST_PROGS := cs_prctl_test
+TEST_GEN_FILES := cs_prctl_test deadloop
+TEST_PROGS := cs_prctl_test deadloop
include ../lib.mk
diff --git a/tools/testing/selftests/sched/deadloop.c b/tools/testing/selftests/sched/deadloop.c
new file mode 100644
index 000000000000..d850a3e2a0ab
--- /dev/null
+++ b/tools/testing/selftests/sched/deadloop.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <signal.h>
+
+/*
+ * Create multiple infinite loop threads based on the passed parameters
+ * Usage: deadloop num policy prio
+ * num: the number of child threads
+ * policy: the scheduling policy of the child threads, 0-fair, 1-fifo, 2-rr
+ * prio: the priority
+ * If this process is killed, it will print the loop count of all child threads
+ * to the OUTPUT_FILE
+ *
+ * Date: June 27, 2024
+ * Author: Xavier <xavier_qy@163.com>
+ */
+
+#define OUTPUT_FILE "rt_group_sched_test.log"
+
+#if __GLIBC_PREREQ(2, 30) == 0
+#include <sys/syscall.h>
+static pid_t gettid(void)
+{
+ return syscall(SYS_gettid);
+}
+#endif
+
+#define do_err(x) \
+do { \
+ if ((x) < 0) { \
+ printf("test BUG_ON func %s, line %d %ld\n", \
+ __func__, __LINE__, (long)(x) \
+ ); \
+ while (1) \
+ sleep(1); \
+ } \
+} while (0)
+
+#define do_false(x) \
+do { \
+ if ((x) == 1) { \
+ printf("test BUG_ON func %s, line %d %d\n", \
+ __func__, __LINE__, (x) \
+ ); \
+ while (1) \
+ sleep(1); \
+ } \
+} while (0)
+
+
+struct thread_data {
+ pthread_t thread;
+ int index;
+ int pid;
+ unsigned long cnt;
+};
+
+static struct thread_data *pdata;
+static int thread_num = 1;
+
+static void create_thread_posix(void *entry, pthread_t *thread, int *para,
+ int policy, int prio)
+{
+ int ret;
+ struct sched_param param;
+ pthread_attr_t attr;
+
+ memset(¶m, 0, sizeof(param));
+ ret = pthread_attr_init(&attr);
+ do_err(ret);
+
+ ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
+ do_err(ret);
+
+ param.sched_priority = prio;
+
+ ret = pthread_attr_setschedpolicy(&attr, policy);
+ do_err(ret);
+
+ ret = pthread_attr_setschedparam(&attr, ¶m);
+ do_err(ret);
+
+ ret = pthread_create(thread, &attr, entry, para);
+ do_err(ret);
+}
+
+static void *dead_loop_entry(void *arg)
+{
+ int index = *(int *)arg;
+ struct sched_param param;
+ int cur = gettid();
+
+ sched_getparam(cur, ¶m);
+ pdata[index].pid = cur;
+ printf("cur:%d prio:%d\n", cur, param.sched_priority);
+
+ while (1) {
+ asm volatile("" ::: "memory");
+ pdata[index].cnt++;
+ }
+ return NULL;
+}
+
+static void handle_signal(int signal)
+{
+ int cnt = 0;
+
+ if (signal == SIGTERM) {
+ FILE *file = freopen(OUTPUT_FILE, "a", stdout);
+
+ if (file == NULL) {
+ perror("freopen");
+ exit(0);
+ }
+
+ while (cnt < thread_num) {
+ printf("pid:%d cnt:%ld\n", pdata[cnt].pid, pdata[cnt].cnt);
+ cnt++;
+ }
+ fclose(file);
+ exit(0);
+ }
+}
+
+static int dead_loop_create(int policy, int prio)
+{
+ int cnt = 0;
+ int ret;
+ void *status;
+ struct sched_param param;
+
+ param.sched_priority = prio;
+ pdata = malloc(thread_num * sizeof(struct thread_data));
+ do_false(!pdata);
+
+ if (policy) {
+ ret = sched_setscheduler(0, policy, ¶m);
+ do_err(ret);
+ }
+
+ while (cnt < thread_num) {
+ pdata[cnt].index = cnt;
+ create_thread_posix(dead_loop_entry, &pdata[cnt].thread,
+ &pdata[cnt].index, policy, prio);
+ cnt++;
+ }
+
+ signal(SIGTERM, handle_signal);
+
+ cnt = 0;
+ while (cnt < thread_num) {
+ pthread_join(pdata[cnt].thread, &status);
+ cnt++;
+ }
+
+ free(pdata);
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int policy = 2;
+ int prio = 50;
+
+ if (argc == 2)
+ thread_num = atoi(argv[1]);
+
+ if (argc == 3) {
+ thread_num = atoi(argv[1]);
+ policy = atoi(argv[2]);
+ if (policy > 0)
+ prio = 50;
+ }
+
+ if (argc == 4) {
+ thread_num = atoi(argv[1]);
+ policy = atoi(argv[2]);
+ prio = atoi(argv[3]);
+ }
+
+ dead_loop_create(policy, prio);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/sched/rt_group_sched_test.sh b/tools/testing/selftests/sched/rt_group_sched_test.sh
new file mode 100755
index 000000000000..9031250a2684
--- /dev/null
+++ b/tools/testing/selftests/sched/rt_group_sched_test.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Test for rt group scheduling
+# Date: June 27, 2024
+# Author: Xavier <xavier_qy@163.com>
+
+# Record the list of child process PIDs
+PIDS=()
+
+# File for redirected output
+LOGFILE="rt_group_sched_test.log"
+
+# Cleanup function: kill all recorded child processes and unmount the cgroup
+function cleanup() {
+ echo "Cleaning up..."
+ for pid in "${PIDS[@]}"; do
+ if kill -0 $pid 2>/dev/null; then
+ kill -TERM $pid
+ fi
+ done
+
+ # Sleep for a while to ensure the processes are properly killed
+ sleep 2
+
+ # Unmount the cgroup filesystem
+ umount /sys/fs/cgroup/cpu 2>/dev/null
+ umount /sys/fs/cgroup 2>/dev/null
+ echo "Cleanup completed."
+
+ # Ensure the LOGFILE exists and is correct
+ if [ ! -f "$LOGFILE" ]; then
+ echo "$LOGFILE not found!"
+ exit 1
+ fi
+
+ # Initialize the total count variable
+ total=0
+
+ # Read matching lines and calculate the total sum
+ while IFS= read -r line
+ do
+ # Use grep to match lines containing 'pid:' and 'cnt:', and extract the value of cnt
+ if echo "$line" | grep -q '^pid:[[:digit:]]\+ cnt:[[:digit:]]\+'; then
+ cnt=$(echo "$line" | sed -n \
+ 's/^pid:[[:digit:]]\+ cnt:\([[:digit:]]\+\)/\1/p')
+ total=$((total + cnt))
+ fi
+ done < "$LOGFILE"
+
+ # Print the total sum
+ echo "Total cnt: $total"
+ echo "Finished processing."
+}
+
+# Capture actions when interrupted or terminated by a signal
+trap cleanup EXIT
+
+# Start the cgroup filesystem and create the necessary directories
+function setup_cgroups() {
+ mount -t tmpfs -o mode=755 cgroup_root /sys/fs/cgroup
+ mkdir -p /sys/fs/cgroup/cpu
+ mount -t cgroup -o cpu none /sys/fs/cgroup/cpu
+}
+
+# Create cgroup subdirectories and configure their settings
+function create_child_cgroup() {
+ local base_dir=$1
+ local name=$2
+ local rt_period=$3
+ local rt_runtime=$4
+ mkdir -p "$base_dir/$name"
+ echo $rt_period > "$base_dir/$name/cpu.rt_period_us"
+ echo $rt_runtime > "$base_dir/$name/cpu.rt_runtime_us"
+}
+# Launch a process and add it to the specified cgroup
+function launch_process() {
+ local process_name=$1
+
+ # Three parameters representing the number of child threads, scheduling policy, and priority
+ local args=$2
+ local cgroup_path=$3
+
+ # Launch the process
+ exec -a $process_name ./deadloop $args &
+ local pid=$!
+ PIDS+=($pid)
+
+ # Short sleep to ensure the process starts
+ sleep 1
+
+ # Check if the process started successfully
+ if ! pgrep -x $process_name > /dev/null; then
+ echo "Error: No process found with name $process_name."
+ exit 1
+ fi
+
+ echo $pid > "$cgroup_path/cgroup.procs"
+ echo "Process $process_name with PID $pid added to cgroup $cgroup_path"
+}
+
+# Main function running all tasks
+function main() {
+ echo "The test needs 30 seconds..."
+ rm -f "$LOGFILE"
+ setup_cgroups
+ create_child_cgroup "/sys/fs/cgroup/cpu" "child1" 1000000 800000
+ create_child_cgroup "/sys/fs/cgroup/cpu/child1" "child2" 1000000 700000
+ create_child_cgroup "/sys/fs/cgroup/cpu/child1/child2" "child3" 1000000 600000
+ launch_process "child1" "3 2 50" "/sys/fs/cgroup/cpu/child1"
+ launch_process "child2" "3 2 50" "/sys/fs/cgroup/cpu/child1/child2"
+ launch_process "child3" "1 2 50" "/sys/fs/cgroup/cpu/child1/child2/child3"
+ launch_process "tg_root" "1 2 50" "/sys/fs/cgroup/cpu"
+
+ # Run for 30 seconds
+ sleep 30
+}
+
+# Execute the main function
+main
--
2.45.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH-RT sched v2 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se
2024-06-29 11:28 ` [PATCH-RT sched v2 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se Xavier
@ 2024-07-04 8:30 ` kernel test robot
0 siblings, 0 replies; 20+ messages in thread
From: kernel test robot @ 2024-07-04 8:30 UTC (permalink / raw)
To: Xavier
Cc: oe-lkp, lkp, linux-kernel, mingo, peterz, juri.lelli,
vincent.guittot, dietmar.eggemann, rostedt, bsegall, mgorman,
bristot, vschneid, Xavier, oliver.sang
Hello,
kernel test robot noticed "WARNING:at_kernel/sched/rt.c:#__enqueue_rt_entity" on:
commit: ed0ed14c2b47993c00c4b3cdceabef535bcef32b ("[PATCH-RT sched v2 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se")
url: https://github.com/intel-lab-lkp/linux/commits/Xavier/RT-SCHED-Optimize-the-enqueue-and-dequeue-operations-for-rt_se/20240630-173825
base: https://git.kernel.org/cgit/linux/kernel/git/tip/tip.git c793a62823d1ce8f70d9cfc7803e3ea436277cda
patch link: https://lore.kernel.org/all/20240629112812.243691-2-xavier_qy@163.com/
patch subject: [PATCH-RT sched v2 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se
in testcase: blktests
version: blktests-x86_64-775a058-1_20240702
with following parameters:
disk: 1SSD
test: block-group-01
compiler: gcc-13
test machine: 4 threads Intel(R) Core(TM) i5-6500 CPU @ 3.20GHz (Skylake) with 32G memory
(please refer to attached dmesg/kmsg for entire log/backtrace)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202407041644.de55c25-oliver.sang@intel.com
[ 54.093440][ C2] ------------[ cut here ]------------
[ 54.094193][ T705] list_add double add: new=ffff888802a8abc0, prev=ffff888802a8abc0, next=ffff8887892c4dd0.
[ 54.098261][ C2] WARNING: CPU: 2 PID: 53 at kernel/sched/rt.c:1415 __enqueue_rt_entity (kernel/sched/rt.c:1415 (discriminator 1))
[ 54.103613][ T705] ------------[ cut here ]------------
[ 54.113477][ C2] Modules linked in: dm_multipath
[ 54.122743][ T705] kernel BUG at lib/list_debug.c:35!
[ 54.128080][ C2] btrfs blake2b_generic
[ 54.132987][ T705] Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI
[ 54.138148][ C2] xor zstd_compress
[ 54.142266][ T705] CPU: 3 PID: 705 Comm: multipathd Tainted: G S 6.10.0-rc1-00010-ged0ed14c2b47 #1
[ 54.149087][ C2] raid6_pq libcrc32c
[ 54.152852][ T705] Hardware name: Dell Inc. OptiPlex 7040/0Y7WYT, BIOS 1.8.1 12/05/2017
[ 54.163339][ C2] ipmi_devintf ipmi_msghandler
[ 54.167192][ T705] RIP: 0010:__list_add_valid_or_report (lib/list_debug.c:35 (discriminator 1))
[ 54.175322][ C2] intel_rapl_msr intel_rapl_common
[ 54.180049][ T705] Code: 0b 48 89 f1 48 c7 c7 00 fa 26 84 48 89 de e8 d6 75 f2 fe 0f 0b 48 89 f2 48 89 d9 48 89 ee 48 c7 c7 80 fa 26 84 e8 bf 75 f2 fe <0f> 0b 48 89 f7 48 89 34 24 e8 11 cc 61 ff 48 8b 34 24 e9 71 ff ff
All code
========
0: 0b 48 89 or -0x77(%rax),%ecx
3: f1 icebp
4: 48 c7 c7 00 fa 26 84 mov $0xffffffff8426fa00,%rdi
b: 48 89 de mov %rbx,%rsi
e: e8 d6 75 f2 fe callq 0xfffffffffef275e9
13: 0f 0b ud2
15: 48 89 f2 mov %rsi,%rdx
18: 48 89 d9 mov %rbx,%rcx
1b: 48 89 ee mov %rbp,%rsi
1e: 48 c7 c7 80 fa 26 84 mov $0xffffffff8426fa80,%rdi
25: e8 bf 75 f2 fe callq 0xfffffffffef275e9
2a:* 0f 0b ud2 <-- trapping instruction
2c: 48 89 f7 mov %rsi,%rdi
2f: 48 89 34 24 mov %rsi,(%rsp)
33: e8 11 cc 61 ff callq 0xffffffffff61cc49
38: 48 8b 34 24 mov (%rsp),%rsi
3c: e9 .byte 0xe9
3d: 71 ff jno 0x3e
3f: ff .byte 0xff
Code starting with the faulting instruction
===========================================
0: 0f 0b ud2
2: 48 89 f7 mov %rsi,%rdi
5: 48 89 34 24 mov %rsi,(%rsp)
9: e8 11 cc 61 ff callq 0xffffffffff61cc1f
e: 48 8b 34 24 mov (%rsp),%rsi
12: e9 .byte 0xe9
13: 71 ff jno 0x14
15: ff .byte 0xff
[ 54.186345][ C2] sd_mod t10_pi
[ 54.191424][ T705] RSP: 0018:ffffc90000327b38 EFLAGS: 00010046
[ 54.211022][ C2] x86_pkg_temp_thermal
[ 54.214447][ T705]
[ 54.220405][ C2] crc64_rocksoft_generic crc64_rocksoft
[ 54.224435][ T705] RAX: 0000000000000058 RBX: ffff8887892c4dd0 RCX: ffffffff82424f4e
[ 54.226632][ C2] intel_powerclamp crc64
[ 54.232145][ T705] RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff8887893b5380
[ 54.240012][ C2] coretemp sg
[ 54.244217][ T705] RBP: ffff888802a8abc0 R08: 0000000000000001 R09: fffff52000064f22
[ 54.252087][ C2] kvm_intel i915
[ 54.255330][ T705] R10: ffffc90000327917 R11: 205d324320202020 R12: ffff888802a8abc0
[ 54.263200][ C2] kvm crct10dif_pclmul
[ 54.266705][ T705] R13: ffff8887892c4dd0 R14: ffff888802a8ac00 R15: ffff8887892c4dd8
[ 54.274572][ C2] crc32_pclmul crc32c_intel
[ 54.278599][ T705] FS: 00007f1b015ee680(0000) GS:ffff888789380000(0000) knlGS:0000000000000000
[ 54.286469][ C2] drm_buddy ghash_clmulni_intel
[ 54.290934][ T705] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 54.299764][ C2] intel_gtt sha512_ssse3
[ 54.304580][ T705] CR2: 000055e6a99e25f8 CR3: 000000080473e006 CR4: 00000000003706f0
[ 54.311054][ C2] drm_display_helper
[ 54.315255][ T705] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 54.323124][ C2] rapl ttm
[ 54.326976][ T705] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 54.334845][ C2] drm_kms_helper
[ 54.337825][ T705] Call Trace:
[ 54.345696][ C2] ahci mei_wdt
[ 54.349201][ T705] <TASK>
[ 54.352357][ C2] intel_cstate wmi_bmof
[ 54.355687][ T705] ? die (arch/x86/kernel/dumpstack.c:421 arch/x86/kernel/dumpstack.c:434 arch/x86/kernel/dumpstack.c:447)
[ 54.358493][ C2] intel_uncore
[ 54.362610][ T705] ? do_trap (arch/x86/kernel/traps.c:114 arch/x86/kernel/traps.c:155)
[ 54.366202][ C2] binfmt_misc video
[ 54.369533][ T705] ? __list_add_valid_or_report (lib/list_debug.c:35 (discriminator 1))
[ 54.373650][ C2] libahci mei_me
[ 54.377418][ T705] ? do_error_trap (arch/x86/include/asm/traps.h:58 arch/x86/kernel/traps.c:176)
[ 54.383104][ C2] i2c_i801 wmi
[ 54.386607][ T705] ? __list_add_valid_or_report (lib/list_debug.c:35 (discriminator 1))
[ 54.391070][ C2] intel_pch_thermal i2c_smbus
[ 54.394400][ T705] ? handle_invalid_op (arch/x86/kernel/traps.c:214)
[ 54.400087][ C2] mei libata
[ 54.404727][ T705] ? __list_add_valid_or_report (lib/list_debug.c:35 (discriminator 1))
[ 54.409540][ C2] acpi_pad fuse
[ 54.412697][ T705] ? exc_invalid_op (arch/x86/kernel/traps.c:267)
[ 54.418385][ C2] loop drm
[ 54.421803][ T705] ? asm_exc_invalid_op (arch/x86/include/asm/idtentry.h:621)
[ 54.426355][ C2] dm_mod ip_tables
[ 54.429337][ T705] ? llist_add_batch (lib/llist.c:33 (discriminator 14))
[ 54.434240][ C2]
[ 54.437928][ T705] ? __list_add_valid_or_report (lib/list_debug.c:35 (discriminator 1))
[ 54.442661][ C2] CPU: 2 PID: 53 Comm: khugepaged Tainted: G S 6.10.0-rc1-00010-ged0ed14c2b47 #1
[ 54.444859][ T705] ? __list_add_valid_or_report (lib/list_debug.c:35 (discriminator 1))
[ 54.450557][ C2] Hardware name: Dell Inc. OptiPlex 7040/0Y7WYT, BIOS 1.8.1 12/05/2017
[ 54.460974][ T705] __enqueue_rt_entity (include/linux/list.h:150 (discriminator 1) include/linux/list.h:183 (discriminator 1) kernel/sched/rt.c:1419 (discriminator 1))
[ 54.466661][ C2] RIP: 0010:__enqueue_rt_entity (kernel/sched/rt.c:1415 (discriminator 1))
[ 54.474792][ T705] enqueue_rt_entity (kernel/sched/rt.c:1616)
[ 54.479778][ C2] Code: fa 48 c1 ea 03 80 3c 02 00 0f 85 1f 03 00 00 49 8b bf 40 0a 00 00 44 89 ea 48 81 c7 b8 00 00 00 e8 15 72 05 00 e9 23 fa ff ff <0f> 0b e9 9b f6 ff ff 48 89 ee 48 89 df e8 8e d1 ff ff e9 f6 f5 ff
All code
========
0: fa cli
1: 48 c1 ea 03 shr $0x3,%rdx
5: 80 3c 02 00 cmpb $0x0,(%rdx,%rax,1)
9: 0f 85 1f 03 00 00 jne 0x32e
f: 49 8b bf 40 0a 00 00 mov 0xa40(%r15),%rdi
16: 44 89 ea mov %r13d,%edx
19: 48 81 c7 b8 00 00 00 add $0xb8,%rdi
20: e8 15 72 05 00 callq 0x5723a
25: e9 23 fa ff ff jmpq 0xfffffffffffffa4d
2a:* 0f 0b ud2 <-- trapping instruction
2c: e9 9b f6 ff ff jmpq 0xfffffffffffff6cc
31: 48 89 ee mov %rbp,%rsi
34: 48 89 df mov %rbx,%rdi
37: e8 8e d1 ff ff callq 0xffffffffffffd1ca
3c: e9 .byte 0xe9
3d: f6 f5 div %ch
3f: ff .byte 0xff
Code starting with the faulting instruction
===========================================
0: 0f 0b ud2
2: e9 9b f6 ff ff jmpq 0xfffffffffffff6a2
7: 48 89 ee mov %rbp,%rsi
a: 48 89 df mov %rbx,%rdi
d: e8 8e d1 ff ff callq 0xffffffffffffd1a0
12: e9 .byte 0xe9
13: f6 f5 div %ch
15: ff .byte 0xff
The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20240704/202407041644.de55c25-oliver.sang@intel.com
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH-RT sched v3 0/2] Optimize the RT group scheduling
2024-06-29 11:28 ` [PATCH-RT sched v2 0/2] Optimize the " Xavier
2024-06-29 11:28 ` [PATCH-RT sched v2 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se Xavier
2024-06-29 11:28 ` [PATCH-RT sched v2 2/2] RT test: Adding test cases for RT group scheduling Xavier
@ 2024-07-16 6:05 ` Xavier
2024-07-16 6:05 ` [PATCH-RT sched v3 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se Xavier
2024-07-16 6:05 ` [PATCH-RT sched v3 " Xavier
2 siblings, 2 replies; 20+ messages in thread
From: Xavier @ 2024-07-16 6:05 UTC (permalink / raw)
To: mingo, peterz, juri.lelli, vincent.guittot
Cc: dietmar.eggemann, rostedt, bsegall, mgorman, bristot, vschneid,
linux-kernel, oliver.sang, Xavier
Hi all,
Patch 3 fixed the issue with handling tasks with prio set to 0 during
the execution of blktests.
Kindly review.
Best Regards,
Xavier
Xavier (2):
RT SCHED: Optimize the enqueue and dequeue operations for rt_se
RT test: Adding test cases for RT group scheduling
MAINTAINERS | 7 +
kernel/sched/debug.c | 48 +++
kernel/sched/rt.c | 287 +++++++++++++++---
kernel/sched/sched.h | 1 +
tools/testing/selftests/sched/Makefile | 4 +-
tools/testing/selftests/sched/deadloop.c | 192 ++++++++++++
.../selftests/sched/rt_group_sched_test.sh | 119 ++++++++
7 files changed, 618 insertions(+), 40 deletions(-)
create mode 100644 tools/testing/selftests/sched/deadloop.c
create mode 100755 tools/testing/selftests/sched/rt_group_sched_test.sh
--
2.45.2
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH-RT sched v3 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se
2024-07-16 6:05 ` [PATCH-RT sched v3 0/2] Optimize the " Xavier
@ 2024-07-16 6:05 ` Xavier
2024-07-16 20:24 ` kernel test robot
2024-07-16 6:05 ` [PATCH-RT sched v3 " Xavier
1 sibling, 1 reply; 20+ messages in thread
From: Xavier @ 2024-07-16 6:05 UTC (permalink / raw)
To: mingo, peterz, juri.lelli, vincent.guittot
Cc: dietmar.eggemann, rostedt, bsegall, mgorman, bristot, vschneid,
linux-kernel, oliver.sang, Xavier
This patch optimizes the enqueue and dequeue of rt_se, the strategy employs
a bottom-up removal approach. Specifically, when removing an rt_se at a
certain level, if it is determined that the highest priority of the rq
associated with that rt_se has not changed, there is no need to continue
removing rt_se at higher levels. At this point, only the total number
of removed rt_se needs to be recorded, and the rt_nr_running count of
higher-level rq should be removed accordingly.
Signed-off-by: Xavier <xavier_qy@163.com>
---
kernel/sched/debug.c | 48 ++++++++
kernel/sched/rt.c | 287 +++++++++++++++++++++++++++++++++++++------
kernel/sched/sched.h | 1 +
3 files changed, 298 insertions(+), 38 deletions(-)
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index c1eb9a1afd13..bf9edba5e87b 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -712,6 +712,54 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
#endif
}
+void print_rt_se(struct seq_file *m, struct sched_rt_entity *rt_se)
+{
+ struct task_struct *task;
+
+#ifdef CONFIG_RT_GROUP_SCHED
+ if (rt_se->my_q) {
+ SEQ_printf_task_group_path(m, rt_se->my_q->tg, "%s\n");
+ return;
+ }
+#endif
+ task = container_of(rt_se, struct task_struct, rt);
+ SEQ_printf(m, " prio-%d, pid-%d, %s\n", task->prio, task->pid, task->comm);
+}
+
+/*shall be called in rq lock*/
+void print_rt_rq_task(struct seq_file *m, struct rt_rq *rt_rq)
+{
+ struct rt_prio_array *array = &rt_rq->active;
+ struct sched_rt_entity *rt_se;
+ struct list_head *queue, *head;
+ unsigned long bitmap[2];
+ int idx;
+ int count = 0;
+
+ if (!rt_rq->rt_nr_running)
+ return;
+
+ memcpy(bitmap, array->bitmap, sizeof(unsigned long) * 2);
+ idx = sched_find_first_bit(bitmap);
+ WARN_ON_ONCE(idx >= MAX_RT_PRIO);
+
+ while (1) {
+ clear_bit(idx, bitmap);
+ queue = array->queue + idx;
+ head = queue;
+ queue = queue->next;
+ do {
+ rt_se = list_entry(queue, struct sched_rt_entity, run_list);
+ print_rt_se(m, rt_se);
+ queue = queue->next;
+ count++;
+ } while (queue != head);
+ idx = sched_find_first_bit(bitmap);
+ if (idx >= MAX_RT_PRIO)
+ break;
+ }
+}
+
void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
{
#ifdef CONFIG_RT_GROUP_SCHED
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index aa4c1c874fa4..b18c424a50d2 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1113,7 +1113,7 @@ void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
#endif /* CONFIG_SMP */
#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
-static void
+static int
inc_rt_prio(struct rt_rq *rt_rq, int prio)
{
int prev_prio = rt_rq->highest_prio.curr;
@@ -1122,9 +1122,11 @@ inc_rt_prio(struct rt_rq *rt_rq, int prio)
rt_rq->highest_prio.curr = prio;
inc_rt_prio_smp(rt_rq, prio, prev_prio);
+
+ return prev_prio > prio;
}
-static void
+static int
dec_rt_prio(struct rt_rq *rt_rq, int prio)
{
int prev_prio = rt_rq->highest_prio.curr;
@@ -1149,12 +1151,22 @@ dec_rt_prio(struct rt_rq *rt_rq, int prio)
}
dec_rt_prio_smp(rt_rq, prio, prev_prio);
+ if (rt_rq->highest_prio.curr > prio)
+ return prio;
+ else
+ return MAX_RT_PRIO;
}
#else
-static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {}
-static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {}
+static inline int inc_rt_prio(struct rt_rq *rt_rq, int prio)
+{
+ return 0;
+}
+static inline int dec_rt_prio(struct rt_rq *rt_rq, int prio)
+{
+ return 0;
+}
#endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */
@@ -1218,28 +1230,31 @@ unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se)
}
static inline
-void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
+int inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
int prio = rt_se_prio(rt_se);
+ int prio_change;
WARN_ON(!rt_prio(prio));
rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
- inc_rt_prio(rt_rq, prio);
+ prio_change = inc_rt_prio(rt_rq, prio);
inc_rt_group(rt_se, rt_rq);
+ return prio_change;
}
static inline
-void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
+int dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq, int prio)
{
+ int prio_changed;
WARN_ON(!rt_prio(rt_se_prio(rt_se)));
- WARN_ON(!rt_rq->rt_nr_running);
rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
- dec_rt_prio(rt_rq, rt_se_prio(rt_se));
+ prio_changed = dec_rt_prio(rt_rq, prio);
dec_rt_group(rt_se, rt_rq);
+ return prio_changed;
}
/*
@@ -1255,12 +1270,13 @@ static inline bool move_entity(unsigned int flags)
return true;
}
-static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_array *array)
+static void __delist_rt_entity(struct sched_rt_entity *rt_se,
+ struct rt_prio_array *array, int last_prio)
{
list_del_init(&rt_se->run_list);
- if (list_empty(array->queue + rt_se_prio(rt_se)))
- __clear_bit(rt_se_prio(rt_se), array->bitmap);
+ if (list_empty(array->queue + last_prio))
+ __clear_bit(last_prio, array->bitmap);
rt_se->on_list = 0;
}
@@ -1371,7 +1387,12 @@ update_stats_dequeue_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
}
}
-static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
+/*
+ * Returns: -1 indicates that rt_se was not enqueued, 0 indicates that the highest
+ * priority of the rq did not change after enqueue, and 1 indicates that the highest
+ * priority of the rq changed after enqueue.
+ */
+static int __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
{
struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
struct rt_prio_array *array = &rt_rq->active;
@@ -1386,8 +1407,8 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag
*/
if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) {
if (rt_se->on_list)
- __delist_rt_entity(rt_se, array);
- return;
+ __delist_rt_entity(rt_se, array, rt_se_prio(rt_se));
+ return -1;
}
if (move_entity(flags)) {
@@ -1402,73 +1423,263 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag
}
rt_se->on_rq = 1;
- inc_rt_tasks(rt_se, rt_rq);
+ return inc_rt_tasks(rt_se, rt_rq);
}
-static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
+/**
+ * delete rt_se from rt_rq
+ *
+ * @rt_se Nodes to be deleted
+ * @last_prio The highest priority of this rt_se before the previous round
+ * of deletion
+ * @flags operation flags
+ *
+ * Returns: =0 indicates that the highest priority of the current rq did not
+ * change during this deletion. >0 indicates it changed, and it returns the
+ * previous highest priority to use in the next round of deletion.
+ */
+static int __dequeue_rt_entity(struct sched_rt_entity *rt_se, int last_prio,
+ unsigned int flags)
{
struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
struct rt_prio_array *array = &rt_rq->active;
if (move_entity(flags)) {
WARN_ON_ONCE(!rt_se->on_list);
- __delist_rt_entity(rt_se, array);
+ __delist_rt_entity(rt_se, array, last_prio);
}
rt_se->on_rq = 0;
- dec_rt_tasks(rt_se, rt_rq);
+ return dec_rt_tasks(rt_se, rt_rq, last_prio);
+}
+
+static inline void dec_rq_nr_running(struct sched_rt_entity *rt_se,
+ unsigned int rt, unsigned int rr)
+{
+ struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
+
+ rt_rq->rt_nr_running -= rt;
+ rt_rq->rr_nr_running -= rr;
+}
+
+static inline void add_rq_nr_running(struct sched_rt_entity *rt_se,
+ unsigned int rt, unsigned int rr)
+{
+ struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
+
+ rt_rq->rt_nr_running += rt;
+ rt_rq->rr_nr_running += rr;
+}
+
+static inline bool on_top_rt_rq(struct sched_rt_entity *rt_se)
+{
+#ifdef CONFIG_RT_GROUP_SCHED
+ if (rt_se->parent)
+ return false;
+#endif
+ return true;
}
/*
- * Because the prio of an upper entry depends on the lower
- * entries, we must remove entries top - down.
+ * To optimize the enqueue and dequeue of rt_se, this strategy employs a
+ * bottom-up removal approach. Specifically, when removing an rt_se at a
+ * certain level, if it is determined that the highest priority of the rq
+ * associated with that rt_se has not changed, there is no need to continue
+ * removing rt_se at higher levels. At this point, only the total number
+ * of removed rt_se needs to be recorded, and the rt_nr_running count of
+ * higher-level rq should be removed accordingly.
+ *
+ * For enqueue operations, if an rt_se at a certain level is in the rq,
+ * it is still necessary to check the priority of the higher-level rq.
+ * If the priority of the higher-level rq is found to be lower than that
+ * of the rt_se to be added, it should be removed, as updating the highest
+ * priority of the rq during addition will cause the rq to be repositioned
+ * in the parent rq.
+ *
+ * Conversely, for dequeue operations, if an rt_se at a certain level is
+ * not in the rq, the operation can be exited immediately to reduce
+ * unnecessary checks and handling.
+ *
+ * The return value refers to the last rt_se that was removed for enqueue
+ * operations. And for dequeue operations, it refers to the last rt_se
+ * that was either removed or had its rt_nr_running updated.
*/
-static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
+static struct sched_rt_entity *dequeue_rt_stack(struct sched_rt_entity *rt_se,
+ unsigned int flags, int for_enqueue)
{
- struct sched_rt_entity *back = NULL;
- unsigned int rt_nr_running;
+ struct sched_rt_entity *last = rt_se;
+ struct sched_rt_entity *origin = rt_se;
+ unsigned int del_rt_nr = 0;
+ unsigned int del_rr_nr = 0;
+ int prio_changed = rt_se_prio(rt_se);
+ int sub_on_rq = 1;
for_each_sched_rt_entity(rt_se) {
- rt_se->back = back;
- back = rt_se;
- }
+ if (on_rt_rq(rt_se)) {
+ if (sub_on_rq) {
+ /*
+ * The number of tasks removed from the sub-level rt_se also needs
+ * to be subtracted from the rq of the current rt_se, as the current
+ * rt_se's rq no longer includes the number of removed tasks.
+ */
+ dec_rq_nr_running(rt_se, del_rt_nr, del_rr_nr);
+ if ((prio_changed != MAX_RT_PRIO) ||
+ (rt_se_prio(rt_se) > rt_se_prio(origin))) {
+ /*
+ * If the removal of the lower-level rt_se causes the
+ * highest priority of the current rq to change, or if the
+ * priority of current rq is lower than the rt_se to be
+ * added, then the current rt_se also needs to be removed
+ * from its parent rq, and the number of deleted tasks
+ * should be accumulated.
+ */
+ if (prio_changed == MAX_RT_PRIO)
+ prio_changed = rt_se_prio(rt_se);
+ del_rt_nr += rt_se_nr_running(rt_se);
+ del_rr_nr += rt_se_rr_nr_running(rt_se);
+ prio_changed = __dequeue_rt_entity(rt_se,
+ prio_changed, flags);
+ last = rt_se;
+ } else if (!for_enqueue) {
+ /* For dequeue, last may only rt_nr_running was modified.*/
+ last = rt_se;
+ }
+ } else {
+ /*
+ * Entering this branch must be for enqueue, as dequeue would break
+ * if an rt_se is not online.
+ * If the sub-level node is not online, and the current rt_se's
+ * priority is lower than the one being added, current rt_se need
+ * to be removed.
+ */
+ prio_changed = rt_se_prio(rt_se);
+ if (prio_changed > rt_se_prio(origin)) {
+ del_rt_nr += rt_se_nr_running(rt_se);
+ del_rr_nr += rt_se_rr_nr_running(rt_se);
+ prio_changed = __dequeue_rt_entity(rt_se,
+ prio_changed, flags);
+ last = rt_se;
+ } else {
+ prio_changed = MAX_RT_PRIO;
+ }
+ }
- rt_nr_running = rt_rq_of_se(back)->rt_nr_running;
+ /*
+ * If the current rt_se is on the top rt_rq, then the already deleted
+ * nodes, plus the count of the rt_rq where current rt_se located,
+ * need to be removed from the top_rt_rq.
+ */
+ if (on_top_rt_rq(rt_se)) {
+ dequeue_top_rt_rq(rt_rq_of_se(rt_se),
+ del_rt_nr + rt_rq_of_se(rt_se)->rt_nr_running);
+ }
+ sub_on_rq = 1;
+ } else if (for_enqueue) {
+ struct rt_rq *group_rq = group_rt_rq(rt_se);
- for (rt_se = back; rt_se; rt_se = rt_se->back) {
- if (on_rt_rq(rt_se))
- __dequeue_rt_entity(rt_se, flags);
+ /*
+ * In the case of an enqueue operation, if a certain level is found to be
+ * not online, then the previous counts need to be reset to zero.
+ */
+ prio_changed = MAX_RT_PRIO;
+ sub_on_rq = 0;
+ del_rt_nr = 0;
+ del_rr_nr = 0;
+
+ /*
+ * If the current group is being throttled, then there is no need to check
+ * higher levels since enqueueing will not affect higher-level nodes.
+ */
+ if (group_rq && rt_rq_throttled(group_rq))
+ break;
+
+ if (on_top_rt_rq(rt_se))
+ dequeue_top_rt_rq(rt_rq_of_se(rt_se),
+ rt_rq_of_se(rt_se)->rt_nr_running);
+ } else {
+ last = rt_se;
+ break;
+ }
}
- dequeue_top_rt_rq(rt_rq_of_se(back), rt_nr_running);
+ return last;
}
static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
{
struct rq *rq = rq_of_rt_se(rt_se);
+ struct sched_rt_entity *last;
+ unsigned int add_rt_nr = 0;
+ unsigned int add_rr_nr = 0;
+ int enqueue = 1;
+ int prio_change = 1;
update_stats_enqueue_rt(rt_rq_of_se(rt_se), rt_se, flags);
- dequeue_rt_stack(rt_se, flags);
- for_each_sched_rt_entity(rt_se)
- __enqueue_rt_entity(rt_se, flags);
+ last = dequeue_rt_stack(rt_se, flags, 1);
+
+ for_each_sched_rt_entity(rt_se) {
+ if (enqueue || !on_rt_rq(rt_se) || (prio_change == 1)) {
+ prio_change = __enqueue_rt_entity(rt_se, flags);
+ if (prio_change >= 0) {
+ add_rt_nr = rt_se_nr_running(rt_se);
+ add_rr_nr = rt_se_rr_nr_running(rt_se);
+ } else {
+ add_rt_nr = add_rr_nr = 0;
+ }
+ } else {
+ add_rq_nr_running(rt_se, add_rt_nr, add_rr_nr);
+ }
+
+ if (rt_se == last)
+ enqueue = 0;
+ }
+
enqueue_top_rt_rq(&rq->rt);
}
static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
{
struct rq *rq = rq_of_rt_se(rt_se);
+ struct sched_rt_entity *last;
+ unsigned int add_rt_nr = 0;
+ unsigned int add_rr_nr = 0;
+ int prio_change = 1;
update_stats_dequeue_rt(rt_rq_of_se(rt_se), rt_se, flags);
- dequeue_rt_stack(rt_se, flags);
+ last = dequeue_rt_stack(rt_se, flags, 0);
for_each_sched_rt_entity(rt_se) {
struct rt_rq *rt_rq = group_rt_rq(rt_se);
+ if (rt_rq && rt_rq->rt_nr_running) {
+ if (on_rt_rq(rt_se)) {
+ add_rq_nr_running(rt_se, add_rt_nr, add_rr_nr);
+ } else {
+ prio_change = __enqueue_rt_entity(rt_se, flags);
+ if (prio_change == 0) {
+ /*
+ * If enqueue is successful and the priority of the rq has
+ * not changed, then the parent node only needs to add the
+ * count of the current rt_se. Otherwise, the parent node
+ * will also need to enqueue.
+ */
+ add_rt_nr = rt_se_nr_running(rt_se);
+ add_rr_nr = rt_se_rr_nr_running(rt_se);
+ }
+ }
+ } else {
+ add_rt_nr = add_rr_nr = 0;
+ }
- if (rt_rq && rt_rq->rt_nr_running)
- __enqueue_rt_entity(rt_se, flags);
+ /*
+ * last is the rt_se of the last deletion or modification of the
+ * count, so the subsequent rt_se does not need to be updated.
+ */
+ if (rt_se == last)
+ break;
}
+
enqueue_top_rt_rq(&rq->rt);
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index a831af102070..b634153aacf0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2878,6 +2878,7 @@ extern void print_rt_stats(struct seq_file *m, int cpu);
extern void print_dl_stats(struct seq_file *m, int cpu);
extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
+extern void print_rt_rq_task(struct seq_file *m, struct rt_rq *rt_rq);
extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
extern void resched_latency_warn(int cpu, u64 latency);
--
2.45.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH-RT sched v3 2/2] RT test: Adding test cases for RT group scheduling
2024-07-16 6:05 ` [PATCH-RT sched v3 0/2] Optimize the " Xavier
2024-07-16 6:05 ` [PATCH-RT sched v3 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se Xavier
@ 2024-07-16 6:05 ` Xavier
1 sibling, 0 replies; 20+ messages in thread
From: Xavier @ 2024-07-16 6:05 UTC (permalink / raw)
To: mingo, peterz, juri.lelli, vincent.guittot
Cc: dietmar.eggemann, rostedt, bsegall, mgorman, bristot, vschneid,
linux-kernel, oliver.sang, Xavier
Adding test cases for RT group scheduling, create some RT infinite loop
processes/threads, then set them to the same or different priorities.
Place them in different RT task groups, run for a period of time,
and finally count the number of infinite loop executions for all tasks.
Signed-off-by: Xavier <xavier_qy@163.com>
---
MAINTAINERS | 7 +
tools/testing/selftests/sched/Makefile | 4 +-
tools/testing/selftests/sched/deadloop.c | 192 ++++++++++++++++++
.../selftests/sched/rt_group_sched_test.sh | 119 +++++++++++
4 files changed, 320 insertions(+), 2 deletions(-)
create mode 100644 tools/testing/selftests/sched/deadloop.c
create mode 100755 tools/testing/selftests/sched/rt_group_sched_test.sh
diff --git a/MAINTAINERS b/MAINTAINERS
index 43353b705988..d29effe57bf8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -19480,6 +19480,13 @@ L: linux-remoteproc@vger.kernel.org
S: Maintained
F: drivers/tty/rpmsg_tty.c
+RT GROUP SCHED TEST
+M: Xavier <xavier_qy@163.com>
+L: linux-kernel@vger.kernel.org
+S: Maintained
+F: tools/testing/selftests/sched/deadloop.c
+F: tools/testing/selftests/sched/rt_group_sched_test.sh
+
RTL2830 MEDIA DRIVER
L: linux-media@vger.kernel.org
S: Orphan
diff --git a/tools/testing/selftests/sched/Makefile b/tools/testing/selftests/sched/Makefile
index 099ee9213557..96decb58bf35 100644
--- a/tools/testing/selftests/sched/Makefile
+++ b/tools/testing/selftests/sched/Makefile
@@ -8,7 +8,7 @@ CFLAGS += -O2 -Wall -g -I./ $(KHDR_INCLUDES) -Wl,-rpath=./ \
$(CLANG_FLAGS)
LDLIBS += -lpthread
-TEST_GEN_FILES := cs_prctl_test
-TEST_PROGS := cs_prctl_test
+TEST_GEN_FILES := cs_prctl_test deadloop
+TEST_PROGS := cs_prctl_test deadloop
include ../lib.mk
diff --git a/tools/testing/selftests/sched/deadloop.c b/tools/testing/selftests/sched/deadloop.c
new file mode 100644
index 000000000000..d850a3e2a0ab
--- /dev/null
+++ b/tools/testing/selftests/sched/deadloop.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <signal.h>
+
+/*
+ * Create multiple infinite loop threads based on the passed parameters
+ * Usage: deadloop num policy prio
+ * num: the number of child threads
+ * policy: the scheduling policy of the child threads, 0-fair, 1-fifo, 2-rr
+ * prio: the priority
+ * If this process is killed, it will print the loop count of all child threads
+ * to the OUTPUT_FILE
+ *
+ * Date: June 27, 2024
+ * Author: Xavier <xavier_qy@163.com>
+ */
+
+#define OUTPUT_FILE "rt_group_sched_test.log"
+
+#if __GLIBC_PREREQ(2, 30) == 0
+#include <sys/syscall.h>
+static pid_t gettid(void)
+{
+ return syscall(SYS_gettid);
+}
+#endif
+
+#define do_err(x) \
+do { \
+ if ((x) < 0) { \
+ printf("test BUG_ON func %s, line %d %ld\n", \
+ __func__, __LINE__, (long)(x) \
+ ); \
+ while (1) \
+ sleep(1); \
+ } \
+} while (0)
+
+#define do_false(x) \
+do { \
+ if ((x) == 1) { \
+ printf("test BUG_ON func %s, line %d %d\n", \
+ __func__, __LINE__, (x) \
+ ); \
+ while (1) \
+ sleep(1); \
+ } \
+} while (0)
+
+
+struct thread_data {
+ pthread_t thread;
+ int index;
+ int pid;
+ unsigned long cnt;
+};
+
+static struct thread_data *pdata;
+static int thread_num = 1;
+
+static void create_thread_posix(void *entry, pthread_t *thread, int *para,
+ int policy, int prio)
+{
+ int ret;
+ struct sched_param param;
+ pthread_attr_t attr;
+
+ memset(¶m, 0, sizeof(param));
+ ret = pthread_attr_init(&attr);
+ do_err(ret);
+
+ ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
+ do_err(ret);
+
+ param.sched_priority = prio;
+
+ ret = pthread_attr_setschedpolicy(&attr, policy);
+ do_err(ret);
+
+ ret = pthread_attr_setschedparam(&attr, ¶m);
+ do_err(ret);
+
+ ret = pthread_create(thread, &attr, entry, para);
+ do_err(ret);
+}
+
+static void *dead_loop_entry(void *arg)
+{
+ int index = *(int *)arg;
+ struct sched_param param;
+ int cur = gettid();
+
+ sched_getparam(cur, ¶m);
+ pdata[index].pid = cur;
+ printf("cur:%d prio:%d\n", cur, param.sched_priority);
+
+ while (1) {
+ asm volatile("" ::: "memory");
+ pdata[index].cnt++;
+ }
+ return NULL;
+}
+
+static void handle_signal(int signal)
+{
+ int cnt = 0;
+
+ if (signal == SIGTERM) {
+ FILE *file = freopen(OUTPUT_FILE, "a", stdout);
+
+ if (file == NULL) {
+ perror("freopen");
+ exit(0);
+ }
+
+ while (cnt < thread_num) {
+ printf("pid:%d cnt:%ld\n", pdata[cnt].pid, pdata[cnt].cnt);
+ cnt++;
+ }
+ fclose(file);
+ exit(0);
+ }
+}
+
+static int dead_loop_create(int policy, int prio)
+{
+ int cnt = 0;
+ int ret;
+ void *status;
+ struct sched_param param;
+
+ param.sched_priority = prio;
+ pdata = malloc(thread_num * sizeof(struct thread_data));
+ do_false(!pdata);
+
+ if (policy) {
+ ret = sched_setscheduler(0, policy, ¶m);
+ do_err(ret);
+ }
+
+ while (cnt < thread_num) {
+ pdata[cnt].index = cnt;
+ create_thread_posix(dead_loop_entry, &pdata[cnt].thread,
+ &pdata[cnt].index, policy, prio);
+ cnt++;
+ }
+
+ signal(SIGTERM, handle_signal);
+
+ cnt = 0;
+ while (cnt < thread_num) {
+ pthread_join(pdata[cnt].thread, &status);
+ cnt++;
+ }
+
+ free(pdata);
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int policy = 2;
+ int prio = 50;
+
+ if (argc == 2)
+ thread_num = atoi(argv[1]);
+
+ if (argc == 3) {
+ thread_num = atoi(argv[1]);
+ policy = atoi(argv[2]);
+ if (policy > 0)
+ prio = 50;
+ }
+
+ if (argc == 4) {
+ thread_num = atoi(argv[1]);
+ policy = atoi(argv[2]);
+ prio = atoi(argv[3]);
+ }
+
+ dead_loop_create(policy, prio);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/sched/rt_group_sched_test.sh b/tools/testing/selftests/sched/rt_group_sched_test.sh
new file mode 100755
index 000000000000..9031250a2684
--- /dev/null
+++ b/tools/testing/selftests/sched/rt_group_sched_test.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Test for rt group scheduling
+# Date: June 27, 2024
+# Author: Xavier <xavier_qy@163.com>
+
+# Record the list of child process PIDs
+PIDS=()
+
+# File for redirected output
+LOGFILE="rt_group_sched_test.log"
+
+# Cleanup function: kill all recorded child processes and unmount the cgroup
+function cleanup() {
+ echo "Cleaning up..."
+ for pid in "${PIDS[@]}"; do
+ if kill -0 $pid 2>/dev/null; then
+ kill -TERM $pid
+ fi
+ done
+
+ # Sleep for a while to ensure the processes are properly killed
+ sleep 2
+
+ # Unmount the cgroup filesystem
+ umount /sys/fs/cgroup/cpu 2>/dev/null
+ umount /sys/fs/cgroup 2>/dev/null
+ echo "Cleanup completed."
+
+ # Ensure the LOGFILE exists and is correct
+ if [ ! -f "$LOGFILE" ]; then
+ echo "$LOGFILE not found!"
+ exit 1
+ fi
+
+ # Initialize the total count variable
+ total=0
+
+ # Read matching lines and calculate the total sum
+ while IFS= read -r line
+ do
+ # Use grep to match lines containing 'pid:' and 'cnt:', and extract the value of cnt
+ if echo "$line" | grep -q '^pid:[[:digit:]]\+ cnt:[[:digit:]]\+'; then
+ cnt=$(echo "$line" | sed -n \
+ 's/^pid:[[:digit:]]\+ cnt:\([[:digit:]]\+\)/\1/p')
+ total=$((total + cnt))
+ fi
+ done < "$LOGFILE"
+
+ # Print the total sum
+ echo "Total cnt: $total"
+ echo "Finished processing."
+}
+
+# Capture actions when interrupted or terminated by a signal
+trap cleanup EXIT
+
+# Start the cgroup filesystem and create the necessary directories
+function setup_cgroups() {
+ mount -t tmpfs -o mode=755 cgroup_root /sys/fs/cgroup
+ mkdir -p /sys/fs/cgroup/cpu
+ mount -t cgroup -o cpu none /sys/fs/cgroup/cpu
+}
+
+# Create cgroup subdirectories and configure their settings
+function create_child_cgroup() {
+ local base_dir=$1
+ local name=$2
+ local rt_period=$3
+ local rt_runtime=$4
+ mkdir -p "$base_dir/$name"
+ echo $rt_period > "$base_dir/$name/cpu.rt_period_us"
+ echo $rt_runtime > "$base_dir/$name/cpu.rt_runtime_us"
+}
+# Launch a process and add it to the specified cgroup
+function launch_process() {
+ local process_name=$1
+
+ # Three parameters representing the number of child threads, scheduling policy, and priority
+ local args=$2
+ local cgroup_path=$3
+
+ # Launch the process
+ exec -a $process_name ./deadloop $args &
+ local pid=$!
+ PIDS+=($pid)
+
+ # Short sleep to ensure the process starts
+ sleep 1
+
+ # Check if the process started successfully
+ if ! pgrep -x $process_name > /dev/null; then
+ echo "Error: No process found with name $process_name."
+ exit 1
+ fi
+
+ echo $pid > "$cgroup_path/cgroup.procs"
+ echo "Process $process_name with PID $pid added to cgroup $cgroup_path"
+}
+
+# Main function running all tasks
+function main() {
+ echo "The test needs 30 seconds..."
+ rm -f "$LOGFILE"
+ setup_cgroups
+ create_child_cgroup "/sys/fs/cgroup/cpu" "child1" 1000000 800000
+ create_child_cgroup "/sys/fs/cgroup/cpu/child1" "child2" 1000000 700000
+ create_child_cgroup "/sys/fs/cgroup/cpu/child1/child2" "child3" 1000000 600000
+ launch_process "child1" "3 2 50" "/sys/fs/cgroup/cpu/child1"
+ launch_process "child2" "3 2 50" "/sys/fs/cgroup/cpu/child1/child2"
+ launch_process "child3" "1 2 50" "/sys/fs/cgroup/cpu/child1/child2/child3"
+ launch_process "tg_root" "1 2 50" "/sys/fs/cgroup/cpu"
+
+ # Run for 30 seconds
+ sleep 30
+}
+
+# Execute the main function
+main
--
2.45.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re: [PATCH-RT sched v3 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se
2024-07-16 6:05 ` [PATCH-RT sched v3 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se Xavier
@ 2024-07-16 20:24 ` kernel test robot
2024-07-17 3:00 ` [PATCH-RT sched v4 0/2] Optimize the RT group scheduling Xavier
0 siblings, 1 reply; 20+ messages in thread
From: kernel test robot @ 2024-07-16 20:24 UTC (permalink / raw)
To: Xavier, mingo, peterz, juri.lelli, vincent.guittot
Cc: oe-kbuild-all, dietmar.eggemann, rostedt, bsegall, mgorman,
bristot, vschneid, linux-kernel, oliver.sang, Xavier
Hi Xavier,
kernel test robot noticed the following build warnings:
[auto build test WARNING on tip/sched/core]
[also build test WARNING on shuah-kselftest/next shuah-kselftest/fixes peterz-queue/sched/core linus/master v6.10]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Xavier/RT-SCHED-Optimize-the-enqueue-and-dequeue-operations-for-rt_se/20240716-140932
base: tip/sched/core
patch link: https://lore.kernel.org/r/20240716060514.304324-2-xavier_qy%40163.com
patch subject: [PATCH-RT sched v3 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se
config: x86_64-randconfig-121-20240716 (https://download.01.org/0day-ci/archive/20240717/202407170411.vRtOCOzx-lkp@intel.com/config)
compiler: gcc-8 (Ubuntu 8.4.0-3ubuntu2) 8.4.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240717/202407170411.vRtOCOzx-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202407170411.vRtOCOzx-lkp@intel.com/
sparse warnings: (new ones prefixed by >>)
kernel/sched/build_utility.c: note: in included file:
kernel/sched/debug.c:469:17: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected struct sched_domain *[assigned] sd @@ got struct sched_domain [noderef] __rcu *parent @@
kernel/sched/debug.c:469:17: sparse: expected struct sched_domain *[assigned] sd
kernel/sched/debug.c:469:17: sparse: got struct sched_domain [noderef] __rcu *parent
>> kernel/sched/debug.c:715:6: sparse: sparse: symbol 'print_rt_se' was not declared. Should it be static?
kernel/sched/debug.c:842:9: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected struct task_struct *tsk @@ got struct task_struct [noderef] __rcu *curr @@
kernel/sched/debug.c:842:9: sparse: expected struct task_struct *tsk
kernel/sched/debug.c:842:9: sparse: got struct task_struct [noderef] __rcu *curr
kernel/sched/debug.c:842:9: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected struct task_struct *tsk @@ got struct task_struct [noderef] __rcu *curr @@
kernel/sched/debug.c:842:9: sparse: expected struct task_struct *tsk
kernel/sched/debug.c:842:9: sparse: got struct task_struct [noderef] __rcu *curr
kernel/sched/build_utility.c: note: in included file:
kernel/sched/stats.c:148:17: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected struct sched_domain *[assigned] sd @@ got struct sched_domain [noderef] __rcu *parent @@
kernel/sched/stats.c:148:17: sparse: expected struct sched_domain *[assigned] sd
kernel/sched/stats.c:148:17: sparse: got struct sched_domain [noderef] __rcu *parent
kernel/sched/build_utility.c: note: in included file:
kernel/sched/topology.c:107:56: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected struct sched_domain *sd @@ got struct sched_domain [noderef] __rcu *child @@
kernel/sched/topology.c:107:56: sparse: expected struct sched_domain *sd
kernel/sched/topology.c:107:56: sparse: got struct sched_domain [noderef] __rcu *child
kernel/sched/topology.c:126:60: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected struct sched_domain *sd @@ got struct sched_domain [noderef] __rcu *parent @@
kernel/sched/topology.c:126:60: sparse: expected struct sched_domain *sd
kernel/sched/topology.c:126:60: sparse: got struct sched_domain [noderef] __rcu *parent
kernel/sched/topology.c:149:20: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected struct sched_domain *sd @@ got struct sched_domain [noderef] __rcu *parent @@
kernel/sched/topology.c:149:20: sparse: expected struct sched_domain *sd
kernel/sched/topology.c:149:20: sparse: got struct sched_domain [noderef] __rcu *parent
kernel/sched/topology.c:454:13: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected struct perf_domain *[assigned] tmp @@ got struct perf_domain [noderef] __rcu *pd @@
kernel/sched/topology.c:454:13: sparse: expected struct perf_domain *[assigned] tmp
kernel/sched/topology.c:454:13: sparse: got struct perf_domain [noderef] __rcu *pd
kernel/sched/topology.c:463:13: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected struct perf_domain *[assigned] tmp @@ got struct perf_domain [noderef] __rcu *pd @@
kernel/sched/topology.c:463:13: sparse: expected struct perf_domain *[assigned] tmp
kernel/sched/topology.c:463:13: sparse: got struct perf_domain [noderef] __rcu *pd
kernel/sched/topology.c:484:19: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected struct perf_domain *[assigned] pd @@ got struct perf_domain [noderef] __rcu *pd @@
kernel/sched/topology.c:484:19: sparse: expected struct perf_domain *[assigned] pd
kernel/sched/topology.c:484:19: sparse: got struct perf_domain [noderef] __rcu *pd
kernel/sched/topology.c:646:49: sparse: sparse: incorrect type in initializer (different address spaces) @@ expected struct sched_domain *parent @@ got struct sched_domain [noderef] __rcu *parent @@
kernel/sched/topology.c:646:49: sparse: expected struct sched_domain *parent
kernel/sched/topology.c:646:49: sparse: got struct sched_domain [noderef] __rcu *parent
kernel/sched/topology.c:731:50: sparse: sparse: incorrect type in initializer (different address spaces) @@ expected struct sched_domain *parent @@ got struct sched_domain [noderef] __rcu *parent @@
kernel/sched/topology.c:731:50: sparse: expected struct sched_domain *parent
kernel/sched/topology.c:731:50: sparse: got struct sched_domain [noderef] __rcu *parent
kernel/sched/topology.c:739:55: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected struct sched_domain [noderef] __rcu *[noderef] __rcu child @@ got struct sched_domain *[assigned] tmp @@
kernel/sched/topology.c:739:55: sparse: expected struct sched_domain [noderef] __rcu *[noderef] __rcu child
kernel/sched/topology.c:739:55: sparse: got struct sched_domain *[assigned] tmp
kernel/sched/topology.c:752:29: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected struct sched_domain *[assigned] tmp @@ got struct sched_domain [noderef] __rcu *parent @@
kernel/sched/topology.c:752:29: sparse: expected struct sched_domain *[assigned] tmp
kernel/sched/topology.c:752:29: sparse: got struct sched_domain [noderef] __rcu *parent
kernel/sched/topology.c:757:20: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected struct sched_domain *sd @@ got struct sched_domain [noderef] __rcu *parent @@
kernel/sched/topology.c:757:20: sparse: expected struct sched_domain *sd
kernel/sched/topology.c:757:20: sparse: got struct sched_domain [noderef] __rcu *parent
kernel/sched/topology.c:778:13: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected struct sched_domain *[assigned] tmp @@ got struct sched_domain [noderef] __rcu *sd @@
kernel/sched/topology.c:778:13: sparse: expected struct sched_domain *[assigned] tmp
kernel/sched/topology.c:778:13: sparse: got struct sched_domain [noderef] __rcu *sd
kernel/sched/topology.c:940:70: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected struct sched_domain *sd @@ got struct sched_domain [noderef] __rcu *child @@
kernel/sched/topology.c:940:70: sparse: expected struct sched_domain *sd
kernel/sched/topology.c:940:70: sparse: got struct sched_domain [noderef] __rcu *child
kernel/sched/topology.c:969:59: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected struct sched_domain *sd @@ got struct sched_domain [noderef] __rcu *child @@
kernel/sched/topology.c:969:59: sparse: expected struct sched_domain *sd
kernel/sched/topology.c:969:59: sparse: got struct sched_domain [noderef] __rcu *child
kernel/sched/topology.c:1015:57: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected struct sched_domain *sd @@ got struct sched_domain [noderef] __rcu *child @@
kernel/sched/topology.c:1015:57: sparse: expected struct sched_domain *sd
kernel/sched/topology.c:1015:57: sparse: got struct sched_domain [noderef] __rcu *child
kernel/sched/topology.c:1017:25: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected struct sched_domain *sibling @@ got struct sched_domain [noderef] __rcu *child @@
kernel/sched/topology.c:1017:25: sparse: expected struct sched_domain *sibling
kernel/sched/topology.c:1017:25: sparse: got struct sched_domain [noderef] __rcu *child
kernel/sched/topology.c:1025:55: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected struct sched_domain *sd @@ got struct sched_domain [noderef] __rcu *child @@
kernel/sched/topology.c:1025:55: sparse: expected struct sched_domain *sd
kernel/sched/topology.c:1025:55: sparse: got struct sched_domain [noderef] __rcu *child
kernel/sched/topology.c:1027:25: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected struct sched_domain *sibling @@ got struct sched_domain [noderef] __rcu *child @@
kernel/sched/topology.c:1027:25: sparse: expected struct sched_domain *sibling
kernel/sched/topology.c:1027:25: sparse: got struct sched_domain [noderef] __rcu *child
kernel/sched/topology.c:1097:62: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected struct sched_domain *sd @@ got struct sched_domain [noderef] __rcu *child @@
kernel/sched/topology.c:1097:62: sparse: expected struct sched_domain *sd
kernel/sched/topology.c:1097:62: sparse: got struct sched_domain [noderef] __rcu *child
kernel/sched/topology.c:1201:40: sparse: sparse: incorrect type in initializer (different address spaces) @@ expected struct sched_domain *child @@ got struct sched_domain [noderef] __rcu *child @@
kernel/sched/topology.c:1201:40: sparse: expected struct sched_domain *child
kernel/sched/topology.c:1201:40: sparse: got struct sched_domain [noderef] __rcu *child
kernel/sched/topology.c:1629:43: sparse: sparse: incorrect type in initializer (different address spaces) @@ expected struct sched_domain [noderef] __rcu *child @@ got struct sched_domain *child @@
kernel/sched/topology.c:1629:43: sparse: expected struct sched_domain [noderef] __rcu *child
kernel/sched/topology.c:1629:43: sparse: got struct sched_domain *child
kernel/sched/topology.c:2328:31: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected struct sched_domain [noderef] __rcu *parent @@ got struct sched_domain *sd @@
kernel/sched/topology.c:2328:31: sparse: expected struct sched_domain [noderef] __rcu *parent
kernel/sched/topology.c:2328:31: sparse: got struct sched_domain *sd
kernel/sched/topology.c:2430:57: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected struct sched_domain *[assigned] sd @@ got struct sched_domain [noderef] __rcu *parent @@
kernel/sched/topology.c:2430:57: sparse: expected struct sched_domain *[assigned] sd
kernel/sched/topology.c:2430:57: sparse: got struct sched_domain [noderef] __rcu *parent
kernel/sched/topology.c:2451:56: sparse: sparse: incorrect type in initializer (different address spaces) @@ expected struct sched_domain *child @@ got struct sched_domain [noderef] __rcu *child @@
kernel/sched/topology.c:2451:56: sparse: expected struct sched_domain *child
kernel/sched/topology.c:2451:56: sparse: got struct sched_domain [noderef] __rcu *child
kernel/sched/topology.c:2450:57: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected struct sched_domain *[assigned] sd @@ got struct sched_domain [noderef] __rcu *parent @@
kernel/sched/topology.c:2450:57: sparse: expected struct sched_domain *[assigned] sd
kernel/sched/topology.c:2450:57: sparse: got struct sched_domain [noderef] __rcu *parent
kernel/sched/topology.c:2505:57: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected struct sched_domain *[assigned] sd @@ got struct sched_domain [noderef] __rcu *parent @@
kernel/sched/topology.c:2505:57: sparse: expected struct sched_domain *[assigned] sd
kernel/sched/topology.c:2505:57: sparse: got struct sched_domain [noderef] __rcu *parent
kernel/sched/build_utility.c: note: in included file:
kernel/sched/core_sched.c:276:37: sparse: sparse: incompatible types in conditional expression (different address spaces):
kernel/sched/core_sched.c:276:37: sparse: struct task_struct *
kernel/sched/core_sched.c:276:37: sparse: struct task_struct [noderef] __rcu *
kernel/sched/build_utility.c: note: in included file:
kernel/sched/build_utility.c: note: in included file (through include/linux/mmzone.h, include/linux/topology.h, include/linux/sched/topology.h, ...):
include/linux/page-flags.h:240:46: sparse: sparse: self-comparison always evaluates to false
include/linux/page-flags.h:240:46: sparse: sparse: self-comparison always evaluates to false
kernel/sched/build_utility.c: note: in included file:
kernel/sched/sched.h:2175:25: sparse: sparse: incompatible types in comparison expression (different address spaces):
vim +/print_rt_se +715 kernel/sched/debug.c
714
> 715 void print_rt_se(struct seq_file *m, struct sched_rt_entity *rt_se)
716 {
717 struct task_struct *task;
718
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH-RT sched v4 0/2] Optimize the RT group scheduling
2024-07-16 20:24 ` kernel test robot
@ 2024-07-17 3:00 ` Xavier
2024-07-17 3:00 ` [PATCH-RT sched v4 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se Xavier
2024-07-17 3:00 ` [PATCH-RT sched v4 2/2] RT test: Adding test cases for RT group scheduling Xavier
0 siblings, 2 replies; 20+ messages in thread
From: Xavier @ 2024-07-17 3:00 UTC (permalink / raw)
To: mingo, peterz, juri.lelli, vincent.guittot
Cc: dietmar.eggemann, rostedt, bsegall, mgorman, bristot, vschneid,
linux-kernel, oliver.sang, Xavier
Hi all,
Fix compilation warnings in debug.c
To Peter,
When is the new RT group implementation expected to be proposed?
Do you think my current patch is appropriate for optimization until then?
Xavier (2):
RT SCHED: Optimize the enqueue and dequeue operations for rt_se
RT test: Adding test cases for RT group scheduling
MAINTAINERS | 7 +
kernel/sched/debug.c | 48 +++
kernel/sched/rt.c | 287 +++++++++++++++---
kernel/sched/sched.h | 1 +
tools/testing/selftests/sched/Makefile | 4 +-
tools/testing/selftests/sched/deadloop.c | 192 ++++++++++++
.../selftests/sched/rt_group_sched_test.sh | 119 ++++++++
7 files changed, 618 insertions(+), 40 deletions(-)
create mode 100644 tools/testing/selftests/sched/deadloop.c
create mode 100755 tools/testing/selftests/sched/rt_group_sched_test.sh
--
2.45.2
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH-RT sched v4 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se
2024-07-17 3:00 ` [PATCH-RT sched v4 0/2] Optimize the RT group scheduling Xavier
@ 2024-07-17 3:00 ` Xavier
2024-07-25 6:21 ` Xavier
2024-07-17 3:00 ` [PATCH-RT sched v4 2/2] RT test: Adding test cases for RT group scheduling Xavier
1 sibling, 1 reply; 20+ messages in thread
From: Xavier @ 2024-07-17 3:00 UTC (permalink / raw)
To: mingo, peterz, juri.lelli, vincent.guittot
Cc: dietmar.eggemann, rostedt, bsegall, mgorman, bristot, vschneid,
linux-kernel, oliver.sang, Xavier
This patch optimizes the enqueue and dequeue of rt_se, the strategy employs
a bottom-up removal approach. Specifically, when removing an rt_se at a
certain level, if it is determined that the highest priority of the rq
associated with that rt_se has not changed, there is no need to continue
removing rt_se at higher levels. At this point, only the total number
of removed rt_se needs to be recorded, and the rt_nr_running count of
higher-level rq should be removed accordingly.
Signed-off-by: Xavier <xavier_qy@163.com>
---
kernel/sched/debug.c | 48 ++++++++
kernel/sched/rt.c | 287 +++++++++++++++++++++++++++++++++++++------
kernel/sched/sched.h | 1 +
3 files changed, 298 insertions(+), 38 deletions(-)
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index c1eb9a1afd13..352ee55da25e 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -712,6 +712,54 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
#endif
}
+static void print_rt_se(struct seq_file *m, struct sched_rt_entity *rt_se)
+{
+ struct task_struct *task;
+
+#ifdef CONFIG_RT_GROUP_SCHED
+ if (rt_se->my_q) {
+ SEQ_printf_task_group_path(m, rt_se->my_q->tg, "%s\n");
+ return;
+ }
+#endif
+ task = container_of(rt_se, struct task_struct, rt);
+ SEQ_printf(m, " prio-%d, pid-%d, %s\n", task->prio, task->pid, task->comm);
+}
+
+/*shall be called in rq lock*/
+void print_rt_rq_task(struct seq_file *m, struct rt_rq *rt_rq)
+{
+ struct rt_prio_array *array = &rt_rq->active;
+ struct sched_rt_entity *rt_se;
+ struct list_head *queue, *head;
+ unsigned long bitmap[2];
+ int idx;
+ int count = 0;
+
+ if (!rt_rq->rt_nr_running)
+ return;
+
+ memcpy(bitmap, array->bitmap, sizeof(unsigned long) * 2);
+ idx = sched_find_first_bit(bitmap);
+ WARN_ON_ONCE(idx >= MAX_RT_PRIO);
+
+ while (1) {
+ clear_bit(idx, bitmap);
+ queue = array->queue + idx;
+ head = queue;
+ queue = queue->next;
+ do {
+ rt_se = list_entry(queue, struct sched_rt_entity, run_list);
+ print_rt_se(m, rt_se);
+ queue = queue->next;
+ count++;
+ } while (queue != head);
+ idx = sched_find_first_bit(bitmap);
+ if (idx >= MAX_RT_PRIO)
+ break;
+ }
+}
+
void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
{
#ifdef CONFIG_RT_GROUP_SCHED
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index aa4c1c874fa4..b18c424a50d2 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1113,7 +1113,7 @@ void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
#endif /* CONFIG_SMP */
#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
-static void
+static int
inc_rt_prio(struct rt_rq *rt_rq, int prio)
{
int prev_prio = rt_rq->highest_prio.curr;
@@ -1122,9 +1122,11 @@ inc_rt_prio(struct rt_rq *rt_rq, int prio)
rt_rq->highest_prio.curr = prio;
inc_rt_prio_smp(rt_rq, prio, prev_prio);
+
+ return prev_prio > prio;
}
-static void
+static int
dec_rt_prio(struct rt_rq *rt_rq, int prio)
{
int prev_prio = rt_rq->highest_prio.curr;
@@ -1149,12 +1151,22 @@ dec_rt_prio(struct rt_rq *rt_rq, int prio)
}
dec_rt_prio_smp(rt_rq, prio, prev_prio);
+ if (rt_rq->highest_prio.curr > prio)
+ return prio;
+ else
+ return MAX_RT_PRIO;
}
#else
-static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {}
-static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {}
+static inline int inc_rt_prio(struct rt_rq *rt_rq, int prio)
+{
+ return 0;
+}
+static inline int dec_rt_prio(struct rt_rq *rt_rq, int prio)
+{
+ return 0;
+}
#endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */
@@ -1218,28 +1230,31 @@ unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se)
}
static inline
-void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
+int inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
int prio = rt_se_prio(rt_se);
+ int prio_change;
WARN_ON(!rt_prio(prio));
rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
- inc_rt_prio(rt_rq, prio);
+ prio_change = inc_rt_prio(rt_rq, prio);
inc_rt_group(rt_se, rt_rq);
+ return prio_change;
}
static inline
-void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
+int dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq, int prio)
{
+ int prio_changed;
WARN_ON(!rt_prio(rt_se_prio(rt_se)));
- WARN_ON(!rt_rq->rt_nr_running);
rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
- dec_rt_prio(rt_rq, rt_se_prio(rt_se));
+ prio_changed = dec_rt_prio(rt_rq, prio);
dec_rt_group(rt_se, rt_rq);
+ return prio_changed;
}
/*
@@ -1255,12 +1270,13 @@ static inline bool move_entity(unsigned int flags)
return true;
}
-static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_array *array)
+static void __delist_rt_entity(struct sched_rt_entity *rt_se,
+ struct rt_prio_array *array, int last_prio)
{
list_del_init(&rt_se->run_list);
- if (list_empty(array->queue + rt_se_prio(rt_se)))
- __clear_bit(rt_se_prio(rt_se), array->bitmap);
+ if (list_empty(array->queue + last_prio))
+ __clear_bit(last_prio, array->bitmap);
rt_se->on_list = 0;
}
@@ -1371,7 +1387,12 @@ update_stats_dequeue_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
}
}
-static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
+/*
+ * Returns: -1 indicates that rt_se was not enqueued, 0 indicates that the highest
+ * priority of the rq did not change after enqueue, and 1 indicates that the highest
+ * priority of the rq changed after enqueue.
+ */
+static int __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
{
struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
struct rt_prio_array *array = &rt_rq->active;
@@ -1386,8 +1407,8 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag
*/
if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) {
if (rt_se->on_list)
- __delist_rt_entity(rt_se, array);
- return;
+ __delist_rt_entity(rt_se, array, rt_se_prio(rt_se));
+ return -1;
}
if (move_entity(flags)) {
@@ -1402,73 +1423,263 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag
}
rt_se->on_rq = 1;
- inc_rt_tasks(rt_se, rt_rq);
+ return inc_rt_tasks(rt_se, rt_rq);
}
-static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
+/**
+ * delete rt_se from rt_rq
+ *
+ * @rt_se Nodes to be deleted
+ * @last_prio The highest priority of this rt_se before the previous round
+ * of deletion
+ * @flags operation flags
+ *
+ * Returns: =0 indicates that the highest priority of the current rq did not
+ * change during this deletion. >0 indicates it changed, and it returns the
+ * previous highest priority to use in the next round of deletion.
+ */
+static int __dequeue_rt_entity(struct sched_rt_entity *rt_se, int last_prio,
+ unsigned int flags)
{
struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
struct rt_prio_array *array = &rt_rq->active;
if (move_entity(flags)) {
WARN_ON_ONCE(!rt_se->on_list);
- __delist_rt_entity(rt_se, array);
+ __delist_rt_entity(rt_se, array, last_prio);
}
rt_se->on_rq = 0;
- dec_rt_tasks(rt_se, rt_rq);
+ return dec_rt_tasks(rt_se, rt_rq, last_prio);
+}
+
+static inline void dec_rq_nr_running(struct sched_rt_entity *rt_se,
+ unsigned int rt, unsigned int rr)
+{
+ struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
+
+ rt_rq->rt_nr_running -= rt;
+ rt_rq->rr_nr_running -= rr;
+}
+
+static inline void add_rq_nr_running(struct sched_rt_entity *rt_se,
+ unsigned int rt, unsigned int rr)
+{
+ struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
+
+ rt_rq->rt_nr_running += rt;
+ rt_rq->rr_nr_running += rr;
+}
+
+static inline bool on_top_rt_rq(struct sched_rt_entity *rt_se)
+{
+#ifdef CONFIG_RT_GROUP_SCHED
+ if (rt_se->parent)
+ return false;
+#endif
+ return true;
}
/*
- * Because the prio of an upper entry depends on the lower
- * entries, we must remove entries top - down.
+ * To optimize the enqueue and dequeue of rt_se, this strategy employs a
+ * bottom-up removal approach. Specifically, when removing an rt_se at a
+ * certain level, if it is determined that the highest priority of the rq
+ * associated with that rt_se has not changed, there is no need to continue
+ * removing rt_se at higher levels. At this point, only the total number
+ * of removed rt_se needs to be recorded, and the rt_nr_running count of
+ * higher-level rq should be removed accordingly.
+ *
+ * For enqueue operations, if an rt_se at a certain level is in the rq,
+ * it is still necessary to check the priority of the higher-level rq.
+ * If the priority of the higher-level rq is found to be lower than that
+ * of the rt_se to be added, it should be removed, as updating the highest
+ * priority of the rq during addition will cause the rq to be repositioned
+ * in the parent rq.
+ *
+ * Conversely, for dequeue operations, if an rt_se at a certain level is
+ * not in the rq, the operation can be exited immediately to reduce
+ * unnecessary checks and handling.
+ *
+ * The return value refers to the last rt_se that was removed for enqueue
+ * operations. And for dequeue operations, it refers to the last rt_se
+ * that was either removed or had its rt_nr_running updated.
*/
-static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
+static struct sched_rt_entity *dequeue_rt_stack(struct sched_rt_entity *rt_se,
+ unsigned int flags, int for_enqueue)
{
- struct sched_rt_entity *back = NULL;
- unsigned int rt_nr_running;
+ struct sched_rt_entity *last = rt_se;
+ struct sched_rt_entity *origin = rt_se;
+ unsigned int del_rt_nr = 0;
+ unsigned int del_rr_nr = 0;
+ int prio_changed = rt_se_prio(rt_se);
+ int sub_on_rq = 1;
for_each_sched_rt_entity(rt_se) {
- rt_se->back = back;
- back = rt_se;
- }
+ if (on_rt_rq(rt_se)) {
+ if (sub_on_rq) {
+ /*
+ * The number of tasks removed from the sub-level rt_se also needs
+ * to be subtracted from the rq of the current rt_se, as the current
+ * rt_se's rq no longer includes the number of removed tasks.
+ */
+ dec_rq_nr_running(rt_se, del_rt_nr, del_rr_nr);
+ if ((prio_changed != MAX_RT_PRIO) ||
+ (rt_se_prio(rt_se) > rt_se_prio(origin))) {
+ /*
+ * If the removal of the lower-level rt_se causes the
+ * highest priority of the current rq to change, or if the
+ * priority of current rq is lower than the rt_se to be
+ * added, then the current rt_se also needs to be removed
+ * from its parent rq, and the number of deleted tasks
+ * should be accumulated.
+ */
+ if (prio_changed == MAX_RT_PRIO)
+ prio_changed = rt_se_prio(rt_se);
+ del_rt_nr += rt_se_nr_running(rt_se);
+ del_rr_nr += rt_se_rr_nr_running(rt_se);
+ prio_changed = __dequeue_rt_entity(rt_se,
+ prio_changed, flags);
+ last = rt_se;
+ } else if (!for_enqueue) {
+ /* For dequeue, last may only rt_nr_running was modified.*/
+ last = rt_se;
+ }
+ } else {
+ /*
+ * Entering this branch must be for enqueue, as dequeue would break
+ * if an rt_se is not online.
+ * If the sub-level node is not online, and the current rt_se's
+ * priority is lower than the one being added, current rt_se need
+ * to be removed.
+ */
+ prio_changed = rt_se_prio(rt_se);
+ if (prio_changed > rt_se_prio(origin)) {
+ del_rt_nr += rt_se_nr_running(rt_se);
+ del_rr_nr += rt_se_rr_nr_running(rt_se);
+ prio_changed = __dequeue_rt_entity(rt_se,
+ prio_changed, flags);
+ last = rt_se;
+ } else {
+ prio_changed = MAX_RT_PRIO;
+ }
+ }
- rt_nr_running = rt_rq_of_se(back)->rt_nr_running;
+ /*
+ * If the current rt_se is on the top rt_rq, then the already deleted
+ * nodes, plus the count of the rt_rq where current rt_se located,
+ * need to be removed from the top_rt_rq.
+ */
+ if (on_top_rt_rq(rt_se)) {
+ dequeue_top_rt_rq(rt_rq_of_se(rt_se),
+ del_rt_nr + rt_rq_of_se(rt_se)->rt_nr_running);
+ }
+ sub_on_rq = 1;
+ } else if (for_enqueue) {
+ struct rt_rq *group_rq = group_rt_rq(rt_se);
- for (rt_se = back; rt_se; rt_se = rt_se->back) {
- if (on_rt_rq(rt_se))
- __dequeue_rt_entity(rt_se, flags);
+ /*
+ * In the case of an enqueue operation, if a certain level is found to be
+ * not online, then the previous counts need to be reset to zero.
+ */
+ prio_changed = MAX_RT_PRIO;
+ sub_on_rq = 0;
+ del_rt_nr = 0;
+ del_rr_nr = 0;
+
+ /*
+ * If the current group is being throttled, then there is no need to check
+ * higher levels since enqueueing will not affect higher-level nodes.
+ */
+ if (group_rq && rt_rq_throttled(group_rq))
+ break;
+
+ if (on_top_rt_rq(rt_se))
+ dequeue_top_rt_rq(rt_rq_of_se(rt_se),
+ rt_rq_of_se(rt_se)->rt_nr_running);
+ } else {
+ last = rt_se;
+ break;
+ }
}
- dequeue_top_rt_rq(rt_rq_of_se(back), rt_nr_running);
+ return last;
}
static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
{
struct rq *rq = rq_of_rt_se(rt_se);
+ struct sched_rt_entity *last;
+ unsigned int add_rt_nr = 0;
+ unsigned int add_rr_nr = 0;
+ int enqueue = 1;
+ int prio_change = 1;
update_stats_enqueue_rt(rt_rq_of_se(rt_se), rt_se, flags);
- dequeue_rt_stack(rt_se, flags);
- for_each_sched_rt_entity(rt_se)
- __enqueue_rt_entity(rt_se, flags);
+ last = dequeue_rt_stack(rt_se, flags, 1);
+
+ for_each_sched_rt_entity(rt_se) {
+ if (enqueue || !on_rt_rq(rt_se) || (prio_change == 1)) {
+ prio_change = __enqueue_rt_entity(rt_se, flags);
+ if (prio_change >= 0) {
+ add_rt_nr = rt_se_nr_running(rt_se);
+ add_rr_nr = rt_se_rr_nr_running(rt_se);
+ } else {
+ add_rt_nr = add_rr_nr = 0;
+ }
+ } else {
+ add_rq_nr_running(rt_se, add_rt_nr, add_rr_nr);
+ }
+
+ if (rt_se == last)
+ enqueue = 0;
+ }
+
enqueue_top_rt_rq(&rq->rt);
}
static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
{
struct rq *rq = rq_of_rt_se(rt_se);
+ struct sched_rt_entity *last;
+ unsigned int add_rt_nr = 0;
+ unsigned int add_rr_nr = 0;
+ int prio_change = 1;
update_stats_dequeue_rt(rt_rq_of_se(rt_se), rt_se, flags);
- dequeue_rt_stack(rt_se, flags);
+ last = dequeue_rt_stack(rt_se, flags, 0);
for_each_sched_rt_entity(rt_se) {
struct rt_rq *rt_rq = group_rt_rq(rt_se);
+ if (rt_rq && rt_rq->rt_nr_running) {
+ if (on_rt_rq(rt_se)) {
+ add_rq_nr_running(rt_se, add_rt_nr, add_rr_nr);
+ } else {
+ prio_change = __enqueue_rt_entity(rt_se, flags);
+ if (prio_change == 0) {
+ /*
+ * If enqueue is successful and the priority of the rq has
+ * not changed, then the parent node only needs to add the
+ * count of the current rt_se. Otherwise, the parent node
+ * will also need to enqueue.
+ */
+ add_rt_nr = rt_se_nr_running(rt_se);
+ add_rr_nr = rt_se_rr_nr_running(rt_se);
+ }
+ }
+ } else {
+ add_rt_nr = add_rr_nr = 0;
+ }
- if (rt_rq && rt_rq->rt_nr_running)
- __enqueue_rt_entity(rt_se, flags);
+ /*
+ * last is the rt_se of the last deletion or modification of the
+ * count, so the subsequent rt_se does not need to be updated.
+ */
+ if (rt_se == last)
+ break;
}
+
enqueue_top_rt_rq(&rq->rt);
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ef20c61004eb..821d65106d13 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2879,6 +2879,7 @@ extern void print_rt_stats(struct seq_file *m, int cpu);
extern void print_dl_stats(struct seq_file *m, int cpu);
extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
+extern void print_rt_rq_task(struct seq_file *m, struct rt_rq *rt_rq);
extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
extern void resched_latency_warn(int cpu, u64 latency);
--
2.45.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* [PATCH-RT sched v4 2/2] RT test: Adding test cases for RT group scheduling
2024-07-17 3:00 ` [PATCH-RT sched v4 0/2] Optimize the RT group scheduling Xavier
2024-07-17 3:00 ` [PATCH-RT sched v4 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se Xavier
@ 2024-07-17 3:00 ` Xavier
1 sibling, 0 replies; 20+ messages in thread
From: Xavier @ 2024-07-17 3:00 UTC (permalink / raw)
To: mingo, peterz, juri.lelli, vincent.guittot
Cc: dietmar.eggemann, rostedt, bsegall, mgorman, bristot, vschneid,
linux-kernel, oliver.sang, Xavier
Adding test cases for RT group scheduling, create some RT infinite loop
processes/threads, then set them to the same or different priorities.
Place them in different RT task groups, run for a period of time,
and finally count the number of infinite loop executions for all tasks.
Signed-off-by: Xavier <xavier_qy@163.com>
---
MAINTAINERS | 7 +
tools/testing/selftests/sched/Makefile | 4 +-
tools/testing/selftests/sched/deadloop.c | 192 ++++++++++++++++++
.../selftests/sched/rt_group_sched_test.sh | 119 +++++++++++
4 files changed, 320 insertions(+), 2 deletions(-)
create mode 100644 tools/testing/selftests/sched/deadloop.c
create mode 100755 tools/testing/selftests/sched/rt_group_sched_test.sh
diff --git a/MAINTAINERS b/MAINTAINERS
index 958e935449e5..f5cc821b8510 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -19463,6 +19463,13 @@ L: linux-remoteproc@vger.kernel.org
S: Maintained
F: drivers/tty/rpmsg_tty.c
+RT GROUP SCHED TEST
+M: Xavier <xavier_qy@163.com>
+L: linux-kernel@vger.kernel.org
+S: Maintained
+F: tools/testing/selftests/sched/deadloop.c
+F: tools/testing/selftests/sched/rt_group_sched_test.sh
+
RTL2830 MEDIA DRIVER
L: linux-media@vger.kernel.org
S: Orphan
diff --git a/tools/testing/selftests/sched/Makefile b/tools/testing/selftests/sched/Makefile
index 099ee9213557..96decb58bf35 100644
--- a/tools/testing/selftests/sched/Makefile
+++ b/tools/testing/selftests/sched/Makefile
@@ -8,7 +8,7 @@ CFLAGS += -O2 -Wall -g -I./ $(KHDR_INCLUDES) -Wl,-rpath=./ \
$(CLANG_FLAGS)
LDLIBS += -lpthread
-TEST_GEN_FILES := cs_prctl_test
-TEST_PROGS := cs_prctl_test
+TEST_GEN_FILES := cs_prctl_test deadloop
+TEST_PROGS := cs_prctl_test deadloop
include ../lib.mk
diff --git a/tools/testing/selftests/sched/deadloop.c b/tools/testing/selftests/sched/deadloop.c
new file mode 100644
index 000000000000..d850a3e2a0ab
--- /dev/null
+++ b/tools/testing/selftests/sched/deadloop.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <signal.h>
+
+/*
+ * Create multiple infinite loop threads based on the passed parameters
+ * Usage: deadloop num policy prio
+ * num: the number of child threads
+ * policy: the scheduling policy of the child threads, 0-fair, 1-fifo, 2-rr
+ * prio: the priority
+ * If this process is killed, it will print the loop count of all child threads
+ * to the OUTPUT_FILE
+ *
+ * Date: June 27, 2024
+ * Author: Xavier <xavier_qy@163.com>
+ */
+
+#define OUTPUT_FILE "rt_group_sched_test.log"
+
+#if __GLIBC_PREREQ(2, 30) == 0
+#include <sys/syscall.h>
+static pid_t gettid(void)
+{
+ return syscall(SYS_gettid);
+}
+#endif
+
+#define do_err(x) \
+do { \
+ if ((x) < 0) { \
+ printf("test BUG_ON func %s, line %d %ld\n", \
+ __func__, __LINE__, (long)(x) \
+ ); \
+ while (1) \
+ sleep(1); \
+ } \
+} while (0)
+
+#define do_false(x) \
+do { \
+ if ((x) == 1) { \
+ printf("test BUG_ON func %s, line %d %d\n", \
+ __func__, __LINE__, (x) \
+ ); \
+ while (1) \
+ sleep(1); \
+ } \
+} while (0)
+
+
+struct thread_data {
+ pthread_t thread;
+ int index;
+ int pid;
+ unsigned long cnt;
+};
+
+static struct thread_data *pdata;
+static int thread_num = 1;
+
+static void create_thread_posix(void *entry, pthread_t *thread, int *para,
+ int policy, int prio)
+{
+ int ret;
+ struct sched_param param;
+ pthread_attr_t attr;
+
+ memset(¶m, 0, sizeof(param));
+ ret = pthread_attr_init(&attr);
+ do_err(ret);
+
+ ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
+ do_err(ret);
+
+ param.sched_priority = prio;
+
+ ret = pthread_attr_setschedpolicy(&attr, policy);
+ do_err(ret);
+
+ ret = pthread_attr_setschedparam(&attr, ¶m);
+ do_err(ret);
+
+ ret = pthread_create(thread, &attr, entry, para);
+ do_err(ret);
+}
+
+static void *dead_loop_entry(void *arg)
+{
+ int index = *(int *)arg;
+ struct sched_param param;
+ int cur = gettid();
+
+ sched_getparam(cur, ¶m);
+ pdata[index].pid = cur;
+ printf("cur:%d prio:%d\n", cur, param.sched_priority);
+
+ while (1) {
+ asm volatile("" ::: "memory");
+ pdata[index].cnt++;
+ }
+ return NULL;
+}
+
+static void handle_signal(int signal)
+{
+ int cnt = 0;
+
+ if (signal == SIGTERM) {
+ FILE *file = freopen(OUTPUT_FILE, "a", stdout);
+
+ if (file == NULL) {
+ perror("freopen");
+ exit(0);
+ }
+
+ while (cnt < thread_num) {
+ printf("pid:%d cnt:%ld\n", pdata[cnt].pid, pdata[cnt].cnt);
+ cnt++;
+ }
+ fclose(file);
+ exit(0);
+ }
+}
+
+static int dead_loop_create(int policy, int prio)
+{
+ int cnt = 0;
+ int ret;
+ void *status;
+ struct sched_param param;
+
+ param.sched_priority = prio;
+ pdata = malloc(thread_num * sizeof(struct thread_data));
+ do_false(!pdata);
+
+ if (policy) {
+ ret = sched_setscheduler(0, policy, ¶m);
+ do_err(ret);
+ }
+
+ while (cnt < thread_num) {
+ pdata[cnt].index = cnt;
+ create_thread_posix(dead_loop_entry, &pdata[cnt].thread,
+ &pdata[cnt].index, policy, prio);
+ cnt++;
+ }
+
+ signal(SIGTERM, handle_signal);
+
+ cnt = 0;
+ while (cnt < thread_num) {
+ pthread_join(pdata[cnt].thread, &status);
+ cnt++;
+ }
+
+ free(pdata);
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int policy = 2;
+ int prio = 50;
+
+ if (argc == 2)
+ thread_num = atoi(argv[1]);
+
+ if (argc == 3) {
+ thread_num = atoi(argv[1]);
+ policy = atoi(argv[2]);
+ if (policy > 0)
+ prio = 50;
+ }
+
+ if (argc == 4) {
+ thread_num = atoi(argv[1]);
+ policy = atoi(argv[2]);
+ prio = atoi(argv[3]);
+ }
+
+ dead_loop_create(policy, prio);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/sched/rt_group_sched_test.sh b/tools/testing/selftests/sched/rt_group_sched_test.sh
new file mode 100755
index 000000000000..9031250a2684
--- /dev/null
+++ b/tools/testing/selftests/sched/rt_group_sched_test.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Test for rt group scheduling
+# Date: June 27, 2024
+# Author: Xavier <xavier_qy@163.com>
+
+# Record the list of child process PIDs
+PIDS=()
+
+# File for redirected output
+LOGFILE="rt_group_sched_test.log"
+
+# Cleanup function: kill all recorded child processes and unmount the cgroup
+function cleanup() {
+ echo "Cleaning up..."
+ for pid in "${PIDS[@]}"; do
+ if kill -0 $pid 2>/dev/null; then
+ kill -TERM $pid
+ fi
+ done
+
+ # Sleep for a while to ensure the processes are properly killed
+ sleep 2
+
+ # Unmount the cgroup filesystem
+ umount /sys/fs/cgroup/cpu 2>/dev/null
+ umount /sys/fs/cgroup 2>/dev/null
+ echo "Cleanup completed."
+
+ # Ensure the LOGFILE exists and is correct
+ if [ ! -f "$LOGFILE" ]; then
+ echo "$LOGFILE not found!"
+ exit 1
+ fi
+
+ # Initialize the total count variable
+ total=0
+
+ # Read matching lines and calculate the total sum
+ while IFS= read -r line
+ do
+ # Use grep to match lines containing 'pid:' and 'cnt:', and extract the value of cnt
+ if echo "$line" | grep -q '^pid:[[:digit:]]\+ cnt:[[:digit:]]\+'; then
+ cnt=$(echo "$line" | sed -n \
+ 's/^pid:[[:digit:]]\+ cnt:\([[:digit:]]\+\)/\1/p')
+ total=$((total + cnt))
+ fi
+ done < "$LOGFILE"
+
+ # Print the total sum
+ echo "Total cnt: $total"
+ echo "Finished processing."
+}
+
+# Capture actions when interrupted or terminated by a signal
+trap cleanup EXIT
+
+# Start the cgroup filesystem and create the necessary directories
+function setup_cgroups() {
+ mount -t tmpfs -o mode=755 cgroup_root /sys/fs/cgroup
+ mkdir -p /sys/fs/cgroup/cpu
+ mount -t cgroup -o cpu none /sys/fs/cgroup/cpu
+}
+
+# Create cgroup subdirectories and configure their settings
+function create_child_cgroup() {
+ local base_dir=$1
+ local name=$2
+ local rt_period=$3
+ local rt_runtime=$4
+ mkdir -p "$base_dir/$name"
+ echo $rt_period > "$base_dir/$name/cpu.rt_period_us"
+ echo $rt_runtime > "$base_dir/$name/cpu.rt_runtime_us"
+}
+# Launch a process and add it to the specified cgroup
+function launch_process() {
+ local process_name=$1
+
+ # Three parameters representing the number of child threads, scheduling policy, and priority
+ local args=$2
+ local cgroup_path=$3
+
+ # Launch the process
+ exec -a $process_name ./deadloop $args &
+ local pid=$!
+ PIDS+=($pid)
+
+ # Short sleep to ensure the process starts
+ sleep 1
+
+ # Check if the process started successfully
+ if ! pgrep -x $process_name > /dev/null; then
+ echo "Error: No process found with name $process_name."
+ exit 1
+ fi
+
+ echo $pid > "$cgroup_path/cgroup.procs"
+ echo "Process $process_name with PID $pid added to cgroup $cgroup_path"
+}
+
+# Main function running all tasks
+function main() {
+ echo "The test needs 30 seconds..."
+ rm -f "$LOGFILE"
+ setup_cgroups
+ create_child_cgroup "/sys/fs/cgroup/cpu" "child1" 1000000 800000
+ create_child_cgroup "/sys/fs/cgroup/cpu/child1" "child2" 1000000 700000
+ create_child_cgroup "/sys/fs/cgroup/cpu/child1/child2" "child3" 1000000 600000
+ launch_process "child1" "3 2 50" "/sys/fs/cgroup/cpu/child1"
+ launch_process "child2" "3 2 50" "/sys/fs/cgroup/cpu/child1/child2"
+ launch_process "child3" "1 2 50" "/sys/fs/cgroup/cpu/child1/child2/child3"
+ launch_process "tg_root" "1 2 50" "/sys/fs/cgroup/cpu"
+
+ # Run for 30 seconds
+ sleep 30
+}
+
+# Execute the main function
+main
--
2.45.2
^ permalink raw reply related [flat|nested] 20+ messages in thread
* Re:[PATCH-RT sched v4 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se
2024-07-17 3:00 ` [PATCH-RT sched v4 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se Xavier
@ 2024-07-25 6:21 ` Xavier
2024-08-08 11:22 ` Xavier
0 siblings, 1 reply; 20+ messages in thread
From: Xavier @ 2024-07-25 6:21 UTC (permalink / raw)
To: mingo, peterz, juri.lelli, vincent.guittot
Cc: dietmar.eggemann, rostedt, bsegall, mgorman, bristot, vschneid,
linux-kernel, oliver.sang
Hi all,
I would like to ask everyone for your opinions or thoughts on the RT group scheduling
optimization. At this stage, is it ready to be merged into the corresponding branch?
Thanks.
--
Best Regards,
Xavier
At 2024-07-17 11:00:32, "Xavier" <xavier_qy@163.com> wrote:
>This patch optimizes the enqueue and dequeue of rt_se, the strategy employs
>a bottom-up removal approach. Specifically, when removing an rt_se at a
>certain level, if it is determined that the highest priority of the rq
>associated with that rt_se has not changed, there is no need to continue
>removing rt_se at higher levels. At this point, only the total number
>of removed rt_se needs to be recorded, and the rt_nr_running count of
>higher-level rq should be removed accordingly.
>
>Signed-off-by: Xavier <xavier_qy@163.com>
>---
> kernel/sched/debug.c | 48 ++++++++
> kernel/sched/rt.c | 287 +++++++++++++++++++++++++++++++++++++------
> kernel/sched/sched.h | 1 +
> 3 files changed, 298 insertions(+), 38 deletions(-)
>
>diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
>index c1eb9a1afd13..352ee55da25e 100644
>--- a/kernel/sched/debug.c
>+++ b/kernel/sched/debug.c
>@@ -712,6 +712,54 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
> #endif
> }
>
>+static void print_rt_se(struct seq_file *m, struct sched_rt_entity *rt_se)
>+{
>+ struct task_struct *task;
>+
>+#ifdef CONFIG_RT_GROUP_SCHED
>+ if (rt_se->my_q) {
>+ SEQ_printf_task_group_path(m, rt_se->my_q->tg, "%s\n");
>+ return;
>+ }
>+#endif
>+ task = container_of(rt_se, struct task_struct, rt);
>+ SEQ_printf(m, " prio-%d, pid-%d, %s\n", task->prio, task->pid, task->comm);
>+}
>+
>+/*shall be called in rq lock*/
>+void print_rt_rq_task(struct seq_file *m, struct rt_rq *rt_rq)
>+{
>+ struct rt_prio_array *array = &rt_rq->active;
>+ struct sched_rt_entity *rt_se;
>+ struct list_head *queue, *head;
>+ unsigned long bitmap[2];
>+ int idx;
>+ int count = 0;
>+
>+ if (!rt_rq->rt_nr_running)
>+ return;
>+
>+ memcpy(bitmap, array->bitmap, sizeof(unsigned long) * 2);
>+ idx = sched_find_first_bit(bitmap);
>+ WARN_ON_ONCE(idx >= MAX_RT_PRIO);
>+
>+ while (1) {
>+ clear_bit(idx, bitmap);
>+ queue = array->queue + idx;
>+ head = queue;
>+ queue = queue->next;
>+ do {
>+ rt_se = list_entry(queue, struct sched_rt_entity, run_list);
>+ print_rt_se(m, rt_se);
>+ queue = queue->next;
>+ count++;
>+ } while (queue != head);
>+ idx = sched_find_first_bit(bitmap);
>+ if (idx >= MAX_RT_PRIO)
>+ break;
>+ }
>+}
>+
> void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
> {
> #ifdef CONFIG_RT_GROUP_SCHED
>diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
>index aa4c1c874fa4..b18c424a50d2 100644
>--- a/kernel/sched/rt.c
>+++ b/kernel/sched/rt.c
>@@ -1113,7 +1113,7 @@ void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
> #endif /* CONFIG_SMP */
>
> #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
>-static void
>+static int
> inc_rt_prio(struct rt_rq *rt_rq, int prio)
> {
> int prev_prio = rt_rq->highest_prio.curr;
>@@ -1122,9 +1122,11 @@ inc_rt_prio(struct rt_rq *rt_rq, int prio)
> rt_rq->highest_prio.curr = prio;
>
> inc_rt_prio_smp(rt_rq, prio, prev_prio);
>+
>+ return prev_prio > prio;
> }
>
>-static void
>+static int
> dec_rt_prio(struct rt_rq *rt_rq, int prio)
> {
> int prev_prio = rt_rq->highest_prio.curr;
>@@ -1149,12 +1151,22 @@ dec_rt_prio(struct rt_rq *rt_rq, int prio)
> }
>
> dec_rt_prio_smp(rt_rq, prio, prev_prio);
>+ if (rt_rq->highest_prio.curr > prio)
>+ return prio;
>+ else
>+ return MAX_RT_PRIO;
> }
>
> #else
>
>-static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {}
>-static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {}
>+static inline int inc_rt_prio(struct rt_rq *rt_rq, int prio)
>+{
>+ return 0;
>+}
>+static inline int dec_rt_prio(struct rt_rq *rt_rq, int prio)
>+{
>+ return 0;
>+}
>
> #endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */
>
>@@ -1218,28 +1230,31 @@ unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se)
> }
>
> static inline
>-void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
>+int inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
> {
> int prio = rt_se_prio(rt_se);
>+ int prio_change;
>
> WARN_ON(!rt_prio(prio));
> rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
> rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
>
>- inc_rt_prio(rt_rq, prio);
>+ prio_change = inc_rt_prio(rt_rq, prio);
> inc_rt_group(rt_se, rt_rq);
>+ return prio_change;
> }
>
> static inline
>-void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
>+int dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq, int prio)
> {
>+ int prio_changed;
> WARN_ON(!rt_prio(rt_se_prio(rt_se)));
>- WARN_ON(!rt_rq->rt_nr_running);
> rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
> rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
>
>- dec_rt_prio(rt_rq, rt_se_prio(rt_se));
>+ prio_changed = dec_rt_prio(rt_rq, prio);
> dec_rt_group(rt_se, rt_rq);
>+ return prio_changed;
> }
>
> /*
>@@ -1255,12 +1270,13 @@ static inline bool move_entity(unsigned int flags)
> return true;
> }
>
>-static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_array *array)
>+static void __delist_rt_entity(struct sched_rt_entity *rt_se,
>+ struct rt_prio_array *array, int last_prio)
> {
> list_del_init(&rt_se->run_list);
>
>- if (list_empty(array->queue + rt_se_prio(rt_se)))
>- __clear_bit(rt_se_prio(rt_se), array->bitmap);
>+ if (list_empty(array->queue + last_prio))
>+ __clear_bit(last_prio, array->bitmap);
>
> rt_se->on_list = 0;
> }
>@@ -1371,7 +1387,12 @@ update_stats_dequeue_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
> }
> }
>
>-static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
>+/*
>+ * Returns: -1 indicates that rt_se was not enqueued, 0 indicates that the highest
>+ * priority of the rq did not change after enqueue, and 1 indicates that the highest
>+ * priority of the rq changed after enqueue.
>+ */
>+static int __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
> {
> struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
> struct rt_prio_array *array = &rt_rq->active;
>@@ -1386,8 +1407,8 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag
> */
> if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) {
> if (rt_se->on_list)
>- __delist_rt_entity(rt_se, array);
>- return;
>+ __delist_rt_entity(rt_se, array, rt_se_prio(rt_se));
>+ return -1;
> }
>
> if (move_entity(flags)) {
>@@ -1402,73 +1423,263 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag
> }
> rt_se->on_rq = 1;
>
>- inc_rt_tasks(rt_se, rt_rq);
>+ return inc_rt_tasks(rt_se, rt_rq);
> }
>
>-static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
>+/**
>+ * delete rt_se from rt_rq
>+ *
>+ * @rt_se Nodes to be deleted
>+ * @last_prio The highest priority of this rt_se before the previous round
>+ * of deletion
>+ * @flags operation flags
>+ *
>+ * Returns: =0 indicates that the highest priority of the current rq did not
>+ * change during this deletion. >0 indicates it changed, and it returns the
>+ * previous highest priority to use in the next round of deletion.
>+ */
>+static int __dequeue_rt_entity(struct sched_rt_entity *rt_se, int last_prio,
>+ unsigned int flags)
> {
> struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
> struct rt_prio_array *array = &rt_rq->active;
>
> if (move_entity(flags)) {
> WARN_ON_ONCE(!rt_se->on_list);
>- __delist_rt_entity(rt_se, array);
>+ __delist_rt_entity(rt_se, array, last_prio);
> }
> rt_se->on_rq = 0;
>
>- dec_rt_tasks(rt_se, rt_rq);
>+ return dec_rt_tasks(rt_se, rt_rq, last_prio);
>+}
>+
>+static inline void dec_rq_nr_running(struct sched_rt_entity *rt_se,
>+ unsigned int rt, unsigned int rr)
>+{
>+ struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
>+
>+ rt_rq->rt_nr_running -= rt;
>+ rt_rq->rr_nr_running -= rr;
>+}
>+
>+static inline void add_rq_nr_running(struct sched_rt_entity *rt_se,
>+ unsigned int rt, unsigned int rr)
>+{
>+ struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
>+
>+ rt_rq->rt_nr_running += rt;
>+ rt_rq->rr_nr_running += rr;
>+}
>+
>+static inline bool on_top_rt_rq(struct sched_rt_entity *rt_se)
>+{
>+#ifdef CONFIG_RT_GROUP_SCHED
>+ if (rt_se->parent)
>+ return false;
>+#endif
>+ return true;
> }
>
> /*
>- * Because the prio of an upper entry depends on the lower
>- * entries, we must remove entries top - down.
>+ * To optimize the enqueue and dequeue of rt_se, this strategy employs a
>+ * bottom-up removal approach. Specifically, when removing an rt_se at a
>+ * certain level, if it is determined that the highest priority of the rq
>+ * associated with that rt_se has not changed, there is no need to continue
>+ * removing rt_se at higher levels. At this point, only the total number
>+ * of removed rt_se needs to be recorded, and the rt_nr_running count of
>+ * higher-level rq should be removed accordingly.
>+ *
>+ * For enqueue operations, if an rt_se at a certain level is in the rq,
>+ * it is still necessary to check the priority of the higher-level rq.
>+ * If the priority of the higher-level rq is found to be lower than that
>+ * of the rt_se to be added, it should be removed, as updating the highest
>+ * priority of the rq during addition will cause the rq to be repositioned
>+ * in the parent rq.
>+ *
>+ * Conversely, for dequeue operations, if an rt_se at a certain level is
>+ * not in the rq, the operation can be exited immediately to reduce
>+ * unnecessary checks and handling.
>+ *
>+ * The return value refers to the last rt_se that was removed for enqueue
>+ * operations. And for dequeue operations, it refers to the last rt_se
>+ * that was either removed or had its rt_nr_running updated.
> */
>-static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
>+static struct sched_rt_entity *dequeue_rt_stack(struct sched_rt_entity *rt_se,
>+ unsigned int flags, int for_enqueue)
> {
>- struct sched_rt_entity *back = NULL;
>- unsigned int rt_nr_running;
>+ struct sched_rt_entity *last = rt_se;
>+ struct sched_rt_entity *origin = rt_se;
>+ unsigned int del_rt_nr = 0;
>+ unsigned int del_rr_nr = 0;
>+ int prio_changed = rt_se_prio(rt_se);
>+ int sub_on_rq = 1;
>
> for_each_sched_rt_entity(rt_se) {
>- rt_se->back = back;
>- back = rt_se;
>- }
>+ if (on_rt_rq(rt_se)) {
>+ if (sub_on_rq) {
>+ /*
>+ * The number of tasks removed from the sub-level rt_se also needs
>+ * to be subtracted from the rq of the current rt_se, as the current
>+ * rt_se's rq no longer includes the number of removed tasks.
>+ */
>+ dec_rq_nr_running(rt_se, del_rt_nr, del_rr_nr);
>+ if ((prio_changed != MAX_RT_PRIO) ||
>+ (rt_se_prio(rt_se) > rt_se_prio(origin))) {
>+ /*
>+ * If the removal of the lower-level rt_se causes the
>+ * highest priority of the current rq to change, or if the
>+ * priority of current rq is lower than the rt_se to be
>+ * added, then the current rt_se also needs to be removed
>+ * from its parent rq, and the number of deleted tasks
>+ * should be accumulated.
>+ */
>+ if (prio_changed == MAX_RT_PRIO)
>+ prio_changed = rt_se_prio(rt_se);
>+ del_rt_nr += rt_se_nr_running(rt_se);
>+ del_rr_nr += rt_se_rr_nr_running(rt_se);
>+ prio_changed = __dequeue_rt_entity(rt_se,
>+ prio_changed, flags);
>+ last = rt_se;
>+ } else if (!for_enqueue) {
>+ /* For dequeue, last may only rt_nr_running was modified.*/
>+ last = rt_se;
>+ }
>+ } else {
>+ /*
>+ * Entering this branch must be for enqueue, as dequeue would break
>+ * if an rt_se is not online.
>+ * If the sub-level node is not online, and the current rt_se's
>+ * priority is lower than the one being added, current rt_se need
>+ * to be removed.
>+ */
>+ prio_changed = rt_se_prio(rt_se);
>+ if (prio_changed > rt_se_prio(origin)) {
>+ del_rt_nr += rt_se_nr_running(rt_se);
>+ del_rr_nr += rt_se_rr_nr_running(rt_se);
>+ prio_changed = __dequeue_rt_entity(rt_se,
>+ prio_changed, flags);
>+ last = rt_se;
>+ } else {
>+ prio_changed = MAX_RT_PRIO;
>+ }
>+ }
>
>- rt_nr_running = rt_rq_of_se(back)->rt_nr_running;
>+ /*
>+ * If the current rt_se is on the top rt_rq, then the already deleted
>+ * nodes, plus the count of the rt_rq where current rt_se located,
>+ * need to be removed from the top_rt_rq.
>+ */
>+ if (on_top_rt_rq(rt_se)) {
>+ dequeue_top_rt_rq(rt_rq_of_se(rt_se),
>+ del_rt_nr + rt_rq_of_se(rt_se)->rt_nr_running);
>+ }
>+ sub_on_rq = 1;
>+ } else if (for_enqueue) {
>+ struct rt_rq *group_rq = group_rt_rq(rt_se);
>
>- for (rt_se = back; rt_se; rt_se = rt_se->back) {
>- if (on_rt_rq(rt_se))
>- __dequeue_rt_entity(rt_se, flags);
>+ /*
>+ * In the case of an enqueue operation, if a certain level is found to be
>+ * not online, then the previous counts need to be reset to zero.
>+ */
>+ prio_changed = MAX_RT_PRIO;
>+ sub_on_rq = 0;
>+ del_rt_nr = 0;
>+ del_rr_nr = 0;
>+
>+ /*
>+ * If the current group is being throttled, then there is no need to check
>+ * higher levels since enqueueing will not affect higher-level nodes.
>+ */
>+ if (group_rq && rt_rq_throttled(group_rq))
>+ break;
>+
>+ if (on_top_rt_rq(rt_se))
>+ dequeue_top_rt_rq(rt_rq_of_se(rt_se),
>+ rt_rq_of_se(rt_se)->rt_nr_running);
>+ } else {
>+ last = rt_se;
>+ break;
>+ }
> }
>
>- dequeue_top_rt_rq(rt_rq_of_se(back), rt_nr_running);
>+ return last;
> }
>
> static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
> {
> struct rq *rq = rq_of_rt_se(rt_se);
>+ struct sched_rt_entity *last;
>+ unsigned int add_rt_nr = 0;
>+ unsigned int add_rr_nr = 0;
>+ int enqueue = 1;
>+ int prio_change = 1;
>
> update_stats_enqueue_rt(rt_rq_of_se(rt_se), rt_se, flags);
>
>- dequeue_rt_stack(rt_se, flags);
>- for_each_sched_rt_entity(rt_se)
>- __enqueue_rt_entity(rt_se, flags);
>+ last = dequeue_rt_stack(rt_se, flags, 1);
>+
>+ for_each_sched_rt_entity(rt_se) {
>+ if (enqueue || !on_rt_rq(rt_se) || (prio_change == 1)) {
>+ prio_change = __enqueue_rt_entity(rt_se, flags);
>+ if (prio_change >= 0) {
>+ add_rt_nr = rt_se_nr_running(rt_se);
>+ add_rr_nr = rt_se_rr_nr_running(rt_se);
>+ } else {
>+ add_rt_nr = add_rr_nr = 0;
>+ }
>+ } else {
>+ add_rq_nr_running(rt_se, add_rt_nr, add_rr_nr);
>+ }
>+
>+ if (rt_se == last)
>+ enqueue = 0;
>+ }
>+
> enqueue_top_rt_rq(&rq->rt);
> }
>
> static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
> {
> struct rq *rq = rq_of_rt_se(rt_se);
>+ struct sched_rt_entity *last;
>+ unsigned int add_rt_nr = 0;
>+ unsigned int add_rr_nr = 0;
>+ int prio_change = 1;
>
> update_stats_dequeue_rt(rt_rq_of_se(rt_se), rt_se, flags);
>
>- dequeue_rt_stack(rt_se, flags);
>+ last = dequeue_rt_stack(rt_se, flags, 0);
>
> for_each_sched_rt_entity(rt_se) {
> struct rt_rq *rt_rq = group_rt_rq(rt_se);
>+ if (rt_rq && rt_rq->rt_nr_running) {
>+ if (on_rt_rq(rt_se)) {
>+ add_rq_nr_running(rt_se, add_rt_nr, add_rr_nr);
>+ } else {
>+ prio_change = __enqueue_rt_entity(rt_se, flags);
>+ if (prio_change == 0) {
>+ /*
>+ * If enqueue is successful and the priority of the rq has
>+ * not changed, then the parent node only needs to add the
>+ * count of the current rt_se. Otherwise, the parent node
>+ * will also need to enqueue.
>+ */
>+ add_rt_nr = rt_se_nr_running(rt_se);
>+ add_rr_nr = rt_se_rr_nr_running(rt_se);
>+ }
>+ }
>+ } else {
>+ add_rt_nr = add_rr_nr = 0;
>+ }
>
>- if (rt_rq && rt_rq->rt_nr_running)
>- __enqueue_rt_entity(rt_se, flags);
>+ /*
>+ * last is the rt_se of the last deletion or modification of the
>+ * count, so the subsequent rt_se does not need to be updated.
>+ */
>+ if (rt_se == last)
>+ break;
> }
>+
> enqueue_top_rt_rq(&rq->rt);
> }
>
>diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
>index ef20c61004eb..821d65106d13 100644
>--- a/kernel/sched/sched.h
>+++ b/kernel/sched/sched.h
>@@ -2879,6 +2879,7 @@ extern void print_rt_stats(struct seq_file *m, int cpu);
> extern void print_dl_stats(struct seq_file *m, int cpu);
> extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
> extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
>+extern void print_rt_rq_task(struct seq_file *m, struct rt_rq *rt_rq);
> extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
>
> extern void resched_latency_warn(int cpu, u64 latency);
>--
>2.45.2
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH-RT sched v1 0/2] Optimize the RT group scheduling
2024-06-27 17:21 [PATCH-RT sched v1 0/2] Optimize the RT group scheduling Xavier
` (2 preceding siblings ...)
2024-06-29 11:28 ` [PATCH-RT sched v2 0/2] Optimize the " Xavier
@ 2024-07-29 9:32 ` Michal Koutný
2024-07-31 3:02 ` Xavier
3 siblings, 1 reply; 20+ messages in thread
From: Michal Koutný @ 2024-07-29 9:32 UTC (permalink / raw)
To: Xavier
Cc: mingo, peterz, juri.lelli, vincent.guittot, dietmar.eggemann,
rostedt, bsegall, mgorman, bristot, vschneid, linux-kernel
[-- Attachment #1: Type: text/plain, Size: 1802 bytes --]
On Fri, Jun 28, 2024 at 01:21:54AM GMT, Xavier <xavier_qy@163.com> wrote:
> The first patch optimizes the enqueue and dequeue of rt_se, the strategy
> employs a bottom-up removal approach.
I haven't read the patches, I only have a remark to the numbers.
> The second patch provides validation for the efficiency improvements made
> by patch 1. The test case count the number of infinite loop executions for
> all threads.
>
> origion optimized
>
> 10242794134 10659512784
> 13650210798 13555924695
> 12953159254 13733609646
> 11888973428 11742656925
> 12791797633 13447598015
> 11451270205 11704847480
> 13335320346 13858155642
> 10682907328 10513565749
> 10173249704 10254224697
> 8309259793 8893668653
^^^ This is fine, that's what you measured.
> avg 11547894262 11836376429
But providing averages with that many significant digit is nonsensical
(most of them are noise).
If I put your columns into D (Octave) and estimate some errors:
(std(D)/sqrt(10)) ./ mean(D)
ans =
0.046626 0.046755
the error itself would be rounded to ~5%, so the averages measured
should be rounded accordingly
avg 11500000000 11800000000
or even more conservatively
avg 12000000000 12000000000
> Run two QEMU emulators simultaneously, one running the original kernel and the
> other running the optimized kernel, and compare the average of the results over
> 10 runs. After optimizing, the number of iterations in the infinite loop increased
> by approximately 2.5%.
Notice that the measure changed is on par with noise in the data (i.e.
it may be accidental). You may need more iterations to get cleaner
result (more convincing data).
HTH,
Michal
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re:Re: [PATCH-RT sched v1 0/2] Optimize the RT group scheduling
2024-07-29 9:32 ` [PATCH-RT sched v1 0/2] Optimize the " Michal Koutný
@ 2024-07-31 3:02 ` Xavier
0 siblings, 0 replies; 20+ messages in thread
From: Xavier @ 2024-07-31 3:02 UTC (permalink / raw)
To: Michal Koutný
Cc: mingo, peterz, juri.lelli, vincent.guittot, dietmar.eggemann,
rostedt, bsegall, mgorman, bristot, vschneid, linux-kernel, tj
Hi Michal,
Your question is good. however, I currently don't have a stable hardware
environment to execute this test case. Running it on QEMU indeed subjects
it to significant random interference. I attempted to make the test cases run
for longer periods, but I found that the results varied significantly each time.
So the previous test data was obtained by running two QEMU instances
simultaneously, one running the unoptimized kernel and the other running
the optimized kernel, this makes the results more convincing.
Nevertheless, from the code logic, it is evident that the optimizations have
indeed resulted in fewer se insert and delete operations, which theoretically
should improve efficiency.
Thanks.
--
Best Regards,
Xavier
At 2024-07-29 17:32:37, "Michal Koutný" <mkoutny@suse.com> wrote:
>On Fri, Jun 28, 2024 at 01:21:54AM GMT, Xavier <xavier_qy@163.com> wrote:
>> The first patch optimizes the enqueue and dequeue of rt_se, the strategy
>> employs a bottom-up removal approach.
>
>I haven't read the patches, I only have a remark to the numbers.
>
>> The second patch provides validation for the efficiency improvements made
>> by patch 1. The test case count the number of infinite loop executions for
>> all threads.
>>
>> origion optimized
>>
>> 10242794134 10659512784
>> 13650210798 13555924695
>> 12953159254 13733609646
>> 11888973428 11742656925
>> 12791797633 13447598015
>> 11451270205 11704847480
>> 13335320346 13858155642
>> 10682907328 10513565749
>> 10173249704 10254224697
>> 8309259793 8893668653
>
>^^^ This is fine, that's what you measured.
>
>> avg 11547894262 11836376429
>
>But providing averages with that many significant digit is nonsensical
>(most of them are noise).
>
>If I put your columns into D (Octave) and estimate some errors:
>
>(std(D)/sqrt(10)) ./ mean(D)
>ans =
>
> 0.046626 0.046755
>
>the error itself would be rounded to ~5%, so the averages measured
>should be rounded accordingly
>
> avg 11500000000 11800000000
>
>or even more conservatively
>
> avg 12000000000 12000000000
>
>> Run two QEMU emulators simultaneously, one running the original kernel and the
>> other running the optimized kernel, and compare the average of the results over
>> 10 runs. After optimizing, the number of iterations in the infinite loop increased
>> by approximately 2.5%.
>
>Notice that the measure changed is on par with noise in the data (i.e.
>it may be accidental). You may need more iterations to get cleaner
>result (more convincing data).
>
>HTH,
>Michal
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re:Re:[PATCH-RT sched v4 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se
2024-07-25 6:21 ` Xavier
@ 2024-08-08 11:22 ` Xavier
0 siblings, 0 replies; 20+ messages in thread
From: Xavier @ 2024-08-08 11:22 UTC (permalink / raw)
To: mingo, peterz, juri.lelli, vincent.guittot
Cc: dietmar.eggemann, rostedt, bsegall, mgorman, bristot, vschneid,
linux-kernel, oliver.sang, tj, mkoutny
Just a reminder, does anyone have any comments or feedback on the patch?
--
Best Regards,
Xavier
At 2024-07-25 14:21:03, "Xavier" <xavier_qy@163.com> wrote:
>
>Hi all,
>
>I would like to ask everyone for your opinions or thoughts on the RT group scheduling
>optimization. At this stage, is it ready to be merged into the corresponding branch?
>Thanks.
>
>--
>Best Regards,
>Xavier
>
>
>
>
>
>At 2024-07-17 11:00:32, "Xavier" <xavier_qy@163.com> wrote:
>>This patch optimizes the enqueue and dequeue of rt_se, the strategy employs
>>a bottom-up removal approach. Specifically, when removing an rt_se at a
>>certain level, if it is determined that the highest priority of the rq
>>associated with that rt_se has not changed, there is no need to continue
>>removing rt_se at higher levels. At this point, only the total number
>>of removed rt_se needs to be recorded, and the rt_nr_running count of
>>higher-level rq should be removed accordingly.
>>
>>Signed-off-by: Xavier <xavier_qy@163.com>
>>---
>> kernel/sched/debug.c | 48 ++++++++
>> kernel/sched/rt.c | 287 +++++++++++++++++++++++++++++++++++++------
>> kernel/sched/sched.h | 1 +
>> 3 files changed, 298 insertions(+), 38 deletions(-)
>>
>>diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
>>index c1eb9a1afd13..352ee55da25e 100644
>>--- a/kernel/sched/debug.c
>>+++ b/kernel/sched/debug.c
>>@@ -712,6 +712,54 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
>> #endif
>> }
>>
>>+static void print_rt_se(struct seq_file *m, struct sched_rt_entity *rt_se)
>>+{
>>+ struct task_struct *task;
>>+
>>+#ifdef CONFIG_RT_GROUP_SCHED
>>+ if (rt_se->my_q) {
>>+ SEQ_printf_task_group_path(m, rt_se->my_q->tg, "%s\n");
>>+ return;
>>+ }
>>+#endif
>>+ task = container_of(rt_se, struct task_struct, rt);
>>+ SEQ_printf(m, " prio-%d, pid-%d, %s\n", task->prio, task->pid, task->comm);
>>+}
>>+
>>+/*shall be called in rq lock*/
>>+void print_rt_rq_task(struct seq_file *m, struct rt_rq *rt_rq)
>>+{
>>+ struct rt_prio_array *array = &rt_rq->active;
>>+ struct sched_rt_entity *rt_se;
>>+ struct list_head *queue, *head;
>>+ unsigned long bitmap[2];
>>+ int idx;
>>+ int count = 0;
>>+
>>+ if (!rt_rq->rt_nr_running)
>>+ return;
>>+
>>+ memcpy(bitmap, array->bitmap, sizeof(unsigned long) * 2);
>>+ idx = sched_find_first_bit(bitmap);
>>+ WARN_ON_ONCE(idx >= MAX_RT_PRIO);
>>+
>>+ while (1) {
>>+ clear_bit(idx, bitmap);
>>+ queue = array->queue + idx;
>>+ head = queue;
>>+ queue = queue->next;
>>+ do {
>>+ rt_se = list_entry(queue, struct sched_rt_entity, run_list);
>>+ print_rt_se(m, rt_se);
>>+ queue = queue->next;
>>+ count++;
>>+ } while (queue != head);
>>+ idx = sched_find_first_bit(bitmap);
>>+ if (idx >= MAX_RT_PRIO)
>>+ break;
>>+ }
>>+}
>>+
>> void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
>> {
>> #ifdef CONFIG_RT_GROUP_SCHED
>>diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
>>index aa4c1c874fa4..b18c424a50d2 100644
>>--- a/kernel/sched/rt.c
>>+++ b/kernel/sched/rt.c
>>@@ -1113,7 +1113,7 @@ void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
>> #endif /* CONFIG_SMP */
>>
>> #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
>>-static void
>>+static int
>> inc_rt_prio(struct rt_rq *rt_rq, int prio)
>> {
>> int prev_prio = rt_rq->highest_prio.curr;
>>@@ -1122,9 +1122,11 @@ inc_rt_prio(struct rt_rq *rt_rq, int prio)
>> rt_rq->highest_prio.curr = prio;
>>
>> inc_rt_prio_smp(rt_rq, prio, prev_prio);
>>+
>>+ return prev_prio > prio;
>> }
>>
>>-static void
>>+static int
>> dec_rt_prio(struct rt_rq *rt_rq, int prio)
>> {
>> int prev_prio = rt_rq->highest_prio.curr;
>>@@ -1149,12 +1151,22 @@ dec_rt_prio(struct rt_rq *rt_rq, int prio)
>> }
>>
>> dec_rt_prio_smp(rt_rq, prio, prev_prio);
>>+ if (rt_rq->highest_prio.curr > prio)
>>+ return prio;
>>+ else
>>+ return MAX_RT_PRIO;
>> }
>>
>> #else
>>
>>-static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {}
>>-static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {}
>>+static inline int inc_rt_prio(struct rt_rq *rt_rq, int prio)
>>+{
>>+ return 0;
>>+}
>>+static inline int dec_rt_prio(struct rt_rq *rt_rq, int prio)
>>+{
>>+ return 0;
>>+}
>>
>> #endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */
>>
>>@@ -1218,28 +1230,31 @@ unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se)
>> }
>>
>> static inline
>>-void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
>>+int inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
>> {
>> int prio = rt_se_prio(rt_se);
>>+ int prio_change;
>>
>> WARN_ON(!rt_prio(prio));
>> rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
>> rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
>>
>>- inc_rt_prio(rt_rq, prio);
>>+ prio_change = inc_rt_prio(rt_rq, prio);
>> inc_rt_group(rt_se, rt_rq);
>>+ return prio_change;
>> }
>>
>> static inline
>>-void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
>>+int dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq, int prio)
>> {
>>+ int prio_changed;
>> WARN_ON(!rt_prio(rt_se_prio(rt_se)));
>>- WARN_ON(!rt_rq->rt_nr_running);
>> rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
>> rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
>>
>>- dec_rt_prio(rt_rq, rt_se_prio(rt_se));
>>+ prio_changed = dec_rt_prio(rt_rq, prio);
>> dec_rt_group(rt_se, rt_rq);
>>+ return prio_changed;
>> }
>>
>> /*
>>@@ -1255,12 +1270,13 @@ static inline bool move_entity(unsigned int flags)
>> return true;
>> }
>>
>>-static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_array *array)
>>+static void __delist_rt_entity(struct sched_rt_entity *rt_se,
>>+ struct rt_prio_array *array, int last_prio)
>> {
>> list_del_init(&rt_se->run_list);
>>
>>- if (list_empty(array->queue + rt_se_prio(rt_se)))
>>- __clear_bit(rt_se_prio(rt_se), array->bitmap);
>>+ if (list_empty(array->queue + last_prio))
>>+ __clear_bit(last_prio, array->bitmap);
>>
>> rt_se->on_list = 0;
>> }
>>@@ -1371,7 +1387,12 @@ update_stats_dequeue_rt(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se,
>> }
>> }
>>
>>-static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
>>+/*
>>+ * Returns: -1 indicates that rt_se was not enqueued, 0 indicates that the highest
>>+ * priority of the rq did not change after enqueue, and 1 indicates that the highest
>>+ * priority of the rq changed after enqueue.
>>+ */
>>+static int __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
>> {
>> struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
>> struct rt_prio_array *array = &rt_rq->active;
>>@@ -1386,8 +1407,8 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag
>> */
>> if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) {
>> if (rt_se->on_list)
>>- __delist_rt_entity(rt_se, array);
>>- return;
>>+ __delist_rt_entity(rt_se, array, rt_se_prio(rt_se));
>>+ return -1;
>> }
>>
>> if (move_entity(flags)) {
>>@@ -1402,73 +1423,263 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag
>> }
>> rt_se->on_rq = 1;
>>
>>- inc_rt_tasks(rt_se, rt_rq);
>>+ return inc_rt_tasks(rt_se, rt_rq);
>> }
>>
>>-static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
>>+/**
>>+ * delete rt_se from rt_rq
>>+ *
>>+ * @rt_se Nodes to be deleted
>>+ * @last_prio The highest priority of this rt_se before the previous round
>>+ * of deletion
>>+ * @flags operation flags
>>+ *
>>+ * Returns: =0 indicates that the highest priority of the current rq did not
>>+ * change during this deletion. >0 indicates it changed, and it returns the
>>+ * previous highest priority to use in the next round of deletion.
>>+ */
>>+static int __dequeue_rt_entity(struct sched_rt_entity *rt_se, int last_prio,
>>+ unsigned int flags)
>> {
>> struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
>> struct rt_prio_array *array = &rt_rq->active;
>>
>> if (move_entity(flags)) {
>> WARN_ON_ONCE(!rt_se->on_list);
>>- __delist_rt_entity(rt_se, array);
>>+ __delist_rt_entity(rt_se, array, last_prio);
>> }
>> rt_se->on_rq = 0;
>>
>>- dec_rt_tasks(rt_se, rt_rq);
>>+ return dec_rt_tasks(rt_se, rt_rq, last_prio);
>>+}
>>+
>>+static inline void dec_rq_nr_running(struct sched_rt_entity *rt_se,
>>+ unsigned int rt, unsigned int rr)
>>+{
>>+ struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
>>+
>>+ rt_rq->rt_nr_running -= rt;
>>+ rt_rq->rr_nr_running -= rr;
>>+}
>>+
>>+static inline void add_rq_nr_running(struct sched_rt_entity *rt_se,
>>+ unsigned int rt, unsigned int rr)
>>+{
>>+ struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
>>+
>>+ rt_rq->rt_nr_running += rt;
>>+ rt_rq->rr_nr_running += rr;
>>+}
>>+
>>+static inline bool on_top_rt_rq(struct sched_rt_entity *rt_se)
>>+{
>>+#ifdef CONFIG_RT_GROUP_SCHED
>>+ if (rt_se->parent)
>>+ return false;
>>+#endif
>>+ return true;
>> }
>>
>> /*
>>- * Because the prio of an upper entry depends on the lower
>>- * entries, we must remove entries top - down.
>>+ * To optimize the enqueue and dequeue of rt_se, this strategy employs a
>>+ * bottom-up removal approach. Specifically, when removing an rt_se at a
>>+ * certain level, if it is determined that the highest priority of the rq
>>+ * associated with that rt_se has not changed, there is no need to continue
>>+ * removing rt_se at higher levels. At this point, only the total number
>>+ * of removed rt_se needs to be recorded, and the rt_nr_running count of
>>+ * higher-level rq should be removed accordingly.
>>+ *
>>+ * For enqueue operations, if an rt_se at a certain level is in the rq,
>>+ * it is still necessary to check the priority of the higher-level rq.
>>+ * If the priority of the higher-level rq is found to be lower than that
>>+ * of the rt_se to be added, it should be removed, as updating the highest
>>+ * priority of the rq during addition will cause the rq to be repositioned
>>+ * in the parent rq.
>>+ *
>>+ * Conversely, for dequeue operations, if an rt_se at a certain level is
>>+ * not in the rq, the operation can be exited immediately to reduce
>>+ * unnecessary checks and handling.
>>+ *
>>+ * The return value refers to the last rt_se that was removed for enqueue
>>+ * operations. And for dequeue operations, it refers to the last rt_se
>>+ * that was either removed or had its rt_nr_running updated.
>> */
>>-static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
>>+static struct sched_rt_entity *dequeue_rt_stack(struct sched_rt_entity *rt_se,
>>+ unsigned int flags, int for_enqueue)
>> {
>>- struct sched_rt_entity *back = NULL;
>>- unsigned int rt_nr_running;
>>+ struct sched_rt_entity *last = rt_se;
>>+ struct sched_rt_entity *origin = rt_se;
>>+ unsigned int del_rt_nr = 0;
>>+ unsigned int del_rr_nr = 0;
>>+ int prio_changed = rt_se_prio(rt_se);
>>+ int sub_on_rq = 1;
>>
>> for_each_sched_rt_entity(rt_se) {
>>- rt_se->back = back;
>>- back = rt_se;
>>- }
>>+ if (on_rt_rq(rt_se)) {
>>+ if (sub_on_rq) {
>>+ /*
>>+ * The number of tasks removed from the sub-level rt_se also needs
>>+ * to be subtracted from the rq of the current rt_se, as the current
>>+ * rt_se's rq no longer includes the number of removed tasks.
>>+ */
>>+ dec_rq_nr_running(rt_se, del_rt_nr, del_rr_nr);
>>+ if ((prio_changed != MAX_RT_PRIO) ||
>>+ (rt_se_prio(rt_se) > rt_se_prio(origin))) {
>>+ /*
>>+ * If the removal of the lower-level rt_se causes the
>>+ * highest priority of the current rq to change, or if the
>>+ * priority of current rq is lower than the rt_se to be
>>+ * added, then the current rt_se also needs to be removed
>>+ * from its parent rq, and the number of deleted tasks
>>+ * should be accumulated.
>>+ */
>>+ if (prio_changed == MAX_RT_PRIO)
>>+ prio_changed = rt_se_prio(rt_se);
>>+ del_rt_nr += rt_se_nr_running(rt_se);
>>+ del_rr_nr += rt_se_rr_nr_running(rt_se);
>>+ prio_changed = __dequeue_rt_entity(rt_se,
>>+ prio_changed, flags);
>>+ last = rt_se;
>>+ } else if (!for_enqueue) {
>>+ /* For dequeue, last may only rt_nr_running was modified.*/
>>+ last = rt_se;
>>+ }
>>+ } else {
>>+ /*
>>+ * Entering this branch must be for enqueue, as dequeue would break
>>+ * if an rt_se is not online.
>>+ * If the sub-level node is not online, and the current rt_se's
>>+ * priority is lower than the one being added, current rt_se need
>>+ * to be removed.
>>+ */
>>+ prio_changed = rt_se_prio(rt_se);
>>+ if (prio_changed > rt_se_prio(origin)) {
>>+ del_rt_nr += rt_se_nr_running(rt_se);
>>+ del_rr_nr += rt_se_rr_nr_running(rt_se);
>>+ prio_changed = __dequeue_rt_entity(rt_se,
>>+ prio_changed, flags);
>>+ last = rt_se;
>>+ } else {
>>+ prio_changed = MAX_RT_PRIO;
>>+ }
>>+ }
>>
>>- rt_nr_running = rt_rq_of_se(back)->rt_nr_running;
>>+ /*
>>+ * If the current rt_se is on the top rt_rq, then the already deleted
>>+ * nodes, plus the count of the rt_rq where current rt_se located,
>>+ * need to be removed from the top_rt_rq.
>>+ */
>>+ if (on_top_rt_rq(rt_se)) {
>>+ dequeue_top_rt_rq(rt_rq_of_se(rt_se),
>>+ del_rt_nr + rt_rq_of_se(rt_se)->rt_nr_running);
>>+ }
>>+ sub_on_rq = 1;
>>+ } else if (for_enqueue) {
>>+ struct rt_rq *group_rq = group_rt_rq(rt_se);
>>
>>- for (rt_se = back; rt_se; rt_se = rt_se->back) {
>>- if (on_rt_rq(rt_se))
>>- __dequeue_rt_entity(rt_se, flags);
>>+ /*
>>+ * In the case of an enqueue operation, if a certain level is found to be
>>+ * not online, then the previous counts need to be reset to zero.
>>+ */
>>+ prio_changed = MAX_RT_PRIO;
>>+ sub_on_rq = 0;
>>+ del_rt_nr = 0;
>>+ del_rr_nr = 0;
>>+
>>+ /*
>>+ * If the current group is being throttled, then there is no need to check
>>+ * higher levels since enqueueing will not affect higher-level nodes.
>>+ */
>>+ if (group_rq && rt_rq_throttled(group_rq))
>>+ break;
>>+
>>+ if (on_top_rt_rq(rt_se))
>>+ dequeue_top_rt_rq(rt_rq_of_se(rt_se),
>>+ rt_rq_of_se(rt_se)->rt_nr_running);
>>+ } else {
>>+ last = rt_se;
>>+ break;
>>+ }
>> }
>>
>>- dequeue_top_rt_rq(rt_rq_of_se(back), rt_nr_running);
>>+ return last;
>> }
>>
>> static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
>> {
>> struct rq *rq = rq_of_rt_se(rt_se);
>>+ struct sched_rt_entity *last;
>>+ unsigned int add_rt_nr = 0;
>>+ unsigned int add_rr_nr = 0;
>>+ int enqueue = 1;
>>+ int prio_change = 1;
>>
>> update_stats_enqueue_rt(rt_rq_of_se(rt_se), rt_se, flags);
>>
>>- dequeue_rt_stack(rt_se, flags);
>>- for_each_sched_rt_entity(rt_se)
>>- __enqueue_rt_entity(rt_se, flags);
>>+ last = dequeue_rt_stack(rt_se, flags, 1);
>>+
>>+ for_each_sched_rt_entity(rt_se) {
>>+ if (enqueue || !on_rt_rq(rt_se) || (prio_change == 1)) {
>>+ prio_change = __enqueue_rt_entity(rt_se, flags);
>>+ if (prio_change >= 0) {
>>+ add_rt_nr = rt_se_nr_running(rt_se);
>>+ add_rr_nr = rt_se_rr_nr_running(rt_se);
>>+ } else {
>>+ add_rt_nr = add_rr_nr = 0;
>>+ }
>>+ } else {
>>+ add_rq_nr_running(rt_se, add_rt_nr, add_rr_nr);
>>+ }
>>+
>>+ if (rt_se == last)
>>+ enqueue = 0;
>>+ }
>>+
>> enqueue_top_rt_rq(&rq->rt);
>> }
>>
>> static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
>> {
>> struct rq *rq = rq_of_rt_se(rt_se);
>>+ struct sched_rt_entity *last;
>>+ unsigned int add_rt_nr = 0;
>>+ unsigned int add_rr_nr = 0;
>>+ int prio_change = 1;
>>
>> update_stats_dequeue_rt(rt_rq_of_se(rt_se), rt_se, flags);
>>
>>- dequeue_rt_stack(rt_se, flags);
>>+ last = dequeue_rt_stack(rt_se, flags, 0);
>>
>> for_each_sched_rt_entity(rt_se) {
>> struct rt_rq *rt_rq = group_rt_rq(rt_se);
>>+ if (rt_rq && rt_rq->rt_nr_running) {
>>+ if (on_rt_rq(rt_se)) {
>>+ add_rq_nr_running(rt_se, add_rt_nr, add_rr_nr);
>>+ } else {
>>+ prio_change = __enqueue_rt_entity(rt_se, flags);
>>+ if (prio_change == 0) {
>>+ /*
>>+ * If enqueue is successful and the priority of the rq has
>>+ * not changed, then the parent node only needs to add the
>>+ * count of the current rt_se. Otherwise, the parent node
>>+ * will also need to enqueue.
>>+ */
>>+ add_rt_nr = rt_se_nr_running(rt_se);
>>+ add_rr_nr = rt_se_rr_nr_running(rt_se);
>>+ }
>>+ }
>>+ } else {
>>+ add_rt_nr = add_rr_nr = 0;
>>+ }
>>
>>- if (rt_rq && rt_rq->rt_nr_running)
>>- __enqueue_rt_entity(rt_se, flags);
>>+ /*
>>+ * last is the rt_se of the last deletion or modification of the
>>+ * count, so the subsequent rt_se does not need to be updated.
>>+ */
>>+ if (rt_se == last)
>>+ break;
>> }
>>+
>> enqueue_top_rt_rq(&rq->rt);
>> }
>>
>>diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
>>index ef20c61004eb..821d65106d13 100644
>>--- a/kernel/sched/sched.h
>>+++ b/kernel/sched/sched.h
>>@@ -2879,6 +2879,7 @@ extern void print_rt_stats(struct seq_file *m, int cpu);
>> extern void print_dl_stats(struct seq_file *m, int cpu);
>> extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
>> extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
>>+extern void print_rt_rq_task(struct seq_file *m, struct rt_rq *rt_rq);
>> extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
>>
>> extern void resched_latency_warn(int cpu, u64 latency);
>>--
>>2.45.2
^ permalink raw reply [flat|nested] 20+ messages in thread
end of thread, other threads:[~2024-08-08 12:08 UTC | newest]
Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-06-27 17:21 [PATCH-RT sched v1 0/2] Optimize the RT group scheduling Xavier
2024-06-27 17:21 ` [PATCH-RT sched v1 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se Xavier
2024-06-28 23:29 ` kernel test robot
2024-06-29 0:20 ` kernel test robot
2024-06-27 17:21 ` [PATCH-RT sched v1 2/2] RT test: Adding test cases for RT group scheduling Xavier
2024-06-29 11:28 ` [PATCH-RT sched v2 0/2] Optimize the " Xavier
2024-06-29 11:28 ` [PATCH-RT sched v2 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se Xavier
2024-07-04 8:30 ` kernel test robot
2024-06-29 11:28 ` [PATCH-RT sched v2 2/2] RT test: Adding test cases for RT group scheduling Xavier
2024-07-16 6:05 ` [PATCH-RT sched v3 0/2] Optimize the " Xavier
2024-07-16 6:05 ` [PATCH-RT sched v3 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se Xavier
2024-07-16 20:24 ` kernel test robot
2024-07-17 3:00 ` [PATCH-RT sched v4 0/2] Optimize the RT group scheduling Xavier
2024-07-17 3:00 ` [PATCH-RT sched v4 1/2] RT SCHED: Optimize the enqueue and dequeue operations for rt_se Xavier
2024-07-25 6:21 ` Xavier
2024-08-08 11:22 ` Xavier
2024-07-17 3:00 ` [PATCH-RT sched v4 2/2] RT test: Adding test cases for RT group scheduling Xavier
2024-07-16 6:05 ` [PATCH-RT sched v3 " Xavier
2024-07-29 9:32 ` [PATCH-RT sched v1 0/2] Optimize the " Michal Koutný
2024-07-31 3:02 ` Xavier
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox