* [PATCH 1/2] sched: make cfs_rq->throttled_csd_list available on !SMP @ 2023-09-22 23:05 Josh Don 2023-09-22 23:05 ` [PATCH 2/2] sched: fix warning in bandwidth distribution Josh Don 2023-09-24 10:39 ` [tip: sched/core] sched/fair: Make cfs_rq->throttled_csd_list available on !SMP tip-bot2 for Josh Don 0 siblings, 2 replies; 6+ messages in thread From: Josh Don @ 2023-09-22 23:05 UTC (permalink / raw) To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot Cc: Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman, Daniel Bristot de Oliveira, Valentin Schneider, linux-kernel, Josh Don This makes the following patch cleaner by avoiding extra CONFIG_SMP conditionals on the availability of throttled_csd_list. Signed-off-by: Josh Don <joshdon@google.com> --- kernel/sched/fair.c | 4 ---- kernel/sched/sched.h | 2 -- 2 files changed, 6 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 384900bf87eb..8f4e63fc8900 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5765,11 +5765,9 @@ static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b) if (!cfs_rq_throttled(cfs_rq)) goto next; -#ifdef CONFIG_SMP /* Already queued for async unthrottle */ if (!list_empty(&cfs_rq->throttled_csd_list)) goto next; -#endif /* By the above checks, this should never be true */ SCHED_WARN_ON(cfs_rq->runtime_remaining > 0); @@ -6136,9 +6134,7 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) { cfs_rq->runtime_enabled = 0; INIT_LIST_HEAD(&cfs_rq->throttled_list); -#ifdef CONFIG_SMP INIT_LIST_HEAD(&cfs_rq->throttled_csd_list); -#endif } void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 68768f47ccb7..8f6a6b693d10 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -644,9 +644,7 @@ struct cfs_rq { int throttled; int throttle_count; struct list_head throttled_list; -#ifdef CONFIG_SMP struct list_head throttled_csd_list; -#endif #endif /* CONFIG_CFS_BANDWIDTH */ #endif /* CONFIG_FAIR_GROUP_SCHED */ }; -- 2.42.0.515.g380fc7ccd1-goog ^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 2/2] sched: fix warning in bandwidth distribution 2023-09-22 23:05 [PATCH 1/2] sched: make cfs_rq->throttled_csd_list available on !SMP Josh Don @ 2023-09-22 23:05 ` Josh Don 2023-09-24 10:12 ` Ingo Molnar 2023-09-24 10:39 ` [tip: sched/core] sched/fair: Fix " tip-bot2 for Josh Don 2023-09-24 10:39 ` [tip: sched/core] sched/fair: Make cfs_rq->throttled_csd_list available on !SMP tip-bot2 for Josh Don 1 sibling, 2 replies; 6+ messages in thread From: Josh Don @ 2023-09-22 23:05 UTC (permalink / raw) To: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot Cc: Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman, Daniel Bristot de Oliveira, Valentin Schneider, linux-kernel, Josh Don We've observed the following warning being hit in distribute_cfs_runtime(): SCHED_WARN_ON(cfs_rq->runtime_remaining > 0) We have the following race: - cpu0: running bandwidth distribution (distribute_cfs_runtime). Inspects the local cfs_rq and makes its runtime_remaining positive. However, we defer unthrottling the local cfs_rq until after considering all remote cfs_rq's. - cpu1: starts running bandwidth distribution from the slack timer. When it finds the cfs_rq for cpu 0 on the throttled list, it observers the that the cfs_rq is throttled, yet is not on the CSD list, and has a positive runtime_remaining, thus triggering the warning in distribute_cfs_runtime. To fix this, we can rework the local unthrottling logic to put the local cfs_rq on a local list, so that any future bandwidth distributions will realize that the cfs_rq is about to be unthrottled. Signed-off-by: Josh Don <joshdon@google.com> --- kernel/sched/fair.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 8f4e63fc8900..de002dab28cf 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5743,13 +5743,13 @@ static void unthrottle_cfs_rq_async(struct cfs_rq *cfs_rq) static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b) { - struct cfs_rq *local_unthrottle = NULL; int this_cpu = smp_processor_id(); u64 runtime, remaining = 1; bool throttled = false; - struct cfs_rq *cfs_rq; + struct cfs_rq *cfs_rq, *tmp; struct rq_flags rf; struct rq *rq; + LIST_HEAD(local_unthrottle); rcu_read_lock(); list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq, @@ -5784,11 +5784,17 @@ static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b) /* we check whether we're throttled above */ if (cfs_rq->runtime_remaining > 0) { - if (cpu_of(rq) != this_cpu || - SCHED_WARN_ON(local_unthrottle)) + if (cpu_of(rq) != this_cpu) { unthrottle_cfs_rq_async(cfs_rq); - else - local_unthrottle = cfs_rq; + } else { + /* + * We currently only expect to be unthrottling + * a single cfs_rq locally. + */ + SCHED_WARN_ON(!list_empty(&local_unthrottle)); + list_add_tail(&cfs_rq->throttled_csd_list, + &local_unthrottle); + } } else { throttled = true; } @@ -5796,15 +5802,23 @@ static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b) next: rq_unlock_irqrestore(rq, &rf); } - rcu_read_unlock(); - if (local_unthrottle) { - rq = cpu_rq(this_cpu); + list_for_each_entry_safe(cfs_rq, tmp, &local_unthrottle, + throttled_csd_list) { + struct rq *rq = rq_of(cfs_rq); + rq_lock_irqsave(rq, &rf); - if (cfs_rq_throttled(local_unthrottle)) - unthrottle_cfs_rq(local_unthrottle); + + list_del_init(&cfs_rq->throttled_csd_list); + + if (cfs_rq_throttled(cfs_rq)) + unthrottle_cfs_rq(cfs_rq); + rq_unlock_irqrestore(rq, &rf); } + SCHED_WARN_ON(!list_empty(&local_unthrottle)); + + rcu_read_unlock(); return throttled; } -- 2.42.0.515.g380fc7ccd1-goog ^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH 2/2] sched: fix warning in bandwidth distribution 2023-09-22 23:05 ` [PATCH 2/2] sched: fix warning in bandwidth distribution Josh Don @ 2023-09-24 10:12 ` Ingo Molnar 2023-09-25 16:38 ` Josh Don 2023-09-24 10:39 ` [tip: sched/core] sched/fair: Fix " tip-bot2 for Josh Don 1 sibling, 1 reply; 6+ messages in thread From: Ingo Molnar @ 2023-09-24 10:12 UTC (permalink / raw) To: Josh Don Cc: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot, Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman, Daniel Bristot de Oliveira, Valentin Schneider, linux-kernel * Josh Don <joshdon@google.com> wrote: > We've observed the following warning being hit in > distribute_cfs_runtime(): > SCHED_WARN_ON(cfs_rq->runtime_remaining > 0) > > We have the following race: > > - cpu0: running bandwidth distribution (distribute_cfs_runtime). > Inspects the local cfs_rq and makes its runtime_remaining positive. > However, we defer unthrottling the local cfs_rq until after > considering all remote cfs_rq's. > - cpu1: starts running bandwidth distribution from the slack timer. When > it finds the cfs_rq for cpu 0 on the throttled list, it observers the > that the cfs_rq is throttled, yet is not on the CSD list, and has a > positive runtime_remaining, thus triggering the warning in > distribute_cfs_runtime. > > To fix this, we can rework the local unthrottling logic to put the local > cfs_rq on a local list, so that any future bandwidth distributions will > realize that the cfs_rq is about to be unthrottled. > > Signed-off-by: Josh Don <joshdon@google.com> > --- > kernel/sched/fair.c | 36 +++++++++++++++++++++++++----------- > 1 file changed, 25 insertions(+), 11 deletions(-) > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > index 8f4e63fc8900..de002dab28cf 100644 > --- a/kernel/sched/fair.c > +++ b/kernel/sched/fair.c > @@ -5743,13 +5743,13 @@ static void unthrottle_cfs_rq_async(struct cfs_rq *cfs_rq) > > static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b) > { > - struct cfs_rq *local_unthrottle = NULL; > int this_cpu = smp_processor_id(); > u64 runtime, remaining = 1; > bool throttled = false; > - struct cfs_rq *cfs_rq; > + struct cfs_rq *cfs_rq, *tmp; > struct rq_flags rf; > struct rq *rq; > + LIST_HEAD(local_unthrottle); > > rcu_read_lock(); > list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq, > @@ -5784,11 +5784,17 @@ static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b) > > /* we check whether we're throttled above */ > if (cfs_rq->runtime_remaining > 0) { > - if (cpu_of(rq) != this_cpu || > - SCHED_WARN_ON(local_unthrottle)) > + if (cpu_of(rq) != this_cpu) { > unthrottle_cfs_rq_async(cfs_rq); > - else > - local_unthrottle = cfs_rq; > + } else { > + /* > + * We currently only expect to be unthrottling > + * a single cfs_rq locally. > + */ > + SCHED_WARN_ON(!list_empty(&local_unthrottle)); > + list_add_tail(&cfs_rq->throttled_csd_list, > + &local_unthrottle); > + } > } else { > throttled = true; > } > @@ -5796,15 +5802,23 @@ static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b) > next: > rq_unlock_irqrestore(rq, &rf); > } > - rcu_read_unlock(); > > - if (local_unthrottle) { > - rq = cpu_rq(this_cpu); > + list_for_each_entry_safe(cfs_rq, tmp, &local_unthrottle, > + throttled_csd_list) { > + struct rq *rq = rq_of(cfs_rq); > + > rq_lock_irqsave(rq, &rf); > - if (cfs_rq_throttled(local_unthrottle)) > - unthrottle_cfs_rq(local_unthrottle); > + > + list_del_init(&cfs_rq->throttled_csd_list); > + > + if (cfs_rq_throttled(cfs_rq)) > + unthrottle_cfs_rq(cfs_rq); > + > rq_unlock_irqrestore(rq, &rf); > } > + SCHED_WARN_ON(!list_empty(&local_unthrottle)); > + > + rcu_read_unlock(); Thanks, this looks much cleaner. When the warning hits, we don't have any other side-effects, such as bad behavior or data corruption, correct? Under that assumption I've queued your fix in tip:sched/core, for a v6.7 merge, and not in tip:sched/urgent for a v6.6 merge, but let me know if I'm reading the code wrong... Thanks, Ingo ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 2/2] sched: fix warning in bandwidth distribution 2023-09-24 10:12 ` Ingo Molnar @ 2023-09-25 16:38 ` Josh Don 0 siblings, 0 replies; 6+ messages in thread From: Josh Don @ 2023-09-25 16:38 UTC (permalink / raw) To: Ingo Molnar Cc: Ingo Molnar, Peter Zijlstra, Juri Lelli, Vincent Guittot, Dietmar Eggemann, Steven Rostedt, Ben Segall, Mel Gorman, Daniel Bristot de Oliveira, Valentin Schneider, linux-kernel > When the warning hits, we don't have any other side-effects, > such as bad behavior or data corruption, correct? Correct, the warning is benign. > Under that assumption I've queued your fix in tip:sched/core, > for a v6.7 merge, and not in tip:sched/urgent for a v6.6 merge, > but let me know if I'm reading the code wrong... Thanks Ingo! ^ permalink raw reply [flat|nested] 6+ messages in thread
* [tip: sched/core] sched/fair: Fix warning in bandwidth distribution 2023-09-22 23:05 ` [PATCH 2/2] sched: fix warning in bandwidth distribution Josh Don 2023-09-24 10:12 ` Ingo Molnar @ 2023-09-24 10:39 ` tip-bot2 for Josh Don 1 sibling, 0 replies; 6+ messages in thread From: tip-bot2 for Josh Don @ 2023-09-24 10:39 UTC (permalink / raw) To: linux-tip-commits; +Cc: Josh Don, Ingo Molnar, x86, linux-kernel The following commit has been merged into the sched/core branch of tip: Commit-ID: 2f8c62296b6f656bbfd17e9f1fadd7478003a9d9 Gitweb: https://git.kernel.org/tip/2f8c62296b6f656bbfd17e9f1fadd7478003a9d9 Author: Josh Don <joshdon@google.com> AuthorDate: Fri, 22 Sep 2023 16:05:35 -07:00 Committer: Ingo Molnar <mingo@kernel.org> CommitterDate: Sun, 24 Sep 2023 12:08:29 +02:00 sched/fair: Fix warning in bandwidth distribution We've observed the following warning being hit in distribute_cfs_runtime(): SCHED_WARN_ON(cfs_rq->runtime_remaining > 0) We have the following race: - CPU 0: running bandwidth distribution (distribute_cfs_runtime). Inspects the local cfs_rq and makes its runtime_remaining positive. However, we defer unthrottling the local cfs_rq until after considering all remote cfs_rq's. - CPU 1: starts running bandwidth distribution from the slack timer. When it finds the cfs_rq for CPU 0 on the throttled list, it observers the that the cfs_rq is throttled, yet is not on the CSD list, and has a positive runtime_remaining, thus triggering the warning in distribute_cfs_runtime. To fix this, we can rework the local unthrottling logic to put the local cfs_rq on a local list, so that any future bandwidth distributions will realize that the cfs_rq is about to be unthrottled. Signed-off-by: Josh Don <joshdon@google.com> Signed-off-by: Ingo Molnar <mingo@kernel.org> Link: https://lore.kernel.org/r/20230922230535.296350-2-joshdon@google.com --- kernel/sched/fair.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 41c960e..2973173 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5741,13 +5741,13 @@ static void unthrottle_cfs_rq_async(struct cfs_rq *cfs_rq) static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b) { - struct cfs_rq *local_unthrottle = NULL; int this_cpu = smp_processor_id(); u64 runtime, remaining = 1; bool throttled = false; - struct cfs_rq *cfs_rq; + struct cfs_rq *cfs_rq, *tmp; struct rq_flags rf; struct rq *rq; + LIST_HEAD(local_unthrottle); rcu_read_lock(); list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq, @@ -5782,11 +5782,17 @@ static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b) /* we check whether we're throttled above */ if (cfs_rq->runtime_remaining > 0) { - if (cpu_of(rq) != this_cpu || - SCHED_WARN_ON(local_unthrottle)) + if (cpu_of(rq) != this_cpu) { unthrottle_cfs_rq_async(cfs_rq); - else - local_unthrottle = cfs_rq; + } else { + /* + * We currently only expect to be unthrottling + * a single cfs_rq locally. + */ + SCHED_WARN_ON(!list_empty(&local_unthrottle)); + list_add_tail(&cfs_rq->throttled_csd_list, + &local_unthrottle); + } } else { throttled = true; } @@ -5794,15 +5800,23 @@ static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b) next: rq_unlock_irqrestore(rq, &rf); } - rcu_read_unlock(); - if (local_unthrottle) { - rq = cpu_rq(this_cpu); + list_for_each_entry_safe(cfs_rq, tmp, &local_unthrottle, + throttled_csd_list) { + struct rq *rq = rq_of(cfs_rq); + rq_lock_irqsave(rq, &rf); - if (cfs_rq_throttled(local_unthrottle)) - unthrottle_cfs_rq(local_unthrottle); + + list_del_init(&cfs_rq->throttled_csd_list); + + if (cfs_rq_throttled(cfs_rq)) + unthrottle_cfs_rq(cfs_rq); + rq_unlock_irqrestore(rq, &rf); } + SCHED_WARN_ON(!list_empty(&local_unthrottle)); + + rcu_read_unlock(); return throttled; } ^ permalink raw reply related [flat|nested] 6+ messages in thread
* [tip: sched/core] sched/fair: Make cfs_rq->throttled_csd_list available on !SMP 2023-09-22 23:05 [PATCH 1/2] sched: make cfs_rq->throttled_csd_list available on !SMP Josh Don 2023-09-22 23:05 ` [PATCH 2/2] sched: fix warning in bandwidth distribution Josh Don @ 2023-09-24 10:39 ` tip-bot2 for Josh Don 1 sibling, 0 replies; 6+ messages in thread From: tip-bot2 for Josh Don @ 2023-09-24 10:39 UTC (permalink / raw) To: linux-tip-commits; +Cc: Josh Don, Ingo Molnar, x86, linux-kernel The following commit has been merged into the sched/core branch of tip: Commit-ID: 30797bce8ef0c73f0c388148ffac92458533b10e Gitweb: https://git.kernel.org/tip/30797bce8ef0c73f0c388148ffac92458533b10e Author: Josh Don <joshdon@google.com> AuthorDate: Fri, 22 Sep 2023 16:05:34 -07:00 Committer: Ingo Molnar <mingo@kernel.org> CommitterDate: Sun, 24 Sep 2023 12:08:28 +02:00 sched/fair: Make cfs_rq->throttled_csd_list available on !SMP This makes the following patch cleaner by avoiding extra CONFIG_SMP conditionals on the availability of rq->throttled_csd_list. Signed-off-by: Josh Don <joshdon@google.com> Signed-off-by: Ingo Molnar <mingo@kernel.org> Link: https://lore.kernel.org/r/20230922230535.296350-1-joshdon@google.com --- kernel/sched/fair.c | 4 ---- kernel/sched/sched.h | 2 -- 2 files changed, 6 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7572000..41c960e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5763,11 +5763,9 @@ static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b) if (!cfs_rq_throttled(cfs_rq)) goto next; -#ifdef CONFIG_SMP /* Already queued for async unthrottle */ if (!list_empty(&cfs_rq->throttled_csd_list)) goto next; -#endif /* By the above checks, this should never be true */ SCHED_WARN_ON(cfs_rq->runtime_remaining > 0); @@ -6134,9 +6132,7 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) { cfs_rq->runtime_enabled = 0; INIT_LIST_HEAD(&cfs_rq->throttled_list); -#ifdef CONFIG_SMP INIT_LIST_HEAD(&cfs_rq->throttled_csd_list); -#endif } void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 9260120..96f8ab7 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -634,9 +634,7 @@ struct cfs_rq { int throttled; int throttle_count; struct list_head throttled_list; -#ifdef CONFIG_SMP struct list_head throttled_csd_list; -#endif #endif /* CONFIG_CFS_BANDWIDTH */ #endif /* CONFIG_FAIR_GROUP_SCHED */ }; ^ permalink raw reply related [flat|nested] 6+ messages in thread
end of thread, other threads:[~2023-09-25 16:38 UTC | newest] Thread overview: 6+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2023-09-22 23:05 [PATCH 1/2] sched: make cfs_rq->throttled_csd_list available on !SMP Josh Don 2023-09-22 23:05 ` [PATCH 2/2] sched: fix warning in bandwidth distribution Josh Don 2023-09-24 10:12 ` Ingo Molnar 2023-09-25 16:38 ` Josh Don 2023-09-24 10:39 ` [tip: sched/core] sched/fair: Fix " tip-bot2 for Josh Don 2023-09-24 10:39 ` [tip: sched/core] sched/fair: Make cfs_rq->throttled_csd_list available on !SMP tip-bot2 for Josh Don
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.