From: Johannes Weiner <hannes-druUgvl0LCNAfugRpC6u6w@public.gmane.org>
To: Chengming Zhou <zhouchengming-EC8Uxl6Npydl57MIdRCFDg@public.gmane.org>
Cc: tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org,
mkoutny-IBi9RG/b67k@public.gmane.org,
surenb-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org,
gregkh-hQyY1W1yCW8ekmWlsbkhG0B+6BGkLq7r@public.gmane.org,
corbet-T1hC0tSOHrs@public.gmane.org,
mingo-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
peterz-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org,
songmuchun-EC8Uxl6Npydl57MIdRCFDg@public.gmane.org,
cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
linux-doc-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Subject: Re: [PATCH v3 09/10] sched/psi: cache parent psi_group to speed up groups iterate
Date: Wed, 24 Aug 2022 06:18:49 -0400 [thread overview]
Message-ID: <YwX7CeeRDDAhV3UH@cmpxchg.org> (raw)
In-Reply-To: <20220824081829.33748-10-zhouchengming-EC8Uxl6Npydl57MIdRCFDg@public.gmane.org>
Hi Chengming,
This looks generally good to me, but I have one comment:
On Wed, Aug 24, 2022 at 04:18:28PM +0800, Chengming Zhou wrote:
> @@ -772,30 +772,18 @@ static void psi_group_change(struct psi_group *group, int cpu,
> schedule_delayed_work(&group->avgs_work, PSI_FREQ);
> }
>
> -static struct psi_group *iterate_groups(struct task_struct *task, void **iter)
> +static inline struct psi_group *task_psi_group(struct task_struct *task)
> {
> - if (*iter == &psi_system)
> - return NULL;
> -
> #ifdef CONFIG_CGROUPS
> - if (static_branch_likely(&psi_cgroups_enabled)) {
> - struct cgroup *cgroup = NULL;
> -
> - if (!*iter)
> - cgroup = task->cgroups->dfl_cgrp;
> - else
> - cgroup = cgroup_parent(*iter);
> -
> - if (cgroup && cgroup_parent(cgroup)) {
> - *iter = cgroup;
> - return cgroup_psi(cgroup);
> - }
> - }
> + if (static_branch_likely(&psi_cgroups_enabled))
> + return cgroup_psi(task_dfl_cgroup(task));
> #endif
> - *iter = &psi_system;
> return &psi_system;
> }
>
> +#define for_each_psi_group(group) \
> + for (; group; group = group->parent)
It would be better to open-code this. It's hiding that it's walking
ancestors, and the name and single parameter suggest it's walking some
global list - not that the parameter is iterator AND starting point.
This makes for particularly obscure code in the discontiguous loops in
psi_task_switch():
group = task_psi_group(task);
for_each_psi_group(group)
if (group == common)
break;
/* This looks like a second full loop: */
for_each_psi_group(group)
...
> static void psi_flags_change(struct task_struct *task, int clear, int set)
> {
> if (((task->psi_flags & set) ||
> @@ -815,7 +803,6 @@ void psi_task_change(struct task_struct *task, int clear, int set)
> {
> int cpu = task_cpu(task);
> struct psi_group *group;
> - void *iter = NULL;
> u64 now;
>
> if (!task->pid)
> @@ -825,7 +812,8 @@ void psi_task_change(struct task_struct *task, int clear, int set)
>
> now = cpu_clock(cpu);
>
> - while ((group = iterate_groups(task, &iter)))
> + group = task_psi_group(task);
> + for_each_psi_group(group)
> psi_group_change(group, cpu, clear, set, now, true);
task_psi_group() is never NULL, so this should be a do-while loop:
group = task_psi_group(task);
do {
psi_group_change(group, cpu, clear, set, now, true);
} while ((group = group->parent));
> @@ -834,7 +822,6 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
> {
> struct psi_group *group, *common = NULL;
> int cpu = task_cpu(prev);
> - void *iter;
> u64 now = cpu_clock(cpu);
>
> if (next->pid) {
> @@ -845,8 +832,8 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
> * we reach the first common ancestor. Iterate @next's
> * ancestors only until we encounter @prev's ONCPU.
> */
> - iter = NULL;
> - while ((group = iterate_groups(next, &iter))) {
> + group = task_psi_group(next);
> + for_each_psi_group(group) {
Ditto.
> if (per_cpu_ptr(group->pcpu, cpu)->state_mask &
> PSI_ONCPU) {
> common = group;
> @@ -887,9 +874,12 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
>
> psi_flags_change(prev, clear, set);
>
> - iter = NULL;
> - while ((group = iterate_groups(prev, &iter)) && group != common)
> + group = task_psi_group(prev);
> + for_each_psi_group(group) {
> + if (group == common)
> + break;
Ditto.
> psi_group_change(group, cpu, clear, set, now, wake_clock);
> + }
>
> /*
> * TSK_ONCPU is handled up to the common ancestor. If we're tasked
> @@ -897,7 +887,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
> */
> if (sleep || unlikely(prev->in_memstall != next->in_memstall)) {
> clear &= ~TSK_ONCPU;
> - for (; group; group = iterate_groups(prev, &iter))
> + for_each_psi_group(group)
> psi_group_change(group, cpu, clear, set, now, wake_clock);
This can stay as is, group may already be NULL here.
> @@ -907,7 +897,6 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
> void psi_account_irqtime(struct task_struct *task, u32 delta)
> {
> int cpu = task_cpu(task);
> - void *iter = NULL;
> struct psi_group *group;
> struct psi_group_cpu *groupc;
> u64 now;
> @@ -917,7 +906,8 @@ void psi_account_irqtime(struct task_struct *task, u32 delta)
>
> now = cpu_clock(cpu);
>
> - while ((group = iterate_groups(task, &iter))) {
> + group = task_psi_group(task);
> + for_each_psi_group(group) {
> groupc = per_cpu_ptr(group->pcpu, cpu);
do-while again.
With that,
Acked-by: Johannes Weiner <hannes-druUgvl0LCNAfugRpC6u6w@public.gmane.org>
Thanks!
WARNING: multiple messages have this Message-ID (diff)
From: Johannes Weiner <hannes@cmpxchg.org>
To: Chengming Zhou <zhouchengming@bytedance.com>
Cc: tj@kernel.org, mkoutny@suse.com, surenb@google.com,
gregkh@linuxfoundation.org, corbet@lwn.net, mingo@redhat.com,
peterz@infradead.org, songmuchun@bytedance.com,
cgroups@vger.kernel.org, linux-doc@vger.kernel.org,
linux-kernel@vger.kernel.org
Subject: Re: [PATCH v3 09/10] sched/psi: cache parent psi_group to speed up groups iterate
Date: Wed, 24 Aug 2022 06:18:49 -0400 [thread overview]
Message-ID: <YwX7CeeRDDAhV3UH@cmpxchg.org> (raw)
In-Reply-To: <20220824081829.33748-10-zhouchengming@bytedance.com>
Hi Chengming,
This looks generally good to me, but I have one comment:
On Wed, Aug 24, 2022 at 04:18:28PM +0800, Chengming Zhou wrote:
> @@ -772,30 +772,18 @@ static void psi_group_change(struct psi_group *group, int cpu,
> schedule_delayed_work(&group->avgs_work, PSI_FREQ);
> }
>
> -static struct psi_group *iterate_groups(struct task_struct *task, void **iter)
> +static inline struct psi_group *task_psi_group(struct task_struct *task)
> {
> - if (*iter == &psi_system)
> - return NULL;
> -
> #ifdef CONFIG_CGROUPS
> - if (static_branch_likely(&psi_cgroups_enabled)) {
> - struct cgroup *cgroup = NULL;
> -
> - if (!*iter)
> - cgroup = task->cgroups->dfl_cgrp;
> - else
> - cgroup = cgroup_parent(*iter);
> -
> - if (cgroup && cgroup_parent(cgroup)) {
> - *iter = cgroup;
> - return cgroup_psi(cgroup);
> - }
> - }
> + if (static_branch_likely(&psi_cgroups_enabled))
> + return cgroup_psi(task_dfl_cgroup(task));
> #endif
> - *iter = &psi_system;
> return &psi_system;
> }
>
> +#define for_each_psi_group(group) \
> + for (; group; group = group->parent)
It would be better to open-code this. It's hiding that it's walking
ancestors, and the name and single parameter suggest it's walking some
global list - not that the parameter is iterator AND starting point.
This makes for particularly obscure code in the discontiguous loops in
psi_task_switch():
group = task_psi_group(task);
for_each_psi_group(group)
if (group == common)
break;
/* This looks like a second full loop: */
for_each_psi_group(group)
...
> static void psi_flags_change(struct task_struct *task, int clear, int set)
> {
> if (((task->psi_flags & set) ||
> @@ -815,7 +803,6 @@ void psi_task_change(struct task_struct *task, int clear, int set)
> {
> int cpu = task_cpu(task);
> struct psi_group *group;
> - void *iter = NULL;
> u64 now;
>
> if (!task->pid)
> @@ -825,7 +812,8 @@ void psi_task_change(struct task_struct *task, int clear, int set)
>
> now = cpu_clock(cpu);
>
> - while ((group = iterate_groups(task, &iter)))
> + group = task_psi_group(task);
> + for_each_psi_group(group)
> psi_group_change(group, cpu, clear, set, now, true);
task_psi_group() is never NULL, so this should be a do-while loop:
group = task_psi_group(task);
do {
psi_group_change(group, cpu, clear, set, now, true);
} while ((group = group->parent));
> @@ -834,7 +822,6 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
> {
> struct psi_group *group, *common = NULL;
> int cpu = task_cpu(prev);
> - void *iter;
> u64 now = cpu_clock(cpu);
>
> if (next->pid) {
> @@ -845,8 +832,8 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
> * we reach the first common ancestor. Iterate @next's
> * ancestors only until we encounter @prev's ONCPU.
> */
> - iter = NULL;
> - while ((group = iterate_groups(next, &iter))) {
> + group = task_psi_group(next);
> + for_each_psi_group(group) {
Ditto.
> if (per_cpu_ptr(group->pcpu, cpu)->state_mask &
> PSI_ONCPU) {
> common = group;
> @@ -887,9 +874,12 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
>
> psi_flags_change(prev, clear, set);
>
> - iter = NULL;
> - while ((group = iterate_groups(prev, &iter)) && group != common)
> + group = task_psi_group(prev);
> + for_each_psi_group(group) {
> + if (group == common)
> + break;
Ditto.
> psi_group_change(group, cpu, clear, set, now, wake_clock);
> + }
>
> /*
> * TSK_ONCPU is handled up to the common ancestor. If we're tasked
> @@ -897,7 +887,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
> */
> if (sleep || unlikely(prev->in_memstall != next->in_memstall)) {
> clear &= ~TSK_ONCPU;
> - for (; group; group = iterate_groups(prev, &iter))
> + for_each_psi_group(group)
> psi_group_change(group, cpu, clear, set, now, wake_clock);
This can stay as is, group may already be NULL here.
> @@ -907,7 +897,6 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
> void psi_account_irqtime(struct task_struct *task, u32 delta)
> {
> int cpu = task_cpu(task);
> - void *iter = NULL;
> struct psi_group *group;
> struct psi_group_cpu *groupc;
> u64 now;
> @@ -917,7 +906,8 @@ void psi_account_irqtime(struct task_struct *task, u32 delta)
>
> now = cpu_clock(cpu);
>
> - while ((group = iterate_groups(task, &iter))) {
> + group = task_psi_group(task);
> + for_each_psi_group(group) {
> groupc = per_cpu_ptr(group->pcpu, cpu);
do-while again.
With that,
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Thanks!
next prev parent reply other threads:[~2022-08-24 10:18 UTC|newest]
Thread overview: 33+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-08-24 8:18 [PATCH v3 00/10] sched/psi: some optimization and extension Chengming Zhou
2022-08-24 8:18 ` [PATCH v3 01/10] sched/psi: fix periodic aggregation shut off Chengming Zhou
2022-08-24 8:18 ` [PATCH v3 03/10] sched/psi: save percpu memory when !psi_cgroups_enabled Chengming Zhou
[not found] ` <20220824081829.33748-1-zhouchengming-EC8Uxl6Npydl57MIdRCFDg@public.gmane.org>
2022-08-24 8:18 ` [PATCH v3 02/10] sched/psi: don't create cgroup PSI files when psi_disabled Chengming Zhou
2022-08-24 8:18 ` Chengming Zhou
2022-08-24 8:18 ` [PATCH v3 04/10] sched/psi: move private helpers to sched/stats.h Chengming Zhou
2022-08-24 8:18 ` Chengming Zhou
2022-08-24 8:18 ` [PATCH v3 05/10] sched/psi: optimize task switch inside shared cgroups again Chengming Zhou
2022-08-24 8:18 ` Chengming Zhou
[not found] ` <20220824081829.33748-6-zhouchengming-EC8Uxl6Npydl57MIdRCFDg@public.gmane.org>
2022-08-24 14:06 ` Johannes Weiner
2022-08-24 14:06 ` Johannes Weiner
2022-08-24 15:30 ` Chengming Zhou
2022-08-24 8:18 ` [PATCH v3 06/10] sched/psi: remove NR_ONCPU task accounting Chengming Zhou
2022-08-24 8:18 ` Chengming Zhou
2022-08-24 8:18 ` [PATCH v3 10/10] sched/psi: per-cgroup PSI accounting disable/re-enable interface Chengming Zhou
2022-08-24 8:18 ` Chengming Zhou
2022-08-24 9:59 ` Johannes Weiner
[not found] ` <YwX2jC2UQ/zeY2E8-druUgvl0LCNAfugRpC6u6w@public.gmane.org>
2022-08-24 10:55 ` Chengming Zhou
2022-08-24 10:55 ` Chengming Zhou
2022-08-25 12:28 ` Chengming Zhou
2022-08-25 12:28 ` Chengming Zhou
2022-08-25 13:20 ` Johannes Weiner
2022-08-25 13:29 ` Chengming Zhou
2022-08-24 8:18 ` [PATCH v3 07/10] sched/psi: add PSI_IRQ to track IRQ/SOFTIRQ pressure Chengming Zhou
[not found] ` <20220824081829.33748-8-zhouchengming-EC8Uxl6Npydl57MIdRCFDg@public.gmane.org>
2022-08-24 10:46 ` Johannes Weiner
2022-08-24 10:46 ` Johannes Weiner
2022-08-24 11:53 ` Chengming Zhou
2022-08-24 8:18 ` [PATCH v3 08/10] sched/psi: consolidate cgroup_psi() Chengming Zhou
2022-08-24 10:19 ` Johannes Weiner
2022-08-24 8:18 ` [PATCH v3 09/10] sched/psi: cache parent psi_group to speed up groups iterate Chengming Zhou
[not found] ` <20220824081829.33748-10-zhouchengming-EC8Uxl6Npydl57MIdRCFDg@public.gmane.org>
2022-08-24 10:18 ` Johannes Weiner [this message]
2022-08-24 10:18 ` Johannes Weiner
2022-08-24 10:48 ` Chengming Zhou
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=YwX7CeeRDDAhV3UH@cmpxchg.org \
--to=hannes-druugvl0lcnafugrpc6u6w@public.gmane.org \
--cc=cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=corbet-T1hC0tSOHrs@public.gmane.org \
--cc=gregkh-hQyY1W1yCW8ekmWlsbkhG0B+6BGkLq7r@public.gmane.org \
--cc=linux-doc-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
--cc=mingo-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
--cc=mkoutny-IBi9RG/b67k@public.gmane.org \
--cc=peterz-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org \
--cc=songmuchun-EC8Uxl6Npydl57MIdRCFDg@public.gmane.org \
--cc=surenb-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org \
--cc=tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org \
--cc=zhouchengming-EC8Uxl6Npydl57MIdRCFDg@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.