From: Peter Zijlstra <peterz@infradead.org>
To: Juri Lelli <juri.lelli@redhat.com>
Cc: mingo@redhat.com, rostedt@goodmis.org, tj@kernel.org,
linux-kernel@vger.kernel.org, luca.abeni@santannapisa.it,
claudio@evidence.eu.com, tommaso.cucinotta@santannapisa.it,
bristot@redhat.com, mathieu.poirier@linaro.org,
lizefan@huawei.com, cgroups@vger.kernel.org,
Prateek Sood <prsood@codeaurora.org>
Subject: Re: [PATCH v8 6/8] cgroup/cpuset: Change cpuset_rwsem and hotplug lock order
Date: Fri, 28 Jun 2019 15:03:08 +0200 [thread overview]
Message-ID: <20190628130308.GU3419@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <20190628080618.522-7-juri.lelli@redhat.com>
On Fri, Jun 28, 2019 at 10:06:16AM +0200, Juri Lelli wrote:
> cpuset_rwsem is going to be acquired from sched_setscheduler() with a
> following patch. There are however paths (e.g., spawn_ksoftirqd) in
> which sched_scheduler() is eventually called while holding hotplug lock;
> this creates a dependecy between hotplug lock (to be always acquired
> first) and cpuset_rwsem (to be always acquired after hotplug lock).
>
> Fix paths which currently take the two locks in the wrong order (after
> a following patch is applied).
> Signed-off-by: Juri Lelli <juri.lelli@redhat.com>
This all reminds me of this:
https://lkml.kernel.org/r/1510755615-25906-1-git-send-email-prsood@codeaurora.org
Which sadly got reverted again. If we do this now (I've always been a
proponent), then we can make that rebuild synchronous again, which
should also help here IIRC.
> ---
> include/linux/cpuset.h | 8 ++++----
> kernel/cgroup/cpuset.c | 22 +++++++++++++++++-----
> 2 files changed, 21 insertions(+), 9 deletions(-)
>
> diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
> index 934633a05d20..7f1478c26a33 100644
> --- a/include/linux/cpuset.h
> +++ b/include/linux/cpuset.h
> @@ -40,14 +40,14 @@ static inline bool cpusets_enabled(void)
>
> static inline void cpuset_inc(void)
> {
> - static_branch_inc(&cpusets_pre_enable_key);
> - static_branch_inc(&cpusets_enabled_key);
> + static_branch_inc_cpuslocked(&cpusets_pre_enable_key);
> + static_branch_inc_cpuslocked(&cpusets_enabled_key);
> }
>
> static inline void cpuset_dec(void)
> {
> - static_branch_dec(&cpusets_enabled_key);
> - static_branch_dec(&cpusets_pre_enable_key);
> + static_branch_dec_cpuslocked(&cpusets_enabled_key);
> + static_branch_dec_cpuslocked(&cpusets_pre_enable_key);
> }
>
> extern int cpuset_init(void);
> diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
> index a7c0c8d8f132..d92b351f89e3 100644
> --- a/kernel/cgroup/cpuset.c
> +++ b/kernel/cgroup/cpuset.c
> @@ -1026,8 +1026,8 @@ static void rebuild_sched_domains_locked(void)
> cpumask_var_t *doms;
> int ndoms;
>
> + lockdep_assert_cpus_held();
> percpu_rwsem_assert_held(&cpuset_rwsem);
> - get_online_cpus();
>
> /*
> * We have raced with CPU hotplug. Don't do anything to avoid
> @@ -1036,19 +1036,17 @@ static void rebuild_sched_domains_locked(void)
> */
> if (!top_cpuset.nr_subparts_cpus &&
> !cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask))
> - goto out;
> + return;
>
> if (top_cpuset.nr_subparts_cpus &&
> !cpumask_subset(top_cpuset.effective_cpus, cpu_active_mask))
> - goto out;
> + return;
>
> /* Generate domain masks and attrs */
> ndoms = generate_sched_domains(&doms, &attr);
>
> /* Have scheduler rebuild the domains */
> partition_and_rebuild_sched_domains(ndoms, doms, attr);
> -out:
> - put_online_cpus();
> }
> #else /* !CONFIG_SMP */
> static void rebuild_sched_domains_locked(void)
> @@ -1058,9 +1056,11 @@ static void rebuild_sched_domains_locked(void)
>
> void rebuild_sched_domains(void)
> {
> + get_online_cpus();
> percpu_down_write(&cpuset_rwsem);
> rebuild_sched_domains_locked();
> percpu_up_write(&cpuset_rwsem);
> + put_online_cpus();
> }
>
> /**
> @@ -2298,6 +2298,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
> cpuset_filetype_t type = cft->private;
> int retval = 0;
>
> + get_online_cpus();
> percpu_down_write(&cpuset_rwsem);
> if (!is_cpuset_online(cs)) {
> retval = -ENODEV;
> @@ -2335,6 +2336,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
> }
> out_unlock:
> percpu_up_write(&cpuset_rwsem);
> + put_online_cpus();
> return retval;
> }
>
> @@ -2345,6 +2347,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
> cpuset_filetype_t type = cft->private;
> int retval = -ENODEV;
>
> + get_online_cpus();
> percpu_down_write(&cpuset_rwsem);
> if (!is_cpuset_online(cs))
> goto out_unlock;
> @@ -2359,6 +2362,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
> }
> out_unlock:
> percpu_up_write(&cpuset_rwsem);
> + put_online_cpus();
> return retval;
> }
>
> @@ -2397,6 +2401,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
> kernfs_break_active_protection(of->kn);
> flush_work(&cpuset_hotplug_work);
>
> + get_online_cpus();
> percpu_down_write(&cpuset_rwsem);
> if (!is_cpuset_online(cs))
> goto out_unlock;
> @@ -2422,6 +2427,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
> free_cpuset(trialcs);
> out_unlock:
> percpu_up_write(&cpuset_rwsem);
> + put_online_cpus();
> kernfs_unbreak_active_protection(of->kn);
> css_put(&cs->css);
> flush_workqueue(cpuset_migrate_mm_wq);
> @@ -2552,6 +2558,7 @@ static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf,
> return -EINVAL;
>
> css_get(&cs->css);
> + get_online_cpus();
> percpu_down_write(&cpuset_rwsem);
> if (!is_cpuset_online(cs))
> goto out_unlock;
> @@ -2559,6 +2566,7 @@ static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf,
> retval = update_prstate(cs, val);
> out_unlock:
> percpu_up_write(&cpuset_rwsem);
> + put_online_cpus();
> css_put(&cs->css);
> return retval ?: nbytes;
> }
> @@ -2764,6 +2772,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
> if (!parent)
> return 0;
>
> + get_online_cpus();
> percpu_down_write(&cpuset_rwsem);
>
> set_bit(CS_ONLINE, &cs->flags);
> @@ -2816,6 +2825,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
> spin_unlock_irq(&callback_lock);
> out_unlock:
> percpu_up_write(&cpuset_rwsem);
> + put_online_cpus();
> return 0;
> }
>
> @@ -2834,6 +2844,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
> {
> struct cpuset *cs = css_cs(css);
>
> + get_online_cpus();
> percpu_down_write(&cpuset_rwsem);
>
> if (is_partition_root(cs))
> @@ -2854,6 +2865,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
> clear_bit(CS_ONLINE, &cs->flags);
>
> percpu_up_write(&cpuset_rwsem);
> + put_online_cpus();
> }
>
> static void cpuset_css_free(struct cgroup_subsys_state *css)
> --
> 2.17.2
>
next prev parent reply other threads:[~2019-06-28 13:03 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-06-28 8:06 [PATCH v8 0/8] sched/deadline: fix cpusets bandwidth accounting Juri Lelli
2019-06-28 8:06 ` [PATCH v8 1/8] sched/topology: Adding function partition_sched_domains_locked() Juri Lelli
2019-06-28 8:06 ` [PATCH v8 2/8] sched/core: Streamlining calls to task_rq_unlock() Juri Lelli
2019-06-28 8:06 ` [PATCH v8 3/8] cpuset: Rebuild root domain deadline accounting information Juri Lelli
2019-06-28 8:06 ` [PATCH v8 4/8] sched/deadline: Fix bandwidth accounting at all levels after offline migration Juri Lelli
2019-06-28 8:06 ` [PATCH v8 5/8] cgroup/cpuset: convert cpuset_mutex to percpu_rwsem Juri Lelli
2019-06-28 12:45 ` Peter Zijlstra
2019-06-28 14:31 ` Juri Lelli
2019-06-28 8:06 ` [PATCH v8 6/8] cgroup/cpuset: Change cpuset_rwsem and hotplug lock order Juri Lelli
2019-06-28 13:03 ` Peter Zijlstra [this message]
2019-07-01 6:52 ` Juri Lelli
2019-07-01 8:27 ` Peter Zijlstra
2019-07-01 14:51 ` Tejun Heo
2019-07-04 8:49 ` Juri Lelli
2019-07-12 14:04 ` Juri Lelli
2019-07-16 15:36 ` Tejun Heo
2019-06-28 8:06 ` [PATCH v8 7/8] sched/core: Prevent race condition between cpuset and __sched_setscheduler() Juri Lelli
2019-07-01 19:11 ` Peter Zijlstra
2019-07-02 7:01 ` Juri Lelli
2019-06-28 8:06 ` [PATCH v8 8/8] rcu/tree: Setschedule gp ktread to SCHED_FIFO outside of atomic region Juri Lelli
2019-07-01 19:13 ` Peter Zijlstra
2019-07-02 7:01 ` Juri Lelli
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190628130308.GU3419@hirez.programming.kicks-ass.net \
--to=peterz@infradead.org \
--cc=bristot@redhat.com \
--cc=cgroups@vger.kernel.org \
--cc=claudio@evidence.eu.com \
--cc=juri.lelli@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=lizefan@huawei.com \
--cc=luca.abeni@santannapisa.it \
--cc=mathieu.poirier@linaro.org \
--cc=mingo@redhat.com \
--cc=prsood@codeaurora.org \
--cc=rostedt@goodmis.org \
--cc=tj@kernel.org \
--cc=tommaso.cucinotta@santannapisa.it \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.