From: Roman Gushchin <roman.gushchin@linux.dev>
To: Muchun Song <songmuchun@bytedance.com>
Cc: hannes@cmpxchg.org, mhocko@kernel.org, shakeelb@google.com,
cgroups@vger.kernel.org, linux-mm@kvack.org,
linux-kernel@vger.kernel.org, duanxiongchun@bytedance.com,
longman@redhat.com
Subject: Re: [PATCH v4 01/11] mm: memcontrol: prepare objcg API for non-kmem usage
Date: Tue, 24 May 2022 19:36:24 -0700 [thread overview]
Message-ID: <Yo2WKADtPy2rekRh@carbon> (raw)
In-Reply-To: <20220524060551.80037-2-songmuchun@bytedance.com>
On Tue, May 24, 2022 at 02:05:41PM +0800, Muchun Song wrote:
> Pagecache pages are charged at the allocation time and holding a
> reference to the original memory cgroup until being reclaimed.
> Depending on the memory pressure, specific patterns of the page
> sharing between different cgroups and the cgroup creation and
> destruction rates, a large number of dying memory cgroups can be
> pinned by pagecache pages. It makes the page reclaim less efficient
> and wastes memory.
>
> We can convert LRU pages and most other raw memcg pins to the objcg
> direction to fix this problem, and then the page->memcg will always
> point to an object cgroup pointer.
>
> Therefore, the infrastructure of objcg no longer only serves
> CONFIG_MEMCG_KMEM. In this patch, we move the infrastructure of the
> objcg out of the scope of the CONFIG_MEMCG_KMEM so that the LRU pages
> can reuse it to charge pages.
>
> We know that the LRU pages are not accounted at the root level. But
> the page->memcg_data points to the root_mem_cgroup. So the
> page->memcg_data of the LRU pages always points to a valid pointer.
> But the root_mem_cgroup dose not have an object cgroup. If we use
> obj_cgroup APIs to charge the LRU pages, we should set the
> page->memcg_data to a root object cgroup. So we also allocate an
> object cgroup for the root_mem_cgroup.
>
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> ---
> include/linux/memcontrol.h | 5 ++--
> mm/memcontrol.c | 60 +++++++++++++++++++++++++---------------------
> 2 files changed, 35 insertions(+), 30 deletions(-)
>
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 89b14729d59f..ff1c1dd7e762 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -315,10 +315,10 @@ struct mem_cgroup {
>
> #ifdef CONFIG_MEMCG_KMEM
> int kmemcg_id;
> +#endif
> struct obj_cgroup __rcu *objcg;
> /* list of inherited objcgs, protected by objcg_lock */
> struct list_head objcg_list;
> -#endif
>
> MEMCG_PADDING(_pad2_);
>
> @@ -851,8 +851,7 @@ static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec)
> * parent_mem_cgroup - find the accounting parent of a memcg
> * @memcg: memcg whose parent to find
> *
> - * Returns the parent memcg, or NULL if this is the root or the memory
> - * controller is in legacy no-hierarchy mode.
> + * Returns the parent memcg, or NULL if this is the root.
> */
> static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
> {
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 598fece89e2b..6de0d3e53eb1 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -254,9 +254,9 @@ struct mem_cgroup *vmpressure_to_memcg(struct vmpressure *vmpr)
> return container_of(vmpr, struct mem_cgroup, vmpressure);
> }
>
> -#ifdef CONFIG_MEMCG_KMEM
> static DEFINE_SPINLOCK(objcg_lock);
>
> +#ifdef CONFIG_MEMCG_KMEM
> bool mem_cgroup_kmem_disabled(void)
> {
> return cgroup_memory_nokmem;
> @@ -265,12 +265,10 @@ bool mem_cgroup_kmem_disabled(void)
> static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg,
> unsigned int nr_pages);
>
> -static void obj_cgroup_release(struct percpu_ref *ref)
> +static void obj_cgroup_release_bytes(struct obj_cgroup *objcg)
> {
> - struct obj_cgroup *objcg = container_of(ref, struct obj_cgroup, refcnt);
> unsigned int nr_bytes;
> unsigned int nr_pages;
> - unsigned long flags;
>
> /*
> * At this point all allocated objects are freed, and
> @@ -284,9 +282,9 @@ static void obj_cgroup_release(struct percpu_ref *ref)
> * 3) CPU1: a process from another memcg is allocating something,
> * the stock if flushed,
> * objcg->nr_charged_bytes = PAGE_SIZE - 92
> - * 5) CPU0: we do release this object,
> + * 4) CPU0: we do release this object,
> * 92 bytes are added to stock->nr_bytes
> - * 6) CPU0: stock is flushed,
> + * 5) CPU0: stock is flushed,
> * 92 bytes are added to objcg->nr_charged_bytes
> *
> * In the result, nr_charged_bytes == PAGE_SIZE.
> @@ -298,6 +296,19 @@ static void obj_cgroup_release(struct percpu_ref *ref)
>
> if (nr_pages)
> obj_cgroup_uncharge_pages(objcg, nr_pages);
> +}
> +#else
> +static inline void obj_cgroup_release_bytes(struct obj_cgroup *objcg)
> +{
> +}
> +#endif
> +
> +static void obj_cgroup_release(struct percpu_ref *ref)
> +{
> + struct obj_cgroup *objcg = container_of(ref, struct obj_cgroup, refcnt);
> + unsigned long flags;
> +
> + obj_cgroup_release_bytes(objcg);
>
> spin_lock_irqsave(&objcg_lock, flags);
> list_del(&objcg->list);
> @@ -326,10 +337,10 @@ static struct obj_cgroup *obj_cgroup_alloc(void)
> return objcg;
> }
>
> -static void memcg_reparent_objcgs(struct mem_cgroup *memcg,
> - struct mem_cgroup *parent)
> +static void memcg_reparent_objcgs(struct mem_cgroup *memcg)
> {
> struct obj_cgroup *objcg, *iter;
> + struct mem_cgroup *parent = parent_mem_cgroup(memcg);
>
> objcg = rcu_replace_pointer(memcg->objcg, NULL, true);
>
> @@ -348,6 +359,7 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg,
> percpu_ref_kill(&objcg->refcnt);
> }
>
> +#ifdef CONFIG_MEMCG_KMEM
> /*
> * A lot of the calls to the cache allocation functions are expected to be
> * inlined by the compiler. Since the calls to memcg_slab_pre_alloc_hook() are
> @@ -3589,21 +3601,12 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
> #ifdef CONFIG_MEMCG_KMEM
> static int memcg_online_kmem(struct mem_cgroup *memcg)
> {
> - struct obj_cgroup *objcg;
> -
> if (cgroup_memory_nokmem)
> return 0;
>
> if (unlikely(mem_cgroup_is_root(memcg)))
> return 0;
>
> - objcg = obj_cgroup_alloc();
> - if (!objcg)
> - return -ENOMEM;
> -
> - objcg->memcg = memcg;
> - rcu_assign_pointer(memcg->objcg, objcg);
> -
> static_branch_enable(&memcg_kmem_enabled_key);
>
> memcg->kmemcg_id = memcg->id.id;
> @@ -3613,27 +3616,19 @@ static int memcg_online_kmem(struct mem_cgroup *memcg)
>
> static void memcg_offline_kmem(struct mem_cgroup *memcg)
> {
> - struct mem_cgroup *parent;
> -
> if (cgroup_memory_nokmem)
> return;
>
> if (unlikely(mem_cgroup_is_root(memcg)))
> return;
>
> - parent = parent_mem_cgroup(memcg);
> - if (!parent)
> - parent = root_mem_cgroup;
> -
> - memcg_reparent_objcgs(memcg, parent);
> -
> /*
> * After we have finished memcg_reparent_objcgs(), all list_lrus
> * corresponding to this cgroup are guaranteed to remain empty.
> * The ordering is imposed by list_lru_node->lock taken by
> * memcg_reparent_list_lrus().
> */
This comment doesn't look to be correct after these changes. Should it
be fixed? Or the ordering should be fixed too?
> - memcg_reparent_list_lrus(memcg, parent);
> + memcg_reparent_list_lrus(memcg, parent_mem_cgroup(memcg));
We effectively dropped this:
if (!parent)
parent = root_mem_cgroup;
Is it safe? (assuming v1 non-hierarchical mode, it's usually when all
is getting complicated)
The rest of the patch looks good to me.
Thanks!
next prev parent reply other threads:[~2022-05-25 2:36 UTC|newest]
Thread overview: 48+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-05-24 6:05 [PATCH v4 00/11] Use obj_cgroup APIs to charge the LRU pages Muchun Song
2022-05-24 6:05 ` [PATCH v4 01/11] mm: memcontrol: prepare objcg API for non-kmem usage Muchun Song
2022-05-24 19:01 ` Johannes Weiner
2022-05-25 8:46 ` Muchun Song
2022-05-25 2:36 ` Roman Gushchin [this message]
2022-05-25 7:57 ` Muchun Song
2022-05-25 12:37 ` Johannes Weiner
2022-05-25 13:08 ` Muchun Song
2022-05-24 6:05 ` [PATCH v4 02/11] mm: memcontrol: introduce compact_folio_lruvec_lock_irqsave Muchun Song
2022-05-24 19:22 ` Johannes Weiner
2022-05-25 9:38 ` Muchun Song
2022-05-24 6:05 ` [PATCH v4 03/11] mm: memcontrol: make lruvec lock safe when LRU pages are reparented Muchun Song
2022-05-24 19:23 ` Waiman Long
2022-05-25 10:20 ` Muchun Song
2022-05-25 14:59 ` Waiman Long
2022-05-24 19:27 ` Johannes Weiner
2022-05-25 9:53 ` Muchun Song
2022-05-25 12:30 ` Johannes Weiner
2022-05-25 13:03 ` Muchun Song
2022-05-25 14:48 ` Johannes Weiner
2022-05-25 15:38 ` Muchun Song
2022-05-26 20:17 ` Waiman Long
2022-05-27 2:55 ` Muchun Song
2022-05-24 6:05 ` [PATCH v4 04/11] mm: vmscan: rework move_pages_to_lru() Muchun Song
2022-05-24 19:38 ` Johannes Weiner
2022-05-25 11:38 ` Muchun Song
2022-05-24 19:52 ` Waiman Long
2022-05-25 11:43 ` Muchun Song
2022-05-25 2:43 ` Roman Gushchin
2022-05-25 11:41 ` Muchun Song
2022-05-24 6:05 ` [PATCH v4 05/11] mm: thp: introduce folio_split_queue_lock{_irqsave}() Muchun Song
2022-05-24 6:05 ` [PATCH v4 06/11] mm: thp: make split queue lock safe when LRU pages are reparented Muchun Song
2022-05-25 2:54 ` Roman Gushchin
2022-05-25 11:44 ` Muchun Song
2022-05-24 6:05 ` [PATCH v4 07/11] mm: memcontrol: make all the callers of {folio,page}_memcg() safe Muchun Song
2022-05-25 3:03 ` Roman Gushchin
2022-05-25 11:51 ` Muchun Song
2022-05-24 6:05 ` [PATCH v4 08/11] mm: memcontrol: introduce memcg_reparent_ops Muchun Song
2022-05-24 6:05 ` [PATCH v4 09/11] mm: memcontrol: use obj_cgroup APIs to charge the LRU pages Muchun Song
2022-05-24 12:29 ` kernel test robot
2022-05-24 18:16 ` kernel test robot
2022-05-25 7:14 ` [mm] bec0ae1210: WARNING:possible_recursive_locking_detected kernel test robot
2022-05-24 6:05 ` [PATCH v4 10/11] mm: lru: add VM_BUG_ON_FOLIO to lru maintenance function Muchun Song
2022-05-24 19:44 ` Johannes Weiner
2022-05-25 11:59 ` Muchun Song
2022-05-25 2:40 ` Roman Gushchin
2022-05-25 11:58 ` Muchun Song
2022-05-24 6:05 ` [PATCH v4 11/11] mm: lru: use lruvec lock to serialize memcg changes Muchun Song
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Yo2WKADtPy2rekRh@carbon \
--to=roman.gushchin@linux.dev \
--cc=cgroups@vger.kernel.org \
--cc=duanxiongchun@bytedance.com \
--cc=hannes@cmpxchg.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=longman@redhat.com \
--cc=mhocko@kernel.org \
--cc=shakeelb@google.com \
--cc=songmuchun@bytedance.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).