* + mm-kmem-add-direct-objcg-pointer-to-task_struct.patch added to mm-unstable branch
@ 2023-09-29 18:42 Andrew Morton
2023-10-01 16:42 ` Roman Gushchin
0 siblings, 1 reply; 9+ messages in thread
From: Andrew Morton @ 2023-09-29 18:42 UTC (permalink / raw)
To: mm-commits, vbabka, shakeelb, rientjes, muchun.song, mhocko,
hannes, dennis, roman.gushchin, akpm
The patch titled
Subject: mm: kmem: add direct objcg pointer to task_struct
has been added to the -mm mm-unstable branch. Its filename is
mm-kmem-add-direct-objcg-pointer-to-task_struct.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-kmem-add-direct-objcg-pointer-to-task_struct.patch
This patch will later appear in the mm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Roman Gushchin <roman.gushchin@linux.dev>
Subject: mm: kmem: add direct objcg pointer to task_struct
Date: Fri, 29 Sep 2023 11:00:52 -0700
To charge a freshly allocated kernel object to a memory cgroup, the kernel
needs to obtain an objcg pointer. Currently it does it indirectly by
obtaining the memcg pointer first and then calling to
__get_obj_cgroup_from_memcg().
Usually tasks spend their entire life belonging to the same object cgroup.
So it makes sense to save the objcg pointer on task_struct directly, so
it can be obtained faster. It requires some work on fork, exit and cgroup
migrate paths, but these paths are way colder.
To avoid any costly synchronization the following rules are applied:
1) A task sets it's objcg pointer itself.
2) If a task is being migrated to another cgroup, the least
significant bit of the objcg pointer is set atomically.
3) On the allocation path the objcg pointer is obtained locklessly
using the READ_ONCE() macro and the least significant bit is
checked. If it's set, the following procedure is used to update
it locklessly:
- task->objcg is zeroed using cmpxcg
- new objcg pointer is obtained
- task->objcg is updated using try_cmpxchg
- operation is repeated if try_cmpxcg fails
It guarantees that no updates will be lost if task migration
is racing against objcg pointer update. It also allows to keep
both read and write paths fully lockless.
Because the task is keeping a reference to the objcg, it can't go away
while the task is alive.
This commit doesn't change the way the remote memcg charging works.
Link: https://lkml.kernel.org/r/20230929180056.1122002-3-roman.gushchin@linux.dev
Signed-off-by: Roman Gushchin (Cruise) <roman.gushchin@linux.dev>
Cc: David Rientjes <rientjes@google.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
include/linux/memcontrol.h | 10 +++
include/linux/sched.h | 4 +
mm/memcontrol.c | 111 ++++++++++++++++++++++++++++++++---
3 files changed, 116 insertions(+), 9 deletions(-)
--- a/include/linux/memcontrol.h~mm-kmem-add-direct-objcg-pointer-to-task_struct
+++ a/include/linux/memcontrol.h
@@ -544,6 +544,16 @@ static inline bool folio_memcg_kmem(stru
return folio->memcg_data & MEMCG_DATA_KMEM;
}
+static inline bool current_objcg_needs_update(struct obj_cgroup *objcg)
+{
+ return (struct obj_cgroup *)((unsigned long)objcg & 0x1);
+}
+
+static inline struct obj_cgroup *
+current_objcg_without_update_flag(struct obj_cgroup *objcg)
+{
+ return (struct obj_cgroup *)((unsigned long)objcg & ~0x1);
+}
#else
static inline bool folio_memcg_kmem(struct folio *folio)
--- a/include/linux/sched.h~mm-kmem-add-direct-objcg-pointer-to-task_struct
+++ a/include/linux/sched.h
@@ -1443,6 +1443,10 @@ struct task_struct {
struct mem_cgroup *active_memcg;
#endif
+#ifdef CONFIG_MEMCG_KMEM
+ struct obj_cgroup *objcg;
+#endif
+
#ifdef CONFIG_BLK_CGROUP
struct gendisk *throttle_disk;
#endif
--- a/mm/memcontrol.c~mm-kmem-add-direct-objcg-pointer-to-task_struct
+++ a/mm/memcontrol.c
@@ -3041,6 +3041,47 @@ static struct obj_cgroup *__get_obj_cgro
return objcg;
}
+static struct obj_cgroup *current_objcg_update(struct obj_cgroup *old)
+{
+ struct mem_cgroup *memcg;
+ struct obj_cgroup *objcg = NULL, *tmp = old;
+
+ old = current_objcg_without_update_flag(old);
+ if (old)
+ obj_cgroup_put(old);
+
+ rcu_read_lock();
+ do {
+ /* Atomically drop the update bit, */
+ WARN_ON_ONCE(cmpxchg(¤t->objcg, tmp, 0) != tmp);
+
+ /* ...obtain the new objcg pointer */
+ memcg = mem_cgroup_from_task(current);
+ for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg)) {
+ objcg = rcu_dereference(memcg->objcg);
+ if (objcg && obj_cgroup_tryget(objcg))
+ break;
+ objcg = NULL;
+ }
+
+ /*
+ * ...and try atomically set up a new objcg pointer. If it
+ * fails, it means the update flag was set concurrently, so
+ * the whole procedure should be repeated.
+ */
+ tmp = 0;
+ } while (!try_cmpxchg(¤t->objcg, &tmp, objcg));
+ rcu_read_unlock();
+
+ return objcg;
+}
+
+static inline void current_objcg_set_needs_update(struct task_struct *task)
+{
+ /* atomically set the update bit */
+ set_bit(0, (unsigned long *)¤t->objcg);
+}
+
__always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
{
struct mem_cgroup *memcg;
@@ -3048,19 +3089,26 @@ __always_inline struct obj_cgroup *get_o
if (in_task()) {
memcg = current->active_memcg;
+ if (unlikely(memcg))
+ goto from_memcg;
- /* Memcg to charge can't be determined. */
- if (likely(!memcg) && (!current->mm || (current->flags & PF_KTHREAD)))
- return NULL;
+ objcg = READ_ONCE(current->objcg);
+ if (unlikely(current_objcg_needs_update(objcg)))
+ objcg = current_objcg_update(objcg);
+
+ if (objcg) {
+ obj_cgroup_get(objcg);
+ return objcg;
+ }
} else {
memcg = this_cpu_read(int_active_memcg);
- if (likely(!memcg))
- return NULL;
+ if (unlikely(memcg))
+ goto from_memcg;
}
+ return NULL;
+from_memcg:
rcu_read_lock();
- if (!memcg)
- memcg = mem_cgroup_from_task(current);
objcg = __get_obj_cgroup_from_memcg(memcg);
rcu_read_unlock();
return objcg;
@@ -6400,6 +6448,7 @@ static void mem_cgroup_move_task(void)
mem_cgroup_clear_mc();
}
}
+
#else /* !CONFIG_MMU */
static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
{
@@ -6413,8 +6462,27 @@ static void mem_cgroup_move_task(void)
}
#endif
+#ifdef CONFIG_MEMCG_KMEM
+static void mem_cgroup_fork(struct task_struct *task)
+{
+ /*
+ * Set the update flag to cause task->objcg to be initialized lazily
+ * on the first allocation.
+ */
+ task->objcg = (struct obj_cgroup *)0x1;
+}
+
+static void mem_cgroup_exit(struct task_struct *task)
+{
+ struct obj_cgroup *objcg = current_objcg_without_update_flag(task->objcg);
+
+ if (objcg)
+ obj_cgroup_put(objcg);
+}
+#endif
+
#ifdef CONFIG_LRU_GEN
-static void mem_cgroup_attach(struct cgroup_taskset *tset)
+static void mem_cgroup_lru_gen_attach(struct cgroup_taskset *tset)
{
struct task_struct *task;
struct cgroup_subsys_state *css;
@@ -6432,10 +6500,29 @@ static void mem_cgroup_attach(struct cgr
task_unlock(task);
}
#else
+static void mem_cgroup_lru_gen_attach(struct cgroup_taskset *tset) {}
+#endif /* CONFIG_LRU_GEN */
+
+#ifdef CONFIG_MEMCG_KMEM
+static void mem_cgroup_kmem_attach(struct cgroup_taskset *tset)
+{
+ struct task_struct *task;
+ struct cgroup_subsys_state *css;
+
+ cgroup_taskset_for_each(task, css, tset)
+ current_objcg_set_needs_update(task);
+}
+#else
+static void mem_cgroup_kmem_attach(struct cgroup_taskset *tset) {}
+#endif /* CONFIG_MEMCG_KMEM */
+
+#if defined(CONFIG_LRU_GEN) || defined(CONFIG_MEMCG_KMEM)
static void mem_cgroup_attach(struct cgroup_taskset *tset)
{
+ mem_cgroup_lru_gen_attach(tset);
+ mem_cgroup_kmem_attach(tset);
}
-#endif /* CONFIG_LRU_GEN */
+#endif
static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value)
{
@@ -6845,9 +6932,15 @@ struct cgroup_subsys memory_cgrp_subsys
.css_reset = mem_cgroup_css_reset,
.css_rstat_flush = mem_cgroup_css_rstat_flush,
.can_attach = mem_cgroup_can_attach,
+#if defined(CONFIG_LRU_GEN) || defined(CONFIG_MEMCG_KMEM)
.attach = mem_cgroup_attach,
+#endif
.cancel_attach = mem_cgroup_cancel_attach,
.post_attach = mem_cgroup_move_task,
+#ifdef CONFIG_MEMCG_KMEM
+ .fork = mem_cgroup_fork,
+ .exit = mem_cgroup_exit,
+#endif
.dfl_cftypes = memory_files,
.legacy_cftypes = mem_cgroup_legacy_files,
.early_init = 0,
_
Patches currently in -mm which might be from roman.gushchin@linux.dev are
mm-kmem-optimize-get_obj_cgroup_from_current.patch
mm-kmem-add-direct-objcg-pointer-to-task_struct.patch
mm-kmem-make-memcg-keep-a-reference-to-the-original-objcg.patch
mm-kmem-scoped-objcg-protection.patch
percpu-scoped-objcg-protection.patch
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: + mm-kmem-add-direct-objcg-pointer-to-task_struct.patch added to mm-unstable branch
2023-09-29 18:42 Andrew Morton
@ 2023-10-01 16:42 ` Roman Gushchin
0 siblings, 0 replies; 9+ messages in thread
From: Roman Gushchin @ 2023-10-01 16:42 UTC (permalink / raw)
To: Andrew Morton
Cc: mm-commits, vbabka, shakeelb, rientjes, muchun.song, mhocko,
hannes, dennis
On Fri, Sep 29, 2023 at 11:42:48AM -0700, Andrew Morton wrote:
>
> The patch titled
> Subject: mm: kmem: add direct objcg pointer to task_struct
> has been added to the -mm mm-unstable branch. Its filename is
> mm-kmem-add-direct-objcg-pointer-to-task_struct.patch
>
> This patch will shortly appear at
> https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-kmem-add-direct-objcg-pointer-to-task_struct.patch
>
> This patch will later appear in the mm-unstable branch at
> git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
>
> Before you just go and hit "reply", please:
> a) Consider who else should be cc'ed
> b) Prefer to cc a suitable mailing list as well
> c) Ideally: find the original patch on the mailing list and do a
> reply-to-all to that, adding suitable additional cc's
>
> *** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
>
> The -mm tree is included into linux-next via the mm-everything
> branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
> and is updated there every 2-3 working days
>
> ------------------------------------------------------
> From: Roman Gushchin <roman.gushchin@linux.dev>
> Subject: mm: kmem: add direct objcg pointer to task_struct
> Date: Fri, 29 Sep 2023 11:00:52 -0700
>
> To charge a freshly allocated kernel object to a memory cgroup, the kernel
> needs to obtain an objcg pointer. Currently it does it indirectly by
> obtaining the memcg pointer first and then calling to
> __get_obj_cgroup_from_memcg().
>
> Usually tasks spend their entire life belonging to the same object cgroup.
> So it makes sense to save the objcg pointer on task_struct directly, so
> it can be obtained faster. It requires some work on fork, exit and cgroup
> migrate paths, but these paths are way colder.
>
> To avoid any costly synchronization the following rules are applied:
> 1) A task sets it's objcg pointer itself.
>
> 2) If a task is being migrated to another cgroup, the least
> significant bit of the objcg pointer is set atomically.
>
> 3) On the allocation path the objcg pointer is obtained locklessly
> using the READ_ONCE() macro and the least significant bit is
> checked. If it's set, the following procedure is used to update
> it locklessly:
> - task->objcg is zeroed using cmpxcg
> - new objcg pointer is obtained
> - task->objcg is updated using try_cmpxchg
> - operation is repeated if try_cmpxcg fails
> It guarantees that no updates will be lost if task migration
> is racing against objcg pointer update. It also allows to keep
> both read and write paths fully lockless.
>
> Because the task is keeping a reference to the objcg, it can't go away
> while the task is alive.
>
> This commit doesn't change the way the remote memcg charging works.
>
> Link: https://lkml.kernel.org/r/20230929180056.1122002-3-roman.gushchin@linux.dev
> Signed-off-by: Roman Gushchin (Cruise) <roman.gushchin@linux.dev>
> Cc: David Rientjes <rientjes@google.com>
> Cc: Dennis Zhou <dennis@kernel.org>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> Cc: Michal Hocko <mhocko@kernel.org>
> Cc: Muchun Song <muchun.song@linux.dev>
> Cc: Shakeel Butt <shakeelb@google.com>
> Cc: Vlastimil Babka <vbabka@suse.cz>
> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
> ---
>
> include/linux/memcontrol.h | 10 +++
> include/linux/sched.h | 4 +
> mm/memcontrol.c | 111 ++++++++++++++++++++++++++++++++---
> 3 files changed, 116 insertions(+), 9 deletions(-)
>
> --- a/include/linux/memcontrol.h~mm-kmem-add-direct-objcg-pointer-to-task_struct
> +++ a/include/linux/memcontrol.h
> @@ -544,6 +544,16 @@ static inline bool folio_memcg_kmem(stru
> return folio->memcg_data & MEMCG_DATA_KMEM;
> }
>
> +static inline bool current_objcg_needs_update(struct obj_cgroup *objcg)
> +{
> + return (struct obj_cgroup *)((unsigned long)objcg & 0x1);
> +}
> +
> +static inline struct obj_cgroup *
> +current_objcg_without_update_flag(struct obj_cgroup *objcg)
> +{
> + return (struct obj_cgroup *)((unsigned long)objcg & ~0x1);
> +}
>
> #else
> static inline bool folio_memcg_kmem(struct folio *folio)
> --- a/include/linux/sched.h~mm-kmem-add-direct-objcg-pointer-to-task_struct
> +++ a/include/linux/sched.h
> @@ -1443,6 +1443,10 @@ struct task_struct {
> struct mem_cgroup *active_memcg;
> #endif
>
> +#ifdef CONFIG_MEMCG_KMEM
> + struct obj_cgroup *objcg;
> +#endif
> +
> #ifdef CONFIG_BLK_CGROUP
> struct gendisk *throttle_disk;
> #endif
> --- a/mm/memcontrol.c~mm-kmem-add-direct-objcg-pointer-to-task_struct
> +++ a/mm/memcontrol.c
> @@ -3041,6 +3041,47 @@ static struct obj_cgroup *__get_obj_cgro
> return objcg;
> }
>
> +static struct obj_cgroup *current_objcg_update(struct obj_cgroup *old)
> +{
> + struct mem_cgroup *memcg;
> + struct obj_cgroup *objcg = NULL, *tmp = old;
> +
> + old = current_objcg_without_update_flag(old);
> + if (old)
> + obj_cgroup_put(old);
> +
> + rcu_read_lock();
> + do {
> + /* Atomically drop the update bit, */
> + WARN_ON_ONCE(cmpxchg(¤t->objcg, tmp, 0) != tmp);
Hi Andrew,
can you please, merge a small fixup here?
I've got a sparse complaint from the Intel's kernel test robot.
Thanks!
--
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 78ab36b5899f..4c762a04a689 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3013,7 +3013,7 @@ static struct obj_cgroup *current_objcg_update(struct obj_cgroup *old)
rcu_read_lock();
do {
/* Atomically drop the update bit, */
- WARN_ON_ONCE(cmpxchg(¤t->objcg, tmp, 0) != tmp);
+ WARN_ON_ONCE(cmpxchg(¤t->objcg, tmp, NULL) != tmp);
/* ...obtain the new objcg pointer */
memcg = mem_cgroup_from_task(current);
^ permalink raw reply related [flat|nested] 9+ messages in thread
* + mm-kmem-add-direct-objcg-pointer-to-task_struct.patch added to mm-unstable branch
@ 2023-10-10 18:20 Andrew Morton
2023-10-10 22:15 ` Roman Gushchin
0 siblings, 1 reply; 9+ messages in thread
From: Andrew Morton @ 2023-10-10 18:20 UTC (permalink / raw)
To: mm-commits, vbabka, shakeelb, rientjes, naresh.kamboju,
muchun.song, mhocko, hannes, dennis, roman.gushchin, akpm
The patch titled
Subject: mm: kmem: add direct objcg pointer to task_struct
has been added to the -mm mm-unstable branch. Its filename is
mm-kmem-add-direct-objcg-pointer-to-task_struct.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-kmem-add-direct-objcg-pointer-to-task_struct.patch
This patch will later appear in the mm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Roman Gushchin <roman.gushchin@linux.dev>
Subject: mm: kmem: add direct objcg pointer to task_struct
Date: Mon, 9 Oct 2023 17:09:26 -0700
To charge a freshly allocated kernel object to a memory cgroup, the kernel
needs to obtain an objcg pointer. Currently it does it indirectly by
obtaining the memcg pointer first and then calling to
__get_obj_cgroup_from_memcg().
Usually tasks spend their entire life belonging to the same object cgroup.
So it makes sense to save the objcg pointer on task_struct directly, so
it can be obtained faster. It requires some work on fork, exit and cgroup
migrate paths, but these paths are way colder.
To avoid any costly synchronization the following rules are applied:
1) A task sets it's objcg pointer itself.
2) If a task is being migrated to another cgroup, the least
significant bit of the objcg pointer is set atomically.
3) On the allocation path the objcg pointer is obtained locklessly
using the READ_ONCE() macro and the least significant bit is
checked. If it's set, the following procedure is used to update
it locklessly:
- task->objcg is zeroed using cmpxcg
- new objcg pointer is obtained
- task->objcg is updated using try_cmpxchg
- operation is repeated if try_cmpxcg fails
It guarantees that no updates will be lost if task migration
is racing against objcg pointer update. It also allows to keep
both read and write paths fully lockless.
Because the task is keeping a reference to the objcg, it can't go away
while the task is alive.
This commit doesn't change the way the remote memcg charging works.
Link: https://lkml.kernel.org/r/20231010000929.450702-3-roman.gushchin@linux.dev
Signed-off-by: Roman Gushchin (Cruise) <roman.gushchin@linux.dev>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Naresh Kamboju <naresh.kamboju@linaro.org>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
include/linux/sched.h | 4 +
mm/memcontrol.c | 130 +++++++++++++++++++++++++++++++++++++---
2 files changed, 125 insertions(+), 9 deletions(-)
--- a/include/linux/sched.h~mm-kmem-add-direct-objcg-pointer-to-task_struct
+++ a/include/linux/sched.h
@@ -1443,6 +1443,10 @@ struct task_struct {
struct mem_cgroup *active_memcg;
#endif
+#ifdef CONFIG_MEMCG_KMEM
+ struct obj_cgroup *objcg;
+#endif
+
#ifdef CONFIG_BLK_CGROUP
struct gendisk *throttle_disk;
#endif
--- a/mm/memcontrol.c~mm-kmem-add-direct-objcg-pointer-to-task_struct
+++ a/mm/memcontrol.c
@@ -249,6 +249,8 @@ struct mem_cgroup *vmpressure_to_memcg(s
return container_of(vmpr, struct mem_cgroup, vmpressure);
}
+#define CURRENT_OBJCG_UPDATE_FLAG 0x1UL
+
#ifdef CONFIG_MEMCG_KMEM
static DEFINE_SPINLOCK(objcg_lock);
@@ -3107,6 +3109,50 @@ static struct obj_cgroup *__get_obj_cgro
return objcg;
}
+static struct obj_cgroup *current_objcg_update(void)
+{
+ struct mem_cgroup *memcg;
+ struct obj_cgroup *old, *objcg = NULL;
+
+ do {
+ /* Atomically drop the update bit. */
+ old = xchg(¤t->objcg, NULL);
+ if (old) {
+ old = (struct obj_cgroup *)
+ ((unsigned long)old & ~CURRENT_OBJCG_UPDATE_FLAG);
+ if (old)
+ obj_cgroup_put(old);
+
+ old = NULL;
+ }
+
+ /* Obtain the new objcg pointer. */
+ rcu_read_lock();
+ memcg = mem_cgroup_from_task(current);
+ /*
+ * The current task can be asynchronously moved to another
+ * memcg and the previous memcg can be offlined. So let's
+ * get the memcg pointer and try get a reference to objcg
+ * under a rcu read lock.
+ */
+ for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg)) {
+ objcg = rcu_dereference(memcg->objcg);
+ if (likely(objcg && obj_cgroup_tryget(objcg)))
+ break;
+ objcg = NULL;
+ }
+ rcu_read_unlock();
+
+ /*
+ * Try set up a new objcg pointer atomically. If it
+ * fails, it means the update flag was set concurrently, so
+ * the whole procedure should be repeated.
+ */
+ } while (!try_cmpxchg(¤t->objcg, &old, objcg));
+
+ return objcg;
+}
+
__always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
{
struct mem_cgroup *memcg;
@@ -3114,19 +3160,26 @@ __always_inline struct obj_cgroup *get_o
if (in_task()) {
memcg = current->active_memcg;
+ if (unlikely(memcg))
+ goto from_memcg;
- /* Memcg to charge can't be determined. */
- if (likely(!memcg) && (!current->mm || (current->flags & PF_KTHREAD)))
- return NULL;
+ objcg = READ_ONCE(current->objcg);
+ if (unlikely((unsigned long)objcg & CURRENT_OBJCG_UPDATE_FLAG))
+ objcg = current_objcg_update();
+
+ if (objcg) {
+ obj_cgroup_get(objcg);
+ return objcg;
+ }
} else {
memcg = this_cpu_read(int_active_memcg);
- if (likely(!memcg))
- return NULL;
+ if (unlikely(memcg))
+ goto from_memcg;
}
+ return NULL;
+from_memcg:
rcu_read_lock();
- if (!memcg)
- memcg = mem_cgroup_from_task(current);
objcg = __get_obj_cgroup_from_memcg(memcg);
rcu_read_unlock();
return objcg;
@@ -6469,6 +6522,7 @@ static void mem_cgroup_move_task(void)
mem_cgroup_clear_mc();
}
}
+
#else /* !CONFIG_MMU */
static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
{
@@ -6482,8 +6536,39 @@ static void mem_cgroup_move_task(void)
}
#endif
+#ifdef CONFIG_MEMCG_KMEM
+static void mem_cgroup_fork(struct task_struct *task)
+{
+ /*
+ * Set the update flag to cause task->objcg to be initialized lazily
+ * on the first allocation. It can be done without any synchronization
+ * because it's always performed on the current task, so does
+ * current_objcg_update().
+ */
+ task->objcg = (struct obj_cgroup *)CURRENT_OBJCG_UPDATE_FLAG;
+}
+
+static void mem_cgroup_exit(struct task_struct *task)
+{
+ struct obj_cgroup *objcg = task->objcg;
+
+ objcg = (struct obj_cgroup *)
+ ((unsigned long)objcg & ~CURRENT_OBJCG_UPDATE_FLAG);
+ if (objcg)
+ obj_cgroup_put(objcg);
+
+ /*
+ * Some kernel allocations can happen after this point,
+ * but let's ignore them. It can be done without any synchronization
+ * because it's always performed on the current task, so does
+ * current_objcg_update().
+ */
+ task->objcg = NULL;
+}
+#endif
+
#ifdef CONFIG_LRU_GEN
-static void mem_cgroup_attach(struct cgroup_taskset *tset)
+static void mem_cgroup_lru_gen_attach(struct cgroup_taskset *tset)
{
struct task_struct *task;
struct cgroup_subsys_state *css;
@@ -6501,10 +6586,31 @@ static void mem_cgroup_attach(struct cgr
task_unlock(task);
}
#else
+static void mem_cgroup_lru_gen_attach(struct cgroup_taskset *tset) {}
+#endif /* CONFIG_LRU_GEN */
+
+#ifdef CONFIG_MEMCG_KMEM
+static void mem_cgroup_kmem_attach(struct cgroup_taskset *tset)
+{
+ struct task_struct *task;
+ struct cgroup_subsys_state *css;
+
+ cgroup_taskset_for_each(task, css, tset) {
+ /* atomically set the update bit */
+ set_bit(0, (unsigned long *)¤t->objcg);
+ }
+}
+#else
+static void mem_cgroup_kmem_attach(struct cgroup_taskset *tset) {}
+#endif /* CONFIG_MEMCG_KMEM */
+
+#if defined(CONFIG_LRU_GEN) || defined(CONFIG_MEMCG_KMEM)
static void mem_cgroup_attach(struct cgroup_taskset *tset)
{
+ mem_cgroup_lru_gen_attach(tset);
+ mem_cgroup_kmem_attach(tset);
}
-#endif /* CONFIG_LRU_GEN */
+#endif
static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value)
{
@@ -6914,9 +7020,15 @@ struct cgroup_subsys memory_cgrp_subsys
.css_reset = mem_cgroup_css_reset,
.css_rstat_flush = mem_cgroup_css_rstat_flush,
.can_attach = mem_cgroup_can_attach,
+#if defined(CONFIG_LRU_GEN) || defined(CONFIG_MEMCG_KMEM)
.attach = mem_cgroup_attach,
+#endif
.cancel_attach = mem_cgroup_cancel_attach,
.post_attach = mem_cgroup_move_task,
+#ifdef CONFIG_MEMCG_KMEM
+ .fork = mem_cgroup_fork,
+ .exit = mem_cgroup_exit,
+#endif
.dfl_cftypes = memory_files,
.legacy_cftypes = mem_cgroup_legacy_files,
.early_init = 0,
_
Patches currently in -mm which might be from roman.gushchin@linux.dev are
mm-kmem-optimize-get_obj_cgroup_from_current.patch
mm-kmem-add-direct-objcg-pointer-to-task_struct.patch
mm-kmem-make-memcg-keep-a-reference-to-the-original-objcg.patch
mm-kmem-scoped-objcg-protection.patch
percpu-scoped-objcg-protection.patch
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: + mm-kmem-add-direct-objcg-pointer-to-task_struct.patch added to mm-unstable branch
2023-10-10 18:20 + mm-kmem-add-direct-objcg-pointer-to-task_struct.patch added to mm-unstable branch Andrew Morton
@ 2023-10-10 22:15 ` Roman Gushchin
2023-10-10 23:02 ` Andrew Morton
0 siblings, 1 reply; 9+ messages in thread
From: Roman Gushchin @ 2023-10-10 22:15 UTC (permalink / raw)
To: Andrew Morton
Cc: mm-commits, vbabka, shakeelb, rientjes, naresh.kamboju,
muchun.song, mhocko, hannes, dennis
On Tue, Oct 10, 2023 at 11:20:00AM -0700, Andrew Morton wrote:
>
> The patch titled
> Subject: mm: kmem: add direct objcg pointer to task_struct
> has been added to the -mm mm-unstable branch. Its filename is
> mm-kmem-add-direct-objcg-pointer-to-task_struct.patch
>
> This patch will shortly appear at
> https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-kmem-add-direct-objcg-pointer-to-task_struct.patch
>
> This patch will later appear in the mm-unstable branch at
> git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
>
> Before you just go and hit "reply", please:
> a) Consider who else should be cc'ed
> b) Prefer to cc a suitable mailing list as well
> c) Ideally: find the original patch on the mailing list and do a
> reply-to-all to that, adding suitable additional cc's
>
> *** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
>
> The -mm tree is included into linux-next via the mm-everything
> branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
> and is updated there every 2-3 working days
>
> ------------------------------------------------------
> From: Roman Gushchin <roman.gushchin@linux.dev>
> Subject: mm: kmem: add direct objcg pointer to task_struct
> Date: Mon, 9 Oct 2023 17:09:26 -0700
>
> To charge a freshly allocated kernel object to a memory cgroup, the kernel
> needs to obtain an objcg pointer. Currently it does it indirectly by
> obtaining the memcg pointer first and then calling to
> __get_obj_cgroup_from_memcg().
>
> Usually tasks spend their entire life belonging to the same object cgroup.
> So it makes sense to save the objcg pointer on task_struct directly, so
> it can be obtained faster. It requires some work on fork, exit and cgroup
> migrate paths, but these paths are way colder.
>
> To avoid any costly synchronization the following rules are applied:
> 1) A task sets it's objcg pointer itself.
>
> 2) If a task is being migrated to another cgroup, the least
> significant bit of the objcg pointer is set atomically.
>
> 3) On the allocation path the objcg pointer is obtained locklessly
> using the READ_ONCE() macro and the least significant bit is
> checked. If it's set, the following procedure is used to update
> it locklessly:
> - task->objcg is zeroed using cmpxcg
> - new objcg pointer is obtained
> - task->objcg is updated using try_cmpxchg
> - operation is repeated if try_cmpxcg fails
> It guarantees that no updates will be lost if task migration
> is racing against objcg pointer update. It also allows to keep
> both read and write paths fully lockless.
>
> Because the task is keeping a reference to the objcg, it can't go away
> while the task is alive.
>
> This commit doesn't change the way the remote memcg charging works.
>
> Link: https://lkml.kernel.org/r/20231010000929.450702-3-roman.gushchin@linux.dev
> Signed-off-by: Roman Gushchin (Cruise) <roman.gushchin@linux.dev>
> Acked-by: Johannes Weiner <hannes@cmpxchg.org>
> Cc: David Rientjes <rientjes@google.com>
> Cc: Dennis Zhou <dennis@kernel.org>
> Cc: Michal Hocko <mhocko@kernel.org>
> Cc: Muchun Song <muchun.song@linux.dev>
> Cc: Naresh Kamboju <naresh.kamboju@linaro.org>
> Cc: Shakeel Butt <shakeelb@google.com>
> Cc: Vlastimil Babka <vbabka@suse.cz>
> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Andrew, can you, please, add
Tested-by: Naresh Kamboju <naresh.kamboju@linaro.org>
here?
Thanks!
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: + mm-kmem-add-direct-objcg-pointer-to-task_struct.patch added to mm-unstable branch
2023-10-10 22:15 ` Roman Gushchin
@ 2023-10-10 23:02 ` Andrew Morton
0 siblings, 0 replies; 9+ messages in thread
From: Andrew Morton @ 2023-10-10 23:02 UTC (permalink / raw)
To: Roman Gushchin
Cc: mm-commits, vbabka, shakeelb, rientjes, naresh.kamboju,
muchun.song, mhocko, hannes, dennis
On Tue, 10 Oct 2023 15:15:03 -0700 Roman Gushchin <roman.gushchin@linux.dev> wrote:
> Andrew, can you, please, add
> Tested-by: Naresh Kamboju <naresh.kamboju@linaro.org>
> here?
yup, I've updated all five patches.
^ permalink raw reply [flat|nested] 9+ messages in thread
* + mm-kmem-add-direct-objcg-pointer-to-task_struct.patch added to mm-unstable branch
@ 2023-10-16 22:58 Andrew Morton
2023-10-18 18:08 ` Roman Gushchin
0 siblings, 1 reply; 9+ messages in thread
From: Andrew Morton @ 2023-10-16 22:58 UTC (permalink / raw)
To: mm-commits, vbabka, shakeelb, rientjes, naresh.kamboju,
muchun.song, mhocko, hannes, dennis, roman.gushchin, akpm
The patch titled
Subject: mm: kmem: add direct objcg pointer to task_struct
has been added to the -mm mm-unstable branch. Its filename is
mm-kmem-add-direct-objcg-pointer-to-task_struct.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-kmem-add-direct-objcg-pointer-to-task_struct.patch
This patch will later appear in the mm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Roman Gushchin <roman.gushchin@linux.dev>
Subject: mm: kmem: add direct objcg pointer to task_struct
Date: Mon, 16 Oct 2023 15:18:57 -0700
To charge a freshly allocated kernel object to a memory cgroup, the kernel
needs to obtain an objcg pointer. Currently it does it indirectly by
obtaining the memcg pointer first and then calling to
__get_obj_cgroup_from_memcg().
Usually tasks spend their entire life belonging to the same object cgroup.
So it makes sense to save the objcg pointer on task_struct directly, so
it can be obtained faster. It requires some work on fork, exit and cgroup
migrate paths, but these paths are way colder.
To avoid any costly synchronization the following rules are applied:
1) A task sets it's objcg pointer itself.
2) If a task is being migrated to another cgroup, the least
significant bit of the objcg pointer is set atomically.
3) On the allocation path the objcg pointer is obtained locklessly
using the READ_ONCE() macro and the least significant bit is
checked. If it's set, the following procedure is used to update
it locklessly:
- task->objcg is zeroed using cmpxcg
- new objcg pointer is obtained
- task->objcg is updated using try_cmpxchg
- operation is repeated if try_cmpxcg fails
It guarantees that no updates will be lost if task migration
is racing against objcg pointer update. It also allows to keep
both read and write paths fully lockless.
Because the task is keeping a reference to the objcg, it can't go away
while the task is alive.
This commit doesn't change the way the remote memcg charging works.
Link: https://lkml.kernel.org/r/20231016221900.4031141-3-roman.gushchin@linux.dev
Signed-off-by: Roman Gushchin (Cruise) <roman.gushchin@linux.dev>
Tested-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Shakeel Butt <shakeelb@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
include/linux/sched.h | 4 +
mm/memcontrol.c | 130 +++++++++++++++++++++++++++++++++++++---
2 files changed, 125 insertions(+), 9 deletions(-)
--- a/include/linux/sched.h~mm-kmem-add-direct-objcg-pointer-to-task_struct
+++ a/include/linux/sched.h
@@ -1443,6 +1443,10 @@ struct task_struct {
struct mem_cgroup *active_memcg;
#endif
+#ifdef CONFIG_MEMCG_KMEM
+ struct obj_cgroup *objcg;
+#endif
+
#ifdef CONFIG_BLK_CGROUP
struct gendisk *throttle_disk;
#endif
--- a/mm/memcontrol.c~mm-kmem-add-direct-objcg-pointer-to-task_struct
+++ a/mm/memcontrol.c
@@ -249,6 +249,8 @@ struct mem_cgroup *vmpressure_to_memcg(s
return container_of(vmpr, struct mem_cgroup, vmpressure);
}
+#define CURRENT_OBJCG_UPDATE_FLAG 0x1UL
+
#ifdef CONFIG_MEMCG_KMEM
static DEFINE_SPINLOCK(objcg_lock);
@@ -3107,6 +3109,50 @@ static struct obj_cgroup *__get_obj_cgro
return objcg;
}
+static struct obj_cgroup *current_objcg_update(void)
+{
+ struct mem_cgroup *memcg;
+ struct obj_cgroup *old, *objcg = NULL;
+
+ do {
+ /* Atomically drop the update bit. */
+ old = xchg(¤t->objcg, NULL);
+ if (old) {
+ old = (struct obj_cgroup *)
+ ((unsigned long)old & ~CURRENT_OBJCG_UPDATE_FLAG);
+ if (old)
+ obj_cgroup_put(old);
+
+ old = NULL;
+ }
+
+ /* Obtain the new objcg pointer. */
+ rcu_read_lock();
+ memcg = mem_cgroup_from_task(current);
+ /*
+ * The current task can be asynchronously moved to another
+ * memcg and the previous memcg can be offlined. So let's
+ * get the memcg pointer and try get a reference to objcg
+ * under a rcu read lock.
+ */
+ for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg)) {
+ objcg = rcu_dereference(memcg->objcg);
+ if (likely(objcg && obj_cgroup_tryget(objcg)))
+ break;
+ objcg = NULL;
+ }
+ rcu_read_unlock();
+
+ /*
+ * Try set up a new objcg pointer atomically. If it
+ * fails, it means the update flag was set concurrently, so
+ * the whole procedure should be repeated.
+ */
+ } while (!try_cmpxchg(¤t->objcg, &old, objcg));
+
+ return objcg;
+}
+
__always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
{
struct mem_cgroup *memcg;
@@ -3114,19 +3160,26 @@ __always_inline struct obj_cgroup *get_o
if (in_task()) {
memcg = current->active_memcg;
+ if (unlikely(memcg))
+ goto from_memcg;
- /* Memcg to charge can't be determined. */
- if (likely(!memcg) && (!current->mm || (current->flags & PF_KTHREAD)))
- return NULL;
+ objcg = READ_ONCE(current->objcg);
+ if (unlikely((unsigned long)objcg & CURRENT_OBJCG_UPDATE_FLAG))
+ objcg = current_objcg_update();
+
+ if (objcg) {
+ obj_cgroup_get(objcg);
+ return objcg;
+ }
} else {
memcg = this_cpu_read(int_active_memcg);
- if (likely(!memcg))
- return NULL;
+ if (unlikely(memcg))
+ goto from_memcg;
}
+ return NULL;
+from_memcg:
rcu_read_lock();
- if (!memcg)
- memcg = mem_cgroup_from_task(current);
objcg = __get_obj_cgroup_from_memcg(memcg);
rcu_read_unlock();
return objcg;
@@ -6469,6 +6522,7 @@ static void mem_cgroup_move_task(void)
mem_cgroup_clear_mc();
}
}
+
#else /* !CONFIG_MMU */
static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
{
@@ -6482,8 +6536,39 @@ static void mem_cgroup_move_task(void)
}
#endif
+#ifdef CONFIG_MEMCG_KMEM
+static void mem_cgroup_fork(struct task_struct *task)
+{
+ /*
+ * Set the update flag to cause task->objcg to be initialized lazily
+ * on the first allocation. It can be done without any synchronization
+ * because it's always performed on the current task, so does
+ * current_objcg_update().
+ */
+ task->objcg = (struct obj_cgroup *)CURRENT_OBJCG_UPDATE_FLAG;
+}
+
+static void mem_cgroup_exit(struct task_struct *task)
+{
+ struct obj_cgroup *objcg = task->objcg;
+
+ objcg = (struct obj_cgroup *)
+ ((unsigned long)objcg & ~CURRENT_OBJCG_UPDATE_FLAG);
+ if (objcg)
+ obj_cgroup_put(objcg);
+
+ /*
+ * Some kernel allocations can happen after this point,
+ * but let's ignore them. It can be done without any synchronization
+ * because it's always performed on the current task, so does
+ * current_objcg_update().
+ */
+ task->objcg = NULL;
+}
+#endif
+
#ifdef CONFIG_LRU_GEN
-static void mem_cgroup_attach(struct cgroup_taskset *tset)
+static void mem_cgroup_lru_gen_attach(struct cgroup_taskset *tset)
{
struct task_struct *task;
struct cgroup_subsys_state *css;
@@ -6501,10 +6586,31 @@ static void mem_cgroup_attach(struct cgr
task_unlock(task);
}
#else
+static void mem_cgroup_lru_gen_attach(struct cgroup_taskset *tset) {}
+#endif /* CONFIG_LRU_GEN */
+
+#ifdef CONFIG_MEMCG_KMEM
+static void mem_cgroup_kmem_attach(struct cgroup_taskset *tset)
+{
+ struct task_struct *task;
+ struct cgroup_subsys_state *css;
+
+ cgroup_taskset_for_each(task, css, tset) {
+ /* atomically set the update bit */
+ set_bit(0, (unsigned long *)&task->objcg);
+ }
+}
+#else
+static void mem_cgroup_kmem_attach(struct cgroup_taskset *tset) {}
+#endif /* CONFIG_MEMCG_KMEM */
+
+#if defined(CONFIG_LRU_GEN) || defined(CONFIG_MEMCG_KMEM)
static void mem_cgroup_attach(struct cgroup_taskset *tset)
{
+ mem_cgroup_lru_gen_attach(tset);
+ mem_cgroup_kmem_attach(tset);
}
-#endif /* CONFIG_LRU_GEN */
+#endif
static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value)
{
@@ -6914,9 +7020,15 @@ struct cgroup_subsys memory_cgrp_subsys
.css_reset = mem_cgroup_css_reset,
.css_rstat_flush = mem_cgroup_css_rstat_flush,
.can_attach = mem_cgroup_can_attach,
+#if defined(CONFIG_LRU_GEN) || defined(CONFIG_MEMCG_KMEM)
.attach = mem_cgroup_attach,
+#endif
.cancel_attach = mem_cgroup_cancel_attach,
.post_attach = mem_cgroup_move_task,
+#ifdef CONFIG_MEMCG_KMEM
+ .fork = mem_cgroup_fork,
+ .exit = mem_cgroup_exit,
+#endif
.dfl_cftypes = memory_files,
.legacy_cftypes = mem_cgroup_legacy_files,
.early_init = 0,
_
Patches currently in -mm which might be from roman.gushchin@linux.dev are
mm-kmem-optimize-get_obj_cgroup_from_current.patch
mm-kmem-add-direct-objcg-pointer-to-task_struct.patch
mm-kmem-make-memcg-keep-a-reference-to-the-original-objcg.patch
mm-kmem-scoped-objcg-protection.patch
percpu-scoped-objcg-protection.patch
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: + mm-kmem-add-direct-objcg-pointer-to-task_struct.patch added to mm-unstable branch
2023-10-16 22:58 Andrew Morton
@ 2023-10-18 18:08 ` Roman Gushchin
2023-10-19 7:14 ` Vlastimil Babka
0 siblings, 1 reply; 9+ messages in thread
From: Roman Gushchin @ 2023-10-18 18:08 UTC (permalink / raw)
To: Andrew Morton
Cc: mm-commits, vbabka, shakeelb, rientjes, naresh.kamboju,
muchun.song, mhocko, hannes, dennis
From 14f998a9235fdfa88a4ebfad5802bdde6195bfae Mon Sep 17 00:00:00 2001
From: Roman Gushchin <roman.gushchin@linux.dev>
Date: Mon, 19 Dec 2022 15:46:18 -0800
Subject: [PATCH v4 2/5] mm: kmem: add direct objcg pointer to task_struct
To charge a freshly allocated kernel object to a memory cgroup, the
kernel needs to obtain an objcg pointer. Currently it does it
indirectly by obtaining the memcg pointer first and then calling to
__get_obj_cgroup_from_memcg().
Usually tasks spend their entire life belonging to the same object
cgroup. So it makes sense to save the objcg pointer on task_struct
directly, so it can be obtained faster. It requires some work on fork,
exit and cgroup migrate paths, but these paths are way colder.
To avoid any costly synchronization the following rules are applied:
1) A task sets it's objcg pointer itself.
2) If a task is being migrated to another cgroup, the least
significant bit of the objcg pointer is set atomically.
3) On the allocation path the objcg pointer is obtained locklessly
using the READ_ONCE() macro and the least significant bit is
checked. If it's set, the following procedure is used to update
it locklessly:
- task->objcg is zeroed using cmpxcg
- new objcg pointer is obtained
- task->objcg is updated using try_cmpxchg
- operation is repeated if try_cmpxcg fails
It guarantees that no updates will be lost if task migration
is racing against objcg pointer update. It also allows to keep
both read and write paths fully lockless.
Because the task is keeping a reference to the objcg, it can't go away
while the task is alive.
This commit doesn't change the way the remote memcg charging works.
Signed-off-by: Roman Gushchin (Cruise) <roman.gushchin@linux.dev>
Tested-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
---
include/linux/sched.h | 4 ++
mm/memcontrol.c | 138 +++++++++++++++++++++++++++++++++++++++---
2 files changed, 133 insertions(+), 9 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 77f01ac385f7..60de42715b56 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1443,6 +1443,10 @@ struct task_struct {
struct mem_cgroup *active_memcg;
#endif
+#ifdef CONFIG_MEMCG_KMEM
+ struct obj_cgroup *objcg;
+#endif
+
#ifdef CONFIG_BLK_CGROUP
struct gendisk *throttle_disk;
#endif
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 16ac2a5838fb..d51b87cc8d97 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -249,6 +249,9 @@ struct mem_cgroup *vmpressure_to_memcg(struct vmpressure *vmpr)
return container_of(vmpr, struct mem_cgroup, vmpressure);
}
+#define CURRENT_OBJCG_UPDATE_BIT 0
+#define CURRENT_OBJCG_UPDATE_FLAG (1UL << CURRENT_OBJCG_UPDATE_BIT)
+
#ifdef CONFIG_MEMCG_KMEM
static DEFINE_SPINLOCK(objcg_lock);
@@ -3001,6 +3004,57 @@ static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg)
return objcg;
}
+static struct obj_cgroup *current_objcg_update(void)
+{
+ struct mem_cgroup *memcg;
+ struct obj_cgroup *old, *objcg = NULL;
+
+ do {
+ /* Atomically drop the update bit. */
+ old = xchg(¤t->objcg, NULL);
+ if (old) {
+ old = (struct obj_cgroup *)
+ ((unsigned long)old & ~CURRENT_OBJCG_UPDATE_FLAG);
+ if (old)
+ obj_cgroup_put(old);
+
+ old = NULL;
+ }
+
+ /*
+ * Release the objcg pointer from the previous iteration,
+ * if try_cmpxcg() below fails.
+ */
+ if (unlikely(objcg))
+ obj_cgroup_put(objcg);
+
+ /* Obtain the new objcg pointer. */
+ rcu_read_lock();
+ memcg = mem_cgroup_from_task(current);
+ /*
+ * The current task can be asynchronously moved to another
+ * memcg and the previous memcg can be offlined. So let's
+ * get the memcg pointer and try get a reference to objcg
+ * under a rcu read lock.
+ */
+ for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg)) {
+ objcg = rcu_dereference(memcg->objcg);
+ if (likely(objcg && obj_cgroup_tryget(objcg)))
+ break;
+ objcg = NULL;
+ }
+ rcu_read_unlock();
+
+ /*
+ * Try set up a new objcg pointer atomically. If it
+ * fails, it means the update flag was set concurrently, so
+ * the whole procedure should be repeated.
+ */
+ } while (!try_cmpxchg(¤t->objcg, &old, objcg));
+
+ return objcg;
+}
+
__always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
{
struct mem_cgroup *memcg;
@@ -3008,19 +3062,26 @@ __always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
if (in_task()) {
memcg = current->active_memcg;
+ if (unlikely(memcg))
+ goto from_memcg;
- /* Memcg to charge can't be determined. */
- if (likely(!memcg) && (!current->mm || (current->flags & PF_KTHREAD)))
- return NULL;
+ objcg = READ_ONCE(current->objcg);
+ if (unlikely((unsigned long)objcg & CURRENT_OBJCG_UPDATE_FLAG))
+ objcg = current_objcg_update();
+
+ if (objcg) {
+ obj_cgroup_get(objcg);
+ return objcg;
+ }
} else {
memcg = this_cpu_read(int_active_memcg);
- if (likely(!memcg))
- return NULL;
+ if (unlikely(memcg))
+ goto from_memcg;
}
+ return NULL;
+from_memcg:
rcu_read_lock();
- if (!memcg)
- memcg = mem_cgroup_from_task(current);
objcg = __get_obj_cgroup_from_memcg(memcg);
rcu_read_unlock();
return objcg;
@@ -6345,6 +6406,7 @@ static void mem_cgroup_move_task(void)
mem_cgroup_clear_mc();
}
}
+
#else /* !CONFIG_MMU */
static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
{
@@ -6358,8 +6420,39 @@ static void mem_cgroup_move_task(void)
}
#endif
+#ifdef CONFIG_MEMCG_KMEM
+static void mem_cgroup_fork(struct task_struct *task)
+{
+ /*
+ * Set the update flag to cause task->objcg to be initialized lazily
+ * on the first allocation. It can be done without any synchronization
+ * because it's always performed on the current task, so does
+ * current_objcg_update().
+ */
+ task->objcg = (struct obj_cgroup *)CURRENT_OBJCG_UPDATE_FLAG;
+}
+
+static void mem_cgroup_exit(struct task_struct *task)
+{
+ struct obj_cgroup *objcg = task->objcg;
+
+ objcg = (struct obj_cgroup *)
+ ((unsigned long)objcg & ~CURRENT_OBJCG_UPDATE_FLAG);
+ if (objcg)
+ obj_cgroup_put(objcg);
+
+ /*
+ * Some kernel allocations can happen after this point,
+ * but let's ignore them. It can be done without any synchronization
+ * because it's always performed on the current task, so does
+ * current_objcg_update().
+ */
+ task->objcg = NULL;
+}
+#endif
+
#ifdef CONFIG_LRU_GEN
-static void mem_cgroup_attach(struct cgroup_taskset *tset)
+static void mem_cgroup_lru_gen_attach(struct cgroup_taskset *tset)
{
struct task_struct *task;
struct cgroup_subsys_state *css;
@@ -6377,10 +6470,31 @@ static void mem_cgroup_attach(struct cgroup_taskset *tset)
task_unlock(task);
}
#else
+static void mem_cgroup_lru_gen_attach(struct cgroup_taskset *tset) {}
+#endif /* CONFIG_LRU_GEN */
+
+#ifdef CONFIG_MEMCG_KMEM
+static void mem_cgroup_kmem_attach(struct cgroup_taskset *tset)
+{
+ struct task_struct *task;
+ struct cgroup_subsys_state *css;
+
+ cgroup_taskset_for_each(task, css, tset) {
+ /* atomically set the update bit */
+ set_bit(CURRENT_OBJCG_UPDATE_BIT, (unsigned long *)&task->objcg);
+ }
+}
+#else
+static void mem_cgroup_kmem_attach(struct cgroup_taskset *tset) {}
+#endif /* CONFIG_MEMCG_KMEM */
+
+#if defined(CONFIG_LRU_GEN) || defined(CONFIG_MEMCG_KMEM)
static void mem_cgroup_attach(struct cgroup_taskset *tset)
{
+ mem_cgroup_lru_gen_attach(tset);
+ mem_cgroup_kmem_attach(tset);
}
-#endif /* CONFIG_LRU_GEN */
+#endif
static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value)
{
@@ -6824,9 +6938,15 @@ struct cgroup_subsys memory_cgrp_subsys = {
.css_reset = mem_cgroup_css_reset,
.css_rstat_flush = mem_cgroup_css_rstat_flush,
.can_attach = mem_cgroup_can_attach,
+#if defined(CONFIG_LRU_GEN) || defined(CONFIG_MEMCG_KMEM)
.attach = mem_cgroup_attach,
+#endif
.cancel_attach = mem_cgroup_cancel_attach,
.post_attach = mem_cgroup_move_task,
+#ifdef CONFIG_MEMCG_KMEM
+ .fork = mem_cgroup_fork,
+ .exit = mem_cgroup_exit,
+#endif
.dfl_cftypes = memory_files,
.legacy_cftypes = mem_cgroup_legacy_files,
.early_init = 0,
--
2.42.0
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: + mm-kmem-add-direct-objcg-pointer-to-task_struct.patch added to mm-unstable branch
2023-10-18 18:08 ` Roman Gushchin
@ 2023-10-19 7:14 ` Vlastimil Babka
0 siblings, 0 replies; 9+ messages in thread
From: Vlastimil Babka @ 2023-10-19 7:14 UTC (permalink / raw)
To: Roman Gushchin, Andrew Morton
Cc: mm-commits, shakeelb, rientjes, naresh.kamboju, muchun.song,
mhocko, hannes, dennis
On 10/18/23 20:08, Roman Gushchin wrote:
> From 14f998a9235fdfa88a4ebfad5802bdde6195bfae Mon Sep 17 00:00:00 2001
> From: Roman Gushchin <roman.gushchin@linux.dev>
> Date: Mon, 19 Dec 2022 15:46:18 -0800
> Subject: [PATCH v4 2/5] mm: kmem: add direct objcg pointer to task_struct
>
> To charge a freshly allocated kernel object to a memory cgroup, the
> kernel needs to obtain an objcg pointer. Currently it does it
> indirectly by obtaining the memcg pointer first and then calling to
> __get_obj_cgroup_from_memcg().
>
> Usually tasks spend their entire life belonging to the same object
> cgroup. So it makes sense to save the objcg pointer on task_struct
> directly, so it can be obtained faster. It requires some work on fork,
> exit and cgroup migrate paths, but these paths are way colder.
>
> To avoid any costly synchronization the following rules are applied:
> 1) A task sets it's objcg pointer itself.
>
> 2) If a task is being migrated to another cgroup, the least
> significant bit of the objcg pointer is set atomically.
>
> 3) On the allocation path the objcg pointer is obtained locklessly
> using the READ_ONCE() macro and the least significant bit is
> checked. If it's set, the following procedure is used to update
> it locklessly:
> - task->objcg is zeroed using cmpxcg
> - new objcg pointer is obtained
> - task->objcg is updated using try_cmpxchg
> - operation is repeated if try_cmpxcg fails
> It guarantees that no updates will be lost if task migration
> is racing against objcg pointer update. It also allows to keep
> both read and write paths fully lockless.
>
> Because the task is keeping a reference to the objcg, it can't go away
> while the task is alive.
>
> This commit doesn't change the way the remote memcg charging works.
>
> Signed-off-by: Roman Gushchin (Cruise) <roman.gushchin@linux.dev>
> Tested-by: Naresh Kamboju <naresh.kamboju@linaro.org>
> Acked-by: Johannes Weiner <hannes@cmpxchg.org>
> ---
> include/linux/sched.h | 4 ++
> mm/memcontrol.c | 138 +++++++++++++++++++++++++++++++++++++++---
> 2 files changed, 133 insertions(+), 9 deletions(-)
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 77f01ac385f7..60de42715b56 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1443,6 +1443,10 @@ struct task_struct {
> struct mem_cgroup *active_memcg;
> #endif
>
> +#ifdef CONFIG_MEMCG_KMEM
> + struct obj_cgroup *objcg;
> +#endif
> +
> #ifdef CONFIG_BLK_CGROUP
> struct gendisk *throttle_disk;
> #endif
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 16ac2a5838fb..d51b87cc8d97 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -249,6 +249,9 @@ struct mem_cgroup *vmpressure_to_memcg(struct vmpressure *vmpr)
> return container_of(vmpr, struct mem_cgroup, vmpressure);
> }
>
> +#define CURRENT_OBJCG_UPDATE_BIT 0
> +#define CURRENT_OBJCG_UPDATE_FLAG (1UL << CURRENT_OBJCG_UPDATE_BIT)
> +
> #ifdef CONFIG_MEMCG_KMEM
> static DEFINE_SPINLOCK(objcg_lock);
>
> @@ -3001,6 +3004,57 @@ static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg)
> return objcg;
> }
>
> +static struct obj_cgroup *current_objcg_update(void)
> +{
> + struct mem_cgroup *memcg;
> + struct obj_cgroup *old, *objcg = NULL;
> +
> + do {
> + /* Atomically drop the update bit. */
> + old = xchg(¤t->objcg, NULL);
> + if (old) {
> + old = (struct obj_cgroup *)
> + ((unsigned long)old & ~CURRENT_OBJCG_UPDATE_FLAG);
> + if (old)
> + obj_cgroup_put(old);
> +
> + old = NULL;
> + }
> +
> + /*
> + * Release the objcg pointer from the previous iteration,
> + * if try_cmpxcg() below fails.
> + */
> + if (unlikely(objcg))
> + obj_cgroup_put(objcg);
> +
> + /* Obtain the new objcg pointer. */
> + rcu_read_lock();
> + memcg = mem_cgroup_from_task(current);
Btw, can this return the root_mem_cgroup? If yes, then the for loop below
doesn't do even a single iteration, and we might have a stale pointer in
objcg? Should we set it to NULL after dropping the reference above?
(But even before the series, the similar loop in
__get_obj_cgroup_from_memcg() would mean no objcg is obtained in such case,
I guess that's just a part of the design that root memcg doesn't have an objcg).
> + /*
> + * The current task can be asynchronously moved to another
> + * memcg and the previous memcg can be offlined. So let's
> + * get the memcg pointer and try get a reference to objcg
> + * under a rcu read lock.
> + */
> + for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg)) {
> + objcg = rcu_dereference(memcg->objcg);
> + if (likely(objcg && obj_cgroup_tryget(objcg)))
> + break;
> + objcg = NULL;
> + }
> + rcu_read_unlock();
> +
> + /*
> + * Try set up a new objcg pointer atomically. If it
> + * fails, it means the update flag was set concurrently, so
> + * the whole procedure should be repeated.
> + */
> + } while (!try_cmpxchg(¤t->objcg, &old, objcg));
> +
> + return objcg;
> +}
> +
> __always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
> {
> struct mem_cgroup *memcg;
> @@ -3008,19 +3062,26 @@ __always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
>
> if (in_task()) {
> memcg = current->active_memcg;
> + if (unlikely(memcg))
> + goto from_memcg;
>
> - /* Memcg to charge can't be determined. */
> - if (likely(!memcg) && (!current->mm || (current->flags & PF_KTHREAD)))
> - return NULL;
> + objcg = READ_ONCE(current->objcg);
> + if (unlikely((unsigned long)objcg & CURRENT_OBJCG_UPDATE_FLAG))
> + objcg = current_objcg_update();
> +
> + if (objcg) {
> + obj_cgroup_get(objcg);
> + return objcg;
> + }
> } else {
> memcg = this_cpu_read(int_active_memcg);
> - if (likely(!memcg))
> - return NULL;
> + if (unlikely(memcg))
> + goto from_memcg;
> }
> + return NULL;
>
> +from_memcg:
> rcu_read_lock();
> - if (!memcg)
> - memcg = mem_cgroup_from_task(current);
> objcg = __get_obj_cgroup_from_memcg(memcg);
> rcu_read_unlock();
> return objcg;
> @@ -6345,6 +6406,7 @@ static void mem_cgroup_move_task(void)
> mem_cgroup_clear_mc();
> }
> }
> +
> #else /* !CONFIG_MMU */
> static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
> {
> @@ -6358,8 +6420,39 @@ static void mem_cgroup_move_task(void)
> }
> #endif
>
> +#ifdef CONFIG_MEMCG_KMEM
> +static void mem_cgroup_fork(struct task_struct *task)
> +{
> + /*
> + * Set the update flag to cause task->objcg to be initialized lazily
> + * on the first allocation. It can be done without any synchronization
> + * because it's always performed on the current task, so does
> + * current_objcg_update().
> + */
> + task->objcg = (struct obj_cgroup *)CURRENT_OBJCG_UPDATE_FLAG;
> +}
> +
> +static void mem_cgroup_exit(struct task_struct *task)
> +{
> + struct obj_cgroup *objcg = task->objcg;
> +
> + objcg = (struct obj_cgroup *)
> + ((unsigned long)objcg & ~CURRENT_OBJCG_UPDATE_FLAG);
> + if (objcg)
> + obj_cgroup_put(objcg);
> +
> + /*
> + * Some kernel allocations can happen after this point,
> + * but let's ignore them. It can be done without any synchronization
> + * because it's always performed on the current task, so does
> + * current_objcg_update().
> + */
> + task->objcg = NULL;
> +}
> +#endif
> +
> #ifdef CONFIG_LRU_GEN
> -static void mem_cgroup_attach(struct cgroup_taskset *tset)
> +static void mem_cgroup_lru_gen_attach(struct cgroup_taskset *tset)
> {
> struct task_struct *task;
> struct cgroup_subsys_state *css;
> @@ -6377,10 +6470,31 @@ static void mem_cgroup_attach(struct cgroup_taskset *tset)
> task_unlock(task);
> }
> #else
> +static void mem_cgroup_lru_gen_attach(struct cgroup_taskset *tset) {}
> +#endif /* CONFIG_LRU_GEN */
> +
> +#ifdef CONFIG_MEMCG_KMEM
> +static void mem_cgroup_kmem_attach(struct cgroup_taskset *tset)
> +{
> + struct task_struct *task;
> + struct cgroup_subsys_state *css;
> +
> + cgroup_taskset_for_each(task, css, tset) {
> + /* atomically set the update bit */
> + set_bit(CURRENT_OBJCG_UPDATE_BIT, (unsigned long *)&task->objcg);
> + }
> +}
> +#else
> +static void mem_cgroup_kmem_attach(struct cgroup_taskset *tset) {}
> +#endif /* CONFIG_MEMCG_KMEM */
> +
> +#if defined(CONFIG_LRU_GEN) || defined(CONFIG_MEMCG_KMEM)
> static void mem_cgroup_attach(struct cgroup_taskset *tset)
> {
> + mem_cgroup_lru_gen_attach(tset);
> + mem_cgroup_kmem_attach(tset);
> }
> -#endif /* CONFIG_LRU_GEN */
> +#endif
>
> static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value)
> {
> @@ -6824,9 +6938,15 @@ struct cgroup_subsys memory_cgrp_subsys = {
> .css_reset = mem_cgroup_css_reset,
> .css_rstat_flush = mem_cgroup_css_rstat_flush,
> .can_attach = mem_cgroup_can_attach,
> +#if defined(CONFIG_LRU_GEN) || defined(CONFIG_MEMCG_KMEM)
> .attach = mem_cgroup_attach,
> +#endif
> .cancel_attach = mem_cgroup_cancel_attach,
> .post_attach = mem_cgroup_move_task,
> +#ifdef CONFIG_MEMCG_KMEM
> + .fork = mem_cgroup_fork,
> + .exit = mem_cgroup_exit,
> +#endif
> .dfl_cftypes = memory_files,
> .legacy_cftypes = mem_cgroup_legacy_files,
> .early_init = 0,
^ permalink raw reply [flat|nested] 9+ messages in thread
* + mm-kmem-add-direct-objcg-pointer-to-task_struct.patch added to mm-unstable branch
@ 2023-10-20 17:11 Andrew Morton
0 siblings, 0 replies; 9+ messages in thread
From: Andrew Morton @ 2023-10-20 17:11 UTC (permalink / raw)
To: mm-commits, vbabka, shakeelb, rientjes, naresh.kamboju,
muchun.song, mhocko, hannes, dennis, roman.gushchin, akpm
The patch titled
Subject: mm: kmem: add direct objcg pointer to task_struct
has been added to the -mm mm-unstable branch. Its filename is
mm-kmem-add-direct-objcg-pointer-to-task_struct.patch
This patch will shortly appear at
https://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new.git/tree/patches/mm-kmem-add-direct-objcg-pointer-to-task_struct.patch
This patch will later appear in the mm-unstable branch at
git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Before you just go and hit "reply", please:
a) Consider who else should be cc'ed
b) Prefer to cc a suitable mailing list as well
c) Ideally: find the original patch on the mailing list and do a
reply-to-all to that, adding suitable additional cc's
*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***
The -mm tree is included into linux-next via the mm-everything
branch at git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
and is updated there every 2-3 working days
------------------------------------------------------
From: Roman Gushchin <roman.gushchin@linux.dev>
Subject: mm: kmem: add direct objcg pointer to task_struct
Date: Thu, 19 Oct 2023 15:53:42 -0700
To charge a freshly allocated kernel object to a memory cgroup, the kernel
needs to obtain an objcg pointer. Currently it does it indirectly by
obtaining the memcg pointer first and then calling to
__get_obj_cgroup_from_memcg().
Usually tasks spend their entire life belonging to the same object cgroup.
So it makes sense to save the objcg pointer on task_struct directly, so
it can be obtained faster. It requires some work on fork, exit and cgroup
migrate paths, but these paths are way colder.
To avoid any costly synchronization the following rules are applied:
1) A task sets it's objcg pointer itself.
2) If a task is being migrated to another cgroup, the least
significant bit of the objcg pointer is set atomically.
3) On the allocation path the objcg pointer is obtained locklessly
using the READ_ONCE() macro and the least significant bit is
checked. If it's set, the following procedure is used to update
it locklessly:
- task->objcg is zeroed using cmpxcg
- new objcg pointer is obtained
- task->objcg is updated using try_cmpxchg
- operation is repeated if try_cmpxcg fails
It guarantees that no updates will be lost if task migration
is racing against objcg pointer update. It also allows to keep
both read and write paths fully lockless.
Because the task is keeping a reference to the objcg, it can't go away
while the task is alive.
This commit doesn't change the way the remote memcg charging works.
Link: https://lkml.kernel.org/r/20231019225346.1822282-3-roman.gushchin@linux.dev
Signed-off-by: Roman Gushchin (Cruise) <roman.gushchin@linux.dev>
Tested-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Shakeel Butt <shakeelb@google.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: Dennis Zhou <dennis@kernel.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
include/linux/sched.h | 4 +
mm/memcontrol.c | 139 +++++++++++++++++++++++++++++++++++++---
2 files changed, 134 insertions(+), 9 deletions(-)
--- a/include/linux/sched.h~mm-kmem-add-direct-objcg-pointer-to-task_struct
+++ a/include/linux/sched.h
@@ -1443,6 +1443,10 @@ struct task_struct {
struct mem_cgroup *active_memcg;
#endif
+#ifdef CONFIG_MEMCG_KMEM
+ struct obj_cgroup *objcg;
+#endif
+
#ifdef CONFIG_BLK_CGROUP
struct gendisk *throttle_disk;
#endif
--- a/mm/memcontrol.c~mm-kmem-add-direct-objcg-pointer-to-task_struct
+++ a/mm/memcontrol.c
@@ -249,6 +249,9 @@ struct mem_cgroup *vmpressure_to_memcg(s
return container_of(vmpr, struct mem_cgroup, vmpressure);
}
+#define CURRENT_OBJCG_UPDATE_BIT 0
+#define CURRENT_OBJCG_UPDATE_FLAG (1UL << CURRENT_OBJCG_UPDATE_BIT)
+
#ifdef CONFIG_MEMCG_KMEM
static DEFINE_SPINLOCK(objcg_lock);
@@ -3107,6 +3110,58 @@ static struct obj_cgroup *__get_obj_cgro
return objcg;
}
+static struct obj_cgroup *current_objcg_update(void)
+{
+ struct mem_cgroup *memcg;
+ struct obj_cgroup *old, *objcg = NULL;
+
+ do {
+ /* Atomically drop the update bit. */
+ old = xchg(¤t->objcg, NULL);
+ if (old) {
+ old = (struct obj_cgroup *)
+ ((unsigned long)old & ~CURRENT_OBJCG_UPDATE_FLAG);
+ if (old)
+ obj_cgroup_put(old);
+
+ old = NULL;
+ }
+
+ /* If new objcg is NULL, no reason for the second atomic update. */
+ if (!current->mm || (current->flags & PF_KTHREAD))
+ return NULL;
+
+ /*
+ * Release the objcg pointer from the previous iteration,
+ * if try_cmpxcg() below fails.
+ */
+ if (unlikely(objcg)) {
+ obj_cgroup_put(objcg);
+ objcg = NULL;
+ }
+
+ /*
+ * Obtain the new objcg pointer. The current task can be
+ * asynchronously moved to another memcg and the previous
+ * memcg can be offlined. So let's get the memcg pointer
+ * and try get a reference to objcg under a rcu read lock.
+ */
+
+ rcu_read_lock();
+ memcg = mem_cgroup_from_task(current);
+ objcg = __get_obj_cgroup_from_memcg(memcg);
+ rcu_read_unlock();
+
+ /*
+ * Try set up a new objcg pointer atomically. If it
+ * fails, it means the update flag was set concurrently, so
+ * the whole procedure should be repeated.
+ */
+ } while (!try_cmpxchg(¤t->objcg, &old, objcg));
+
+ return objcg;
+}
+
__always_inline struct obj_cgroup *get_obj_cgroup_from_current(void)
{
struct mem_cgroup *memcg;
@@ -3114,19 +3169,26 @@ __always_inline struct obj_cgroup *get_o
if (in_task()) {
memcg = current->active_memcg;
+ if (unlikely(memcg))
+ goto from_memcg;
- /* Memcg to charge can't be determined. */
- if (likely(!memcg) && (!current->mm || (current->flags & PF_KTHREAD)))
- return NULL;
+ objcg = READ_ONCE(current->objcg);
+ if (unlikely((unsigned long)objcg & CURRENT_OBJCG_UPDATE_FLAG))
+ objcg = current_objcg_update();
+
+ if (objcg) {
+ obj_cgroup_get(objcg);
+ return objcg;
+ }
} else {
memcg = this_cpu_read(int_active_memcg);
- if (likely(!memcg))
- return NULL;
+ if (unlikely(memcg))
+ goto from_memcg;
}
+ return NULL;
+from_memcg:
rcu_read_lock();
- if (!memcg)
- memcg = mem_cgroup_from_task(current);
objcg = __get_obj_cgroup_from_memcg(memcg);
rcu_read_unlock();
return objcg;
@@ -6468,6 +6530,7 @@ static void mem_cgroup_move_task(void)
mem_cgroup_clear_mc();
}
}
+
#else /* !CONFIG_MMU */
static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
{
@@ -6481,8 +6544,39 @@ static void mem_cgroup_move_task(void)
}
#endif
+#ifdef CONFIG_MEMCG_KMEM
+static void mem_cgroup_fork(struct task_struct *task)
+{
+ /*
+ * Set the update flag to cause task->objcg to be initialized lazily
+ * on the first allocation. It can be done without any synchronization
+ * because it's always performed on the current task, so does
+ * current_objcg_update().
+ */
+ task->objcg = (struct obj_cgroup *)CURRENT_OBJCG_UPDATE_FLAG;
+}
+
+static void mem_cgroup_exit(struct task_struct *task)
+{
+ struct obj_cgroup *objcg = task->objcg;
+
+ objcg = (struct obj_cgroup *)
+ ((unsigned long)objcg & ~CURRENT_OBJCG_UPDATE_FLAG);
+ if (objcg)
+ obj_cgroup_put(objcg);
+
+ /*
+ * Some kernel allocations can happen after this point,
+ * but let's ignore them. It can be done without any synchronization
+ * because it's always performed on the current task, so does
+ * current_objcg_update().
+ */
+ task->objcg = NULL;
+}
+#endif
+
#ifdef CONFIG_LRU_GEN
-static void mem_cgroup_attach(struct cgroup_taskset *tset)
+static void mem_cgroup_lru_gen_attach(struct cgroup_taskset *tset)
{
struct task_struct *task;
struct cgroup_subsys_state *css;
@@ -6500,10 +6594,31 @@ static void mem_cgroup_attach(struct cgr
task_unlock(task);
}
#else
+static void mem_cgroup_lru_gen_attach(struct cgroup_taskset *tset) {}
+#endif /* CONFIG_LRU_GEN */
+
+#ifdef CONFIG_MEMCG_KMEM
+static void mem_cgroup_kmem_attach(struct cgroup_taskset *tset)
+{
+ struct task_struct *task;
+ struct cgroup_subsys_state *css;
+
+ cgroup_taskset_for_each(task, css, tset) {
+ /* atomically set the update bit */
+ set_bit(CURRENT_OBJCG_UPDATE_BIT, (unsigned long *)&task->objcg);
+ }
+}
+#else
+static void mem_cgroup_kmem_attach(struct cgroup_taskset *tset) {}
+#endif /* CONFIG_MEMCG_KMEM */
+
+#if defined(CONFIG_LRU_GEN) || defined(CONFIG_MEMCG_KMEM)
static void mem_cgroup_attach(struct cgroup_taskset *tset)
{
+ mem_cgroup_lru_gen_attach(tset);
+ mem_cgroup_kmem_attach(tset);
}
-#endif /* CONFIG_LRU_GEN */
+#endif
static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value)
{
@@ -6913,9 +7028,15 @@ struct cgroup_subsys memory_cgrp_subsys
.css_reset = mem_cgroup_css_reset,
.css_rstat_flush = mem_cgroup_css_rstat_flush,
.can_attach = mem_cgroup_can_attach,
+#if defined(CONFIG_LRU_GEN) || defined(CONFIG_MEMCG_KMEM)
.attach = mem_cgroup_attach,
+#endif
.cancel_attach = mem_cgroup_cancel_attach,
.post_attach = mem_cgroup_move_task,
+#ifdef CONFIG_MEMCG_KMEM
+ .fork = mem_cgroup_fork,
+ .exit = mem_cgroup_exit,
+#endif
.dfl_cftypes = memory_files,
.legacy_cftypes = mem_cgroup_legacy_files,
.early_init = 0,
_
Patches currently in -mm which might be from roman.gushchin@linux.dev are
mm-kmem-optimize-get_obj_cgroup_from_current.patch
mm-kmem-add-direct-objcg-pointer-to-task_struct.patch
mm-kmem-make-memcg-keep-a-reference-to-the-original-objcg.patch
mm-kmem-scoped-objcg-protection.patch
percpu-scoped-objcg-protection.patch
mm-kmem-reimplement-get_obj_cgroup_from_current.patch
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2023-10-20 17:11 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-10-10 18:20 + mm-kmem-add-direct-objcg-pointer-to-task_struct.patch added to mm-unstable branch Andrew Morton
2023-10-10 22:15 ` Roman Gushchin
2023-10-10 23:02 ` Andrew Morton
-- strict thread matches above, loose matches on Subject: below --
2023-10-20 17:11 Andrew Morton
2023-10-16 22:58 Andrew Morton
2023-10-18 18:08 ` Roman Gushchin
2023-10-19 7:14 ` Vlastimil Babka
2023-09-29 18:42 Andrew Morton
2023-10-01 16:42 ` Roman Gushchin
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.