From: akpm@linux-foundation.org
To: bigeasy@linutronix.de, brouer@redhat.com, cl@linux.com,
efault@gmx.de, iamjoonsoo.kim@lge.com, jannh@google.com,
mgorman@techsingularity.net, mm-commits@vger.kernel.org,
penberg@kernel.org, quic_qiancai@quicinc.com,
rientjes@google.com, tglx@linutronix.de, vbabka@suse.cz
Subject: [merged] mm-slub-move-flush_cpu_slab-invocations-__free_slab-invocations-out-of-irq-context.patch removed from -mm tree
Date: Thu, 09 Sep 2021 14:01:09 -0700 [thread overview]
Message-ID: <20210909210109.9qO2aDyXL%akpm@linux-foundation.org> (raw)
The patch titled
Subject: mm: slub: move flush_cpu_slab() invocations __free_slab() invocations out of IRQ context
has been removed from the -mm tree. Its filename was
mm-slub-move-flush_cpu_slab-invocations-__free_slab-invocations-out-of-irq-context.patch
This patch was dropped because it was merged into mainline or a subsystem tree
------------------------------------------------------
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Subject: mm: slub: move flush_cpu_slab() invocations __free_slab() invocations out of IRQ context
flush_all() flushes a specific SLAB cache on each CPU (where the cache is
present). The deactivate_slab()/__free_slab() invocation happens within
IPI handler and is problematic for PREEMPT_RT.
The flush operation is not a frequent operation or a hot path. The
per-CPU flush operation can be moved to within a workqueue.
Because a workqueue handler, unlike IPI handler, does not disable irqs,
flush_slab() now has to disable them for working with the kmem_cache_cpu
fields. deactivate_slab() is safe to call with irqs enabled.
[vbabka@suse.cz: adapt to new SLUB changes]
Link: https://lkml.kernel.org/r/20210904105003.11688-29-vbabka@suse.cz
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Qian Cai <quic_qiancai@quicinc.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Jann Horn <jannh@google.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
mm/slab_common.c | 2
mm/slub.c | 94 +++++++++++++++++++++++++++++++++++++--------
2 files changed, 80 insertions(+), 16 deletions(-)
--- a/mm/slab_common.c~mm-slub-move-flush_cpu_slab-invocations-__free_slab-invocations-out-of-irq-context
+++ a/mm/slab_common.c
@@ -502,6 +502,7 @@ void kmem_cache_destroy(struct kmem_cach
if (unlikely(!s))
return;
+ cpus_read_lock();
mutex_lock(&slab_mutex);
s->refcount--;
@@ -516,6 +517,7 @@ void kmem_cache_destroy(struct kmem_cach
}
out_unlock:
mutex_unlock(&slab_mutex);
+ cpus_read_unlock();
}
EXPORT_SYMBOL(kmem_cache_destroy);
--- a/mm/slub.c~mm-slub-move-flush_cpu_slab-invocations-__free_slab-invocations-out-of-irq-context
+++ a/mm/slub.c
@@ -2496,16 +2496,25 @@ static void put_cpu_partial(struct kmem_
static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
{
- void *freelist = c->freelist;
- struct page *page = c->page;
+ unsigned long flags;
+ struct page *page;
+ void *freelist;
+
+ local_irq_save(flags);
+
+ page = c->page;
+ freelist = c->freelist;
c->page = NULL;
c->freelist = NULL;
c->tid = next_tid(c->tid);
- deactivate_slab(s, page, freelist);
+ local_irq_restore(flags);
- stat(s, CPUSLAB_FLUSH);
+ if (page) {
+ deactivate_slab(s, page, freelist);
+ stat(s, CPUSLAB_FLUSH);
+ }
}
static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
@@ -2526,15 +2535,27 @@ static inline void __flush_cpu_slab(stru
unfreeze_partials_cpu(s, c);
}
+struct slub_flush_work {
+ struct work_struct work;
+ struct kmem_cache *s;
+ bool skip;
+};
+
/*
* Flush cpu slab.
*
- * Called from IPI handler with interrupts disabled.
+ * Called from CPU work handler with migration disabled.
*/
-static void flush_cpu_slab(void *d)
+static void flush_cpu_slab(struct work_struct *w)
{
- struct kmem_cache *s = d;
- struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
+ struct kmem_cache *s;
+ struct kmem_cache_cpu *c;
+ struct slub_flush_work *sfw;
+
+ sfw = container_of(w, struct slub_flush_work, work);
+
+ s = sfw->s;
+ c = this_cpu_ptr(s->cpu_slab);
if (c->page)
flush_slab(s, c);
@@ -2542,17 +2563,51 @@ static void flush_cpu_slab(void *d)
unfreeze_partials(s);
}
-static bool has_cpu_slab(int cpu, void *info)
+static bool has_cpu_slab(int cpu, struct kmem_cache *s)
{
- struct kmem_cache *s = info;
struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
return c->page || slub_percpu_partial(c);
}
+static DEFINE_MUTEX(flush_lock);
+static DEFINE_PER_CPU(struct slub_flush_work, slub_flush);
+
+static void flush_all_cpus_locked(struct kmem_cache *s)
+{
+ struct slub_flush_work *sfw;
+ unsigned int cpu;
+
+ lockdep_assert_cpus_held();
+ mutex_lock(&flush_lock);
+
+ for_each_online_cpu(cpu) {
+ sfw = &per_cpu(slub_flush, cpu);
+ if (!has_cpu_slab(cpu, s)) {
+ sfw->skip = true;
+ continue;
+ }
+ INIT_WORK(&sfw->work, flush_cpu_slab);
+ sfw->skip = false;
+ sfw->s = s;
+ schedule_work_on(cpu, &sfw->work);
+ }
+
+ for_each_online_cpu(cpu) {
+ sfw = &per_cpu(slub_flush, cpu);
+ if (sfw->skip)
+ continue;
+ flush_work(&sfw->work);
+ }
+
+ mutex_unlock(&flush_lock);
+}
+
static void flush_all(struct kmem_cache *s)
{
- on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1);
+ cpus_read_lock();
+ flush_all_cpus_locked(s);
+ cpus_read_unlock();
}
/*
@@ -4097,7 +4152,7 @@ int __kmem_cache_shutdown(struct kmem_ca
int node;
struct kmem_cache_node *n;
- flush_all(s);
+ flush_all_cpus_locked(s);
/* Attempt to free all objects */
for_each_kmem_cache_node(s, node, n) {
free_partial(s, n);
@@ -4373,7 +4428,7 @@ EXPORT_SYMBOL(kfree);
* being allocated from last increasing the chance that the last objects
* are freed in them.
*/
-int __kmem_cache_shrink(struct kmem_cache *s)
+static int __kmem_cache_do_shrink(struct kmem_cache *s)
{
int node;
int i;
@@ -4385,7 +4440,6 @@ int __kmem_cache_shrink(struct kmem_cach
unsigned long flags;
int ret = 0;
- flush_all(s);
for_each_kmem_cache_node(s, node, n) {
INIT_LIST_HEAD(&discard);
for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
@@ -4435,13 +4489,21 @@ int __kmem_cache_shrink(struct kmem_cach
return ret;
}
+int __kmem_cache_shrink(struct kmem_cache *s)
+{
+ flush_all(s);
+ return __kmem_cache_do_shrink(s);
+}
+
static int slab_mem_going_offline_callback(void *arg)
{
struct kmem_cache *s;
mutex_lock(&slab_mutex);
- list_for_each_entry(s, &slab_caches, list)
- __kmem_cache_shrink(s);
+ list_for_each_entry(s, &slab_caches, list) {
+ flush_all_cpus_locked(s);
+ __kmem_cache_do_shrink(s);
+ }
mutex_unlock(&slab_mutex);
return 0;
_
Patches currently in -mm which might be from bigeasy@linutronix.de are
reply other threads:[~2021-09-09 21:01 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210909210109.9qO2aDyXL%akpm@linux-foundation.org \
--to=akpm@linux-foundation.org \
--cc=bigeasy@linutronix.de \
--cc=brouer@redhat.com \
--cc=cl@linux.com \
--cc=efault@gmx.de \
--cc=iamjoonsoo.kim@lge.com \
--cc=jannh@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mgorman@techsingularity.net \
--cc=mm-commits@vger.kernel.org \
--cc=penberg@kernel.org \
--cc=quic_qiancai@quicinc.com \
--cc=rientjes@google.com \
--cc=tglx@linutronix.de \
--cc=vbabka@suse.cz \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.