From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
To: Andy Lutomirski <luto@kernel.org>
Cc: linux-kernel@vger.kernel.org, linux-ia64@vger.kernel.org,
Ben Segall <bsegall@google.com>,
Daniel Bristot de Oliveira <bristot@redhat.com>,
Dietmar Eggemann <dietmar.eggemann@arm.com>,
Ingo Molnar <mingo@redhat.com>,
Juri Lelli <juri.lelli@redhat.com>,
Peter Zijlstra <peterz@infradead.org>,
Steven Rostedt <rostedt@goodmis.org>,
Thomas Gleixner <tglx@linutronix.de>,
Vincent Guittot <vincent.guittot@linaro.org>
Subject: [PATCH v2 7/8] kernel/fork: Only cache the VMAP stack in finish_task_switch().
Date: Mon, 14 Feb 2022 18:15:16 +0000 [thread overview]
Message-ID: <YgqcNFrCsXi/XCCh@linutronix.de> (raw)
In-Reply-To: <YgqV1BLbCx5V+6tq@linutronix.de>
The task stack could be deallocated later. For fork()/exec() kind of
workloads (say a shell script executing several commands) it is
important that the stack is released in finish_task_switch() so that in
VMAP_STACK case it can be cached and reused in the new task.
For PREEMPT_RT it would be good if the wake-up in vfree_atomic() could
be avoided in the scheduling path. Far worse are the other
free_thread_stack() implementations which invoke __free_pages()/
kmem_cache_free() with disabled preemption.
Cache the stack in free_thread_stack() in the VMAP_STACK case and
RCU-delay the free path otherwise. Free the stack in the RCU callback.
In the VMAP_STACK case this is another opportunity to fill the cache.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
If that works and there are no other objection then I'm going to repost
the complete series.
kernel/fork.c | 76 ++++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 63 insertions(+), 13 deletions(-)
diff --git a/kernel/fork.c b/kernel/fork.c
index 984f69d6f211f..aa17ed2a2afc7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -193,6 +193,41 @@ static inline void free_task_struct(struct task_struct *tsk)
#define NR_CACHED_STACKS 2
static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]);
+struct vm_stack {
+ struct rcu_head rcu;
+ struct vm_struct *stack_vm_area;
+};
+
+static bool try_release_thread_stack_to_cache(struct vm_struct *vm)
+{
+ unsigned int i;
+
+ for (i = 0; i < NR_CACHED_STACKS; i++) {
+ if (this_cpu_cmpxchg(cached_stacks[i], NULL, vm) != NULL)
+ continue;
+ return true;
+ }
+ return false;
+}
+
+static void thread_stack_free_rcu(struct rcu_head *rh)
+{
+ struct vm_stack *vm_stack = container_of(rh, struct vm_stack, rcu);
+
+ if (try_release_thread_stack_to_cache(vm_stack->stack_vm_area))
+ return;
+
+ vfree(vm_stack);
+}
+
+static void thread_stack_delayed_free(struct task_struct *tsk)
+{
+ struct vm_stack *vm_stack = tsk->stack;
+
+ vm_stack->stack_vm_area = tsk->stack_vm_area;
+ call_rcu(&vm_stack->rcu, thread_stack_free_rcu);
+}
+
static int free_vm_stack_cache(unsigned int cpu)
{
struct vm_struct **cached_vm_stacks = per_cpu_ptr(cached_stacks, cpu);
@@ -296,24 +331,27 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
static void free_thread_stack(struct task_struct *tsk)
{
- int i;
+ if (!try_release_thread_stack_to_cache(tsk->stack_vm_area))
+ thread_stack_delayed_free(tsk);
- for (i = 0; i < NR_CACHED_STACKS; i++) {
- if (this_cpu_cmpxchg(cached_stacks[i], NULL,
- tsk->stack_vm_area) != NULL)
- continue;
-
- tsk->stack = NULL;
- tsk->stack_vm_area = NULL;
- return;
- }
- vfree_atomic(tsk->stack);
tsk->stack = NULL;
tsk->stack_vm_area = NULL;
}
# else /* !CONFIG_VMAP_STACK */
+static void thread_stack_free_rcu(struct rcu_head *rh)
+{
+ __free_pages(virt_to_page(rh), THREAD_SIZE_ORDER);
+}
+
+static void thread_stack_delayed_free(struct task_struct *tsk)
+{
+ struct rcu_head *rh = tsk->stack;
+
+ call_rcu(rh, thread_stack_free_rcu);
+}
+
static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
struct page *page = alloc_pages_node(node, THREADINFO_GFP,
@@ -328,7 +366,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
static void free_thread_stack(struct task_struct *tsk)
{
- __free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
+ thread_stack_delayed_free(tsk);
tsk->stack = NULL;
}
@@ -337,6 +375,18 @@ static void free_thread_stack(struct task_struct *tsk)
static struct kmem_cache *thread_stack_cache;
+static void thread_stack_free_rcu(struct rcu_head *rh)
+{
+ kmem_cache_free(thread_stack_cache, rh);
+}
+
+static void thread_stack_delayed_free(struct task_struct *tsk)
+{
+ struct rcu_head *rh = tsk->stack;
+
+ call_rcu(rh, thread_stack_free_rcu);
+}
+
static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
unsigned long *stack;
@@ -348,7 +398,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
static void free_thread_stack(struct task_struct *tsk)
{
- kmem_cache_free(thread_stack_cache, tsk->stack);
+ thread_stack_delayed_free(tsk);
tsk->stack = NULL;
}
--
2.34.1
WARNING: multiple messages have this Message-ID (diff)
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
To: Andy Lutomirski <luto@kernel.org>
Cc: linux-kernel@vger.kernel.org, linux-ia64@vger.kernel.org,
Ben Segall <bsegall@google.com>,
Daniel Bristot de Oliveira <bristot@redhat.com>,
Dietmar Eggemann <dietmar.eggemann@arm.com>,
Ingo Molnar <mingo@redhat.com>,
Juri Lelli <juri.lelli@redhat.com>,
Peter Zijlstra <peterz@infradead.org>,
Steven Rostedt <rostedt@goodmis.org>,
Thomas Gleixner <tglx@linutronix.de>,
Vincent Guittot <vincent.guittot@linaro.org>
Subject: [PATCH v2 7/8] kernel/fork: Only cache the VMAP stack in finish_task_switch().
Date: Mon, 14 Feb 2022 19:15:16 +0100 [thread overview]
Message-ID: <YgqcNFrCsXi/XCCh@linutronix.de> (raw)
In-Reply-To: <YgqV1BLbCx5V+6tq@linutronix.de>
The task stack could be deallocated later. For fork()/exec() kind of
workloads (say a shell script executing several commands) it is
important that the stack is released in finish_task_switch() so that in
VMAP_STACK case it can be cached and reused in the new task.
For PREEMPT_RT it would be good if the wake-up in vfree_atomic() could
be avoided in the scheduling path. Far worse are the other
free_thread_stack() implementations which invoke __free_pages()/
kmem_cache_free() with disabled preemption.
Cache the stack in free_thread_stack() in the VMAP_STACK case and
RCU-delay the free path otherwise. Free the stack in the RCU callback.
In the VMAP_STACK case this is another opportunity to fill the cache.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
If that works and there are no other objection then I'm going to repost
the complete series.
kernel/fork.c | 76 ++++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 63 insertions(+), 13 deletions(-)
diff --git a/kernel/fork.c b/kernel/fork.c
index 984f69d6f211f..aa17ed2a2afc7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -193,6 +193,41 @@ static inline void free_task_struct(struct task_struct *tsk)
#define NR_CACHED_STACKS 2
static DEFINE_PER_CPU(struct vm_struct *, cached_stacks[NR_CACHED_STACKS]);
+struct vm_stack {
+ struct rcu_head rcu;
+ struct vm_struct *stack_vm_area;
+};
+
+static bool try_release_thread_stack_to_cache(struct vm_struct *vm)
+{
+ unsigned int i;
+
+ for (i = 0; i < NR_CACHED_STACKS; i++) {
+ if (this_cpu_cmpxchg(cached_stacks[i], NULL, vm) != NULL)
+ continue;
+ return true;
+ }
+ return false;
+}
+
+static void thread_stack_free_rcu(struct rcu_head *rh)
+{
+ struct vm_stack *vm_stack = container_of(rh, struct vm_stack, rcu);
+
+ if (try_release_thread_stack_to_cache(vm_stack->stack_vm_area))
+ return;
+
+ vfree(vm_stack);
+}
+
+static void thread_stack_delayed_free(struct task_struct *tsk)
+{
+ struct vm_stack *vm_stack = tsk->stack;
+
+ vm_stack->stack_vm_area = tsk->stack_vm_area;
+ call_rcu(&vm_stack->rcu, thread_stack_free_rcu);
+}
+
static int free_vm_stack_cache(unsigned int cpu)
{
struct vm_struct **cached_vm_stacks = per_cpu_ptr(cached_stacks, cpu);
@@ -296,24 +331,27 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
static void free_thread_stack(struct task_struct *tsk)
{
- int i;
+ if (!try_release_thread_stack_to_cache(tsk->stack_vm_area))
+ thread_stack_delayed_free(tsk);
- for (i = 0; i < NR_CACHED_STACKS; i++) {
- if (this_cpu_cmpxchg(cached_stacks[i], NULL,
- tsk->stack_vm_area) != NULL)
- continue;
-
- tsk->stack = NULL;
- tsk->stack_vm_area = NULL;
- return;
- }
- vfree_atomic(tsk->stack);
tsk->stack = NULL;
tsk->stack_vm_area = NULL;
}
# else /* !CONFIG_VMAP_STACK */
+static void thread_stack_free_rcu(struct rcu_head *rh)
+{
+ __free_pages(virt_to_page(rh), THREAD_SIZE_ORDER);
+}
+
+static void thread_stack_delayed_free(struct task_struct *tsk)
+{
+ struct rcu_head *rh = tsk->stack;
+
+ call_rcu(rh, thread_stack_free_rcu);
+}
+
static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
struct page *page = alloc_pages_node(node, THREADINFO_GFP,
@@ -328,7 +366,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
static void free_thread_stack(struct task_struct *tsk)
{
- __free_pages(virt_to_page(tsk->stack), THREAD_SIZE_ORDER);
+ thread_stack_delayed_free(tsk);
tsk->stack = NULL;
}
@@ -337,6 +375,18 @@ static void free_thread_stack(struct task_struct *tsk)
static struct kmem_cache *thread_stack_cache;
+static void thread_stack_free_rcu(struct rcu_head *rh)
+{
+ kmem_cache_free(thread_stack_cache, rh);
+}
+
+static void thread_stack_delayed_free(struct task_struct *tsk)
+{
+ struct rcu_head *rh = tsk->stack;
+
+ call_rcu(rh, thread_stack_free_rcu);
+}
+
static int alloc_thread_stack_node(struct task_struct *tsk, int node)
{
unsigned long *stack;
@@ -348,7 +398,7 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
static void free_thread_stack(struct task_struct *tsk)
{
- kmem_cache_free(thread_stack_cache, tsk->stack);
+ thread_stack_delayed_free(tsk);
tsk->stack = NULL;
}
--
2.34.1
next prev parent reply other threads:[~2022-02-14 18:15 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-01-25 15:26 [PATCH REPOST 0/8] kernel/fork: Move thread stack free otu of the scheduler path Sebastian Andrzej Siewior
2022-01-25 15:26 ` Sebastian Andrzej Siewior
2022-01-25 15:26 ` [PATCH 1/8] kernel/fork: Redo ifdefs around task's handling Sebastian Andrzej Siewior
2022-01-25 15:26 ` Sebastian Andrzej Siewior
2022-01-25 15:26 ` [PATCH 2/8] kernel/fork: Duplicate task_struct before stack allocation Sebastian Andrzej Siewior
2022-01-25 15:26 ` Sebastian Andrzej Siewior
2022-02-11 23:42 ` Andy Lutomirski
2022-02-11 23:42 ` Andy Lutomirski
2022-02-14 11:39 ` Sebastian Andrzej Siewior
2022-02-14 11:39 ` Sebastian Andrzej Siewior
2022-01-25 15:26 ` [PATCH 3/8] kernel/fork, IA64: Provide a alloc_thread_stack_node() for IA64 Sebastian Andrzej Siewior
2022-01-25 15:26 ` Sebastian Andrzej Siewior
2022-02-14 18:00 ` Sebastian Andrzej Siewior
2022-02-14 18:00 ` Sebastian Andrzej Siewior
2022-01-25 15:26 ` [PATCH 4/8] kernel/fork: Don't assign the stack pointer in dup_task_struct() Sebastian Andrzej Siewior
2022-01-25 15:26 ` Sebastian Andrzej Siewior
2022-01-25 15:26 ` [PATCH 5/8] kernel/fork: Move memcg_charge_kernel_stack() into CONFIG_VMAP_STACK Sebastian Andrzej Siewior
2022-01-25 15:26 ` Sebastian Andrzej Siewior
2022-01-25 15:26 ` [PATCH 6/8] kernel/fork: Move task stack account to do_exit() Sebastian Andrzej Siewior
2022-01-25 15:26 ` Sebastian Andrzej Siewior
2022-02-11 23:43 ` Andy Lutomirski
2022-02-11 23:43 ` Andy Lutomirski
2022-01-25 15:26 ` [PATCH 7/8] kernel/fork: Only cache the VMAP stack in finish_task_switch() Sebastian Andrzej Siewior
2022-01-25 15:26 ` Sebastian Andrzej Siewior
2022-02-11 23:55 ` Andy Lutomirski
2022-02-11 23:55 ` Andy Lutomirski
2022-02-14 12:10 ` Sebastian Andrzej Siewior
2022-02-14 12:10 ` Sebastian Andrzej Siewior
2022-02-14 12:24 ` Sebastian Andrzej Siewior
2022-02-14 12:24 ` Sebastian Andrzej Siewior
2022-02-14 16:54 ` Sebastian Andrzej Siewior
2022-02-14 16:54 ` Sebastian Andrzej Siewior
2022-02-14 17:48 ` Sebastian Andrzej Siewior
2022-02-14 17:48 ` Sebastian Andrzej Siewior
2022-02-14 18:15 ` Sebastian Andrzej Siewior [this message]
2022-02-14 18:15 ` [PATCH v2 " Sebastian Andrzej Siewior
2022-01-25 15:26 ` [PATCH 8/8] kernel/fork: Use IS_ENABLED() in account_kernel_stack() Sebastian Andrzej Siewior
2022-01-25 15:26 ` Sebastian Andrzej Siewior
2022-02-08 17:10 ` [PATCH REPOST 0/8] kernel/fork: Move thread stack free otu of the scheduler path Sebastian Andrzej Siewior
2022-02-08 17:10 ` Sebastian Andrzej Siewior
-- strict thread matches above, loose matches on Subject: below --
2022-02-17 10:23 [PATCH v2 " Sebastian Andrzej Siewior
2022-02-17 10:24 ` [PATCH v2 7/8] kernel/fork: Only cache the VMAP stack in finish_task_switch() Sebastian Andrzej Siewior
2022-02-17 10:24 ` Sebastian Andrzej Siewior
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=YgqcNFrCsXi/XCCh@linutronix.de \
--to=bigeasy@linutronix.de \
--cc=bristot@redhat.com \
--cc=bsegall@google.com \
--cc=dietmar.eggemann@arm.com \
--cc=juri.lelli@redhat.com \
--cc=linux-ia64@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=luto@kernel.org \
--cc=mingo@redhat.com \
--cc=peterz@infradead.org \
--cc=rostedt@goodmis.org \
--cc=tglx@linutronix.de \
--cc=vincent.guittot@linaro.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.