From: Peter Zijlstra <peterz@infradead.org>
To: Rik van Riel <riel@surriel.com>
Cc: x86@kernel.org, linux-kernel@vger.kernel.org,
kernel-team@meta.com, dave.hansen@linux.intel.com,
luto@kernel.org, tglx@linutronix.de, mingo@redhat.com,
bp@alien8.de, hpa@zytor.com, akpm@linux-foundation.org
Subject: Re: [PATCH 07/10] x86,mm: enable broadcast TLB invalidation for multi-threaded processes
Date: Sun, 22 Dec 2024 12:36:01 +0100 [thread overview]
Message-ID: <20241222113601.GX11133@noisy.programming.kicks-ass.net> (raw)
In-Reply-To: <20241222040717.3096835-8-riel@surriel.com>
On Sat, Dec 21, 2024 at 11:06:39PM -0500, Rik van Riel wrote:
> +#ifdef CONFIG_CPU_SUP_AMD
> +/*
> + * Logic for AMD INVLPGB support.
> + */
> +static DEFINE_SPINLOCK(broadcast_asid_lock);
RAW_SPINLOCK ?
> +static u16 last_broadcast_asid = TLB_NR_DYN_ASIDS;
> +static DECLARE_BITMAP(broadcast_asid_used, MAX_ASID_AVAILABLE) = { 0 };
> +static LIST_HEAD(broadcast_asid_list);
> +static int broadcast_asid_available = MAX_ASID_AVAILABLE - TLB_NR_DYN_ASIDS - 1;
> +
> +static void reset_broadcast_asid_space(void)
> +{
> + mm_context_t *context;
> +
> + assert_spin_locked(&broadcast_asid_lock);
lockdep_assert_locked(&broadcast_asid_lock);
> +
> + /*
> + * Flush once when we wrap around the ASID space, so we won't need
> + * to flush every time we allocate an ASID for boradcast flushing.
> + */
> + invlpgb_flush_all_nonglobals();
> + tlbsync();
> +
> + /*
> + * Leave the currently used broadcast ASIDs set in the bitmap, since
> + * those cannot be reused before the next wraparound and flush..
> + */
> + bitmap_clear(broadcast_asid_used, 0, MAX_ASID_AVAILABLE);
> + list_for_each_entry(context, &broadcast_asid_list, broadcast_asid_list)
> + __set_bit(context->broadcast_asid, broadcast_asid_used);
> +
> + last_broadcast_asid = TLB_NR_DYN_ASIDS;
> +}
> +
> +static u16 get_broadcast_asid(void)
> +{
> + assert_spin_locked(&broadcast_asid_lock);
lockdep_assert_locked
> +
> + do {
> + u16 start = last_broadcast_asid;
> + u16 asid = find_next_zero_bit(broadcast_asid_used, MAX_ASID_AVAILABLE, start);
> +
> + if (asid >= MAX_ASID_AVAILABLE) {
> + reset_broadcast_asid_space();
> + continue;
> + }
> +
> + /* Try claiming this broadcast ASID. */
> + if (!test_and_set_bit(asid, broadcast_asid_used)) {
> + last_broadcast_asid = asid;
> + return asid;
> + }
> + } while (1);
> +}
> +
> +/*
> + * Returns true if the mm is transitioning from a CPU-local ASID to a broadcast
> + * (INVLPGB) ASID, or the other way around.
> + */
> +static bool needs_broadcast_asid_reload(struct mm_struct *next, u16 prev_asid)
> +{
> + u16 broadcast_asid = next->context.broadcast_asid;
> +
> + if (broadcast_asid && prev_asid != broadcast_asid) {
> + return true;
> + }
> +
> + if (!broadcast_asid && is_broadcast_asid(prev_asid)) {
> + return true;
> + }
> +
> + return false;
> +}
Those return statements don't really need {} on.
> +
> +void destroy_context_free_broadcast_asid(struct mm_struct *mm) {
{ goes on a new line.
> + unsigned long flags;
> +
> + if (!mm->context.broadcast_asid)
> + return;
> +
> + spin_lock_irqsave(&broadcast_asid_lock, flags);
guard(raw_spin_lock_irqsave)(&broadcast_asid_lock);
> + mm->context.broadcast_asid = 0;
> + list_del(&mm->context.broadcast_asid_list);
> + broadcast_asid_available++;
> + spin_unlock_irqrestore(&broadcast_asid_lock, flags);
> +}
> +
> +static int mm_active_cpus(struct mm_struct *mm)
> +{
> + int count = 0;
> + int cpu;
> +
> + for_each_cpu(cpu, mm_cpumask(mm)) {
> + /* Skip the CPUs that aren't really running this process. */
> + if (per_cpu(cpu_tlbstate.loaded_mm, cpu) != mm)
> + continue;
> +
> + if (per_cpu(cpu_tlbstate_shared.is_lazy, cpu))
> + continue;
> +
> + count++;
> + }
> + return count;
> +}
> +
> +/*
> + * Assign a broadcast ASID to the current process, protecting against
> + * races between multiple threads in the process.
> + */
> +static void use_broadcast_asid(struct mm_struct *mm)
> +{
> + unsigned long flags;
> +
> + spin_lock_irqsave(&broadcast_asid_lock, flags);
guard(raw_spin_lock_irqsave)(&broadcast_asid_lock);
> +
> + /* This process is already using broadcast TLB invalidation. */
> + if (mm->context.broadcast_asid)
> + goto out_unlock;
return;
> + mm->context.broadcast_asid = get_broadcast_asid();
> + mm->context.asid_transition = true;
> + list_add(&mm->context.broadcast_asid_list, &broadcast_asid_list);
> + broadcast_asid_available--;
> +
> +out_unlock:
Notably we're really wanting to get away from the whole goto unlock
pattern.
> + spin_unlock_irqrestore(&broadcast_asid_lock, flags);
> +}
> +
> +/*
> + * Figure out whether to assign a broadcast (global) ASID to a process.
> + * We vary the threshold by how empty or full broadcast ASID space is.
> + * 1/4 full: >= 4 active threads
> + * 1/2 full: >= 8 active threads
> + * 3/4 full: >= 16 active threads
> + * 7/8 full: >= 32 active threads
> + * etc
> + *
> + * This way we should never exhaust the broadcast ASID space, even on very
> + * large systems, and the processes with the largest number of active
> + * threads should be able to use broadcast TLB invalidation.
I'm a little confused, at most we need one ASID per CPU, IIRC we have
something like 4k ASIDs (page-offset bits in the physical address bits)
so for anything with less than 4K CPUs we're good, but with anything
having more CPUs we're up a creek irrespective of the above scheme, no?
> + */
> +#define HALFFULL_THRESHOLD 8
> +static bool meets_broadcast_asid_threshold(struct mm_struct *mm)
> +{
> + int avail = broadcast_asid_available;
> + int threshold = HALFFULL_THRESHOLD;
> + int mm_active_threads;
> +
> + if (!avail)
> + return false;
> +
> + mm_active_threads = mm_active_cpus(mm);
> +
> + /* Small processes can just use IPI TLB flushing. */
> + if (mm_active_threads < 3)
> + return false;
> +
> + if (avail > MAX_ASID_AVAILABLE * 3 / 4) {
> + threshold = HALFFULL_THRESHOLD / 4;
> + } else if (avail > MAX_ASID_AVAILABLE / 2) {
> + threshold = HALFFULL_THRESHOLD / 2;
> + } else if (avail < MAX_ASID_AVAILABLE / 3) {
> + do {
> + avail *= 2;
> + threshold *= 2;
> + } while ((avail + threshold ) < MAX_ASID_AVAILABLE / 2);
> + }
> +
> + return mm_active_threads > threshold;
> +}
next prev parent reply other threads:[~2024-12-22 11:36 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-22 4:06 [RFC PATCH 00/10] AMD broadcast TLB invalidation Rik van Riel
2024-12-22 4:06 ` [PATCH 01/10] Add X86_FEATURE_INVLPGB definition Rik van Riel
2024-12-22 4:06 ` [PATCH 02/10] x86,tlb: get INVLPGB count max from CPUID Rik van Riel
2024-12-22 11:46 ` Borislav Petkov
2024-12-22 14:39 ` Rik van Riel
2024-12-22 4:06 ` [PATCH 03/10] x86,mm: add INVLPGB support code Rik van Riel
2024-12-22 11:05 ` Peter Zijlstra
2024-12-22 21:24 ` Rik van Riel
2024-12-22 4:06 ` [PATCH 04/10] x86,mm: use INVLPGB for kernel TLB flushes Rik van Riel
2024-12-22 11:16 ` Peter Zijlstra
2024-12-22 15:12 ` Rik van Riel
2024-12-24 18:13 ` Peter Zijlstra
2024-12-22 11:47 ` Borislav Petkov
2024-12-22 4:06 ` [PATCH 05/10] x86,tlb: use INVLPGB in flush_tlb_all Rik van Riel
2024-12-22 11:17 ` Peter Zijlstra
2024-12-22 4:06 ` [PATCH 06/10] x86,mm: use broadcast TLB flushing for page reclaim TLB flushing Rik van Riel
2024-12-22 11:18 ` Peter Zijlstra
2024-12-22 4:06 ` [PATCH 07/10] x86,mm: enable broadcast TLB invalidation for multi-threaded processes Rik van Riel
2024-12-22 11:36 ` Peter Zijlstra [this message]
2024-12-22 22:45 ` Rik van Riel
2024-12-22 4:06 ` [PATCH 08/10] x86,tlb: do targeted broadcast flushing from tlbbatch code Rik van Riel
2024-12-22 4:06 ` [PATCH 09/10] x86/mm: enable AMD translation cache extensions Rik van Riel
2024-12-22 11:38 ` Peter Zijlstra
2024-12-22 15:37 ` Rik van Riel
2024-12-24 18:25 ` Peter Zijlstra
2024-12-22 4:06 ` [PATCH 10/10] x86/mm: only invalidate final translations with INVLPGB Rik van Riel
2024-12-22 11:11 ` [RFC PATCH 00/10] AMD broadcast TLB invalidation Peter Zijlstra
2024-12-22 15:06 ` Rik van Riel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241222113601.GX11133@noisy.programming.kicks-ass.net \
--to=peterz@infradead.org \
--cc=akpm@linux-foundation.org \
--cc=bp@alien8.de \
--cc=dave.hansen@linux.intel.com \
--cc=hpa@zytor.com \
--cc=kernel-team@meta.com \
--cc=linux-kernel@vger.kernel.org \
--cc=luto@kernel.org \
--cc=mingo@redhat.com \
--cc=riel@surriel.com \
--cc=tglx@linutronix.de \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox