public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: Rik van Riel <riel@surriel.com>
Cc: x86@kernel.org, linux-kernel@vger.kernel.org,
	kernel-team@meta.com, dave.hansen@linux.intel.com,
	luto@kernel.org, tglx@linutronix.de, mingo@redhat.com,
	bp@alien8.de, hpa@zytor.com, akpm@linux-foundation.org
Subject: Re: [PATCH 07/10] x86,mm: enable broadcast TLB invalidation for multi-threaded processes
Date: Sun, 22 Dec 2024 12:36:01 +0100	[thread overview]
Message-ID: <20241222113601.GX11133@noisy.programming.kicks-ass.net> (raw)
In-Reply-To: <20241222040717.3096835-8-riel@surriel.com>

On Sat, Dec 21, 2024 at 11:06:39PM -0500, Rik van Riel wrote:

> +#ifdef CONFIG_CPU_SUP_AMD
> +/*
> + * Logic for AMD INVLPGB support.
> + */
> +static DEFINE_SPINLOCK(broadcast_asid_lock);

RAW_SPINLOCK ?

> +static u16 last_broadcast_asid = TLB_NR_DYN_ASIDS;
> +static DECLARE_BITMAP(broadcast_asid_used, MAX_ASID_AVAILABLE) = { 0 };
> +static LIST_HEAD(broadcast_asid_list);
> +static int broadcast_asid_available = MAX_ASID_AVAILABLE - TLB_NR_DYN_ASIDS - 1;
> +
> +static void reset_broadcast_asid_space(void)
> +{
> +	mm_context_t *context;
> +
> +	assert_spin_locked(&broadcast_asid_lock);

	lockdep_assert_locked(&broadcast_asid_lock);

> +
> +	/*
> +	 * Flush once when we wrap around the ASID space, so we won't need
> +	 * to flush every time we allocate an ASID for boradcast flushing.
> +	 */
> +	invlpgb_flush_all_nonglobals();
> +	tlbsync();
> +
> +	/*
> +	 * Leave the currently used broadcast ASIDs set in the bitmap, since
> +	 * those cannot be reused before the next wraparound and flush..
> +	 */
> +	bitmap_clear(broadcast_asid_used, 0, MAX_ASID_AVAILABLE);
> +	list_for_each_entry(context, &broadcast_asid_list, broadcast_asid_list)
> +		__set_bit(context->broadcast_asid, broadcast_asid_used);
> +
> +	last_broadcast_asid = TLB_NR_DYN_ASIDS;
> +}
> +
> +static u16 get_broadcast_asid(void)
> +{
> +	assert_spin_locked(&broadcast_asid_lock);

	lockdep_assert_locked

> +
> +	do {
> +		u16 start = last_broadcast_asid;
> +		u16 asid = find_next_zero_bit(broadcast_asid_used, MAX_ASID_AVAILABLE, start);
> +
> +		if (asid >= MAX_ASID_AVAILABLE) {
> +			reset_broadcast_asid_space();
> +			continue;
> +		}
> +
> +		/* Try claiming this broadcast ASID. */
> +		if (!test_and_set_bit(asid, broadcast_asid_used)) {
> +			last_broadcast_asid = asid;
> +			return asid;
> +		}
> +	} while (1);
> +}
> +
> +/*
> + * Returns true if the mm is transitioning from a CPU-local ASID to a broadcast
> + * (INVLPGB) ASID, or the other way around.
> + */
> +static bool needs_broadcast_asid_reload(struct mm_struct *next, u16 prev_asid)
> +{
> +	u16 broadcast_asid = next->context.broadcast_asid;
> +
> +	if (broadcast_asid && prev_asid != broadcast_asid) {
> +		return true;
> +	}
> +
> +	if (!broadcast_asid && is_broadcast_asid(prev_asid)) {
> +		return true;
> +	}
> +
> +	return false;
> +}

Those return statements don't really need {} on.

> +
> +void destroy_context_free_broadcast_asid(struct mm_struct *mm) {

{ goes on a new line.

> +	unsigned long flags;
> +
> +	if (!mm->context.broadcast_asid)
> +		return;
> +
> +	spin_lock_irqsave(&broadcast_asid_lock, flags);

	guard(raw_spin_lock_irqsave)(&broadcast_asid_lock);

> +	mm->context.broadcast_asid = 0;
> +	list_del(&mm->context.broadcast_asid_list);
> +	broadcast_asid_available++;
> +	spin_unlock_irqrestore(&broadcast_asid_lock, flags);
> +}
> +
> +static int mm_active_cpus(struct mm_struct *mm)
> +{
> +	int count = 0;
> +	int cpu;
> +
> +	for_each_cpu(cpu, mm_cpumask(mm)) {
> +		/* Skip the CPUs that aren't really running this process. */
> +		if (per_cpu(cpu_tlbstate.loaded_mm, cpu) != mm)
> +			continue;
> +
> +		if (per_cpu(cpu_tlbstate_shared.is_lazy, cpu))
> +			continue;
> +
> +		count++;
> +	}
> +	return count;
> +}
> +
> +/*
> + * Assign a broadcast ASID to the current process, protecting against
> + * races between multiple threads in the process.
> + */
> +static void use_broadcast_asid(struct mm_struct *mm)
> +{
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&broadcast_asid_lock, flags);

	guard(raw_spin_lock_irqsave)(&broadcast_asid_lock);

> +
> +	/* This process is already using broadcast TLB invalidation. */
> +	if (mm->context.broadcast_asid)
> +		goto out_unlock;

		return;

> +	mm->context.broadcast_asid = get_broadcast_asid();
> +	mm->context.asid_transition = true;
> +	list_add(&mm->context.broadcast_asid_list, &broadcast_asid_list);
> +	broadcast_asid_available--;
> +
> +out_unlock:

Notably we're really wanting to get away from the whole goto unlock
pattern.

> +	spin_unlock_irqrestore(&broadcast_asid_lock, flags);
> +}
> +
> +/*
> + * Figure out whether to assign a broadcast (global) ASID to a process.
> + * We vary the threshold by how empty or full broadcast ASID space is.
> + * 1/4 full: >= 4 active threads
> + * 1/2 full: >= 8 active threads
> + * 3/4 full: >= 16 active threads
> + * 7/8 full: >= 32 active threads
> + * etc
> + *
> + * This way we should never exhaust the broadcast ASID space, even on very
> + * large systems, and the processes with the largest number of active
> + * threads should be able to use broadcast TLB invalidation.

I'm a little confused, at most we need one ASID per CPU, IIRC we have
something like 4k ASIDs (page-offset bits in the physical address bits)
so for anything with less than 4K CPUs we're good, but with anything
having more CPUs we're up a creek irrespective of the above scheme, no?

> + */
> +#define HALFFULL_THRESHOLD 8
> +static bool meets_broadcast_asid_threshold(struct mm_struct *mm)
> +{
> +	int avail = broadcast_asid_available;
> +	int threshold = HALFFULL_THRESHOLD;
> +	int mm_active_threads;
> +
> +	if (!avail)
> +		return false;
> +
> +	mm_active_threads = mm_active_cpus(mm);
> +
> +	/* Small processes can just use IPI TLB flushing. */
> +	if (mm_active_threads < 3)
> +		return false;
> +
> +	if (avail > MAX_ASID_AVAILABLE * 3 / 4) {
> +		threshold = HALFFULL_THRESHOLD / 4;
> +	} else if (avail > MAX_ASID_AVAILABLE / 2) {
> +		threshold = HALFFULL_THRESHOLD / 2;
> +	} else if (avail < MAX_ASID_AVAILABLE / 3) {
> +		do {
> +			avail *= 2;
> +			threshold *= 2;
> +		} while ((avail + threshold ) < MAX_ASID_AVAILABLE / 2);
> +	}
> +
> +	return mm_active_threads > threshold;
> +}

  reply	other threads:[~2024-12-22 11:36 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-12-22  4:06 [RFC PATCH 00/10] AMD broadcast TLB invalidation Rik van Riel
2024-12-22  4:06 ` [PATCH 01/10] Add X86_FEATURE_INVLPGB definition Rik van Riel
2024-12-22  4:06 ` [PATCH 02/10] x86,tlb: get INVLPGB count max from CPUID Rik van Riel
2024-12-22 11:46   ` Borislav Petkov
2024-12-22 14:39     ` Rik van Riel
2024-12-22  4:06 ` [PATCH 03/10] x86,mm: add INVLPGB support code Rik van Riel
2024-12-22 11:05   ` Peter Zijlstra
2024-12-22 21:24     ` Rik van Riel
2024-12-22  4:06 ` [PATCH 04/10] x86,mm: use INVLPGB for kernel TLB flushes Rik van Riel
2024-12-22 11:16   ` Peter Zijlstra
2024-12-22 15:12     ` Rik van Riel
2024-12-24 18:13       ` Peter Zijlstra
2024-12-22 11:47   ` Borislav Petkov
2024-12-22  4:06 ` [PATCH 05/10] x86,tlb: use INVLPGB in flush_tlb_all Rik van Riel
2024-12-22 11:17   ` Peter Zijlstra
2024-12-22  4:06 ` [PATCH 06/10] x86,mm: use broadcast TLB flushing for page reclaim TLB flushing Rik van Riel
2024-12-22 11:18   ` Peter Zijlstra
2024-12-22  4:06 ` [PATCH 07/10] x86,mm: enable broadcast TLB invalidation for multi-threaded processes Rik van Riel
2024-12-22 11:36   ` Peter Zijlstra [this message]
2024-12-22 22:45     ` Rik van Riel
2024-12-22  4:06 ` [PATCH 08/10] x86,tlb: do targeted broadcast flushing from tlbbatch code Rik van Riel
2024-12-22  4:06 ` [PATCH 09/10] x86/mm: enable AMD translation cache extensions Rik van Riel
2024-12-22 11:38   ` Peter Zijlstra
2024-12-22 15:37     ` Rik van Riel
2024-12-24 18:25       ` Peter Zijlstra
2024-12-22  4:06 ` [PATCH 10/10] x86/mm: only invalidate final translations with INVLPGB Rik van Riel
2024-12-22 11:11 ` [RFC PATCH 00/10] AMD broadcast TLB invalidation Peter Zijlstra
2024-12-22 15:06   ` Rik van Riel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241222113601.GX11133@noisy.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=akpm@linux-foundation.org \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=kernel-team@meta.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mingo@redhat.com \
    --cc=riel@surriel.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox