All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jamie Heilman <jamie@audible.transient.net>
To: Thomas Gleixner <tglx@linutronix.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	linux-kernel@vger.kernel.org, peterz@infradead.org,
	stable@vger.kernel.org
Subject: Re: regression in 6.6.46; arch/x86/mm/pti.c
Date: Mon, 9 Sep 2024 07:28:46 +0000	[thread overview]
Message-ID: <Zt6jru89n7DBECCr@audible.transient.net> (raw)
In-Reply-To: <87h6apcp9x.ffs@tglx>

Thomas Gleixner wrote:
> On Mon, Sep 09 2024 at 05:03, Jamie Heilman wrote:
> > 3db03fb4995e ("x86/mm: Fix pti_clone_entry_text() for i386") which got
> > landed in 6.6.46, has introduced two back to back warnings on boot on
> > my 32bit system (found on 6.6.50):
> 
> Right.
> 
> > Reverting that commit removes the warnings (tested against 6.6.50).
> > The follow-on commit of c48b5a4cf312 ("x86/mm: Fix PTI for i386 some
> > more") doesn't apply cleanly to 6.6.50, but I did try out a build of
> > 6.11-rc7 and that works fine too with no warnings on boot.
> 
> See backport below.

Yep, that tests out fine for me too.  Thanks!

> ---
> From: Thomas Gleixner <tglx@linutronix.de>
> Date:   Tue Aug 6 20:48:43 2024 +0200
> Subject: [PATCH 6.6.y, 6.1.y, 5.10.y] x86/mm: Fix PTI for i386 some more
> 
> commit c48b5a4cf3125adb679e28ef093f66ff81368d05 upstream.
> 
> So it turns out that we have to do two passes of
> pti_clone_entry_text(), once before initcalls, such that device and
> late initcalls can use user-mode-helper / modprobe and once after
> free_initmem() / mark_readonly().
> 
> Now obviously mark_readonly() can cause PMD splits, and
> pti_clone_pgtable() doesn't like that much.
> 
> Allow the late clone to split PMDs so that pagetables stay in sync.
> 
> [peterz: Changelog and comments]
> Reported-by: Guenter Roeck <linux@roeck-us.net>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Tested-by: Guenter Roeck <linux@roeck-us.net>
> Link: https://lkml.kernel.org/r/20240806184843.GX37996@noisy.programming.kicks-ass.net
> ---
>  arch/x86/mm/pti.c |   45 +++++++++++++++++++++++++++++----------------
>  1 file changed, 29 insertions(+), 16 deletions(-)
> 
> --- a/arch/x86/mm/pti.c
> +++ b/arch/x86/mm/pti.c
> @@ -241,7 +241,7 @@ static pmd_t *pti_user_pagetable_walk_pm
>   *
>   * Returns a pointer to a PTE on success, or NULL on failure.
>   */
> -static pte_t *pti_user_pagetable_walk_pte(unsigned long address)
> +static pte_t *pti_user_pagetable_walk_pte(unsigned long address, bool late_text)
>  {
>  	gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
>  	pmd_t *pmd;
> @@ -251,10 +251,15 @@ static pte_t *pti_user_pagetable_walk_pt
>  	if (!pmd)
>  		return NULL;
>  
> -	/* We can't do anything sensible if we hit a large mapping. */
> +	/* Large PMD mapping found */
>  	if (pmd_large(*pmd)) {
> -		WARN_ON(1);
> -		return NULL;
> +		/* Clear the PMD if we hit a large mapping from the first round */
> +		if (late_text) {
> +			set_pmd(pmd, __pmd(0));
> +		} else {
> +			WARN_ON_ONCE(1);
> +			return NULL;
> +		}
>  	}
>  
>  	if (pmd_none(*pmd)) {
> @@ -283,7 +288,7 @@ static void __init pti_setup_vsyscall(vo
>  	if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte))
>  		return;
>  
> -	target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR);
> +	target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR, false);
>  	if (WARN_ON(!target_pte))
>  		return;
>  
> @@ -301,7 +306,7 @@ enum pti_clone_level {
>  
>  static void
>  pti_clone_pgtable(unsigned long start, unsigned long end,
> -		  enum pti_clone_level level)
> +		  enum pti_clone_level level, bool late_text)
>  {
>  	unsigned long addr;
>  
> @@ -390,7 +395,7 @@ pti_clone_pgtable(unsigned long start, u
>  				return;
>  
>  			/* Allocate PTE in the user page-table */
> -			target_pte = pti_user_pagetable_walk_pte(addr);
> +			target_pte = pti_user_pagetable_walk_pte(addr, late_text);
>  			if (WARN_ON(!target_pte))
>  				return;
>  
> @@ -453,7 +458,7 @@ static void __init pti_clone_user_shared
>  		phys_addr_t pa = per_cpu_ptr_to_phys((void *)va);
>  		pte_t *target_pte;
>  
> -		target_pte = pti_user_pagetable_walk_pte(va);
> +		target_pte = pti_user_pagetable_walk_pte(va, false);
>  		if (WARN_ON(!target_pte))
>  			return;
>  
> @@ -476,7 +481,7 @@ static void __init pti_clone_user_shared
>  	start = CPU_ENTRY_AREA_BASE;
>  	end   = start + (PAGE_SIZE * CPU_ENTRY_AREA_PAGES);
>  
> -	pti_clone_pgtable(start, end, PTI_CLONE_PMD);
> +	pti_clone_pgtable(start, end, PTI_CLONE_PMD, false);
>  }
>  #endif /* CONFIG_X86_64 */
>  
> @@ -493,11 +498,11 @@ static void __init pti_setup_espfix64(vo
>  /*
>   * Clone the populated PMDs of the entry text and force it RO.
>   */
> -static void pti_clone_entry_text(void)
> +static void pti_clone_entry_text(bool late)
>  {
>  	pti_clone_pgtable((unsigned long) __entry_text_start,
>  			  (unsigned long) __entry_text_end,
> -			  PTI_LEVEL_KERNEL_IMAGE);
> +			  PTI_LEVEL_KERNEL_IMAGE, late);
>  }
>  
>  /*
> @@ -572,7 +577,7 @@ static void pti_clone_kernel_text(void)
>  	 * pti_set_kernel_image_nonglobal() did to clear the
>  	 * global bit.
>  	 */
> -	pti_clone_pgtable(start, end_clone, PTI_LEVEL_KERNEL_IMAGE);
> +	pti_clone_pgtable(start, end_clone, PTI_LEVEL_KERNEL_IMAGE, false);
>  
>  	/*
>  	 * pti_clone_pgtable() will set the global bit in any PMDs
> @@ -639,8 +644,15 @@ void __init pti_init(void)
>  
>  	/* Undo all global bits from the init pagetables in head_64.S: */
>  	pti_set_kernel_image_nonglobal();
> +
>  	/* Replace some of the global bits just for shared entry text: */
> -	pti_clone_entry_text();
> +	/*
> +	 * This is very early in boot. Device and Late initcalls can do
> +	 * modprobe before free_initmem() and mark_readonly(). This
> +	 * pti_clone_entry_text() allows those user-mode-helpers to function,
> +	 * but notably the text is still RW.
> +	 */
> +	pti_clone_entry_text(false);
>  	pti_setup_espfix64();
>  	pti_setup_vsyscall();
>  }
> @@ -657,10 +669,11 @@ void pti_finalize(void)
>  	if (!boot_cpu_has(X86_FEATURE_PTI))
>  		return;
>  	/*
> -	 * We need to clone everything (again) that maps parts of the
> -	 * kernel image.
> +	 * This is after free_initmem() (all initcalls are done) and we've done
> +	 * mark_readonly(). Text is now NX which might've split some PMDs
> +	 * relative to the early clone.
>  	 */
> -	pti_clone_entry_text();
> +	pti_clone_entry_text(true);
>  	pti_clone_kernel_text();
>  
>  	debug_checkwx_user();
> 

-- 
Jamie Heilman                     http://audible.transient.net/~jamie/

  reply	other threads:[~2024-09-09  7:28 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-09-09  5:03 regression in 6.6.46; arch/x86/mm/pti.c Jamie Heilman
2024-09-09  6:30 ` Thomas Gleixner
2024-09-09  7:28   ` Jamie Heilman [this message]
2024-09-10  7:35   ` Greg Kroah-Hartman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Zt6jru89n7DBECCr@audible.transient.net \
    --to=jamie@audible.transient.net \
    --cc=gregkh@linuxfoundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterz@infradead.org \
    --cc=stable@vger.kernel.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.