public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Baoquan He <bhe@redhat.com>
To: Heiko Carstens <hca@linux.ibm.com>
Cc: Sven Schnelle <svens@linux.ibm.com>,
	Vasily Gorbik <gor@linux.ibm.com>,
	Alexander Gordeev <agordeev@linux.ibm.com>,
	Christian Borntraeger <borntraeger@linux.ibm.com>,
	Alexander Potapenko <glider@google.com>,
	Marco Elver <elver@google.com>,
	Christophe Leroy <christophe.leroy@csgroup.eu>,
	Michael Ellerman <mpe@ellerman.id.au>,
	linux-s390@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH] s390/kfence: fix page fault reporting
Date: Tue, 14 Feb 2023 09:15:00 +0800	[thread overview]
Message-ID: <Y+rglIOjA7kascpE@MiWiFi-R3L-srv> (raw)
In-Reply-To: <20230213183858.1473681-1-hca@linux.ibm.com>

On 02/13/23 at 07:38pm, Heiko Carstens wrote:
> Baoquan He reported lots of KFENCE reports when /proc/kcore is read,
> e.g. with crash or even simpler with dd:
> 
>  BUG: KFENCE: invalid read in copy_from_kernel_nofault+0x5e/0x120
>  Invalid read at 0x00000000f4f5149f:
>   copy_from_kernel_nofault+0x5e/0x120
>   read_kcore+0x6b2/0x870
>   proc_reg_read+0x9a/0xf0
>   vfs_read+0x94/0x270
>   ksys_read+0x70/0x100
>   __do_syscall+0x1d0/0x200
>   system_call+0x82/0xb0
> 
> The reason for this is that read_kcore() simply reads memory that might
> have been unmapped by KFENCE with copy_from_kernel_nofault(). Any fault due
> to pages being unmapped by KFENCE would be handled gracefully by the fault
> handler (exception table fixup).
> 
> However the s390 fault handler first reports the fault, and only afterwards
> would perform the exception table fixup. Most architectures have this in
> reversed order, which also avoids the false positive KFENCE reports when an
> unmapped page is accessed.
> 
> Therefore change the s390 fault handler so it handles exception table
> fixups before KFENCE page faults are reported.
> 
> Reported-by: Baoquan He <bhe@redhat.com>
> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>

Applied this patch on the latest kernel 6.2-rc8 and tested, it fixes the
problem perfectly. Thank a lot. Please feel free to add:

Tested-by: Baoquan He <bhe@redhat.com>

> ---
>  arch/s390/mm/fault.c | 49 +++++++++++++++++++++++++++++++-------------
>  1 file changed, 35 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
> index 9649d9382e0a..8e84ed2bb944 100644
> --- a/arch/s390/mm/fault.c
> +++ b/arch/s390/mm/fault.c
> @@ -96,6 +96,20 @@ static enum fault_type get_fault_type(struct pt_regs *regs)
>  	return KERNEL_FAULT;
>  }
>  
> +static unsigned long get_fault_address(struct pt_regs *regs)
> +{
> +	unsigned long trans_exc_code = regs->int_parm_long;
> +
> +	return trans_exc_code & __FAIL_ADDR_MASK;
> +}
> +
> +static bool fault_is_write(struct pt_regs *regs)
> +{
> +	unsigned long trans_exc_code = regs->int_parm_long;
> +
> +	return (trans_exc_code & store_indication) == 0x400;
> +}
> +
>  static int bad_address(void *p)
>  {
>  	unsigned long dummy;
> @@ -228,15 +242,26 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code)
>  			(void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK));
>  }
>  
> -static noinline void do_no_context(struct pt_regs *regs)
> +static noinline void do_no_context(struct pt_regs *regs, vm_fault_t fault)
>  {
> +	enum fault_type fault_type;
> +	unsigned long address;
> +	bool is_write;
> +
>  	if (fixup_exception(regs))
>  		return;
> +	fault_type = get_fault_type(regs);
> +	if ((fault_type == KERNEL_FAULT) && (fault == VM_FAULT_BADCONTEXT)) {
> +		address = get_fault_address(regs);
> +		is_write = fault_is_write(regs);
> +		if (kfence_handle_page_fault(address, is_write, regs))
> +			return;
> +	}
>  	/*
>  	 * Oops. The kernel tried to access some bad page. We'll have to
>  	 * terminate things with extreme prejudice.
>  	 */
> -	if (get_fault_type(regs) == KERNEL_FAULT)
> +	if (fault_type == KERNEL_FAULT)
>  		printk(KERN_ALERT "Unable to handle kernel pointer dereference"
>  		       " in virtual kernel address space\n");
>  	else
> @@ -255,7 +280,7 @@ static noinline void do_low_address(struct pt_regs *regs)
>  		die (regs, "Low-address protection");
>  	}
>  
> -	do_no_context(regs);
> +	do_no_context(regs, VM_FAULT_BADACCESS);
>  }
>  
>  static noinline void do_sigbus(struct pt_regs *regs)
> @@ -286,28 +311,28 @@ static noinline void do_fault_error(struct pt_regs *regs, vm_fault_t fault)
>  		fallthrough;
>  	case VM_FAULT_BADCONTEXT:
>  	case VM_FAULT_PFAULT:
> -		do_no_context(regs);
> +		do_no_context(regs, fault);
>  		break;
>  	case VM_FAULT_SIGNAL:
>  		if (!user_mode(regs))
> -			do_no_context(regs);
> +			do_no_context(regs, fault);
>  		break;
>  	default: /* fault & VM_FAULT_ERROR */
>  		if (fault & VM_FAULT_OOM) {
>  			if (!user_mode(regs))
> -				do_no_context(regs);
> +				do_no_context(regs, fault);
>  			else
>  				pagefault_out_of_memory();
>  		} else if (fault & VM_FAULT_SIGSEGV) {
>  			/* Kernel mode? Handle exceptions or die */
>  			if (!user_mode(regs))
> -				do_no_context(regs);
> +				do_no_context(regs, fault);
>  			else
>  				do_sigsegv(regs, SEGV_MAPERR);
>  		} else if (fault & VM_FAULT_SIGBUS) {
>  			/* Kernel mode? Handle exceptions or die */
>  			if (!user_mode(regs))
> -				do_no_context(regs);
> +				do_no_context(regs, fault);
>  			else
>  				do_sigbus(regs);
>  		} else
> @@ -334,7 +359,6 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
>  	struct mm_struct *mm;
>  	struct vm_area_struct *vma;
>  	enum fault_type type;
> -	unsigned long trans_exc_code;
>  	unsigned long address;
>  	unsigned int flags;
>  	vm_fault_t fault;
> @@ -351,9 +375,8 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
>  		return 0;
>  
>  	mm = tsk->mm;
> -	trans_exc_code = regs->int_parm_long;
> -	address = trans_exc_code & __FAIL_ADDR_MASK;
> -	is_write = (trans_exc_code & store_indication) == 0x400;
> +	address = get_fault_address(regs);
> +	is_write = fault_is_write(regs);
>  
>  	/*
>  	 * Verify that the fault happened in user space, that
> @@ -364,8 +387,6 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
>  	type = get_fault_type(regs);
>  	switch (type) {
>  	case KERNEL_FAULT:
> -		if (kfence_handle_page_fault(address, is_write, regs))
> -			return 0;
>  		goto out;
>  	case USER_FAULT:
>  	case GMAP_FAULT:
> -- 
> 2.37.2
> 


  reply	other threads:[~2023-02-14  1:16 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-13 18:38 [PATCH] s390/kfence: fix page fault reporting Heiko Carstens
2023-02-14  1:15 ` Baoquan He [this message]
2023-02-14 13:33 ` Alexander Potapenko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Y+rglIOjA7kascpE@MiWiFi-R3L-srv \
    --to=bhe@redhat.com \
    --cc=agordeev@linux.ibm.com \
    --cc=borntraeger@linux.ibm.com \
    --cc=christophe.leroy@csgroup.eu \
    --cc=elver@google.com \
    --cc=glider@google.com \
    --cc=gor@linux.ibm.com \
    --cc=hca@linux.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=mpe@ellerman.id.au \
    --cc=svens@linux.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox