LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH] arch:powerpc simple_write_to_buffer return check
From: Mayank Suman @ 2021-02-04 18:16 UTC (permalink / raw)
  To: ruscur, oohall, mpe, benh, paulus, linuxppc-dev, linux-kernel
  Cc: Mayank Suman

Signed-off-by: Mayank Suman <mayanksuman@live.com>
---
 arch/powerpc/kernel/eeh.c                    | 8 ++++----
 arch/powerpc/platforms/powernv/eeh-powernv.c | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 813713c9120c..2dbe1558a71f 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1628,8 +1628,8 @@ static ssize_t eeh_force_recover_write(struct file *filp,
 	char buf[20];
 	int ret;
 
-	ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
-	if (!ret)
+	ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
+	if (ret <= 0)
 		return -EFAULT;
 
 	/*
@@ -1696,7 +1696,7 @@ static ssize_t eeh_dev_check_write(struct file *filp,
 
 	memset(buf, 0, sizeof(buf));
 	ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
-	if (!ret)
+	if (ret <= 0)
 		return -EFAULT;
 
 	ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
@@ -1836,7 +1836,7 @@ static ssize_t eeh_dev_break_write(struct file *filp,
 
 	memset(buf, 0, sizeof(buf));
 	ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
-	if (!ret)
+	if (ret <= 0)
 		return -EFAULT;
 
 	ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 89e22c460ebf..36ed2b8f7375 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -76,8 +76,8 @@ static ssize_t pnv_eeh_ei_write(struct file *filp,
 		return -ENXIO;
 
 	/* Copy over argument buffer */
-	ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
-	if (!ret)
+	ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
+	if (ret <= 0)
 		return -EFAULT;
 
 	/* Retrieve parameters */
-- 
2.30.0


^ permalink raw reply related

* [PATCH] KVM: PPC: Book3S HV: Save and restore FSCR in the P9 path
From: Fabiano Rosas @ 2021-02-04 20:05 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev

The Facility Status and Control Register is a privileged SPR that
defines the availability of some features in problem state. Since it
can be written by the guest, we must restore it to the previous host
value after guest exit.

This restoration is currently done by taking the value from
current->thread.fscr, which in the P9 path is not enough anymore
because the guest could context switch the QEMU thread, causing the
guest-current value to be saved into the thread struct.

The above situation manifested when running a QEMU linked against a
libc with System Call Vectored support, which causes scv
instructions to be run by QEMU early during the guest boot (during
SLOF), at which point the FSCR is 0 due to guest entry. After a few
scv calls (1 to a couple hundred), the context switching happens and
the QEMU thread runs with the guest value, resulting in a Facility
Unavailable interrupt.

This patch saves and restores the host value of FSCR in the inner
guest entry loop in a way independent of current->thread.fscr. The old
way of doing it is still kept in place because it works for the old
entry path.

Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com>
---
 arch/powerpc/kvm/book3s_hv.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 6f612d240392..f2ddf7139a2a 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3595,6 +3595,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	unsigned long host_tidr = mfspr(SPRN_TIDR);
 	unsigned long host_iamr = mfspr(SPRN_IAMR);
 	unsigned long host_amr = mfspr(SPRN_AMR);
+	unsigned long host_fscr = mfspr(SPRN_FSCR);
 	s64 dec;
 	u64 tb;
 	int trap, save_pmu;
@@ -3735,6 +3736,9 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 	if (host_amr != vcpu->arch.amr)
 		mtspr(SPRN_AMR, host_amr);

+	if (host_fscr != vcpu->arch.fscr)
+		mtspr(SPRN_FSCR, host_fscr);
+
 	msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
 	store_fp_state(&vcpu->arch.fp);
 #ifdef CONFIG_ALTIVEC
-- 
2.29.2

^ permalink raw reply related

* Re: [PATCH] arch:powerpc simple_write_to_buffer return check
From: Oliver O'Halloran @ 2021-02-04 22:35 UTC (permalink / raw)
  To: Mayank Suman; +Cc: Linux Kernel Mailing List, Paul Mackerras, linuxppc-dev
In-Reply-To: <PS1PR04MB29345AB59076B370A4F99F75D6B39@PS1PR04MB2934.apcprd04.prod.outlook.com>

On Fri, Feb 5, 2021 at 5:17 AM Mayank Suman <mayanksuman@live.com> wrote:
>
> Signed-off-by: Mayank Suman <mayanksuman@live.com>

commit messages aren't optional

> ---
>  arch/powerpc/kernel/eeh.c                    | 8 ++++----
>  arch/powerpc/platforms/powernv/eeh-powernv.c | 4 ++--
>  2 files changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
> index 813713c9120c..2dbe1558a71f 100644
> --- a/arch/powerpc/kernel/eeh.c
> +++ b/arch/powerpc/kernel/eeh.c
> @@ -1628,8 +1628,8 @@ static ssize_t eeh_force_recover_write(struct file *filp,
>         char buf[20];
>         int ret;
>
> -       ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
> -       if (!ret)
> +       ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);

We should probably be zeroing the buffer. Reading to sizeof(buf) - 1
is done in a few places to guarantee that the string is nul
terminated, but without the preceeding memset() that isn't actually
guaranteed.

> +       if (ret <= 0)
>                 return -EFAULT;

EFAULT is supposed to be returned when the user supplies a buffer to
write(2) which is outside their address space. I figured letting the
sscanf() in the next step fail if the user passes writes a zero-length
buffer and returning EINVAL made more sense. That said, the exact
semantics around zero length writes are pretty handwavy so I guess
this isn't wrong, but I don't think it's better either.

>         /*
> @@ -1696,7 +1696,7 @@ static ssize_t eeh_dev_check_write(struct file *filp,
>
>         memset(buf, 0, sizeof(buf));
>         ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
> -       if (!ret)
> +       if (ret <= 0)
>                 return -EFAULT;
>
>         ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
> @@ -1836,7 +1836,7 @@ static ssize_t eeh_dev_break_write(struct file *filp,
>
>         memset(buf, 0, sizeof(buf));
>         ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
> -       if (!ret)
> +       if (ret <= 0)
>                 return -EFAULT;
>
>         ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
> diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
> index 89e22c460ebf..36ed2b8f7375 100644
> --- a/arch/powerpc/platforms/powernv/eeh-powernv.c
> +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
> @@ -76,8 +76,8 @@ static ssize_t pnv_eeh_ei_write(struct file *filp,
>                 return -ENXIO;
>
>         /* Copy over argument buffer */
> -       ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
> -       if (!ret)
> +       ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
> +       if (ret <= 0)
>                 return -EFAULT;
>
>         /* Retrieve parameters */
> --
> 2.30.0
>

^ permalink raw reply

* Re: [PATCH v16 11/12] powerpc: Use OF alloc and free for FDT
From: Lakshmi Ramasubramanian @ 2021-02-04 23:23 UTC (permalink / raw)
  To: Rob Herring
  Cc: Mark Rutland, Bhupesh Sharma, tao.li, Mimi Zohar, Paul Mackerras,
	vincenzo.frascino, Frank Rowand, Sasha Levin, Masahiro Yamada,
	James Morris, AKASHI, Takahiro, linux-arm-kernel, Catalin Marinas,
	Serge E. Hallyn, devicetree, Pavel Tatashin, Will Deacon,
	Prakhar Srivastava, Hsin-Yi Wang, Allison Randal,
	Christophe Leroy, Matthias Brugger, balajib, dmitry.kasatkin,
	linux-kernel@vger.kernel.org, James Morse, Greg Kroah-Hartman,
	Joe Perches, linux-integrity, linuxppc-dev, Thiago Jung Bauermann
In-Reply-To: <CAL_JsqK1Pb9nAeL84EP2U3MQgpBsm+E_0QXmzbigWXnS245WPQ@mail.gmail.com>

On 2/4/21 11:26 AM, Rob Herring wrote:
> On Thu, Feb 4, 2021 at 10:42 AM Lakshmi Ramasubramanian
> <nramas@linux.microsoft.com> wrote:
>>
>> of_alloc_and_init_fdt() and of_free_fdt() have been defined in
>> drivers/of/kexec.c to allocate and free memory for FDT.
>>
>> Use of_alloc_and_init_fdt() and of_free_fdt() to allocate and
>> initialize the FDT, and to free the FDT respectively.
>>
>> powerpc sets the FDT address in image_loader_data field in
>> "struct kimage" and the memory is freed in
>> kimage_file_post_load_cleanup().  This cleanup function uses kfree()
>> to free the memory. But since of_alloc_and_init_fdt() uses kvmalloc()
>> for allocation, the buffer needs to be freed using kvfree().
> 
> You could just change the kexec core to call kvfree() instead.

> 
>> Define "fdt" field in "struct kimage_arch" for powerpc to store
>> the address of FDT, and free the memory in powerpc specific
>> arch_kimage_file_post_load_cleanup().
> 
> However, given all the other buffers have an explicit field in kimage
> or kimage_arch, changing powerpc is to match arm64 is better IMO.

Just to be clear:
I'll leave this as is - free FDT buffer in powerpc's 
arch_kimage_file_post_load_cleanup() to match arm64 behavior.

Will not change "kexec core" to call kvfree() - doing that change would 
require changing all architectures to use kvmalloc() for 
image_loader_data allocation.

> 
>> Signed-off-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
>> Suggested-by: Rob Herring <robh@kernel.org>
>> Suggested-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
>> ---
>>   arch/powerpc/include/asm/kexec.h  |  2 ++
>>   arch/powerpc/kexec/elf_64.c       | 26 ++++++++++++++++----------
>>   arch/powerpc/kexec/file_load_64.c |  3 +++
>>   3 files changed, 21 insertions(+), 10 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
>> index 2c0be93d239a..d7d13cac4d31 100644
>> --- a/arch/powerpc/include/asm/kexec.h
>> +++ b/arch/powerpc/include/asm/kexec.h
>> @@ -111,6 +111,8 @@ struct kimage_arch {
>>          unsigned long elf_headers_mem;
>>          unsigned long elf_headers_sz;
>>          void *elf_headers;
>> +
>> +       void *fdt;
>>   };
>>
>>   char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
>> diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
>> index d0e459bb2f05..51d2d8eb6c1b 100644
>> --- a/arch/powerpc/kexec/elf_64.c
>> +++ b/arch/powerpc/kexec/elf_64.c
>> @@ -19,6 +19,7 @@
>>   #include <linux/kexec.h>
>>   #include <linux/libfdt.h>
>>   #include <linux/module.h>
>> +#include <linux/of.h>
>>   #include <linux/of_fdt.h>
>>   #include <linux/slab.h>
>>   #include <linux/types.h>
>> @@ -32,7 +33,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
>>          unsigned int fdt_size;
>>          unsigned long kernel_load_addr;
>>          unsigned long initrd_load_addr = 0, fdt_load_addr;
>> -       void *fdt;
>> +       void *fdt = NULL;
>>          const void *slave_code;
>>          struct elfhdr ehdr;
>>          char *modified_cmdline = NULL;
>> @@ -103,18 +104,12 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
>>          }
>>
>>          fdt_size = fdt_totalsize(initial_boot_params) * 2;
>> -       fdt = kmalloc(fdt_size, GFP_KERNEL);
>> +       fdt = of_alloc_and_init_fdt(fdt_size);
>>          if (!fdt) {
>>                  pr_err("Not enough memory for the device tree.\n");
>>                  ret = -ENOMEM;
>>                  goto out;
>>          }
>> -       ret = fdt_open_into(initial_boot_params, fdt, fdt_size);
>> -       if (ret < 0) {
>> -               pr_err("Error setting up the new device tree.\n");
>> -               ret = -EINVAL;
>> -               goto out;
>> -       }
>>
>>          ret = setup_new_fdt_ppc64(image, fdt, initrd_load_addr,
> 
> The first thing this function does is call setup_new_fdt() which first
> calls of_kexec_setup_new_fdt(). (Note, I really don't understand the
> PPC code split. It looks like there's a 32-bit and 64-bit split, but
> 32-bit looks broken to me. Nothing ever calls setup_new_fdt() except
> setup_new_fdt_ppc64()). The arm64 version is calling
> of_alloc_and_init_fdt() and then of_kexec_setup_new_fdt() directly.
> 
> So we can just make of_alloc_and_init_fdt() also call
> of_kexec_setup_new_fdt() (really, just tweak of_kexec_setup_new_fdt do
> the alloc and copy). 
ok - will move fdt allocation into of_kexec_setup_new_fdt().

I don't think the architecture needs to pick the
> size either. It's doubtful that either one is that sensitive to the
> amount of extra space.
I am not clear about the above comment -
are you saying the architectures don't need to pass FDT size to the 
alloc function?

arm64 is adding command line string length and some extra space to the 
size computed from initial_boot_params for FDT Size:

	buf_size = fdt_totalsize(initial_boot_params)
			+ cmdline_len + DTB_EXTRA_SPACE;

powerpc is just using twice the size computed from initial_boot_params

	fdt_size = fdt_totalsize(initial_boot_params) * 2;

I think it would be safe to let arm64 and powerpc pass the required FDT 
size, along with the other params to of_kexec_setup_new_fdt() - and in 
this function we allocate FDT and set it up.

And, for powerpc leave the remaining code in setup_new_fdt_ppc64().

Would that be ok?

> 
>>                                    initrd_len, cmdline);
>> @@ -131,6 +126,10 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
>>          ret = kexec_add_buffer(&kbuf);
>>          if (ret)
>>                  goto out;
>> +
>> +       /* FDT will be freed in arch_kimage_file_post_load_cleanup */
>> +       image->arch.fdt = fdt;
>> +
>>          fdt_load_addr = kbuf.mem;
>>
>>          pr_debug("Loaded device tree at 0x%lx\n", fdt_load_addr);
>> @@ -145,8 +144,15 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
>>          kfree(modified_cmdline);
>>          kexec_free_elf_info(&elf_info);
>>
>> -       /* Make kimage_file_post_load_cleanup free the fdt buffer for us. */
>> -       return ret ? ERR_PTR(ret) : fdt;
>> +       /*
>> +        * Once FDT buffer has been successfully passed to kexec_add_buffer(),
>> +        * the FDT buffer address is saved in image->arch.fdt. In that case,
>> +        * the memory cannot be freed here in case of any other error.
>> +        */
>> +       if (ret && !image->arch.fdt)
>> +               of_free_fdt(fdt);
> 
> Just call kvfree() directly.
Sure - will do.

  -lakshmi

> 
>> +
>> +       return ret ? ERR_PTR(ret) : NULL;
>>   }
>>
>>   const struct kexec_file_ops kexec_elf64_ops = {
>> diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
>> index 3cab318aa3b9..d9d5b5569a6d 100644
>> --- a/arch/powerpc/kexec/file_load_64.c
>> +++ b/arch/powerpc/kexec/file_load_64.c
>> @@ -1113,5 +1113,8 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image)
>>          image->arch.elf_headers = NULL;
>>          image->arch.elf_headers_sz = 0;
>>
>> +       of_free_fdt(image->arch.fdt);
>> +       image->arch.fdt = NULL;
>> +
>>          return kexec_image_post_load_cleanup_default(image);
>>   }
>> --
>> 2.30.0
>>


^ permalink raw reply

* Re: [PATCH 1/2] PCI/AER: Disable AER interrupt during suspend
From: Bjorn Helgaas @ 2021-02-04 23:27 UTC (permalink / raw)
  To: Kai-Heng Feng
  Cc: Joerg Roedel,
	open list:PCI ENHANCED ERROR HANDLING (EEH) FOR POWERPC,
	open list:PCI SUBSYSTEM, open list, Lalithambika Krishnakumar,
	Alex Williamson, Oliver O'Halloran, Bjorn Helgaas,
	Mika Westerberg, Lu Baolu
In-Reply-To: <CAAd53p7FfRCgfC5dGL3HyP+rbVtR2VCfMPYBBvJ=-DFCWFeVPA@mail.gmail.com>

[+cc Alex]

On Thu, Jan 28, 2021 at 12:09:37PM +0800, Kai-Heng Feng wrote:
> On Thu, Jan 28, 2021 at 4:51 AM Bjorn Helgaas <helgaas@kernel.org> wrote:
> > On Thu, Jan 28, 2021 at 01:31:00AM +0800, Kai-Heng Feng wrote:
> > > Commit 50310600ebda ("iommu/vt-d: Enable PCI ACS for platform opt in
> > > hint") enables ACS, and some platforms lose its NVMe after resume from
> > > firmware:
> > > [   50.947816] pcieport 0000:00:1b.0: DPC: containment event, status:0x1f01 source:0x0000
> > > [   50.947817] pcieport 0000:00:1b.0: DPC: unmasked uncorrectable error detected
> > > [   50.947829] pcieport 0000:00:1b.0: PCIe Bus Error: severity=Uncorrected (Non-Fatal), type=Transaction Layer, (Receiver ID)
> > > [   50.947830] pcieport 0000:00:1b.0:   device [8086:06ac] error status/mask=00200000/00010000
> > > [   50.947831] pcieport 0000:00:1b.0:    [21] ACSViol                (First)
> > > [   50.947841] pcieport 0000:00:1b.0: AER: broadcast error_detected message
> > > [   50.947843] nvme nvme0: frozen state error detected, reset controller
> > >
> > > It happens right after ACS gets enabled during resume.
> > >
> > > To prevent that from happening, disable AER interrupt and enable it on
> > > system suspend and resume, respectively.
> >
> > Lots of questions here.  Maybe this is what we'll end up doing, but I
> > am curious about why the error is reported in the first place.
> >
> > Is this a consequence of the link going down and back up?
> 
> Could be. From the observations, it only happens when firmware suspend
> (S3) is used.
> Maybe it happens when it's gets powered up, but I don't have equipment
> to debug at hardware level.
> 
> If we use non-firmware suspend method, enabling ACS after resume won't
> trip AER and DPC.
> 
> > Is it consequence of the device doing a DMA when it shouldn't?
> 
> If it's doing DMA while suspending, the same error should also happen
> after NVMe is suspended and before PCIe port suspending.
> Furthermore, if non-firmware suspend method is used, there's so such
> issue, so less likely to be any DMA operation.
> 
> > Are we doing something in the wrong order during suspend?  Or maybe
> > resume, since I assume the error is reported during resume?
> 
> Yes the error is reported during resume. The suspend/resume order
> seems fine as non-firmware suspend doesn't have this issue.

I really feel like we need a better understanding of what's going on
here.  Disabling the AER interrupt is like closing our eyes and
pretending that because we don't see it, it didn't happen.

An ACS error is triggered by a DMA, right?  I'm assuming an MMIO
access from the CPU wouldn't trigger this error.  And it sounds like
the error is triggered before we even start running the driver after
resume.

If we're powering up an NVMe device from D3cold and it DMAs before the
driver touches it, something would be seriously broken.  I doubt
that's what's happening.  Maybe a device could resume some previously
programmed DMA after powering up from D3hot.

Or maybe the error occurred on suspend, like if the device wasn't
quiesced or something, but we didn't notice it until resume?  The 
AER error status bits are RW1CS, which means they can be preserved
across hot/warm/cold resets.

Can you instrument the code to see whether the AER error status bit is
set before enabling ACS?  I'm not sure that merely enabling ACS (I
assume you mean pci_std_enable_acs(), where we write PCI_ACS_CTRL)
should cause an interrupt for a previously-logged error.  I suspect
that could happen when enabling *AER*, but I wouldn't think it would
happen when enabling *ACS*.

Does this error happen on multiple machines from different vendors?
Wondering if it could be a BIOS issue, e.g., BIOS not cleaning up
after it did something to cause an error.

> > If we *do* take the error, why doesn't DPC recovery work?
> 
> It works for the root port, but not for the NVMe drive:
> [   50.947816] pcieport 0000:00:1b.0: DPC: containment event,
> status:0x1f01 source:0x0000
> [   50.947817] pcieport 0000:00:1b.0: DPC: unmasked uncorrectable error detected
> [   50.947829] pcieport 0000:00:1b.0: PCIe Bus Error:
> severity=Uncorrected (Non-Fatal), type=Transaction Layer, (Receiver
> ID)
> [   50.947830] pcieport 0000:00:1b.0:   device [8086:06ac] error
> status/mask=00200000/00010000
> [   50.947831] pcieport 0000:00:1b.0:    [21] ACSViol                (First)
> [   50.947841] pcieport 0000:00:1b.0: AER: broadcast error_detected message
> [   50.947843] nvme nvme0: frozen state error detected, reset controller
> [   50.948400] ACPI: EC: event unblocked
> [   50.948432] xhci_hcd 0000:00:14.0: PME# disabled
> [   50.948444] xhci_hcd 0000:00:14.0: enabling bus mastering
> [   50.949056] pcieport 0000:00:1b.0: PME# disabled
> [   50.949068] pcieport 0000:00:1c.0: PME# disabled
> [   50.949416] e1000e 0000:00:1f.6: PME# disabled
> [   50.949463] e1000e 0000:00:1f.6: enabling bus mastering
> [   50.951606] sd 0:0:0:0: [sda] Starting disk
> [   50.951610] nvme 0000:01:00.0: can't change power state from D3hot
> to D0 (config space inaccessible)
> [   50.951730] nvme nvme0: Removing after probe failure status: -19
> [   50.952360] nvme nvme0: failed to set APST feature (-19)
> [   50.971136] snd_hda_intel 0000:00:1f.3: PME# disabled
> [   51.089330] pcieport 0000:00:1b.0: AER: broadcast resume message
> [   51.089345] pcieport 0000:00:1b.0: AER: device recovery successful
> 
> But I think why recovery doesn't work for NVMe is for another discussion...
> 
> Kai-Heng
> 
> >
> > > Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=209149
> > > Fixes: 50310600ebda ("iommu/vt-d: Enable PCI ACS for platform opt in hint")
> > > Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
> > > ---
> > >  drivers/pci/pcie/aer.c | 18 ++++++++++++++++++
> > >  1 file changed, 18 insertions(+)
> > >
> > > diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> > > index 77b0f2c45bc0..0e9a85530ae6 100644
> > > --- a/drivers/pci/pcie/aer.c
> > > +++ b/drivers/pci/pcie/aer.c
> > > @@ -1365,6 +1365,22 @@ static int aer_probe(struct pcie_device *dev)
> > >       return 0;
> > >  }
> > >
> > > +static int aer_suspend(struct pcie_device *dev)
> > > +{
> > > +     struct aer_rpc *rpc = get_service_data(dev);
> > > +
> > > +     aer_disable_rootport(rpc);
> > > +     return 0;
> > > +}
> > > +
> > > +static int aer_resume(struct pcie_device *dev)
> > > +{
> > > +     struct aer_rpc *rpc = get_service_data(dev);
> > > +
> > > +     aer_enable_rootport(rpc);
> > > +     return 0;
> > > +}
> > > +
> > >  /**
> > >   * aer_root_reset - reset Root Port hierarchy, RCEC, or RCiEP
> > >   * @dev: pointer to Root Port, RCEC, or RCiEP
> > > @@ -1437,6 +1453,8 @@ static struct pcie_port_service_driver aerdriver = {
> > >       .service        = PCIE_PORT_SERVICE_AER,
> > >
> > >       .probe          = aer_probe,
> > > +     .suspend        = aer_suspend,
> > > +     .resume         = aer_resume,
> > >       .remove         = aer_remove,
> > >  };
> > >
> > > --
> > > 2.29.2
> > >

^ permalink raw reply

* Re: [PATCH v16 11/12] powerpc: Use OF alloc and free for FDT
From: Rob Herring @ 2021-02-04 23:36 UTC (permalink / raw)
  To: Lakshmi Ramasubramanian
  Cc: Mark Rutland, Bhupesh Sharma, tao.li, Mimi Zohar, Paul Mackerras,
	vincenzo.frascino, Frank Rowand, Sasha Levin, Masahiro Yamada,
	James Morris, AKASHI, Takahiro, linux-arm-kernel, Catalin Marinas,
	Serge E. Hallyn, devicetree, Pavel Tatashin, Will Deacon,
	Prakhar Srivastava, Hsin-Yi Wang, Allison Randal,
	Christophe Leroy, Matthias Brugger, balajib, dmitry.kasatkin,
	linux-kernel@vger.kernel.org, James Morse, Greg Kroah-Hartman,
	Joe Perches, linux-integrity, linuxppc-dev, Thiago Jung Bauermann
In-Reply-To: <503d42ba-89bf-4ad9-9d4c-acb625580f77@linux.microsoft.com>

On Thu, Feb 4, 2021 at 5:23 PM Lakshmi Ramasubramanian
<nramas@linux.microsoft.com> wrote:
>
> On 2/4/21 11:26 AM, Rob Herring wrote:
> > On Thu, Feb 4, 2021 at 10:42 AM Lakshmi Ramasubramanian
> > <nramas@linux.microsoft.com> wrote:
> >>
> >> of_alloc_and_init_fdt() and of_free_fdt() have been defined in
> >> drivers/of/kexec.c to allocate and free memory for FDT.
> >>
> >> Use of_alloc_and_init_fdt() and of_free_fdt() to allocate and
> >> initialize the FDT, and to free the FDT respectively.
> >>
> >> powerpc sets the FDT address in image_loader_data field in
> >> "struct kimage" and the memory is freed in
> >> kimage_file_post_load_cleanup().  This cleanup function uses kfree()
> >> to free the memory. But since of_alloc_and_init_fdt() uses kvmalloc()
> >> for allocation, the buffer needs to be freed using kvfree().
> >
> > You could just change the kexec core to call kvfree() instead.
>
> >
> >> Define "fdt" field in "struct kimage_arch" for powerpc to store
> >> the address of FDT, and free the memory in powerpc specific
> >> arch_kimage_file_post_load_cleanup().
> >
> > However, given all the other buffers have an explicit field in kimage
> > or kimage_arch, changing powerpc is to match arm64 is better IMO.
>
> Just to be clear:
> I'll leave this as is - free FDT buffer in powerpc's
> arch_kimage_file_post_load_cleanup() to match arm64 behavior.

Yes.

> Will not change "kexec core" to call kvfree() - doing that change would
> require changing all architectures to use kvmalloc() for
> image_loader_data allocation.

Actually, no. AIUI, kvfree() can be used whether you used kvmalloc,
vmalloc, or kmalloc for the alloc.

> >> Signed-off-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
> >> Suggested-by: Rob Herring <robh@kernel.org>
> >> Suggested-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
> >> ---
> >>   arch/powerpc/include/asm/kexec.h  |  2 ++
> >>   arch/powerpc/kexec/elf_64.c       | 26 ++++++++++++++++----------
> >>   arch/powerpc/kexec/file_load_64.c |  3 +++
> >>   3 files changed, 21 insertions(+), 10 deletions(-)
> >>
> >> diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
> >> index 2c0be93d239a..d7d13cac4d31 100644
> >> --- a/arch/powerpc/include/asm/kexec.h
> >> +++ b/arch/powerpc/include/asm/kexec.h
> >> @@ -111,6 +111,8 @@ struct kimage_arch {
> >>          unsigned long elf_headers_mem;
> >>          unsigned long elf_headers_sz;
> >>          void *elf_headers;
> >> +
> >> +       void *fdt;
> >>   };
> >>
> >>   char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
> >> diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
> >> index d0e459bb2f05..51d2d8eb6c1b 100644
> >> --- a/arch/powerpc/kexec/elf_64.c
> >> +++ b/arch/powerpc/kexec/elf_64.c
> >> @@ -19,6 +19,7 @@
> >>   #include <linux/kexec.h>
> >>   #include <linux/libfdt.h>
> >>   #include <linux/module.h>
> >> +#include <linux/of.h>
> >>   #include <linux/of_fdt.h>
> >>   #include <linux/slab.h>
> >>   #include <linux/types.h>
> >> @@ -32,7 +33,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
> >>          unsigned int fdt_size;
> >>          unsigned long kernel_load_addr;
> >>          unsigned long initrd_load_addr = 0, fdt_load_addr;
> >> -       void *fdt;
> >> +       void *fdt = NULL;
> >>          const void *slave_code;
> >>          struct elfhdr ehdr;
> >>          char *modified_cmdline = NULL;
> >> @@ -103,18 +104,12 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
> >>          }
> >>
> >>          fdt_size = fdt_totalsize(initial_boot_params) * 2;
> >> -       fdt = kmalloc(fdt_size, GFP_KERNEL);
> >> +       fdt = of_alloc_and_init_fdt(fdt_size);
> >>          if (!fdt) {
> >>                  pr_err("Not enough memory for the device tree.\n");
> >>                  ret = -ENOMEM;
> >>                  goto out;
> >>          }
> >> -       ret = fdt_open_into(initial_boot_params, fdt, fdt_size);
> >> -       if (ret < 0) {
> >> -               pr_err("Error setting up the new device tree.\n");
> >> -               ret = -EINVAL;
> >> -               goto out;
> >> -       }
> >>
> >>          ret = setup_new_fdt_ppc64(image, fdt, initrd_load_addr,
> >
> > The first thing this function does is call setup_new_fdt() which first
> > calls of_kexec_setup_new_fdt(). (Note, I really don't understand the
> > PPC code split. It looks like there's a 32-bit and 64-bit split, but
> > 32-bit looks broken to me. Nothing ever calls setup_new_fdt() except
> > setup_new_fdt_ppc64()). The arm64 version is calling
> > of_alloc_and_init_fdt() and then of_kexec_setup_new_fdt() directly.
> >
> > So we can just make of_alloc_and_init_fdt() also call
> > of_kexec_setup_new_fdt() (really, just tweak of_kexec_setup_new_fdt do
> > the alloc and copy).
> ok - will move fdt allocation into of_kexec_setup_new_fdt().
>
> I don't think the architecture needs to pick the
> > size either. It's doubtful that either one is that sensitive to the
> > amount of extra space.
> I am not clear about the above comment -
> are you saying the architectures don't need to pass FDT size to the
> alloc function?
>
> arm64 is adding command line string length and some extra space to the
> size computed from initial_boot_params for FDT Size:
>
>         buf_size = fdt_totalsize(initial_boot_params)
>                         + cmdline_len + DTB_EXTRA_SPACE;
>
> powerpc is just using twice the size computed from initial_boot_params
>
>         fdt_size = fdt_totalsize(initial_boot_params) * 2;
>
> I think it would be safe to let arm64 and powerpc pass the required FDT
> size, along with the other params to of_kexec_setup_new_fdt() - and in
> this function we allocate FDT and set it up.

It's pretty clear that someone just picked something that 'should be
enough'. The only thing I can guess for the difference is that arm
DT's tend to be a bit larger. So doubling the size would be even more
excessive. Either way, we're talking 10s kB to few 100kB. I'd go with
DTB_EXTRA_SPACE and we can bump it up if someone has problems.

Also, I would like for 'initial_boot_params' to be private ultimately,
so removing any references is helpful.

> And, for powerpc leave the remaining code in setup_new_fdt_ppc64().

Right.

Rob

^ permalink raw reply

* Re: [PATCH v16 11/12] powerpc: Use OF alloc and free for FDT
From: Lakshmi Ramasubramanian @ 2021-02-04 23:42 UTC (permalink / raw)
  To: Rob Herring
  Cc: Mark Rutland, tao.li, Mimi Zohar, Paul Mackerras,
	vincenzo.frascino, Frank Rowand, Sasha Levin, Masahiro Yamada,
	James Morris, AKASHI, Takahiro, linux-arm-kernel, Catalin Marinas,
	Serge E. Hallyn, devicetree, Pavel Tatashin, Will Deacon,
	Prakhar Srivastava, Hsin-Yi Wang, Allison Randal,
	Christophe Leroy, Matthias Brugger, balajib, dmitry.kasatkin,
	linux-kernel@vger.kernel.org, James Morse, Greg Kroah-Hartman,
	Joe Perches, linux-integrity, linuxppc-dev, Thiago Jung Bauermann
In-Reply-To: <CAL_JsqKY9fxOowW=sBVm9s8j=3RWA7Jn9Ft9Edyx5qy5Yvykmw@mail.gmail.com>

On 2/4/21 3:36 PM, Rob Herring wrote:
> On Thu, Feb 4, 2021 at 5:23 PM Lakshmi Ramasubramanian
> <nramas@linux.microsoft.com> wrote:
>>
>> On 2/4/21 11:26 AM, Rob Herring wrote:
>>> On Thu, Feb 4, 2021 at 10:42 AM Lakshmi Ramasubramanian
>>> <nramas@linux.microsoft.com> wrote:
>>>>
>>>> of_alloc_and_init_fdt() and of_free_fdt() have been defined in
>>>> drivers/of/kexec.c to allocate and free memory for FDT.
>>>>
>>>> Use of_alloc_and_init_fdt() and of_free_fdt() to allocate and
>>>> initialize the FDT, and to free the FDT respectively.
>>>>
>>>> powerpc sets the FDT address in image_loader_data field in
>>>> "struct kimage" and the memory is freed in
>>>> kimage_file_post_load_cleanup().  This cleanup function uses kfree()
>>>> to free the memory. But since of_alloc_and_init_fdt() uses kvmalloc()
>>>> for allocation, the buffer needs to be freed using kvfree().
>>>
>>> You could just change the kexec core to call kvfree() instead.
>>
>>>
>>>> Define "fdt" field in "struct kimage_arch" for powerpc to store
>>>> the address of FDT, and free the memory in powerpc specific
>>>> arch_kimage_file_post_load_cleanup().
>>>
>>> However, given all the other buffers have an explicit field in kimage
>>> or kimage_arch, changing powerpc is to match arm64 is better IMO.
>>
>> Just to be clear:
>> I'll leave this as is - free FDT buffer in powerpc's
>> arch_kimage_file_post_load_cleanup() to match arm64 behavior.
> 
> Yes.

ok

> 
>> Will not change "kexec core" to call kvfree() - doing that change would
>> require changing all architectures to use kvmalloc() for
>> image_loader_data allocation.
> 
> Actually, no. AIUI, kvfree() can be used whether you used kvmalloc,
> vmalloc, or kmalloc for the alloc.
That is good information. Thanks.

> 
>>>> Signed-off-by: Lakshmi Ramasubramanian <nramas@linux.microsoft.com>
>>>> Suggested-by: Rob Herring <robh@kernel.org>
>>>> Suggested-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
>>>> ---
>>>>    arch/powerpc/include/asm/kexec.h  |  2 ++
>>>>    arch/powerpc/kexec/elf_64.c       | 26 ++++++++++++++++----------
>>>>    arch/powerpc/kexec/file_load_64.c |  3 +++
>>>>    3 files changed, 21 insertions(+), 10 deletions(-)
>>>>
>>>> diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
>>>> index 2c0be93d239a..d7d13cac4d31 100644
>>>> --- a/arch/powerpc/include/asm/kexec.h
>>>> +++ b/arch/powerpc/include/asm/kexec.h
>>>> @@ -111,6 +111,8 @@ struct kimage_arch {
>>>>           unsigned long elf_headers_mem;
>>>>           unsigned long elf_headers_sz;
>>>>           void *elf_headers;
>>>> +
>>>> +       void *fdt;
>>>>    };
>>>>
>>>>    char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
>>>> diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
>>>> index d0e459bb2f05..51d2d8eb6c1b 100644
>>>> --- a/arch/powerpc/kexec/elf_64.c
>>>> +++ b/arch/powerpc/kexec/elf_64.c
>>>> @@ -19,6 +19,7 @@
>>>>    #include <linux/kexec.h>
>>>>    #include <linux/libfdt.h>
>>>>    #include <linux/module.h>
>>>> +#include <linux/of.h>
>>>>    #include <linux/of_fdt.h>
>>>>    #include <linux/slab.h>
>>>>    #include <linux/types.h>
>>>> @@ -32,7 +33,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
>>>>           unsigned int fdt_size;
>>>>           unsigned long kernel_load_addr;
>>>>           unsigned long initrd_load_addr = 0, fdt_load_addr;
>>>> -       void *fdt;
>>>> +       void *fdt = NULL;
>>>>           const void *slave_code;
>>>>           struct elfhdr ehdr;
>>>>           char *modified_cmdline = NULL;
>>>> @@ -103,18 +104,12 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
>>>>           }
>>>>
>>>>           fdt_size = fdt_totalsize(initial_boot_params) * 2;
>>>> -       fdt = kmalloc(fdt_size, GFP_KERNEL);
>>>> +       fdt = of_alloc_and_init_fdt(fdt_size);
>>>>           if (!fdt) {
>>>>                   pr_err("Not enough memory for the device tree.\n");
>>>>                   ret = -ENOMEM;
>>>>                   goto out;
>>>>           }
>>>> -       ret = fdt_open_into(initial_boot_params, fdt, fdt_size);
>>>> -       if (ret < 0) {
>>>> -               pr_err("Error setting up the new device tree.\n");
>>>> -               ret = -EINVAL;
>>>> -               goto out;
>>>> -       }
>>>>
>>>>           ret = setup_new_fdt_ppc64(image, fdt, initrd_load_addr,
>>>
>>> The first thing this function does is call setup_new_fdt() which first
>>> calls of_kexec_setup_new_fdt(). (Note, I really don't understand the
>>> PPC code split. It looks like there's a 32-bit and 64-bit split, but
>>> 32-bit looks broken to me. Nothing ever calls setup_new_fdt() except
>>> setup_new_fdt_ppc64()). The arm64 version is calling
>>> of_alloc_and_init_fdt() and then of_kexec_setup_new_fdt() directly.
>>>
>>> So we can just make of_alloc_and_init_fdt() also call
>>> of_kexec_setup_new_fdt() (really, just tweak of_kexec_setup_new_fdt do
>>> the alloc and copy).
>> ok - will move fdt allocation into of_kexec_setup_new_fdt().
>>
>> I don't think the architecture needs to pick the
>>> size either. It's doubtful that either one is that sensitive to the
>>> amount of extra space.
>> I am not clear about the above comment -
>> are you saying the architectures don't need to pass FDT size to the
>> alloc function?
>>
>> arm64 is adding command line string length and some extra space to the
>> size computed from initial_boot_params for FDT Size:
>>
>>          buf_size = fdt_totalsize(initial_boot_params)
>>                          + cmdline_len + DTB_EXTRA_SPACE;
>>
>> powerpc is just using twice the size computed from initial_boot_params
>>
>>          fdt_size = fdt_totalsize(initial_boot_params) * 2;
>>
>> I think it would be safe to let arm64 and powerpc pass the required FDT
>> size, along with the other params to of_kexec_setup_new_fdt() - and in
>> this function we allocate FDT and set it up.
> 
> It's pretty clear that someone just picked something that 'should be
> enough'. The only thing I can guess for the difference is that arm
> DT's tend to be a bit larger. So doubling the size would be even more
> excessive. Either way, we're talking 10s kB to few 100kB. I'd go with
> DTB_EXTRA_SPACE and we can bump it up if someone has problems.
ok - I'll use DTB_EXTRA_SPACE for the fdt allocation.

> 
> Also, I would like for 'initial_boot_params' to be private ultimately,
> so removing any references is helpful.
ok

> 
>> And, for powerpc leave the remaining code in setup_new_fdt_ppc64().
> 
> Right.
ok

thanks,
  -lakshmi

^ permalink raw reply

* Re: [PATCH v3 28/32] powerpc/64s: interrupt implement exit logic in C
From: Michael Ellerman @ 2021-02-05  0:22 UTC (permalink / raw)
  To: Nicholas Piggin, Christophe Leroy, linuxppc-dev; +Cc: Michal Suchanek
In-Reply-To: <1612428699.u023r42mj3.astroid@bobo.none>

Nicholas Piggin <npiggin@gmail.com> writes:
> Excerpts from Christophe Leroy's message of February 4, 2021 6:03 pm:
>> Le 04/02/2021 à 04:27, Nicholas Piggin a écrit :
>>> Excerpts from Christophe Leroy's message of February 4, 2021 2:25 am:
>>>> Le 25/02/2020 à 18:35, Nicholas Piggin a écrit :
...
>>>>> +
>>>>> +	/*
>>>>> +	 * We don't need to restore AMR on the way back to userspace for KUAP.
>>>>> +	 * The value of AMR only matters while we're in the kernel.
>>>>> +	 */
>>>>> +	kuap_restore_amr(regs);
>>>>
>>>> Is that correct to restore KUAP state here ? Shouldn't we have it at lower level in assembly ?
>>>>
>>>> Isn't there a risk that someone manages to call interrupt_exit_kernel_prepare() or the end of it in
>>>> a way or another, and get the previous KUAP state restored by this way ?
>>> 
>>> I'm not sure if there much more risk if it's here rather than the
>>> instruction being in another place in the code.
>>> 
>>> There's a lot of user access around the kernel too if you want to find a
>>> gadget to unlock KUAP then I suppose there is a pretty large attack
>>> surface.
>> 
>> My understanding is that user access scope is strictly limited, for instance we enforce the 
>> begin/end of user access to be in the same function, and we refrain from calling any other function 
>> inside the user access window. x86 even have 'objtool' to enforce it at build time. So in theory 
>> there is no way to get out of the function while user access is open.
>> 
>> Here with the interrupt exit function it is free beer. You have a place where you re-open user 
>> access and return with a simple blr. So that's open bar. If someone manages to just call the 
>> interrupt exit function, then user access remains open
>
> Hmm okay maybe that's a good point.

I don't think it's a very attractive gadget, it's not just a plain blr,
it does a full stack frame tear down before the return. And there's no
LR reloads anywhere very close.

Obviously it depends on what the compiler decides to do, it's possible
it could be a usable gadget. But there are other places that are more
attractive I think, eg:

c00000000061d768:	a6 03 3d 7d 	mtspr   29,r9
c00000000061d76c:	2c 01 00 4c 	isync
c00000000061d770:	00 00 00 60 	nop
c00000000061d774:	30 00 21 38 	addi    r1,r1,48
c00000000061d778:	20 00 80 4e 	blr


So I don't think we should redesign the code *purely* because we're
worried about interrupt_exit_kernel_prepare() being a useful gadget. If
we can come up with a way to restructure it that reads well and is
maintainable, and also reduces the chance of it being a good gadget then
sure.

cheers

^ permalink raw reply

* Re: [PATCH v3 28/32] powerpc/64s: interrupt implement exit logic in C
From: Nicholas Piggin @ 2021-02-05  2:16 UTC (permalink / raw)
  To: Christophe Leroy, linuxppc-dev, Michael Ellerman; +Cc: Michal Suchanek
In-Reply-To: <87blczpdm3.fsf@mpe.ellerman.id.au>

Excerpts from Michael Ellerman's message of February 5, 2021 10:22 am:
> Nicholas Piggin <npiggin@gmail.com> writes:
>> Excerpts from Christophe Leroy's message of February 4, 2021 6:03 pm:
>>> Le 04/02/2021 à 04:27, Nicholas Piggin a écrit :
>>>> Excerpts from Christophe Leroy's message of February 4, 2021 2:25 am:
>>>>> Le 25/02/2020 à 18:35, Nicholas Piggin a écrit :
> ...
>>>>>> +
>>>>>> +	/*
>>>>>> +	 * We don't need to restore AMR on the way back to userspace for KUAP.
>>>>>> +	 * The value of AMR only matters while we're in the kernel.
>>>>>> +	 */
>>>>>> +	kuap_restore_amr(regs);
>>>>>
>>>>> Is that correct to restore KUAP state here ? Shouldn't we have it at lower level in assembly ?
>>>>>
>>>>> Isn't there a risk that someone manages to call interrupt_exit_kernel_prepare() or the end of it in
>>>>> a way or another, and get the previous KUAP state restored by this way ?
>>>> 
>>>> I'm not sure if there much more risk if it's here rather than the
>>>> instruction being in another place in the code.
>>>> 
>>>> There's a lot of user access around the kernel too if you want to find a
>>>> gadget to unlock KUAP then I suppose there is a pretty large attack
>>>> surface.
>>> 
>>> My understanding is that user access scope is strictly limited, for instance we enforce the 
>>> begin/end of user access to be in the same function, and we refrain from calling any other function 
>>> inside the user access window. x86 even have 'objtool' to enforce it at build time. So in theory 
>>> there is no way to get out of the function while user access is open.
>>> 
>>> Here with the interrupt exit function it is free beer. You have a place where you re-open user 
>>> access and return with a simple blr. So that's open bar. If someone manages to just call the 
>>> interrupt exit function, then user access remains open
>>
>> Hmm okay maybe that's a good point.
> 
> I don't think it's a very attractive gadget, it's not just a plain blr,
> it does a full stack frame tear down before the return. And there's no
> LR reloads anywhere very close.
> 
> Obviously it depends on what the compiler decides to do, it's possible
> it could be a usable gadget. But there are other places that are more
> attractive I think, eg:
> 
> c00000000061d768:	a6 03 3d 7d 	mtspr   29,r9
> c00000000061d76c:	2c 01 00 4c 	isync
> c00000000061d770:	00 00 00 60 	nop
> c00000000061d774:	30 00 21 38 	addi    r1,r1,48
> c00000000061d778:	20 00 80 4e 	blr
> 
> 
> So I don't think we should redesign the code *purely* because we're
> worried about interrupt_exit_kernel_prepare() being a useful gadget. If
> we can come up with a way to restructure it that reads well and is
> maintainable, and also reduces the chance of it being a good gadget then
> sure.

Okay. That would be good if we can keep it in C, the pkeys + kuap combo
is fairly complicated and we might want to something cleverer with it, 
so that would make it even more difficult in asm.

Thanks,
Nick

^ permalink raw reply

* Re: [PATCH] powerpc/perf: Record counter overflow always if SAMPLE_IP is unset
From: Athira Rajeev @ 2021-02-05  2:36 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: Madhavan Srinivasan, linuxppc-dev
In-Reply-To: <87pn1gpmnl.fsf@mpe.ellerman.id.au>



> On 04-Feb-2021, at 8:25 AM, Michael Ellerman <mpe@ellerman.id.au> wrote:
> 
> Athira Rajeev <atrajeev@linux.vnet.ibm.com> writes:
>> While sampling for marked events, currently we record the sample only
>> if the SIAR valid bit of Sampled Instruction Event Register (SIER) is
>> set. SIAR_VALID bit is used for fetching the instruction address from
>> Sampled Instruction Address Register(SIAR). But there are some usecases,
>> where the user is interested only in the PMU stats at each counter
>> overflow and the exact IP of the overflow event is not required.
>> Dropping SIAR invalid samples will fail to record some of the counter
>> overflows in such cases.
>> 
>> Example of such usecase is dumping the PMU stats (event counts)
>> after some regular amount of instructions/events from the userspace
>> (ex: via ptrace). Here counter overflow is indicated to userspace via
>> signal handler, and captured by monitoring and enabling I/O
>> signaling on the event file descriptor. In these cases, we expect to
>> get sample/overflow indication after each specified sample_period.
>> 
>> Perf event attribute will not have PERF_SAMPLE_IP set in the
>> sample_type if exact IP of the overflow event is not requested. So
>> while profiling if SAMPLE_IP is not set, just record the counter overflow
>> irrespective of SIAR_VALID check.
>> 
>> Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
>> ---
>> arch/powerpc/perf/core-book3s.c | 10 ++++++++--
>> 1 file changed, 8 insertions(+), 2 deletions(-)
>> 
>> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
>> index 28206b1fe172..bb4828a05e4d 100644
>> --- a/arch/powerpc/perf/core-book3s.c
>> +++ b/arch/powerpc/perf/core-book3s.c
>> @@ -2166,10 +2166,16 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
>> 	 * address even when freeze on supervisor state (kernel) is set in
>> 	 * MMCR2. Check attr.exclude_kernel and address to drop the sample in
>> 	 * these cases.
>> +	 *
>> +	 * If address is not requested in the sample
>> +	 * via PERF_SAMPLE_IP, just record that sample
>> +	 * irrespective of SIAR valid check.
>> 	 */
>> -	if (event->attr.exclude_kernel && record)
>> -		if (is_kernel_addr(mfspr(SPRN_SIAR)))
>> +	if (event->attr.exclude_kernel && record) {
>> +		if (is_kernel_addr(mfspr(SPRN_SIAR)) && (event->attr.sample_type & PERF_SAMPLE_IP))
>> 			record = 0;
>> +	} else if (!record && !(event->attr.sample_type & PERF_SAMPLE_IP))
>> +		record = 1;
> 
> This seems wrong, you're assuming that record was set previously to
> = siar_valid(), but it may be that record is still 0 from the
> initialisation and we weren't going to record.
> 
> Don't we need something more like this?

Hi Michael,

Thanks for checking the patch and sharing the suggestion.

Yes, the below change looks good and tested with my scenario. 
I will send a V2 with new change.

Thanks
Athira
> 
> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
> index 9fd06010e8b6..e4e8a017d339 100644
> --- a/arch/powerpc/perf/core-book3s.c
> +++ b/arch/powerpc/perf/core-book3s.c
> @@ -2136,7 +2136,12 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
> 			left += period;
> 			if (left <= 0)
> 				left = period;
> -			record = siar_valid(regs);
> +
> +			if (event->attr.sample_type & PERF_SAMPLE_IP)
> +				record = siar_valid(regs);
> +			else
> +				record = 1;
> +
> 			event->hw.last_period = event->hw.sample_period;
> 		}
> 		if (left < 0x80000000LL)
> @@ -2154,9 +2159,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
> 	 * MMCR2. Check attr.exclude_kernel and address to drop the sample in
> 	 * these cases.
> 	 */
> -	if (event->attr.exclude_kernel && record)
> -		if (is_kernel_addr(mfspr(SPRN_SIAR)))
> -			record = 0;
> +	if (event->attr.exclude_kernel &&
> +	    (event->attr.sample_type & PERF_SAMPLE_IP) &&
> +	    is_kernel_addr(mfspr(SPRN_SIAR)))
> +		record = 0;
> 
> 	/*
> 	 * Finally record data if requested.
> 
> 
> 
> cheers


^ permalink raw reply

* [PATCH] mm/pmem: Avoid inserting hugepage PTE entry with fsdax if hugepage support is disabled
From: Aneesh Kumar K.V @ 2021-02-05  2:39 UTC (permalink / raw)
  To: linux-nvdimm, dan.j.williams, Kirill A . Shutemov, Jan Kara
  Cc: linux-mm, linuxppc-dev, Aneesh Kumar K.V

Differentiate between hardware not supporting hugepages and user disabling THP
via 'echo never > /sys/kernel/mm/transparent_hugepage/enabled'

For the devdax namespace, the kernel handles the above via the
supported_alignment attribute and failing to initialize the namespace
if the namespace align value is not supported on the platform.

For the fsdax namespace, the kernel will continue to initialize
the namespace. This can result in the kernel creating a huge pte
entry even though the hardware don't support the same.

We do want hugepage support with pmem even if the end-user disabled THP
via sysfs file (/sys/kernel/mm/transparent_hugepage/enabled). Hence
differentiate between hardware/firmware lacking support vs user-controlled
disable of THP and prevent a huge fault if the hardware lacks hugepage
support.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 include/linux/huge_mm.h | 15 +++++++++------
 mm/huge_memory.c        |  6 +++++-
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 6a19f35f836b..ba973efcd369 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -78,6 +78,7 @@ static inline vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn,
 }

 enum transparent_hugepage_flag {
+	TRANSPARENT_HUGEPAGE_NEVER_DAX,
 	TRANSPARENT_HUGEPAGE_FLAG,
 	TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
 	TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG,
@@ -123,6 +124,13 @@ extern unsigned long transparent_hugepage_flags;
  */
 static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
 {
+
+	/*
+	 * If the hardware/firmware marked hugepage support disabled.
+	 */
+	if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_NEVER_DAX))
+		return false;
+
 	if (vma->vm_flags & VM_NOHUGEPAGE)
 		return false;

@@ -134,12 +142,7 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)

 	if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG))
 		return true;
-	/*
-	 * For dax vmas, try to always use hugepage mappings. If the kernel does
-	 * not support hugepages, fsdax mappings will fallback to PAGE_SIZE
-	 * mappings, and device-dax namespaces, that try to guarantee a given
-	 * mapping size, will fail to enable
-	 */
+
 	if (vma_is_dax(vma))
 		return true;

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9237976abe72..d698b7e27447 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -386,7 +386,11 @@ static int __init hugepage_init(void)
 	struct kobject *hugepage_kobj;

 	if (!has_transparent_hugepage()) {
-		transparent_hugepage_flags = 0;
+		/*
+		 * Hardware doesn't support hugepages, hence disable
+		 * DAX PMD support.
+		 */
+		transparent_hugepage_flags = 1 << TRANSPARENT_HUGEPAGE_NEVER_DAX;
 		return -EINVAL;
 	}

-- 
2.29.2

^ permalink raw reply related

* [PATCH] powerpc/kuap: Allow kernel thread to access userspace after kthread_use_mm
From: Aneesh Kumar K.V @ 2021-02-05  3:04 UTC (permalink / raw)
  To: linuxppc-dev, mpe
  Cc: Jens Axboe, Aneesh Kumar K.V, Zorro Lang, Nicholas Piggin

This fix the bad fault reported by KUAP when io_wqe_worker access userspace.

 Bug: Read fault blocked by KUAP!
 WARNING: CPU: 1 PID: 101841 at arch/powerpc/mm/fault.c:229 __do_page_fault+0x6b4/0xcd0
 NIP [c00000000009e7e4] __do_page_fault+0x6b4/0xcd0
 LR [c00000000009e7e0] __do_page_fault+0x6b0/0xcd0
..........
 Call Trace:
 [c000000016367330] [c00000000009e7e0] __do_page_fault+0x6b0/0xcd0 (unreliable)
 [c0000000163673e0] [c00000000009ee3c] do_page_fault+0x3c/0x120
 [c000000016367430] [c00000000000c848] handle_page_fault+0x10/0x2c
 --- interrupt: 300 at iov_iter_fault_in_readable+0x148/0x6f0
..........
 NIP [c0000000008e8228] iov_iter_fault_in_readable+0x148/0x6f0
 LR [c0000000008e834c] iov_iter_fault_in_readable+0x26c/0x6f0
 interrupt: 300
 [c0000000163677e0] [c0000000007154a0] iomap_write_actor+0xc0/0x280
 [c000000016367880] [c00000000070fc94] iomap_apply+0x1c4/0x780
 [c000000016367990] [c000000000710330] iomap_file_buffered_write+0xa0/0x120
 [c0000000163679e0] [c00800000040791c] xfs_file_buffered_aio_write+0x314/0x5e0 [xfs]
 [c000000016367a90] [c0000000006d74bc] io_write+0x10c/0x460
 [c000000016367bb0] [c0000000006d80e4] io_issue_sqe+0x8d4/0x1200
 [c000000016367c70] [c0000000006d8ad0] io_wq_submit_work+0xc0/0x250
 [c000000016367cb0] [c0000000006e2578] io_worker_handle_work+0x498/0x800
 [c000000016367d40] [c0000000006e2cdc] io_wqe_worker+0x3fc/0x4f0
 [c000000016367da0] [c0000000001cb0a4] kthread+0x1c4/0x1d0
 [c000000016367e10] [c00000000000dbf0] ret_from_kernel_thread+0x5c/0x6c

The kernel consider thread AMR value for kernel thread to be
AMR_KUAP_BLOCKED. Hence access to userspace is denied. This
of course not correct and we should allow userspace access after
kthread_use_mm(). To be precise, kthread_use_mm() should inherit the
AMR value of the operating address space. But, the AMR value is
thread-specific and we inherit the address space and not thread
access restrictions. Because of this ignore AMR value when accessing
userspace via kernel thread.

Cc: Zorro Lang <zlang@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/kup.h | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h
index f50f72e535aa..2064621ae7b6 100644
--- a/arch/powerpc/include/asm/book3s/64/kup.h
+++ b/arch/powerpc/include/asm/book3s/64/kup.h
@@ -202,22 +202,16 @@ DECLARE_STATIC_KEY_FALSE(uaccess_flush_key);
 #include <asm/mmu.h>
 #include <asm/ptrace.h>
 
-/*
- * For kernel thread that doesn't have thread.regs return
- * default AMR/IAMR values.
- */
 static inline u64 current_thread_amr(void)
 {
-	if (current->thread.regs)
-		return current->thread.regs->amr;
-	return AMR_KUAP_BLOCKED;
+	VM_BUG_ON(!current->thread.regs);
+	return current->thread.regs->amr;
 }
 
 static inline u64 current_thread_iamr(void)
 {
-	if (current->thread.regs)
-		return current->thread.regs->iamr;
-	return AMR_KUEP_BLOCKED;
+	VM_BUG_ON(!current->thread.regs);
+	return current->thread.regs->iamr;
 }
 #endif /* CONFIG_PPC_PKEY */
 
@@ -384,7 +378,14 @@ static __always_inline void allow_user_access(void __user *to, const void __user
 	// This is written so we can resolve to a single case at build time
 	BUILD_BUG_ON(!__builtin_constant_p(dir));
 
-	if (mmu_has_feature(MMU_FTR_PKEY))
+	/*
+	 * Kernel threads may access user mm with kthread_use_mm() but
+	 * can't use current_thread_amr because they have thread.regs==NULL,
+	 * but they have no pkeys.
+	 */
+	if (current->flags & PF_KTHREAD)
+		thread_amr = 0;
+	else if (mmu_has_feature(MMU_FTR_PKEY))
 		thread_amr = current_thread_amr();
 
 	if (dir == KUAP_READ)
-- 
2.29.2


^ permalink raw reply related

* [PATCH 1/1] powerpc/kvm: Save Timebase Offset to fix sched_clock() while running guest code.
From: Leonardo Bras @ 2021-02-05  3:16 UTC (permalink / raw)
  To: Paul Mackerras, Michael Ellerman, Benjamin Herrenschmidt,
	Christophe Leroy, Athira Rajeev, Aneesh Kumar K.V, Gustavo Romero,
	Jordan Niethe, Nicholas Piggin, Stephen Boyd, Thomas Gleixner,
	Peter Zijlstra, Geert Uytterhoeven, Frederic Weisbecker
  Cc: Leonardo Bras, linuxppc-dev, linux-kernel, kvm-ppc

Before guest entry, TBU40 register is changed to reflect guest timebase.
After exitting guest, the register is reverted to it's original value.

If one tries to get the timestamp from host between those changes, it
will present an incorrect value.

An example would be trying to add a tracepoint in
kvmppc_guest_entry_inject_int(), which depending on last tracepoint
acquired could actually cause the host to crash.

Save the Timebase Offset to PACA and use it on sched_clock() to always
get the correct timestamp.

Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
---
 arch/powerpc/include/asm/kvm_book3s_asm.h | 1 +
 arch/powerpc/kernel/asm-offsets.c         | 1 +
 arch/powerpc/kernel/time.c                | 3 ++-
 arch/powerpc/kvm/book3s_hv.c              | 2 ++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   | 2 ++
 5 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 078f4648ea27..e2c12a10eed2 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -131,6 +131,7 @@ struct kvmppc_host_state {
 	u64 cfar;
 	u64 ppr;
 	u64 host_fscr;
+	u64 tb_offset;		/* Timebase offset: keeps correct timebase while on guest */
 #endif
 };
 
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index b12d7c049bfe..0beb8fdc6352 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -706,6 +706,7 @@ int main(void)
 	HSTATE_FIELD(HSTATE_CFAR, cfar);
 	HSTATE_FIELD(HSTATE_PPR, ppr);
 	HSTATE_FIELD(HSTATE_HOST_FSCR, host_fscr);
+	HSTATE_FIELD(HSTATE_TB_OFFSET, tb_offset);
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
 #else /* CONFIG_PPC_BOOK3S */
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 67feb3524460..adf6648e3572 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -699,7 +699,8 @@ EXPORT_SYMBOL_GPL(tb_to_ns);
  */
 notrace unsigned long long sched_clock(void)
 {
-	return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
+	return mulhdu(get_tb() - boot_tb - local_paca->kvm_hstate.tb_offset, tb_to_ns_scale)
+			<< tb_to_ns_shift;
 }
 
 
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index b3731572295e..c08593c63353 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3491,6 +3491,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
 		if ((tb & 0xffffff) < (new_tb & 0xffffff))
 			mtspr(SPRN_TBU40, new_tb + 0x1000000);
 		vc->tb_offset_applied = vc->tb_offset;
+		local_paca->kvm_hstate.tb_offset = vc->tb_offset;
 	}
 
 	if (vc->pcr)
@@ -3594,6 +3595,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
 		if ((tb & 0xffffff) < (new_tb & 0xffffff))
 			mtspr(SPRN_TBU40, new_tb + 0x1000000);
 		vc->tb_offset_applied = 0;
+		local_paca->kvm_hstate.tb_offset = 0;
 	}
 
 	mtspr(SPRN_HDEC, 0x7fffffff);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index b73140607875..8f7a9f7f4ee6 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -632,6 +632,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 	cmpdi	r8,0
 	beq	37f
 	std	r8, VCORE_TB_OFFSET_APPL(r5)
+	std	r8, HSTATE_TB_OFFSET(r13)
 	mftb	r6		/* current host timebase */
 	add	r8,r8,r6
 	mtspr	SPRN_TBU40,r8	/* update upper 40 bits */
@@ -1907,6 +1908,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	beq	17f
 	li	r0, 0
 	std	r0, VCORE_TB_OFFSET_APPL(r5)
+	std	r0, HSTATE_TB_OFFSET(r13)
 	mftb	r6			/* current guest timebase */
 	subf	r8,r8,r6
 	mtspr	SPRN_TBU40,r8		/* update upper 40 bits */
-- 
2.29.2


^ permalink raw reply related

* [PATCH] mm/memtest: Add ARCH_USE_MEMTEST
From: Anshuman Khandual @ 2021-02-05  4:10 UTC (permalink / raw)
  To: linux-mm
  Cc: Chris Zankel, Thomas Bogendoerfer, Anshuman Khandual,
	linux-xtensa, linuxppc-dev, linux-kernel, Russell King,
	linux-mips, Max Filippov, Ingo Molnar, Paul Mackerras,
	Catalin Marinas, Thomas Gleixner, Will Deacon, linux-arm-kernel

early_memtest() does not get called from all architectures. Hence enabling
CONFIG_MEMTEST and providing a valid memtest=[1..N] kernel command line
option might not trigger the memory pattern tests as would be expected in
normal circumstances. This situation is misleading.

The change here prevents the above mentioned problem after introducing a
new config option ARCH_USE_MEMTEST that should be subscribed on platforms
that call early_memtest(), in order to enable the config CONFIG_MEMTEST.
Conversely CONFIG_MEMTEST cannot be enabled on platforms where it would
not be tested anyway.

Cc: Russell King <linux@armlinux.org.uk>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-mips@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-xtensa@linux-xtensa.org
Cc: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
---
This patch applies on v5.11-rc6 and has been tested on arm64 platform. But
it has been just build tested on all other platforms.

 arch/arm/Kconfig     | 1 +
 arch/arm64/Kconfig   | 1 +
 arch/mips/Kconfig    | 1 +
 arch/powerpc/Kconfig | 1 +
 arch/x86/Kconfig     | 1 +
 arch/xtensa/Kconfig  | 1 +
 lib/Kconfig.debug    | 9 ++++++++-
 7 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 138248999df7..a63b53c568df 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -32,6 +32,7 @@ config ARM
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_USE_BUILTIN_BSWAP
 	select ARCH_USE_CMPXCHG_LOCKREF
+	select ARCH_USE_MEMTEST
 	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select ARCH_WANT_LD_ORPHAN_WARN
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c4acf8230f20..dfee5831d876 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -70,6 +70,7 @@ config ARM64
 	select ARCH_USE_QUEUED_RWLOCKS
 	select ARCH_USE_QUEUED_SPINLOCKS
 	select ARCH_USE_SYM_ANNOTATIONS
+	select ARCH_USE_MEMTEST
 	select ARCH_SUPPORTS_DEBUG_PAGEALLOC
 	select ARCH_SUPPORTS_MEMORY_FAILURE
 	select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 0a17bedf4f0d..1b21d8e53e6b 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -16,6 +16,7 @@ config MIPS
 	select ARCH_USE_CMPXCHG_LOCKREF if 64BIT
 	select ARCH_USE_QUEUED_RWLOCKS
 	select ARCH_USE_QUEUED_SPINLOCKS
+	select ARCH_USE_MEMTEST
 	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select BUILDTIME_TABLE_SORT
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 107bb4319e0e..9935343a8750 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -151,6 +151,7 @@ config PPC
 	select ARCH_USE_CMPXCHG_LOCKREF		if PPC64
 	select ARCH_USE_QUEUED_RWLOCKS		if PPC_QUEUED_SPINLOCKS
 	select ARCH_USE_QUEUED_SPINLOCKS	if PPC_QUEUED_SPINLOCKS
+	select ARCH_USE_MEMTEST
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
 	select ARCH_WANT_LD_ORPHAN_WARN
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 21f851179ff0..90545348db1b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -100,6 +100,7 @@ config X86
 	select ARCH_USE_QUEUED_RWLOCKS
 	select ARCH_USE_QUEUED_SPINLOCKS
 	select ARCH_USE_SYM_ANNOTATIONS
+	select ARCH_USE_MEMTEST
 	select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 	select ARCH_WANT_DEFAULT_BPF_JIT	if X86_64
 	select ARCH_WANTS_DYNAMIC_TASK_STRUCT
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 37ce1489364e..8eb61fcdfc7f 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -9,6 +9,7 @@ config XTENSA
 	select ARCH_HAS_DMA_SET_UNCACHED if MMU
 	select ARCH_USE_QUEUED_RWLOCKS
 	select ARCH_USE_QUEUED_SPINLOCKS
+	select ARCH_USE_MEMTEST
 	select ARCH_WANT_FRAME_POINTERS
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select BUILDTIME_TABLE_SORT
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 7937265ef879..6dd25b755a82 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2469,11 +2469,18 @@ config TEST_FPU
 
 endif # RUNTIME_TESTING_MENU
 
+config ARCH_USE_MEMTEST
+	bool
+	help
+	  An architecture should select this when it uses early_memtest()
+	  during boot process.
+
 config MEMTEST
 	bool "Memtest"
+	depends on ARCH_USE_MEMTEST
 	help
 	  This option adds a kernel parameter 'memtest', which allows memtest
-	  to be set.
+	  to be set and executed.
 	        memtest=0, mean disabled; -- default
 	        memtest=1, mean do 1 test pattern;
 	        ...
-- 
2.20.1


^ permalink raw reply related

* Re: [PATCH v5 10/10] powerpc/signal64: Use __get_user() to copy sigset_t
From: Christopher M. Riedl @ 2021-02-05  4:40 UTC (permalink / raw)
  To: Christopher M. Riedl, linuxppc-dev
In-Reply-To: <20210203184323.20792-11-cmr@codefail.de>

On Wed Feb 3, 2021 at 12:43 PM CST, Christopher M. Riedl wrote:
> Usually sigset_t is exactly 8B which is a "trivial" size and does not
> warrant using __copy_from_user(). Use __get_user() directly in
> anticipation of future work to remove the trivial size optimizations
> from __copy_from_user(). Calling __get_user() also results in a small
> boost to signal handling throughput here.
>
> Signed-off-by: Christopher M. Riedl <cmr@codefail.de>

This patch triggered sparse warnings about 'different address spaces'.
This minor fixup cleans that up:

diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 42fdc4a7ff72..1dfda6403e14 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -97,7 +97,7 @@ static void prepare_setup_sigcontext(struct task_struct *tsk, int ctx_has_vsx_re
 #endif /* CONFIG_VSX */
 }

-static inline int get_user_sigset(sigset_t *dst, const sigset_t *src)
+static inline int get_user_sigset(sigset_t *dst, const sigset_t __user *src)
 {
	if (sizeof(sigset_t) <= 8)
		return __get_user(dst->sig[0], &src->sig[0]);

^ permalink raw reply related

* Re: [PATCH v5 01/10] powerpc/uaccess: Add unsafe_copy_from_user
From: Daniel Axtens @ 2021-02-05  4:47 UTC (permalink / raw)
  To: Christopher M. Riedl, linuxppc-dev
In-Reply-To: <20210203184323.20792-2-cmr@codefail.de>

Hi Chris,

Pending anything that sparse reported (which I haven't checked), this
looks ok to me.

Reviewed-by: Daniel Axtens <dja@axtens.net>

Kind regards,
Daniel

> Just wrap __copy_tofrom_user() for the usual 'unsafe' pattern which
> takes in a label to goto on error.
>
> Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
> ---
>  arch/powerpc/include/asm/uaccess.h | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
> index 501c9a79038c..036e82eefac9 100644
> --- a/arch/powerpc/include/asm/uaccess.h
> +++ b/arch/powerpc/include/asm/uaccess.h
> @@ -542,6 +542,9 @@ user_write_access_begin(const void __user *ptr, size_t len)
>  #define unsafe_get_user(x, p, e) unsafe_op_wrap(__get_user_allowed(x, p), e)
>  #define unsafe_put_user(x, p, e) __put_user_goto(x, p, e)
>  
> +#define unsafe_copy_from_user(d, s, l, e) \
> +	unsafe_op_wrap(__copy_tofrom_user((__force void __user *)d, s, l), e)
> +
>  #define unsafe_copy_to_user(d, s, l, e) \
>  do {									\
>  	u8 __user *_dst = (u8 __user *)(d);				\
> -- 
> 2.26.1

^ permalink raw reply

* Re: [PATCH v5 02/10] powerpc/signal: Add unsafe_copy_{vsx, fpr}_from_user()
From: Daniel Axtens @ 2021-02-05  5:17 UTC (permalink / raw)
  To: Christopher M. Riedl, linuxppc-dev
In-Reply-To: <20210203184323.20792-3-cmr@codefail.de>

Hi Christopher,

I have checked that each implementation matches the corresponding
*_to_user implementation. We've had some debate about whether the
overarching implementation in the to/from pairs (especially where things
go via a bounce buffer) can be simplified - but that's probably not
really something that this patch set should do.

On that basis:
Reviewed-by: Daniel Axtens <dja@axtens.net>

Kind regards,
Daniel

> Reuse the "safe" implementation from signal.c except for calling
> unsafe_copy_from_user() to copy into a local buffer.
>
> Signed-off-by: Christopher M. Riedl <cmr@codefail.de>
> ---
>  arch/powerpc/kernel/signal.h | 30 ++++++++++++++++++++++++++++++
>  1 file changed, 30 insertions(+)
>
> diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
> index 2559a681536e..7dfc536c78ef 100644
> --- a/arch/powerpc/kernel/signal.h
> +++ b/arch/powerpc/kernel/signal.h
> @@ -53,6 +53,30 @@ unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from);
>  				&buf[i], label);\
>  } while (0)
>  
> +#define unsafe_copy_fpr_from_user(task, from, label)	do {		\
> +	struct task_struct *__t = task;					\
> +	u64 __user *__f = (u64 __user *)from;				\
> +	u64 buf[ELF_NFPREG];						\
> +	int i;								\
> +									\
> +	unsafe_copy_from_user(buf, __f, sizeof(buf), label);		\
> +	for (i = 0; i < ELF_NFPREG - 1; i++)				\
> +		__t->thread.TS_FPR(i) = buf[i];				\
> +	__t->thread.fp_state.fpscr = buf[i];				\
> +} while (0)
> +
> +#define unsafe_copy_vsx_from_user(task, from, label)	do {		\
> +	struct task_struct *__t = task;					\
> +	u64 __user *__f = (u64 __user *)from;				\
> +	u64 buf[ELF_NVSRHALFREG];					\
> +	int i;								\
> +									\
> +	unsafe_copy_from_user(buf, __f, sizeof(buf), label);		\
> +	for (i = 0; i < ELF_NVSRHALFREG ; i++)				\
> +		__t->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];	\
> +} while (0)
> +
> +
>  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
>  #define unsafe_copy_ckfpr_to_user(to, task, label)	do {		\
>  	struct task_struct *__t = task;					\
> @@ -80,6 +104,10 @@ unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from);
>  	unsafe_copy_to_user(to, (task)->thread.fp_state.fpr,	\
>  			    ELF_NFPREG * sizeof(double), label)
>  
> +#define unsafe_copy_fpr_from_user(task, from, label)			\
> +	unsafe_copy_from_user((task)->thread.fp_state.fpr, from,	\
> +			    ELF_NFPREG * sizeof(double), label)
> +
>  static inline unsigned long
>  copy_fpr_to_user(void __user *to, struct task_struct *task)
>  {
> @@ -115,6 +143,8 @@ copy_ckfpr_from_user(struct task_struct *task, void __user *from)
>  #else
>  #define unsafe_copy_fpr_to_user(to, task, label) do { } while (0)
>  
> +#define unsafe_copy_fpr_from_user(task, from, label) do { } while (0)
> +
>  static inline unsigned long
>  copy_fpr_to_user(void __user *to, struct task_struct *task)
>  {
> -- 
> 2.26.1

^ permalink raw reply

* Re: [PATCH v3 28/32] powerpc/64s: interrupt implement exit logic in C
From: Christophe Leroy @ 2021-02-05  6:04 UTC (permalink / raw)
  To: Nicholas Piggin, linuxppc-dev, Michael Ellerman; +Cc: Michal Suchanek
In-Reply-To: <1612491261.by5b8gr97g.astroid@bobo.none>



Le 05/02/2021 à 03:16, Nicholas Piggin a écrit :
> Excerpts from Michael Ellerman's message of February 5, 2021 10:22 am:
>> Nicholas Piggin <npiggin@gmail.com> writes:
>>> Excerpts from Christophe Leroy's message of February 4, 2021 6:03 pm:
>>>> Le 04/02/2021 à 04:27, Nicholas Piggin a écrit :
>>>>> Excerpts from Christophe Leroy's message of February 4, 2021 2:25 am:
>>>>>> Le 25/02/2020 à 18:35, Nicholas Piggin a écrit :
>> ...
>>>>>>> +
>>>>>>> +	/*
>>>>>>> +	 * We don't need to restore AMR on the way back to userspace for KUAP.
>>>>>>> +	 * The value of AMR only matters while we're in the kernel.
>>>>>>> +	 */
>>>>>>> +	kuap_restore_amr(regs);
>>>>>>
>>>>>> Is that correct to restore KUAP state here ? Shouldn't we have it at lower level in assembly ?
>>>>>>
>>>>>> Isn't there a risk that someone manages to call interrupt_exit_kernel_prepare() or the end of it in
>>>>>> a way or another, and get the previous KUAP state restored by this way ?
>>>>>
>>>>> I'm not sure if there much more risk if it's here rather than the
>>>>> instruction being in another place in the code.
>>>>>
>>>>> There's a lot of user access around the kernel too if you want to find a
>>>>> gadget to unlock KUAP then I suppose there is a pretty large attack
>>>>> surface.
>>>>
>>>> My understanding is that user access scope is strictly limited, for instance we enforce the
>>>> begin/end of user access to be in the same function, and we refrain from calling any other function
>>>> inside the user access window. x86 even have 'objtool' to enforce it at build time. So in theory
>>>> there is no way to get out of the function while user access is open.
>>>>
>>>> Here with the interrupt exit function it is free beer. You have a place where you re-open user
>>>> access and return with a simple blr. So that's open bar. If someone manages to just call the
>>>> interrupt exit function, then user access remains open
>>>
>>> Hmm okay maybe that's a good point.
>>
>> I don't think it's a very attractive gadget, it's not just a plain blr,
>> it does a full stack frame tear down before the return. And there's no
>> LR reloads anywhere very close.
>>
>> Obviously it depends on what the compiler decides to do, it's possible
>> it could be a usable gadget. But there are other places that are more
>> attractive I think, eg:
>>
>> c00000000061d768:	a6 03 3d 7d 	mtspr   29,r9
>> c00000000061d76c:	2c 01 00 4c 	isync
>> c00000000061d770:	00 00 00 60 	nop
>> c00000000061d774:	30 00 21 38 	addi    r1,r1,48
>> c00000000061d778:	20 00 80 4e 	blr
>>
>>
>> So I don't think we should redesign the code *purely* because we're
>> worried about interrupt_exit_kernel_prepare() being a useful gadget. If
>> we can come up with a way to restructure it that reads well and is
>> maintainable, and also reduces the chance of it being a good gadget then
>> sure.
> 
> Okay. That would be good if we can keep it in C, the pkeys + kuap combo
> is fairly complicated and we might want to something cleverer with it,
> so that would make it even more difficult in asm.
> 

Ok.

For ppc32, I prefer to keep it in assembly for the time being and move everything from ASM to C at 
once after porting syscall and interrupts to C and wrappers.

Hope this is OK for you.

Christophe

^ permalink raw reply

* [PATCH v2 1/1] powerpc/kvm: Save Timebase Offset to fix sched_clock() while running guest code.
From: Leonardo Bras @ 2021-02-05  6:06 UTC (permalink / raw)
  To: Paul Mackerras, Michael Ellerman, Benjamin Herrenschmidt,
	Christophe Leroy, Athira Rajeev, Aneesh Kumar K.V, Leonardo Bras,
	Jordan Niethe, Nicholas Piggin, Frederic Weisbecker,
	Thomas Gleixner, Geert Uytterhoeven
  Cc: linuxppc-dev, linux-kernel, kvm-ppc

Before guest entry, TBU40 register is changed to reflect guest timebase.
After exitting guest, the register is reverted to it's original value.

If one tries to get the timestamp from host between those changes, it
will present an incorrect value.

An example would be trying to add a tracepoint in
kvmppc_guest_entry_inject_int(), which depending on last tracepoint
acquired could actually cause the host to crash.

Save the Timebase Offset to PACA and use it on sched_clock() to always
get the correct timestamp.

Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
Suggested-by: Paul Mackerras <paulus@ozlabs.org>
---
Changes since v1:
- Subtracts offset only when CONFIG_KVM_BOOK3S_HANDLER and
  CONFIG_PPC_BOOK3S_64 are defined.
---
 arch/powerpc/include/asm/kvm_book3s_asm.h | 1 +
 arch/powerpc/kernel/asm-offsets.c         | 1 +
 arch/powerpc/kernel/time.c                | 8 +++++++-
 arch/powerpc/kvm/book3s_hv.c              | 2 ++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   | 2 ++
 5 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 078f4648ea27..e2c12a10eed2 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -131,6 +131,7 @@ struct kvmppc_host_state {
 	u64 cfar;
 	u64 ppr;
 	u64 host_fscr;
+	u64 tb_offset;		/* Timebase offset: keeps correct timebase while on guest */
 #endif
 };
 
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index b12d7c049bfe..0beb8fdc6352 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -706,6 +706,7 @@ int main(void)
 	HSTATE_FIELD(HSTATE_CFAR, cfar);
 	HSTATE_FIELD(HSTATE_PPR, ppr);
 	HSTATE_FIELD(HSTATE_HOST_FSCR, host_fscr);
+	HSTATE_FIELD(HSTATE_TB_OFFSET, tb_offset);
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
 #else /* CONFIG_PPC_BOOK3S */
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 67feb3524460..f27f0163792b 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -699,7 +699,13 @@ EXPORT_SYMBOL_GPL(tb_to_ns);
  */
 notrace unsigned long long sched_clock(void)
 {
-	return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
+	u64 tb = get_tb() - boot_tb;
+
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_HANDLER)
+	tb -= local_paca->kvm_hstate.tb_offset;
+#endif
+
+	return mulhdu(tb, tb_to_ns_scale) << tb_to_ns_shift;
 }
 
 
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index b3731572295e..c08593c63353 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3491,6 +3491,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
 		if ((tb & 0xffffff) < (new_tb & 0xffffff))
 			mtspr(SPRN_TBU40, new_tb + 0x1000000);
 		vc->tb_offset_applied = vc->tb_offset;
+		local_paca->kvm_hstate.tb_offset = vc->tb_offset;
 	}
 
 	if (vc->pcr)
@@ -3594,6 +3595,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
 		if ((tb & 0xffffff) < (new_tb & 0xffffff))
 			mtspr(SPRN_TBU40, new_tb + 0x1000000);
 		vc->tb_offset_applied = 0;
+		local_paca->kvm_hstate.tb_offset = 0;
 	}
 
 	mtspr(SPRN_HDEC, 0x7fffffff);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index b73140607875..8f7a9f7f4ee6 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -632,6 +632,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 	cmpdi	r8,0
 	beq	37f
 	std	r8, VCORE_TB_OFFSET_APPL(r5)
+	std	r8, HSTATE_TB_OFFSET(r13)
 	mftb	r6		/* current host timebase */
 	add	r8,r8,r6
 	mtspr	SPRN_TBU40,r8	/* update upper 40 bits */
@@ -1907,6 +1908,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	beq	17f
 	li	r0, 0
 	std	r0, VCORE_TB_OFFSET_APPL(r5)
+	std	r0, HSTATE_TB_OFFSET(r13)
 	mftb	r6			/* current guest timebase */
 	subf	r8,r8,r6
 	mtspr	SPRN_TBU40,r8		/* update upper 40 bits */
-- 
2.29.2


^ permalink raw reply related

* Re: [PATCH] arch:powerpc simple_write_to_buffer return check
From: Mayank Suman @ 2021-02-05  6:13 UTC (permalink / raw)
  To: Oliver O'Halloran
  Cc: Linux Kernel Mailing List, Paul Mackerras, linuxppc-dev
In-Reply-To: <CAOSf1CGJ6ZeowMP8Zjo3TazYyaEGuEab4-QRKRJ2jjixUGGtCA@mail.gmail.com>

On 05/02/21 4:05 am, Oliver O'Halloran wrote:
> On Fri, Feb 5, 2021 at 5:17 AM Mayank Suman <mayanksuman@live.com> wrote:
>>
>> Signed-off-by: Mayank Suman <mayanksuman@live.com>
> 
> commit messages aren't optional

Sorry. I will include the commit message in PATCH v2.

> 
>> ---
>>  arch/powerpc/kernel/eeh.c                    | 8 ++++----
>>  arch/powerpc/platforms/powernv/eeh-powernv.c | 4 ++--
>>  2 files changed, 6 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
>> index 813713c9120c..2dbe1558a71f 100644
>> --- a/arch/powerpc/kernel/eeh.c
>> +++ b/arch/powerpc/kernel/eeh.c
>> @@ -1628,8 +1628,8 @@ static ssize_t eeh_force_recover_write(struct file *filp,
>>         char buf[20];
>>         int ret;
>>
>> -       ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
>> -       if (!ret)
>> +       ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
> 
> We should probably be zeroing the buffer. Reading to sizeof(buf) - 1
> is done in a few places to guarantee that the string is nul
> terminated, but without the preceeding memset() that isn't actually
> guaranteed.

Yes, the buffer should be zeroed out first. I have included memset() in Patch v2.

> 
>> +       if (ret <= 0)
>>                 return -EFAULT;
> 
> EFAULT is supposed to be returned when the user supplies a buffer to
> write(2) which is outside their address space. I figured letting the
> sscanf() in the next step fail if the user passes writes a zero-length
> buffer and returning EINVAL made more sense. That said, the exact
> semantics around zero length writes are pretty handwavy so I guess
> this isn't wrong, but I don't think it's better either.
> 

simple_write_to_buffer may return negative value on fail.
So, -EFAULT should be return in case of negative return value. 
The conditional (!ret) was not sufficient to catch negative return value.

^ permalink raw reply

* Re: [PATCH v2 1/1] powerpc/kvm: Save Timebase Offset to fix sched_clock() while running guest code.
From: Nicholas Piggin @ 2021-02-05  6:28 UTC (permalink / raw)
  To: Aneesh Kumar K.V, Athira Rajeev, Benjamin Herrenschmidt,
	Christophe Leroy, Frederic Weisbecker, Geert Uytterhoeven,
	Jordan Niethe, Leonardo Bras, Michael Ellerman, Paul Mackerras,
	Thomas Gleixner
  Cc: linuxppc-dev, linux-kernel, kvm-ppc
In-Reply-To: <20210205060643.233481-1-leobras.c@gmail.com>

Excerpts from Leonardo Bras's message of February 5, 2021 4:06 pm:
> Before guest entry, TBU40 register is changed to reflect guest timebase.
> After exitting guest, the register is reverted to it's original value.
> 
> If one tries to get the timestamp from host between those changes, it
> will present an incorrect value.
> 
> An example would be trying to add a tracepoint in
> kvmppc_guest_entry_inject_int(), which depending on last tracepoint
> acquired could actually cause the host to crash.
> 
> Save the Timebase Offset to PACA and use it on sched_clock() to always
> get the correct timestamp.

Ouch. Not sure how reasonable it is to half switch into guest registers 
and expect to call into the wider kernel, fixing things up as we go. 
What if mftb is used in other places?

Especially as it doesn't seem like there is a reason that function _has_
to be called after the timebase is switched to guest, that's just how 
the code is structured.

As a local hack to work out a bug okay. If you really need it upstream 
could you put it under a debug config option?

Thanks,
Nick

> Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
> Suggested-by: Paul Mackerras <paulus@ozlabs.org>
> ---
> Changes since v1:
> - Subtracts offset only when CONFIG_KVM_BOOK3S_HANDLER and
>   CONFIG_PPC_BOOK3S_64 are defined.
> ---
>  arch/powerpc/include/asm/kvm_book3s_asm.h | 1 +
>  arch/powerpc/kernel/asm-offsets.c         | 1 +
>  arch/powerpc/kernel/time.c                | 8 +++++++-
>  arch/powerpc/kvm/book3s_hv.c              | 2 ++
>  arch/powerpc/kvm/book3s_hv_rmhandlers.S   | 2 ++
>  5 files changed, 13 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
> index 078f4648ea27..e2c12a10eed2 100644
> --- a/arch/powerpc/include/asm/kvm_book3s_asm.h
> +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
> @@ -131,6 +131,7 @@ struct kvmppc_host_state {
>  	u64 cfar;
>  	u64 ppr;
>  	u64 host_fscr;
> +	u64 tb_offset;		/* Timebase offset: keeps correct timebase while on guest */
>  #endif
>  };
>  
> diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
> index b12d7c049bfe..0beb8fdc6352 100644
> --- a/arch/powerpc/kernel/asm-offsets.c
> +++ b/arch/powerpc/kernel/asm-offsets.c
> @@ -706,6 +706,7 @@ int main(void)
>  	HSTATE_FIELD(HSTATE_CFAR, cfar);
>  	HSTATE_FIELD(HSTATE_PPR, ppr);
>  	HSTATE_FIELD(HSTATE_HOST_FSCR, host_fscr);
> +	HSTATE_FIELD(HSTATE_TB_OFFSET, tb_offset);
>  #endif /* CONFIG_PPC_BOOK3S_64 */
>  
>  #else /* CONFIG_PPC_BOOK3S */
> diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
> index 67feb3524460..f27f0163792b 100644
> --- a/arch/powerpc/kernel/time.c
> +++ b/arch/powerpc/kernel/time.c
> @@ -699,7 +699,13 @@ EXPORT_SYMBOL_GPL(tb_to_ns);
>   */
>  notrace unsigned long long sched_clock(void)
>  {
> -	return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
> +	u64 tb = get_tb() - boot_tb;
> +
> +#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_HANDLER)
> +	tb -= local_paca->kvm_hstate.tb_offset;
> +#endif
> +
> +	return mulhdu(tb, tb_to_ns_scale) << tb_to_ns_shift;
>  }
>  
>  
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index b3731572295e..c08593c63353 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -3491,6 +3491,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
>  		if ((tb & 0xffffff) < (new_tb & 0xffffff))
>  			mtspr(SPRN_TBU40, new_tb + 0x1000000);
>  		vc->tb_offset_applied = vc->tb_offset;
> +		local_paca->kvm_hstate.tb_offset = vc->tb_offset;
>  	}
>  
>  	if (vc->pcr)
> @@ -3594,6 +3595,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
>  		if ((tb & 0xffffff) < (new_tb & 0xffffff))
>  			mtspr(SPRN_TBU40, new_tb + 0x1000000);
>  		vc->tb_offset_applied = 0;
> +		local_paca->kvm_hstate.tb_offset = 0;
>  	}
>  
>  	mtspr(SPRN_HDEC, 0x7fffffff);
> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index b73140607875..8f7a9f7f4ee6 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -632,6 +632,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
>  	cmpdi	r8,0
>  	beq	37f
>  	std	r8, VCORE_TB_OFFSET_APPL(r5)
> +	std	r8, HSTATE_TB_OFFSET(r13)
>  	mftb	r6		/* current host timebase */
>  	add	r8,r8,r6
>  	mtspr	SPRN_TBU40,r8	/* update upper 40 bits */
> @@ -1907,6 +1908,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
>  	beq	17f
>  	li	r0, 0
>  	std	r0, VCORE_TB_OFFSET_APPL(r5)
> +	std	r0, HSTATE_TB_OFFSET(r13)
>  	mftb	r6			/* current guest timebase */
>  	subf	r8,r8,r6
>  	mtspr	SPRN_TBU40,r8		/* update upper 40 bits */
> -- 
> 2.29.2
> 
> 

^ permalink raw reply

* Re: [PATCH v2 1/1] powerpc/kvm: Save Timebase Offset to fix sched_clock() while running guest code.
From: Leonardo Bras @ 2021-02-05  7:01 UTC (permalink / raw)
  To: Nicholas Piggin, Aneesh Kumar K.V, Athira Rajeev,
	Benjamin Herrenschmidt, Christophe Leroy, Frederic Weisbecker,
	Geert Uytterhoeven, Jordan Niethe, Michael Ellerman,
	Paul Mackerras, Thomas Gleixner
  Cc: linuxppc-dev, linux-kernel, kvm-ppc
In-Reply-To: <1612506268.6rrvx34gzu.astroid@bobo.none>

Hey Nick, thanks for reviewing :)

On Fri, 2021-02-05 at 16:28 +1000, Nicholas Piggin wrote:
> Excerpts from Leonardo Bras's message of February 5, 2021 4:06 pm:
> > Before guest entry, TBU40 register is changed to reflect guest timebase.
> > After exitting guest, the register is reverted to it's original value.
> > 
> > If one tries to get the timestamp from host between those changes, it
> > will present an incorrect value.
> > 
> > An example would be trying to add a tracepoint in
> > kvmppc_guest_entry_inject_int(), which depending on last tracepoint
> > acquired could actually cause the host to crash.
> > 
> > Save the Timebase Offset to PACA and use it on sched_clock() to always
> > get the correct timestamp.
> 
> Ouch. Not sure how reasonable it is to half switch into guest registers 
> and expect to call into the wider kernel, fixing things up as we go. 
> What if mftb is used in other places?

IIUC, the CPU is not supposed to call anything as host between guest
entry and guest exit, except guest-related cases, like
kvmppc_guest_entry_inject_int(), but anyway, if something calls mftb it
will still get the same value as before.

This is only supposed to change stuff that depends on sched_clock, like
Tracepoints, that can happen in those exceptions.


> Especially as it doesn't seem like there is a reason that function _has_
> to be called after the timebase is switched to guest, that's just how 
> the code is structured.

Correct, but if called, like in rb routines, used by tracepoints, the
difference between last tb and current (lower) tb may cause the CPU to
trap PROGRAM exception, crashing host. 

> As a local hack to work out a bug okay. If you really need it upstream 
> could you put it under a debug config option?

You mean something that is automatically selected whenever those
configs are enabled? 

CONFIG_TRACEPOINT && CONFIG_KVM_BOOK3S_HANDLER && CONFIG_PPC_BOOK3S_64

Or something the user need to select himself in menuconfig?

> 
> Thanks,
> Nick
> 

Thank you!
Leonardo Bras

> > Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
> > Suggested-by: Paul Mackerras <paulus@ozlabs.org>
> > ---
> > Changes since v1:
> > - Subtracts offset only when CONFIG_KVM_BOOK3S_HANDLER and
> >   CONFIG_PPC_BOOK3S_64 are defined.
> > ---
> >  arch/powerpc/include/asm/kvm_book3s_asm.h | 1 +
> >  arch/powerpc/kernel/asm-offsets.c         | 1 +
> >  arch/powerpc/kernel/time.c                | 8 +++++++-
> >  arch/powerpc/kvm/book3s_hv.c              | 2 ++
> >  arch/powerpc/kvm/book3s_hv_rmhandlers.S   | 2 ++
> >  5 files changed, 13 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
> > index 078f4648ea27..e2c12a10eed2 100644
> > --- a/arch/powerpc/include/asm/kvm_book3s_asm.h
> > +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
> > @@ -131,6 +131,7 @@ struct kvmppc_host_state {
> >  	u64 cfar;
> >  	u64 ppr;
> >  	u64 host_fscr;
> > +	u64 tb_offset;		/* Timebase offset: keeps correct timebase while on guest */
> >  #endif
> >  };
> >  
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
> > index b12d7c049bfe..0beb8fdc6352 100644
> > --- a/arch/powerpc/kernel/asm-offsets.c
> > +++ b/arch/powerpc/kernel/asm-offsets.c
> > @@ -706,6 +706,7 @@ int main(void)
> >  	HSTATE_FIELD(HSTATE_CFAR, cfar);
> >  	HSTATE_FIELD(HSTATE_PPR, ppr);
> >  	HSTATE_FIELD(HSTATE_HOST_FSCR, host_fscr);
> > +	HSTATE_FIELD(HSTATE_TB_OFFSET, tb_offset);
> >  #endif /* CONFIG_PPC_BOOK3S_64 */
> >  
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> >  #else /* CONFIG_PPC_BOOK3S */
> > diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
> > index 67feb3524460..f27f0163792b 100644
> > --- a/arch/powerpc/kernel/time.c
> > +++ b/arch/powerpc/kernel/time.c
> > @@ -699,7 +699,13 @@ EXPORT_SYMBOL_GPL(tb_to_ns);
> >   */
> >  notrace unsigned long long sched_clock(void)
> >  {
> > -	return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
> > +	u64 tb = get_tb() - boot_tb;
> > +
> > +#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_HANDLER)
> > +	tb -= local_paca->kvm_hstate.tb_offset;
> > +#endif
> > +
> > +	return mulhdu(tb, tb_to_ns_scale) << tb_to_ns_shift;
> >  }
> >  
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> >  
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> > index b3731572295e..c08593c63353 100644
> > --- a/arch/powerpc/kvm/book3s_hv.c
> > +++ b/arch/powerpc/kvm/book3s_hv.c
> > @@ -3491,6 +3491,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
> >  		if ((tb & 0xffffff) < (new_tb & 0xffffff))
> >  			mtspr(SPRN_TBU40, new_tb + 0x1000000);
> >  		vc->tb_offset_applied = vc->tb_offset;
> > +		local_paca->kvm_hstate.tb_offset = vc->tb_offset;
> >  	}
> >  
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> >  	if (vc->pcr)
> > @@ -3594,6 +3595,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
> >  		if ((tb & 0xffffff) < (new_tb & 0xffffff))
> >  			mtspr(SPRN_TBU40, new_tb + 0x1000000);
> >  		vc->tb_offset_applied = 0;
> > +		local_paca->kvm_hstate.tb_offset = 0;
> >  	}
> >  
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> >  	mtspr(SPRN_HDEC, 0x7fffffff);
> > diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> > index b73140607875..8f7a9f7f4ee6 100644
> > --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> > +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> > @@ -632,6 +632,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
> >  	cmpdi	r8,0
> >  	beq	37f
> >  	std	r8, VCORE_TB_OFFSET_APPL(r5)
> > +	std	r8, HSTATE_TB_OFFSET(r13)
> >  	mftb	r6		/* current host timebase */
> >  	add	r8,r8,r6
> >  	mtspr	SPRN_TBU40,r8	/* update upper 40 bits */
> > @@ -1907,6 +1908,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
> >  	beq	17f
> >  	li	r0, 0
> >  	std	r0, VCORE_TB_OFFSET_APPL(r5)
> > +	std	r0, HSTATE_TB_OFFSET(r13)
> >  	mftb	r6			/* current guest timebase */
> >  	subf	r8,r8,r6
> >  	mtspr	SPRN_TBU40,r8		/* update upper 40 bits */
> > -- 
> > 2.29.2
> > 
> > 



^ permalink raw reply

* [PATCH 7/7] ASoC: dt-bindings: imx-rpmsg: Add binding doc for rpmsg machine driver
From: Shengjiu Wang @ 2021-02-05  6:57 UTC (permalink / raw)
  To: lgirdwood, broonie, perex, tiwai, alsa-devel, linux-kernel, timur,
	nicoleotsuka, Xiubo.Lee, festevam, linuxppc-dev, robh+dt,
	devicetree
In-Reply-To: <1612508250-10586-1-git-send-email-shengjiu.wang@nxp.com>

Imx-rpmsg is a new added machine driver for supporting audio on Cortex-M
core. The Cortex-M core will control the audio interface, DMA and audio
codec, setup the pipeline, the audio driver on Cortex-A core side is just
to communitcate with M core, it is a virtual sound card and don't touch
the hardware.

Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
---
 .../bindings/sound/imx-audio-rpmsg.yaml       | 48 +++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/sound/imx-audio-rpmsg.yaml

diff --git a/Documentation/devicetree/bindings/sound/imx-audio-rpmsg.yaml b/Documentation/devicetree/bindings/sound/imx-audio-rpmsg.yaml
new file mode 100644
index 000000000000..b941aeb80678
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/imx-audio-rpmsg.yaml
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/sound/imx-audio-rpmsg.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP i.MX audio complex with rpmsg
+
+maintainers:
+  - Shengjiu Wang <shengjiu.wang@nxp.com>
+
+properties:
+  compatible:
+    enum:
+      - fsl,imx-audio-rpmsg
+
+  model:
+    $ref: /schemas/types.yaml#/definitions/string
+    description: User specified audio sound card name
+
+  audio-cpu:
+    description: The phandle of an CPU DAI controller
+
+  rpmsg-out:
+    description: |
+      This is a boolean property. If present, the transmitting function
+      will be enabled,
+
+  rpmsg-in:
+    description: |
+      This is a boolean property. If present, the receiving function
+      will be enabled.
+
+required:
+  - compatible
+  - model
+  - audio-cpu
+
+additionalProperties: false
+
+examples:
+  - |
+    sound-rpmsg {
+        compatible = "fsl,imx-audio-rpmsg";
+        model = "ak4497-audio";
+        audio-cpu = <&rpmsg_audio>;
+        rpmsg-out;
+    };
-- 
2.27.0


^ permalink raw reply related

* [PATCH 0/7] Add audio driver base on rpmsg on i.MX platform
From: Shengjiu Wang @ 2021-02-05  6:57 UTC (permalink / raw)
  To: lgirdwood, broonie, perex, tiwai, alsa-devel, linux-kernel, timur,
	nicoleotsuka, Xiubo.Lee, festevam, linuxppc-dev, robh+dt,
	devicetree

On Asymmetric multiprocessor, there is Cortex-A core and Cortex-M core,
Linux is running on A core, RTOS is running on M core.
The audio hardware device can be controlled by Cortex-M device,
So audio playback/capture can be handled by M core.

Rpmsg is the interface for sending and receiving msg to and from M
core, that we can create a virtual sound on Cortex-A core side.

A core will tell the Cortex-M core sound format/rate/channel,
where is the data buffer, what is the period size, when to start,
when to stop and when suspend or resume happen, each of this behavior
there is defined rpmsg command.

Especially we designed the low power audio case, that is to
allocate a large buffer and fill the data, then Cortex-A core can go
to sleep mode, Cortex-M core continue to play the sound, when the
buffer is consumed, Cortex-M core will trigger the Cortex-A core to
wakeup to fill data.

Shengjiu Wang (7):
  ASoC: soc-component: Add snd_soc_pcm_component_ack
  ASoC: fsl_rpmsg: Add CPU DAI driver for audio base on rpmsg
  ASoC: dt-bindings: fsl_rpmsg: Add binding doc for rpmsg cpu dai driver
  ASoC: imx-audio-rpmsg: Add rpmsg_driver for audio channel
  ASoC: imx-pcm-rpmsg: Add platform driver for audio base on rpmsg
  ASoC: imx-rpmsg: Add machine driver for audio base on rpmsg
  ASoC: dt-bindings: imx-rpmsg: Add binding doc for rpmsg machine driver

 .../devicetree/bindings/sound/fsl,rpmsg.yaml  |  80 ++
 .../bindings/sound/imx-audio-rpmsg.yaml       |  48 +
 include/sound/soc-component.h                 |   3 +
 sound/soc/fsl/Kconfig                         |  28 +
 sound/soc/fsl/Makefile                        |   6 +
 sound/soc/fsl/fsl_rpmsg.c                     | 258 +++++
 sound/soc/fsl/fsl_rpmsg.h                     |  38 +
 sound/soc/fsl/imx-audio-rpmsg.c               | 142 +++
 sound/soc/fsl/imx-pcm-rpmsg.c                 | 898 ++++++++++++++++++
 sound/soc/fsl/imx-pcm-rpmsg.h                 | 512 ++++++++++
 sound/soc/fsl/imx-rpmsg.c                     | 148 +++
 sound/soc/soc-component.c                     |  14 +
 sound/soc/soc-pcm.c                           |   2 +
 13 files changed, 2177 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml
 create mode 100644 Documentation/devicetree/bindings/sound/imx-audio-rpmsg.yaml
 create mode 100644 sound/soc/fsl/fsl_rpmsg.c
 create mode 100644 sound/soc/fsl/fsl_rpmsg.h
 create mode 100644 sound/soc/fsl/imx-audio-rpmsg.c
 create mode 100644 sound/soc/fsl/imx-pcm-rpmsg.c
 create mode 100644 sound/soc/fsl/imx-pcm-rpmsg.h
 create mode 100644 sound/soc/fsl/imx-rpmsg.c

-- 
2.27.0

^ permalink raw reply

* [PATCH 1/7] ASoC: soc-component: Add snd_soc_pcm_component_ack
From: Shengjiu Wang @ 2021-02-05  6:57 UTC (permalink / raw)
  To: lgirdwood, broonie, perex, tiwai, alsa-devel, linux-kernel, timur,
	nicoleotsuka, Xiubo.Lee, festevam, linuxppc-dev, robh+dt,
	devicetree
In-Reply-To: <1612508250-10586-1-git-send-email-shengjiu.wang@nxp.com>

Add snd_soc_pcm_component_ack back, which can be used to get updated
buffer pointer in platform driver.
On Asymmetric multiprocessor, this pointer can be sent to Cortex-M
core for audio processing.

Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
---
 include/sound/soc-component.h |  3 +++
 sound/soc/soc-component.c     | 14 ++++++++++++++
 sound/soc/soc-pcm.c           |  2 ++
 3 files changed, 19 insertions(+)

diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h
index 5b47768222b7..2dc8c7e3d1a6 100644
--- a/include/sound/soc-component.h
+++ b/include/sound/soc-component.h
@@ -146,6 +146,8 @@ struct snd_soc_component_driver {
 	int (*mmap)(struct snd_soc_component *component,
 		    struct snd_pcm_substream *substream,
 		    struct vm_area_struct *vma);
+	int (*ack)(struct snd_soc_component *component,
+		   struct snd_pcm_substream *substream);
 
 	const struct snd_compress_ops *compress_ops;
 
@@ -498,5 +500,6 @@ int snd_soc_pcm_component_pm_runtime_get(struct snd_soc_pcm_runtime *rtd,
 					 void *stream);
 void snd_soc_pcm_component_pm_runtime_put(struct snd_soc_pcm_runtime *rtd,
 					  void *stream, int rollback);
+int snd_soc_pcm_component_ack(struct snd_pcm_substream *substream);
 
 #endif /* __SOC_COMPONENT_H */
diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c
index 159bf88b9f8c..a9fbb2d26412 100644
--- a/sound/soc/soc-component.c
+++ b/sound/soc/soc-component.c
@@ -1212,3 +1212,17 @@ void snd_soc_pcm_component_pm_runtime_put(struct snd_soc_pcm_runtime *rtd,
 		soc_component_mark_pop(component, stream, pm);
 	}
 }
+
+int snd_soc_pcm_component_ack(struct snd_pcm_substream *substream)
+{
+	struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
+	struct snd_soc_component *component;
+	int i;
+
+	/* FIXME: use 1st pointer */
+	for_each_rtd_components(rtd, i, component)
+		if (component->driver->ack)
+			return component->driver->ack(component, substream);
+
+	return 0;
+}
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index b79f064887d4..605acec48971 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -2830,6 +2830,8 @@ int soc_new_pcm(struct snd_soc_pcm_runtime *rtd, int num)
 			rtd->ops.page		= snd_soc_pcm_component_page;
 		if (drv->mmap)
 			rtd->ops.mmap		= snd_soc_pcm_component_mmap;
+		if (drv->ack)
+			rtd->ops.ack            = snd_soc_pcm_component_ack;
 	}
 
 	if (playback)
-- 
2.27.0


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox