LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] powerpc/64s: accumulate_stolen_time remove irq mask workaround
From: Nicholas Piggin @ 2021-06-23  2:29 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

The caller has been moved to C after irq soft-mask state has been
reconciled, and Linux irqs have been marked as disabled, so this
does not have to play games with irq internals.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kernel/time.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index b67d93a609a2..d0308e804063 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -231,24 +231,13 @@ static u64 scan_dispatch_log(u64 stop_tb)
 void notrace accumulate_stolen_time(void)
 {
 	u64 sst, ust;
-	unsigned long save_irq_soft_mask = irq_soft_mask_return();
 	struct cpu_accounting_data *acct = &local_paca->accounting;
 
-	/* We are called early in the exception entry, before
-	 * soft/hard_enabled are sync'ed to the expected state
-	 * for the exception. We are hard disabled but the PACA
-	 * needs to reflect that so various debug stuff doesn't
-	 * complain
-	 */
-	irq_soft_mask_set(IRQS_DISABLED);
-
 	sst = scan_dispatch_log(acct->starttime_user);
 	ust = scan_dispatch_log(acct->starttime);
 	acct->stime -= sst;
 	acct->utime -= ust;
 	acct->steal_time += ust + sst;
-
-	irq_soft_mask_set(save_irq_soft_mask);
 }
 
 static inline u64 calculate_stolen_time(u64 stop_tb)
-- 
2.23.0


^ permalink raw reply related

* [PATCH v2] powerpc/pseries: Enable hardlockup watchdog for PowerVM partitions
From: Nicholas Piggin @ 2021-06-23  2:15 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nicholas Piggin

PowerVM will not arbitrarily oversubscribe or stop guests, page out the
guest kernel text to a NFS volume connected by carrier pigeon to abacus
based storage, etc., as a KVM host might. So PowerVM guests are not
likely to be killed by the hard lockup watchdog in normal operation,
even with shared processor LPARs which still get a minimum allotment of
CPU time.

Enable the hard lockup detector by default on !KVM guests, which we will
assume is PowerVM. It has been useful in finding problems on bare metal
kernels.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
v2: Fix 64e build by including kvm_guest.h

 arch/powerpc/kernel/setup_64.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index e42b85e4f1aa..428058dc5114 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -33,6 +33,7 @@
 #include <linux/pgtable.h>
 
 #include <asm/debugfs.h>
+#include <asm/kvm_guest.h>
 #include <asm/io.h>
 #include <asm/kdump.h>
 #include <asm/prom.h>
@@ -939,16 +940,20 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh)
  * disable it by default. Book3S has a soft-nmi hardlockup detector based
  * on the decrementer interrupt, so it does not suffer from this problem.
  *
- * It is likely to get false positives in VM guests, so disable it there
- * by default too.
+ * It is likely to get false positives in KVM guests, so disable it there
+ * by default too. PowerVM will not stop or arbitrarily oversubscribe
+ * CPUs, but give a minimum regular allotment even with SPLPAR, so enable
+ * the detector for non-KVM guests, assume PowerVM.
  */
 static int __init disable_hardlockup_detector(void)
 {
 #ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
 	hardlockup_detector_disable();
 #else
-	if (firmware_has_feature(FW_FEATURE_LPAR))
-		hardlockup_detector_disable();
+	if (firmware_has_feature(FW_FEATURE_LPAR)) {
+		if (is_kvm_guest())
+			hardlockup_detector_disable();
+	}
 #endif
 
 	return 0;
-- 
2.23.0


^ permalink raw reply related

* Re: [PATCH 2/2] powerpc/prom_init: Pass linux_banner to firmware via option vector 7
From: Michael Ellerman @ 2021-06-23  0:38 UTC (permalink / raw)
  To: Tyrel Datwyler, linuxppc-dev
In-Reply-To: <bead9552-1e5c-2485-0463-4d161cce2a1f@linux.ibm.com>

Tyrel Datwyler <tyreld@linux.ibm.com> writes:
> On 6/20/21 11:49 PM, Michael Ellerman wrote:
>> Pass the value of linux_banner to firmware via option vector 7.
>> 
>> Option vector 7 is described in "LoPAR" Linux on Power Architecture
>> Reference v2.9, in table B.7 on page 824:
>> 
>>   An ASCII character formatted null terminated string that describes
>>   the client operating system. The string shall be human readable and
>>   may be displayed on the console.
>> 
>> The string can be up to 256 bytes total, including the nul terminator.
>> 
>> linux_banner contains lots of information, and should make it possible
>> to identify the exact kernel version that is running:
>> 
>>   const char linux_banner[] =
>>   "Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@"
>>   LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n";
>> 
>> For example:
>>   Linux version 4.15.0-144-generic (buildd@bos02-ppc64el-018) (gcc
>>   version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04)) #148-Ubuntu SMP Sat May 8
>>   02:32:13 UTC 2021 (Ubuntu 4.15.0-144.148-generic 4.15.18)
>> 
>> It's also printed at boot to the console/dmesg, which should make it
>> possible to correlate what firmware receives with the console/dmesg on
>> the machine.
>> 
>> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
>> ---
>> 
>> NB. linux_banner is already allowed by prom_init_check.sh
>> 
>> LoPAR: https://openpowerfoundation.org/?resource_lib=linux-on-power-architecture-reference-a-papr-linux-subset-review-draft
>> ---
>>  arch/powerpc/kernel/prom_init.c | 15 +++++++++++++++
>>  1 file changed, 15 insertions(+)
>> 
>> diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
>> index c18d55f8b951..7343076b261c 100644
>> --- a/arch/powerpc/kernel/prom_init.c
>> +++ b/arch/powerpc/kernel/prom_init.c
...
>> @@ -1340,6 +1351,10 @@ static void __init prom_check_platform_support(void)
>>  	memcpy(&ibm_architecture_vec, &ibm_architecture_vec_template,
>>  	       sizeof(ibm_architecture_vec));
>> 
>> +	prom_strscpy_pad(ibm_architecture_vec.vec7.os_id, linux_banner, 256);
>> +	// Ensure nul termination
>> +	ibm_architecture_vec.vec7.os_id[255] = '\0';
>> +
>
> Doesn't the implementation of prom_strscpy_pad() in patch 1 ensure nul termination?

Yes! I was originally using strncpy(), but forgot to drop this when I
switched to strscpy_pad(). I dropped it when applying.

Thanks for reviewing.

cheers

^ permalink raw reply

* Re: [PATCH v14 01/12] swiotlb: Refactor swiotlb init functions
From: Stefano Stabellini @ 2021-06-22 21:02 UTC (permalink / raw)
  To: Claire Chang
  Cc: heikki.krogerus, thomas.hellstrom, peterz, joonas.lahtinen,
	dri-devel, chris, grant.likely, paulus, Frank Rowand, mingo,
	Marek Szyprowski, sstabellini, Saravana Kannan, Joerg Roedel,
	Rafael J . Wysocki, Christoph Hellwig, Bartosz Golaszewski,
	bskeggs, linux-pci, xen-devel, Thierry Reding, intel-gfx,
	matthew.auld, linux-devicetree, jxgao, daniel, Will Deacon,
	Konrad Rzeszutek Wilk, maarten.lankhorst, airlied, Dan Williams,
	linuxppc-dev, jani.nikula, Rob Herring, rodrigo.vivi, bhelgaas,
	boris.ostrovsky, Andy Shevchenko, jgross, Nicolas Boichat,
	Greg KH, Randy Dunlap, lkml, tfiga, list@263.net:IOMMU DRIVERS,
	Jim Quinlan, xypron.glpk, thomas.lendacky, Robin Murphy, bauerman
In-Reply-To: <20210619034043.199220-2-tientzu@chromium.org>

On Sat, 19 Jun 2021, Claire Chang wrote:
> Add a new function, swiotlb_init_io_tlb_mem, for the io_tlb_mem struct
> initialization to make the code reusable.
> 
> Signed-off-by: Claire Chang <tientzu@chromium.org>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Tested-by: Stefano Stabellini <sstabellini@kernel.org>
> Tested-by: Will Deacon <will@kernel.org>

Acked-by: Stefano Stabellini <sstabellini@kernel.org>


> ---
>  kernel/dma/swiotlb.c | 50 ++++++++++++++++++++++----------------------
>  1 file changed, 25 insertions(+), 25 deletions(-)
> 
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index 52e2ac526757..1f9b2b9e7490 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -168,9 +168,28 @@ void __init swiotlb_update_mem_attributes(void)
>  	memset(vaddr, 0, bytes);
>  }
>  
> -int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
> +static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
> +				    unsigned long nslabs, bool late_alloc)
>  {
> +	void *vaddr = phys_to_virt(start);
>  	unsigned long bytes = nslabs << IO_TLB_SHIFT, i;
> +
> +	mem->nslabs = nslabs;
> +	mem->start = start;
> +	mem->end = mem->start + bytes;
> +	mem->index = 0;
> +	mem->late_alloc = late_alloc;
> +	spin_lock_init(&mem->lock);
> +	for (i = 0; i < mem->nslabs; i++) {
> +		mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
> +		mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
> +		mem->slots[i].alloc_size = 0;
> +	}
> +	memset(vaddr, 0, bytes);
> +}
> +
> +int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
> +{
>  	struct io_tlb_mem *mem;
>  	size_t alloc_size;
>  
> @@ -186,16 +205,8 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
>  	if (!mem)
>  		panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
>  		      __func__, alloc_size, PAGE_SIZE);
> -	mem->nslabs = nslabs;
> -	mem->start = __pa(tlb);
> -	mem->end = mem->start + bytes;
> -	mem->index = 0;
> -	spin_lock_init(&mem->lock);
> -	for (i = 0; i < mem->nslabs; i++) {
> -		mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
> -		mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
> -		mem->slots[i].alloc_size = 0;
> -	}
> +
> +	swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, false);
>  
>  	io_tlb_default_mem = mem;
>  	if (verbose)
> @@ -282,8 +293,8 @@ swiotlb_late_init_with_default_size(size_t default_size)
>  int
>  swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
>  {
> -	unsigned long bytes = nslabs << IO_TLB_SHIFT, i;
>  	struct io_tlb_mem *mem;
> +	unsigned long bytes = nslabs << IO_TLB_SHIFT;
>  
>  	if (swiotlb_force == SWIOTLB_NO_FORCE)
>  		return 0;
> @@ -297,20 +308,9 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
>  	if (!mem)
>  		return -ENOMEM;
>  
> -	mem->nslabs = nslabs;
> -	mem->start = virt_to_phys(tlb);
> -	mem->end = mem->start + bytes;
> -	mem->index = 0;
> -	mem->late_alloc = 1;
> -	spin_lock_init(&mem->lock);
> -	for (i = 0; i < mem->nslabs; i++) {
> -		mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
> -		mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
> -		mem->slots[i].alloc_size = 0;
> -	}
> -
> +	memset(mem, 0, sizeof(*mem));
>  	set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
> -	memset(tlb, 0, bytes);
> +	swiotlb_init_io_tlb_mem(mem, virt_to_phys(tlb), nslabs, true);
>  
>  	io_tlb_default_mem = mem;
>  	swiotlb_print_info();
> -- 
> 2.32.0.288.g62a8d224e6-goog
> 

^ permalink raw reply

* Re: [PATCH 1/2] powerpc/prom_init: Convert prom_strcpy() into prom_strscpy_pad()
From: Tyrel Datwyler @ 2021-06-22 18:12 UTC (permalink / raw)
  To: Michael Ellerman, Daniel Axtens, linuxppc-dev
In-Reply-To: <87bl7y35dw.fsf@mpe.ellerman.id.au>

On 6/21/21 9:11 PM, Michael Ellerman wrote:
> Daniel Axtens <dja@axtens.net> writes:
>> Hi
>>
>>> -static char __init *prom_strcpy(char *dest, const char *src)
>>> +static ssize_t __init prom_strscpy_pad(char *dest, const char *src, size_t n)
>>>  {
>>> -	char *tmp = dest;
>>> +	ssize_t rc;
>>> +	size_t i;
>>>  
>>> -	while ((*dest++ = *src++) != '\0')
>>> -		/* nothing */;
>>> -	return tmp;
>>> +	if (n == 0 || n > INT_MAX)
>>> +		return -E2BIG;
>>> +
>>> +	// Copy up to n bytes
>>> +	for (i = 0; i < n && src[i] != '\0'; i++)
>>> +		dest[i] = src[i];
>>> +
>>> +	rc = i;
>>> +
>>> +	// If we copied all n then we have run out of space for the nul
>>> +	if (rc == n) {
>>> +		// Rewind by one character to ensure nul termination
>>> +		i--;
>>> +		rc = -E2BIG;
>>> +	}
>>> +
>>> +	for (; i < n; i++)
>>> +		dest[i] = '\0';
>>> +
>>> +	return rc;
>>>  }
>>>  
>>
>> This implementation seems good to me.
>>
>> I copied it into a new C file and added the following:
>>
>> int main() {
>> 	char longstr[255]="abcdefghijklmnopqrstuvwxyz";
>> 	char shortstr[5];
>> 	assert(prom_strscpy_pad(longstr, "", 0) == -E2BIG);
>> 	assert(prom_strscpy_pad(longstr, "hello", 255) == 5);
>> 	assert(prom_strscpy_pad(shortstr, "hello", 5) == -E2BIG);
>> 	assert(memcmp(shortstr, "hell", 5) == 0);
>> 	assert(memcmp(longstr, "hello\0\0\0\0\0\0\0\0\0", 6) == 0);
>> 	return 0;
>> }
>>
>> All the assertions pass. I believe this covers all the conditions from
>> the strscpy_pad docstring.
>>
>> Reviewed-by: Daniel Axtens <dja@axtens.net>
> 
> Thanks.
> 
> I'll also drop the explicit nul termination in patch 2, which is a
> leftover from when I was using strncpy().

I guess you can ignore my other email questioning this.

-Tyrel

> 
> cheers
> 


^ permalink raw reply

* Re: [PATCH 2/2] powerpc/prom_init: Pass linux_banner to firmware via option vector 7
From: Tyrel Datwyler @ 2021-06-22 18:11 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev
In-Reply-To: <20210621064938.2021419-2-mpe@ellerman.id.au>

On 6/20/21 11:49 PM, Michael Ellerman wrote:
> Pass the value of linux_banner to firmware via option vector 7.
> 
> Option vector 7 is described in "LoPAR" Linux on Power Architecture
> Reference v2.9, in table B.7 on page 824:
> 
>   An ASCII character formatted null terminated string that describes
>   the client operating system. The string shall be human readable and
>   may be displayed on the console.
> 
> The string can be up to 256 bytes total, including the nul terminator.
> 
> linux_banner contains lots of information, and should make it possible
> to identify the exact kernel version that is running:
> 
>   const char linux_banner[] =
>   "Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@"
>   LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION "\n";
> 
> For example:
>   Linux version 4.15.0-144-generic (buildd@bos02-ppc64el-018) (gcc
>   version 7.5.0 (Ubuntu 7.5.0-3ubuntu1~18.04)) #148-Ubuntu SMP Sat May 8
>   02:32:13 UTC 2021 (Ubuntu 4.15.0-144.148-generic 4.15.18)
> 
> It's also printed at boot to the console/dmesg, which should make it
> possible to correlate what firmware receives with the console/dmesg on
> the machine.
> 
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
> ---
> 
> NB. linux_banner is already allowed by prom_init_check.sh
> 
> LoPAR: https://openpowerfoundation.org/?resource_lib=linux-on-power-architecture-reference-a-papr-linux-subset-review-draft
> ---
>  arch/powerpc/kernel/prom_init.c | 15 +++++++++++++++
>  1 file changed, 15 insertions(+)
> 
> diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
> index c18d55f8b951..7343076b261c 100644
> --- a/arch/powerpc/kernel/prom_init.c
> +++ b/arch/powerpc/kernel/prom_init.c
> @@ -27,6 +27,7 @@
>  #include <linux/initrd.h>
>  #include <linux/bitops.h>
>  #include <linux/pgtable.h>
> +#include <linux/printk.h>
>  #include <asm/prom.h>
>  #include <asm/rtas.h>
>  #include <asm/page.h>
> @@ -944,6 +945,10 @@ struct option_vector6 {
>  	u8 os_name;
>  } __packed;
> 
> +struct option_vector7 {
> +	u8 os_id[256];
> +} __packed;
> +
>  struct ibm_arch_vec {
>  	struct { u32 mask, val; } pvrs[14];
> 
> @@ -966,6 +971,9 @@ struct ibm_arch_vec {
> 
>  	u8 vec6_len;
>  	struct option_vector6 vec6;
> +
> +	u8 vec7_len;
> +	struct option_vector7 vec7;
>  } __packed;
> 
>  static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
> @@ -1112,6 +1120,9 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
>  		.secondary_pteg = 0,
>  		.os_name = OV6_LINUX,
>  	},
> +
> +	/* option vector 7: OS Identification */
> +	.vec7_len = VECTOR_LENGTH(sizeof(struct option_vector7)),
>  };
> 
>  static struct ibm_arch_vec __prombss ibm_architecture_vec  ____cacheline_aligned;
> @@ -1340,6 +1351,10 @@ static void __init prom_check_platform_support(void)
>  	memcpy(&ibm_architecture_vec, &ibm_architecture_vec_template,
>  	       sizeof(ibm_architecture_vec));
> 
> +	prom_strscpy_pad(ibm_architecture_vec.vec7.os_id, linux_banner, 256);
> +	// Ensure nul termination
> +	ibm_architecture_vec.vec7.os_id[255] = '\0';
> +

Doesn't the implementation of prom_strscpy_pad() in patch 1 ensure nul termination?

-Tyrel

>  	if (prop_len > 1) {
>  		int i;
>  		u8 vec[8];
> 


^ permalink raw reply

* Re: linux-next: manual merge of the kvm tree with the powerpc tree
From: Paolo Bonzini @ 2021-06-22 16:52 UTC (permalink / raw)
  To: Michael Ellerman, Stephen Rothwell, KVM, PowerPC
  Cc: Ashish Kalra, Brijesh Singh, Sean Christopherson,
	Linux Kernel Mailing List, Maxim Levitsky,
	Linux Next Mailing List, Vitaly Kuznetsov, Bharata B Rao
In-Reply-To: <871r8u2bqp.fsf@mpe.ellerman.id.au>

On 22/06/21 16:51, Michael Ellerman wrote:
>> Please drop the patches at
>> https://www.spinics.net/lists/kvm-ppc/msg18666.html  from the powerpc
>> tree, and merge them through either the kvm-powerpc or kvm trees.
> The kvm-ppc tree is not taking patches at the moment.

If so, let's remove the "T" entry from MAINTAINERS and add an entry for 
the kvm@vger.kernel.org mailing list.

>   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git/log/?h=topic/ppc-kvm
> 
> The commit Stephen mentioned has been rebased since to squash in a fix.
> But what is in the topic branch is now final, I won't rebase what's
> there.

Thanks, I pulled it.  Anyway, if the workflow is not the one indicated 
by MAINTAINERS it's never a bad idea to Cc more people when applying 
patches.

Paolo


^ permalink raw reply

* Re: [PATCH v4 7/7] powerpc/pseries: Add support for FORM2 associativity
From: Daniel Henrique Barboza @ 2021-06-22 16:04 UTC (permalink / raw)
  To: Aneesh Kumar K.V, linuxppc-dev, mpe
  Cc: Nathan Lynch, nvdimm, dan.j.williams, David Gibson
In-Reply-To: <87mtrihzl0.fsf@linux.ibm.com>



On 6/22/21 9:07 AM, Aneesh Kumar K.V wrote:
> Daniel Henrique Barboza <danielhb413@gmail.com> writes:
> 
>> On 6/17/21 1:51 PM, Aneesh Kumar K.V wrote:
>>> PAPR interface currently supports two different ways of communicating resource
>>> grouping details to the OS. These are referred to as Form 0 and Form 1
>>> associativity grouping. Form 0 is the older format and is now considered
>>> deprecated. This patch adds another resource grouping named FORM2.
>>>
>>> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>>> ---
>>>    Documentation/powerpc/associativity.rst   | 135 ++++++++++++++++++++
>>>    arch/powerpc/include/asm/firmware.h       |   3 +-
>>>    arch/powerpc/include/asm/prom.h           |   1 +
>>>    arch/powerpc/kernel/prom_init.c           |   3 +-
>>>    arch/powerpc/mm/numa.c                    | 149 +++++++++++++++++++++-
>>>    arch/powerpc/platforms/pseries/firmware.c |   1 +
>>>    6 files changed, 286 insertions(+), 6 deletions(-)
>>>    create mode 100644 Documentation/powerpc/associativity.rst
>>>
>>> diff --git a/Documentation/powerpc/associativity.rst b/Documentation/powerpc/associativity.rst
>>> new file mode 100644
>>> index 000000000000..93be604ac54d
>>> --- /dev/null
>>> +++ b/Documentation/powerpc/associativity.rst
>>> @@ -0,0 +1,135 @@
>>> +============================
>>> +NUMA resource associativity
>>> +=============================
>>> +
>>> +Associativity represents the groupings of the various platform resources into
>>> +domains of substantially similar mean performance relative to resources outside
>>> +of that domain. Resources subsets of a given domain that exhibit better
>>> +performance relative to each other than relative to other resources subsets
>>> +are represented as being members of a sub-grouping domain. This performance
>>> +characteristic is presented in terms of NUMA node distance within the Linux kernel.
>>> +From the platform view, these groups are also referred to as domains.
>>> +
>>> +PAPR interface currently supports different ways of communicating these resource
>>> +grouping details to the OS. These are referred to as Form 0, Form 1 and Form2
>>> +associativity grouping. Form 0 is the older format and is now considered deprecated.
>>> +
>>> +Hypervisor indicates the type/form of associativity used via "ibm,arcitecture-vec-5 property".
>>> +Bit 0 of byte 5 in the "ibm,architecture-vec-5" property indicates usage of Form 0 or Form 1.
>>> +A value of 1 indicates the usage of Form 1 associativity. For Form 2 associativity
>>> +bit 2 of byte 5 in the "ibm,architecture-vec-5" property is used.
>>> +
>>> +Form 0
>>> +-----
>>> +Form 0 associativity supports only two NUMA distance (LOCAL and REMOTE).
>>> +
>>> +Form 1
>>> +-----
>>> +With Form 1 a combination of ibm,associativity-reference-points and ibm,associativity
>>> +device tree properties are used to determine the NUMA distance between resource groups/domains.
>>> +
>>> +The “ibm,associativity” property contains one or more lists of numbers (domainID)
>>> +representing the resource’s platform grouping domains.
>>> +
>>> +The “ibm,associativity-reference-points” property contains one or more list of numbers
>>> +(domainID index) that represents the 1 based ordinal in the associativity lists.
>>> +The list of domainID index represnets increasing hierachy of resource grouping.
>>> +
>>> +ex:
>>> +{ primary domainID index, secondary domainID index, tertiary domainID index.. }
>>> +
>>> +Linux kernel uses the domainID at the primary domainID index as the NUMA node id.
>>> +Linux kernel computes NUMA distance between two domains by recursively comparing
>>> +if they belong to the same higher-level domains. For mismatch at every higher
>>> +level of the resource group, the kernel doubles the NUMA distance between the
>>> +comparing domains.
>>> +
>>> +Form 2
>>> +-------
>>> +Form 2 associativity format adds separate device tree properties representing NUMA node distance
>>> +thereby making the node distance computation flexible. Form 2 also allows flexible primary
>>> +domain numbering. With numa distance computation now detached from the index value of
>>> +"ibm,associativity" property, Form 2 allows a large number of primary domain ids at the
>>> +same domainID index representing resource groups of different performance/latency characteristics.
>>> +
>>> +Hypervisor indicates the usage of FORM2 associativity using bit 2 of byte 5 in the
>>> +"ibm,architecture-vec-5" property.
>>> +
>>> +"ibm,numa-lookup-index-table" property contains one or more list numbers representing
>>> +the domainIDs present in the system. The offset of the domainID in this property is considered
>>> +the domainID index.
>>> +
>>> +prop-encoded-array: The number N of the domainIDs encoded as with encode-int, followed by
>>> +N domainID encoded as with encode-int
>>> +
>>> +For ex:
>>> +ibm,numa-lookup-index-table =  {4, 0, 8, 250, 252}, domainID index for domainID 8 is 1.
>>> +
>>> +"ibm,numa-distance-table" property contains one or more list of numbers representing the NUMA
>>> +distance between resource groups/domains present in the system.
>>> +
>>> +prop-encoded-array: The number N of the distance values encoded as with encode-int, followed by
>>> +N distance values encoded as with encode-bytes. The max distance value we could encode is 255.
>>> +
>>> +For ex:
>>> +ibm,numa-lookup-index-table =  {3, 0, 8, 40}
>>> +ibm,numa-distance-table     =  {9, 10, 20, 80, 20, 10, 160, 80, 160, 10}
>>> +
>>> +  | 0    8   40
>>> +--|------------
>>> +  |
>>> +0 | 10   20  80
>>> +  |
>>> +8 | 20   10  160
>>> +  |
>>> +40| 80   160  10
>>> +
>>> +
>>> +"ibm,associativity" property for resources in node 0, 8 and 40
>>> +
>>> +{ 3, 6, 7, 0 }
>>> +{ 3, 6, 9, 8 }
>>> +{ 3, 6, 7, 40}
>>> +
>>> +With "ibm,associativity-reference-points"  { 0x3 }
>>
>> With this configuration, would the following ibm,associativity arrays
>> also be valid?
>>
>>
>> { 3, 0, 0, 0 }
>> { 3, 0, 0, 8 }
>> { 3, 0, 0, 40}
>>
> 
> Yes
> 
>> If yes, then we need a way to tell that the associativity domains assignment
>> are optional, and FORM2 relies solely on finding out the domainID of the
>> resource (0, 8 and 40) to retrieve the domainID index, and with this
>> index all performance metrics can be retrieved from the numa-* properties
>> (numa-distance-table, numa-bandwidth-table ...).
>>
> 
> Where do you suggest we clarify that? I agree that it is not explicitly
> mentioned. But we describe the details of how we find the numa distance
> with example in the document.


Perhaps something like this, right in the middle of the example:


----------------

(...)

+  | 0    8   40
+--|------------
+  |
+0 | 10   20  80
+  |
+8 | 20   10  160
+  |
+40| 80   160  10
+
+

With "ibm,associativity-reference-points" equal to { 0x3 }, the domainID of
each resource is located at index 3 of each ibm,associativity property:

+{ 3, 6, 7, 0 }
+{ 3, 6, 9, 8 }
+{ 3, 6, 7, 40 }


FORM2 requires the ibm,associativity array to contain the domainID of the
resource, which is defined by the ibm,associativity-reference-points.
Calculating the associativity domains of the remaining ibm,associativity
elements is not obligatory. In this example, the following ibm,associativity
arrays are also valid:

{ 3, 0, 0, 0 }
{ 3, 0, 0, 8 }
{ 3, 0, 0, 40 }

(...)

-------------


> 
>> Retrieving the resource domainID is done by using ibm,associativity-reference-points.
>>
>> This will allow the platform to implement FORM2 such as:
>>
>> { 1, 0 }
>> { 1, 8 }
>> { 1, 40 }
>>    
>> - ref-points: { 0x1 }
>>
>> If the platform chooses to do so.
>>
> 
> That is correct.
> 
>>
>>> +
>>> +Each resource (drcIndex) now also supports additional optional device tree properties.
>>> +These properties are marked optional because the platform can choose not to export
>>> +them and provide the system topology details using the earlier defined device tree
>>> +properties alone. The optional device tree properties are used when adding new resources
>>> +(DLPAR) and when the platform didn't provide the topology details of the domain which
>>> +contains the newly added resource during boot.
>>> +
>>> +"ibm,numa-lookup-index" property contains a number representing the domainID index to be used
>>> +when building the NUMA distance of the numa node to which this resource belongs. This can
>>> +be looked at as the index at which this new domainID would have appeared in
>>> +"ibm,numa-lookup-index-table" if the domain was present during boot. The domainID
>>> +of the new resource can be obtained from the existing "ibm,associativity" property. This
>>> +can be used to build distance information of a newly onlined NUMA node via DLPAR operation.
>>> +The value is 1 based array index value.
>>> +
>>> +prop-encoded-array: An integer encoded as with encode-int specifying the domainID index
>>> +
>>> +"ibm,numa-distance" property contains one or more list of numbers presenting the NUMA distance
>>> +from this resource domain to other resources.
>>> +
>>> +prop-encoded-array: The number N of the distance values encoded as with encode-int, followed by
>>> +N distance values encoded as with encode-bytes. The max distance value we could encode is 255.
>>> +
>>> +For ex:
>>> +ibm,associativity     = { 4, 5, 10, 50}
>>> +ibm,numa-lookup-index = { 4 }
>>> +ibm,numa-distance   =  {8, 160, 255, 80, 10, 160, 255, 80, 10}
>>> +
>>> +resulting in a new toplogy as below.
>>> +  | 0    8   40   50
>>> +--|------------------
>>> +  |
>>> +0 | 10   20  80   160
>>> +  |
>>> +8 | 20   10  160  255
>>> +  |
>>> +40| 80   160  10  80
>>> +  |
>>> +50| 160  255  80  10
>>> +
>>
>> I see there is no mention of the special PAPR SCM handling. I saw in
>> one of the your replies of v1:
>>
>> "Another option is to make sure that numa-distance-value is populated
>> such that PMEMB distance indicates it is closer to node0 when compared
>> to node1. ie, node_distance[40][0] < node_distance[40][1]. One could
>> possibly infer the grouping based on the distance value and not deepend
>> on ibm,associativity for that purpose."
>>
>>
>> Is that was we're supposed to do with PAPR SCM? I'm not sure how that
>> affects NVDIMM support in QEMU with FORM2.
>>
>>
> 
> yes that is what we are doing with this version of the patchset (v4)
> version. We can drop the nvdimm specific changes from Qemu.


I see. I'll drop the NVDIMM changes in the QEMU POC of FORM2 then.



Thanks,


Daniel

> 
> -aneesh
> 

^ permalink raw reply

* Re: [powerpc][next-20210621] WARNING at kernel/sched/fair.c:3277 during boot
From: Sachin Sant @ 2021-06-22 15:59 UTC (permalink / raw)
  To: Vincent Guittot
  Cc: Odin Ugedal, Linux Next Mailing List, linuxppc-dev, open list
In-Reply-To: <20210622143154.GA804@vingu-book>

>> On Tue, 22 Jun 2021 at 09:39, Sachin Sant <sachinp@linux.vnet.ibm.com> wrote:
>>> 
>>> While booting 5.13.0-rc7-next-20210621 on a PowerVM LPAR following warning
>>> is seen
>>> 
>>> [   30.922154] ------------[ cut here ]------------
>>> [   30.922201] cfs_rq->avg.load_avg || cfs_rq->avg.util_avg || cfs_rq->avg.runnable_avg
>>> [   30.922219] WARNING: CPU: 6 PID: 762 at kernel/sched/fair.c:3277 update_blocked_averages+0x758/0x780
>> 
>> Yes. That was exactly the purpose of the patch. There is one last
>> remaining part which could generate this. I'm going to prepare a patch
> 
> Could you try the patch below ? I have been able to reproduce the problem locally and this
> fix it on my system:
> 
I can recreate the issue with this patch.

         Starting Terminate Plymouth Boot Screen...
         Starting Hold until boot process finishes up...
[FAILED] Failed to start Crash recovery kernel arming.
See 'systemctl status kdump.service' for details.
[   10.737913] ------------[ cut here ]------------
[   10.737960] cfs_rq->avg.load_avg || cfs_rq->avg.util_avg || cfs_rq->avg.runnable_avg
[   10.737976] WARNING: CPU: 27 PID: 146 at kernel/sched/fair.c:3279 update_blocked_averages+0x758/0x780
[   10.738010] Modules linked in: stp llc rfkill sunrpc pseries_rng xts vmx_crypto uio_pdrv_genirq uio sch_fq_codel ip_tables xfs libcrc32c sr_mod sd_mod cdrom t10_pi sg ibmvscsi ibmveth scsi_transport_srp dm_mirror dm_region_hash dm_log dm_mod fuse
[   10.738089] CPU: 27 PID: 146 Comm: ksoftirqd/27 Not tainted 5.13.0-rc7-next-20210621-dirty #2
[   10.738103] NIP:  c0000000001b2768 LR: c0000000001b2764 CTR: c000000000729120
[   10.738116] REGS: c000000015973840 TRAP: 0700   Not tainted  (5.13.0-rc7-next-20210621-dirty)
[   10.738130] MSR:  800000000282b033 <SF,VEC,VSX,EE,FP,ME,IR,DR,RI,LE>  CR: 48000224  XER: 00000005
[   10.738161] CFAR: c00000000014d120 IRQMASK: 1 
[   10.738161] GPR00: c0000000001b2764 c000000015973ae0 c0000000029bb900 0000000000000048 
[   10.738161] GPR04: 00000000fffeffff c0000000159737a0 0000000000000027 c00000154f9f7e18 
[   10.738161] GPR08: 0000000000000023 0000000000000001 0000000000000027 c00000167f1d7fe8 
[   10.738161] GPR12: 0000000000000000 c00000154ffd7e80 c00000154fa82580 000000000000b78a 
[   10.738161] GPR16: 000000028007883c 00000000000002ed c000000038d31000 0000000000000000 
[   10.738161] GPR20: 0000000000000000 c0000000029fdfe0 0000000000000000 000000000000037b 
[   10.738161] GPR24: 0000000000000000 c00000154fa82f90 0000000000000001 c00000003d4ca400 
[   10.738161] GPR28: 00000000000002ed c000000038d311c0 c000000038d31100 0000000000000000 
[   10.738281] NIP [c0000000001b2768] update_blocked_averages+0x758/0x780
[   10.738290] LR [c0000000001b2764] update_blocked_averages+0x754/0x780
[   10.738299] Call Trace:
[   10.738303] [c000000015973ae0] [c0000000001b2764] update_blocked_averages+0x754/0x780 (unreliable)
[   10.738315] [c000000015973c00] [c0000000001be720] run_rebalance_domains+0xa0/0xd0
[   10.738326] [c000000015973c30] [c000000000cf9acc] __do_softirq+0x15c/0x3d4
[   10.738337] [c000000015973d20] [c000000000158464] run_ksoftirqd+0x64/0x90
[   10.738346] [c000000015973d40] [c00000000018fd24] smpboot_thread_fn+0x204/0x270
[   10.738357] [c000000015973da0] [c000000000189770] kthread+0x190/0x1a0
[   10.738367] [c000000015973e10] [c00000000000ceec] ret_from_kernel_thread+0x5c/0x70
[   10.738381] Instruction dump:
[   10.738388] 3863c808 9be9eefe 4bf9a979 60000000 0fe00000 4bfff980 e9210070 e8610088 
[   10.738410] 39400001 99490003 4bf9a959 60000000 <0fe00000> 4bfffc24 3d22fff6 8929eefb 
[   10.738431] ---[ end trace 9ca80b55840c53f0 ]—

Thanks
-Sachin

> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 8cc27b847ad8..da91db1c137f 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -3037,8 +3037,9 @@ enqueue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
> static inline void
> dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
> {
> +       u32 divider = get_pelt_divider(&se->avg);
>        sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg);
> -       sub_positive(&cfs_rq->avg.load_sum, se_weight(se) * se->avg.load_sum);
> +       cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * divider;
> }
> #else
> static inline void
> 

^ permalink raw reply

* Re: [PATCH] ASoC: fsl_spdif: Fix unexpected interrupt after suspend
From: Fabio Estevam @ 2021-06-22 15:20 UTC (permalink / raw)
  To: Shengjiu Wang
  Cc: Linux-ALSA, Timur Tabi, Xiubo Li, linux-kernel, Takashi Iwai,
	Jaroslav Kysela, Nicolin Chen, Mark Brown, linuxppc-dev
In-Reply-To: <1624365084-7934-1-git-send-email-shengjiu.wang@nxp.com>

On Tue, Jun 22, 2021 at 9:50 AM Shengjiu Wang <shengjiu.wang@nxp.com> wrote:
>
> When system enter suspend, the machine driver suspend callback
> function will be called, then the cpu driver trigger callback
> (SNDRV_PCM_TRIGGER_SUSPEND) be called, it would disable the
> interrupt.
>
> But the machine driver suspend and cpu dai driver suspend order
> maybe changed, the cpu dai driver's suspend callback is called before
> machine driver's suppend callback, then the interrupt is not cleared
> successfully in trigger callback.
>
> So need to clear interrupts in cpu dai driver's suspend callback
> to avoid such issue.
>
> Fixes: 9cb2b3796e08 ("ASoC: fsl_spdif: Add pm runtime function")
> Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>

Reviewed-by: Fabio Estevam <festevam@gmail.com>

^ permalink raw reply

* Re: [PATCH] ASoC: fsl-asoc-card: change dev_err to dev_dbg for defer probe
From: Mark Brown @ 2021-06-22 15:14 UTC (permalink / raw)
  To: timur, tiwai, Shengjiu Wang, alsa-devel, linux-kernel, perex,
	linuxppc-dev, Xiubo.Lee, lgirdwood, nicoleotsuka, festevam
  Cc: Mark Brown
In-Reply-To: <1622616132-10391-1-git-send-email-shengjiu.wang@nxp.com>

On Wed, 2 Jun 2021 14:42:12 +0800, Shengjiu Wang wrote:
> Don't need to print error message for defer probe

Applied to

   https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound.git for-next

Thanks!

[1/1] ASoC: fsl-asoc-card: change dev_err to dev_dbg for defer probe
      commit: 4b1d51715d1cf78a1527fe426fc0278dcfea1959

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark

^ permalink raw reply

* Re: linux-next: manual merge of the kvm tree with the powerpc tree
From: Michael Ellerman @ 2021-06-22 14:51 UTC (permalink / raw)
  To: Paolo Bonzini, Stephen Rothwell, KVM, PowerPC
  Cc: Ashish Kalra, Brijesh Singh, Sean Christopherson,
	Linux Kernel Mailing List, Maxim Levitsky,
	Linux Next Mailing List, Vitaly Kuznetsov, Bharata B Rao
In-Reply-To: <9c2dbe56-4c64-0032-0acb-2e2925c7a2ab@redhat.com>

Paolo Bonzini <pbonzini@redhat.com> writes:
> On 22/06/21 07:25, Stephen Rothwell wrote:
>> Hi all,
>> 
>> Today's linux-next merge of the kvm tree got a conflict in:
>> 
>>    include/uapi/linux/kvm.h
>> 
>> between commit:
>> 
>>    9bb4a6f38fd4 ("KVM: PPC: Book3S HV: Add KVM_CAP_PPC_RPT_INVALIDATE capability")
>> 
>> from the powerpc tree and commits:
>> 
>>    644f706719f0 ("KVM: x86: hyper-v: Introduce KVM_CAP_HYPERV_ENFORCE_CPUID")
>>    6dba94035203 ("KVM: x86: Introduce KVM_GET_SREGS2 / KVM_SET_SREGS2")
>>    0dbb11230437 ("KVM: X86: Introduce KVM_HC_MAP_GPA_RANGE hypercall")
>> 
>> from the kvm tree.
>> 
>> I fixed it up (see below) and can carry the fix as necessary. This
>> is now fixed as far as linux-next is concerned, but any non trivial
>> conflicts should be mentioned to your upstream maintainer when your tree
>> is submitted for merging.  You may also want to consider cooperating
>> with the maintainer of the conflicting tree to minimise any particularly
>> complex conflicts.
>> 
>
> What are the dependencies of these KVM patches on patches from the bare 
> metal trees,

I don't think there's actually a semantic dependency on my tree, but
there's multiple textual conflicts with my tree. That series has to go
via both trees, or there will be conflicts.

> ... and can you guys *please* start using topic branches?
>
> I've been asking you for literally years, but this is the first time I 
> remember that Linus will have to resolve conflicts in uAPI changes and 
> it is *not* acceptable.

The patches are in a topic branch, which I will ask you to pull before
the merge window, in order to resolve any conflicts.

> Please drop the patches at 
> https://www.spinics.net/lists/kvm-ppc/msg18666.html from the powerpc 
> tree, and merge them through either the kvm-powerpc or kvm trees.

The kvm-ppc tree is not taking patches at the moment.

But it doesn't matter anyway, this series needs to be merged into my
tree and the KVM tree regardless.

The topic branch is here:

  https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git/log/?h=topic/ppc-kvm


The commit Stephen mentioned has been rebased since to squash in a fix.
But what is in the topic branch is now final, I won't rebase what's
there.

cheers

^ permalink raw reply

* Re: [powerpc][next-20210621] WARNING at kernel/sched/fair.c:3277 during boot
From: Vincent Guittot @ 2021-06-22 14:31 UTC (permalink / raw)
  To: Sachin Sant; +Cc: Odin Ugedal, Linux Next Mailing List, linuxppc-dev, open list
In-Reply-To: <CAKfTPtDrHv4OOfPvwOE2DMNoucXQJ=yvvEpTVKrXghSdKEnZcA@mail.gmail.com>

Le mardi 22 juin 2021 à 09:49:31 (+0200), Vincent Guittot a écrit :
> Hi Sachin,
> 
> On Tue, 22 Jun 2021 at 09:39, Sachin Sant <sachinp@linux.vnet.ibm.com> wrote:
> >
> > While booting 5.13.0-rc7-next-20210621 on a PowerVM LPAR following warning
> > is seen
> >
> > [   30.922154] ------------[ cut here ]------------
> > [   30.922201] cfs_rq->avg.load_avg || cfs_rq->avg.util_avg || cfs_rq->avg.runnable_avg
> > [   30.922219] WARNING: CPU: 6 PID: 762 at kernel/sched/fair.c:3277 update_blocked_averages+0x758/0x780
> > [   30.922259] Modules linked in: pseries_rng xts vmx_crypto uio_pdrv_genirq uio sch_fq_codel ip_tables sd_mod t10_pi sg fuse
> > [   30.922309] CPU: 6 PID: 762 Comm: augenrules Not tainted 5.13.0-rc7-next-20210621 #1
> > [   30.922329] NIP:  c0000000001b27e8 LR: c0000000001b27e4 CTR: c0000000007cfda0
> > [   30.922344] REGS: c000000023fcb660 TRAP: 0700   Not tainted  (5.13.0-rc7-next-20210621)
> > [   30.922359] MSR:  8000000000029033 <SF,EE,ME,IR,DR,RI,LE>  CR: 48488224  XER: 00000005
> > [   30.922394] CFAR: c00000000014d120 IRQMASK: 1
> >                GPR00: c0000000001b27e4 c000000023fcb900 c000000002a08400 0000000000000048
> >                GPR04: 00000000ffff7fff c000000023fcb5c0 0000000000000027 c000000f6fdd7e18
> >                GPR08: 0000000000000023 0000000000000001 0000000000000027 c0000000028a6650
> >                GPR12: 0000000000008000 c000000f6fff7680 c000000f6fe62600 0000000000000032
> >                GPR16: 00000007331a989a c000000f6fe62600 c0000000238a6800 0000000000000001
> >                GPR20: 0000000000000000 c000000002a4dfe0 0000000000000000 0000000000000006
> >                GPR24: 0000000000000000 c000000f6fe63010 0000000000000001 c000000f6fe62680
> >                GPR28: 0000000000000006 c0000000238a69c0 0000000000000000 c000000f6fe62600
> > [   30.922569] NIP [c0000000001b27e8] update_blocked_averages+0x758/0x780
> > [   30.922599] LR [c0000000001b27e4] update_blocked_averages+0x754/0x780
> > [   30.922624] Call Trace:
> > [   30.922631] [c000000023fcb900] [c0000000001b27e4] update_blocked_averages+0x754/0x780 (unreliable)
> > [   30.922653] [c000000023fcba20] [c0000000001bd668] newidle_balance+0x258/0x5c0
> > [   30.922674] [c000000023fcbab0] [c0000000001bdaac] pick_next_task_fair+0x7c/0x4d0
> > [   30.922692] [c000000023fcbb10] [c000000000dcd31c] __schedule+0x15c/0x1780
> > [   30.922708] [c000000023fcbc50] [c0000000001a5a04] do_task_dead+0x64/0x70
> > [   30.922726] [c000000023fcbc80] [c000000000156338] do_exit+0x848/0xcc0
> > [   30.922743] [c000000023fcbd50] [c000000000156884] do_group_exit+0x64/0xe0
> > [   30.922758] [c000000023fcbd90] [c000000000156924] sys_exit_group+0x24/0x30
> > [   30.922774] [c000000023fcbdb0] [c0000000000310c0] system_call_exception+0x150/0x2d0
> > [   30.922792] [c000000023fcbe10] [c00000000000cc5c] system_call_common+0xec/0x278
> > [   30.922808] --- interrupt: c00 at 0x7fffb3acddcc
> > [   30.922821] NIP:  00007fffb3acddcc LR: 00007fffb3a27f04 CTR: 0000000000000000
> > [   30.922833] REGS: c000000023fcbe80 TRAP: 0c00   Not tainted  (5.13.0-rc7-next-20210621)
> > [   30.922847] MSR:  800000000280f033 <SF,VEC,VSX,EE,PR,FP,ME,IR,DR,RI,LE>  CR: 28444202  XER: 00000000
> > [   30.922882] IRQMASK: 0
> >                GPR00: 00000000000000ea 00007fffc8f21780 00007fffb3bf7100 0000000000000000
> >                GPR04: 0000000000000000 0000000155f142f0 0000000000000000 00007fffb3d23740
> >                GPR08: fffffffffbad2a87 0000000000000000 0000000000000000 0000000000000000
> >                GPR12: 0000000000000000 00007fffb3d2aeb0 0000000116be95e0 0000000000000032
> >                GPR16: 0000000000000000 00007fffc8f21cd8 000000000000002d 0000000000000024
> >                GPR20: 00007fffc8f21cd4 00007fffb3bf4f98 0000000000000001 0000000000000001
> >                GPR24: 00007fffb3bf0950 0000000000000000 0000000000000000 0000000000000001
> >                GPR28: 0000000000000000 0000000000000000 00007fffb3d23ec0 0000000000000000
> > [   30.923023] NIP [00007fffb3acddcc] 0x7fffb3acddcc
> > [   30.923035] LR [00007fffb3a27f04] 0x7fffb3a27f04
> > [   30.923045] --- interrupt: c00
> > [   30.923052] Instruction dump:
> > [   30.923061] 3863be48 9be97ae6 4bf9a8f9 60000000 0fe00000 4bfff980 e9210070 e8610088
> > [   30.923088] 39400001 99490003 4bf9a8d9 60000000 <0fe00000> 4bfffc24 3d22fff5 89297ae3
> > [   30.923113] ---[ end trace ed07974d2149c499 ]—
> >
> > This warning was introduced with commit 9e077b52d86a
> > sched/pelt: Check that *_avg are null when *_sum are
> 
> Yes. That was exactly the purpose of the patch. There is one last
> remaining part which could generate this. I'm going to prepare a patch

Could you try the patch below ? I have been able to reproduce the problem locally and this
fix it on my system:

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 8cc27b847ad8..da91db1c137f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3037,8 +3037,9 @@ enqueue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 static inline void
 dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
+       u32 divider = get_pelt_divider(&se->avg);
        sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg);
-       sub_positive(&cfs_rq->avg.load_sum, se_weight(se) * se->avg.load_sum);
+       cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * divider;
 }
 #else
 static inline void


> 
> Thanks
> 
> >
> > next-20210618 was good.
> >
> > Thanks
> > -Sachin

^ permalink raw reply related

* [PATCH 3/3] powerpc/pseries: fail quicker in dlpar_memory_add_by_ic()
From: Daniel Henrique Barboza @ 2021-06-22 13:39 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Daniel Henrique Barboza
In-Reply-To: <20210622133923.295373-1-danielhb413@gmail.com>

The validation done at the start of dlpar_memory_add_by_ic() is an all
of nothing scenario - if any LMBs in the range is marked as RESERVED we
can fail right away.

We then can remove the 'lmbs_available' var and its check with
'lmbs_to_add' since the whole LMB range was already validated in the
previous step.

Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
---
 arch/powerpc/platforms/pseries/hotplug-memory.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index c0a03e1537cb..377d852f5a9a 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -796,7 +796,6 @@ static int dlpar_memory_add_by_index(u32 drc_index)
 static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index)
 {
 	struct drmem_lmb *lmb, *start_lmb, *end_lmb;
-	int lmbs_available = 0;
 	int rc;
 
 	pr_info("Attempting to hot-add %u LMB(s) at index %x\n",
@@ -811,15 +810,14 @@ static int dlpar_memory_add_by_ic(u32 lmbs_to_add, u32 drc_index)
 
 	/* Validate that the LMBs in this range are not reserved */
 	for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
-		if (lmb->flags & DRCONF_MEM_RESERVED)
-			break;
-
-		lmbs_available++;
+		/* Fail immediately if the whole range can't be hot-added */
+		if (lmb->flags & DRCONF_MEM_RESERVED) {
+			pr_err("Memory at %llx (drc index %x) is reserved\n",
+					lmb->base_addr, lmb->drc_index);
+			return -EINVAL;
+		}
 	}
 
-	if (lmbs_available < lmbs_to_add)
-		return -EINVAL;
-
 	for_each_drmem_lmb_in_range(lmb, start_lmb, end_lmb) {
 		if (lmb->flags & DRCONF_MEM_ASSIGNED)
 			continue;
-- 
2.31.1


^ permalink raw reply related

* [PATCH 2/3] powerpc/pseries: break early in dlpar_memory_add_by_count() loops
From: Daniel Henrique Barboza @ 2021-06-22 13:39 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Daniel Henrique Barboza
In-Reply-To: <20210622133923.295373-1-danielhb413@gmail.com>

After a successful dlpar_add_lmb() call the LMB is marked as reserved.
Later on, depending whether we added enough LMBs or not, we rely on
the marked LMBs to see which ones might need to be removed, and we
remove the reservation of all of them.

These are done in for_each_drmem_lmb() loops without any break
condition. This means that we're going to check all LMBs of the partition
even after going through all the reserved ones.

This patch adds break conditions in both loops to avoid this. The
'lmbs_added' variable was renamed to 'lmbs_reserved', and it's now
being decremented each time a lmb reservation is removed, indicating
if there are still marked LMBs to be processed.

Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
---
 arch/powerpc/platforms/pseries/hotplug-memory.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 28a7fd90232f..c0a03e1537cb 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -673,7 +673,7 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add)
 {
 	struct drmem_lmb *lmb;
 	int lmbs_available = 0;
-	int lmbs_added = 0;
+	int lmbs_reserved = 0;
 	int rc;
 
 	pr_info("Attempting to hot-add %d LMB(s)\n", lmbs_to_add);
@@ -714,13 +714,12 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add)
 		 * requested LMBs cannot be added.
 		 */
 		drmem_mark_lmb_reserved(lmb);
-
-		lmbs_added++;
-		if (lmbs_added == lmbs_to_add)
+		lmbs_reserved++;
+		if (lmbs_reserved == lmbs_to_add)
 			break;
 	}
 
-	if (lmbs_added != lmbs_to_add) {
+	if (lmbs_reserved != lmbs_to_add) {
 		pr_err("Memory hot-add failed, removing any added LMBs\n");
 
 		for_each_drmem_lmb(lmb) {
@@ -735,6 +734,10 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add)
 				dlpar_release_drc(lmb->drc_index);
 
 			drmem_remove_lmb_reservation(lmb);
+			lmbs_reserved--;
+
+			if (lmbs_reserved == 0)
+				break;
 		}
 		rc = -EINVAL;
 	} else {
@@ -745,6 +748,10 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add)
 			pr_debug("Memory at %llx (drc index %x) was hot-added\n",
 				 lmb->base_addr, lmb->drc_index);
 			drmem_remove_lmb_reservation(lmb);
+			lmbs_reserved--;
+
+			if (lmbs_reserved == 0)
+				break;
 		}
 		rc = 0;
 	}
-- 
2.31.1


^ permalink raw reply related

* [PATCH 0/3] powerpc/pseries: cleanups for dlpar_memory_add* functions
From: Daniel Henrique Barboza @ 2021-06-22 13:39 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Daniel Henrique Barboza

Hi,

These are a couple of cleanups for the dlpar_memory_add* functions
that are similar to those I did a month or so ago in
dlpar_memory_remove_by_count and dlpar_memory_remove_by_ic. 



Daniel Henrique Barboza (3):
  powerpc/pseries: skip reserved LMBs in dlpar_memory_add_by_count()
  powerpc/pseries: break early in dlpar_memory_add_by_count() loops
  powerpc/pseries: fail quicker in dlpar_memory_add_by_ic()

 .../platforms/pseries/hotplug-memory.c        | 34 ++++++++++++-------
 1 file changed, 21 insertions(+), 13 deletions(-)

-- 
2.31.1


^ permalink raw reply

* [PATCH 1/3] powerpc/pseries: skip reserved LMBs in dlpar_memory_add_by_count()
From: Daniel Henrique Barboza @ 2021-06-22 13:39 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Daniel Henrique Barboza
In-Reply-To: <20210622133923.295373-1-danielhb413@gmail.com>

The function is counting reserved LMBs as available to be added, but
they aren't. This will cause the function to miscalculate the available
LMBs and can trigger errors later on when executing dlpar_add_lmb().

Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
---
 arch/powerpc/platforms/pseries/hotplug-memory.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 36f66556a7c6..28a7fd90232f 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -683,6 +683,9 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add)
 
 	/* Validate that there are enough LMBs to satisfy the request */
 	for_each_drmem_lmb(lmb) {
+		if (lmb->flags & DRCONF_MEM_RESERVED)
+			continue;
+
 		if (!(lmb->flags & DRCONF_MEM_ASSIGNED))
 			lmbs_available++;
 
-- 
2.31.1


^ permalink raw reply related

* Re: [PATCH v3 0/4] Add perf interface to expose nvdimm
From: Peter Zijlstra @ 2021-06-22 13:14 UTC (permalink / raw)
  To: Kajol Jain
  Cc: nvdimm, santosh, maddy, ira.weiny, rnsastry, linux-kernel,
	atrajeev, aneesh.kumar, vaibhav, dan.j.williams, linuxppc-dev,
	tglx
In-Reply-To: <20210617132617.99529-1-kjain@linux.ibm.com>

On Thu, Jun 17, 2021 at 06:56:13PM +0530, Kajol Jain wrote:
> ---
> Kajol Jain (4):
>   drivers/nvdimm: Add nvdimm pmu structure
>   drivers/nvdimm: Add perf interface to expose nvdimm performance stats
>   powerpc/papr_scm: Add perf interface support
>   powerpc/papr_scm: Document papr_scm sysfs event format entries

Don't see anything obviously wrong with this one.

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

^ permalink raw reply

* [PATCH] ASoC: fsl_spdif: Fix unexpected interrupt after suspend
From: Shengjiu Wang @ 2021-06-22 12:31 UTC (permalink / raw)
  To: timur, nicoleotsuka, Xiubo.Lee, festevam, broonie, perex, tiwai,
	alsa-devel
  Cc: linuxppc-dev, linux-kernel

When system enter suspend, the machine driver suspend callback
function will be called, then the cpu driver trigger callback
(SNDRV_PCM_TRIGGER_SUSPEND) be called, it would disable the
interrupt.

But the machine driver suspend and cpu dai driver suspend order
maybe changed, the cpu dai driver's suspend callback is called before
machine driver's suppend callback, then the interrupt is not cleared
successfully in trigger callback.

So need to clear interrupts in cpu dai driver's suspend callback
to avoid such issue.

Fixes: 9cb2b3796e08 ("ASoC: fsl_spdif: Add pm runtime function")
Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
---
 sound/soc/fsl/fsl_spdif.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/soc/fsl/fsl_spdif.c b/sound/soc/fsl/fsl_spdif.c
index a9c6b930b04a..8ffb1a6048d6 100644
--- a/sound/soc/fsl/fsl_spdif.c
+++ b/sound/soc/fsl/fsl_spdif.c
@@ -1467,6 +1467,9 @@ static int fsl_spdif_runtime_suspend(struct device *dev)
 	struct fsl_spdif_priv *spdif_priv = dev_get_drvdata(dev);
 	int i;
 
+	/* Disable all the interrupts */
+	regmap_update_bits(spdif_priv->regmap, REG_SPDIF_SIE, 0xffffff, 0);
+
 	regmap_read(spdif_priv->regmap, REG_SPDIF_SRPC,
 			&spdif_priv->regcache_srpc);
 	regcache_cache_only(spdif_priv->regmap, true);
-- 
2.27.0


^ permalink raw reply related

* Re: [PATCH v2 1/1] powerpc/papr_scm: Properly handle UUID types and API
From: Andy Shevchenko @ 2021-06-22 12:47 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev, linux-kernel
  Cc: Oliver O'Halloran, Paul Mackerras, Aneesh Kumar K . V
In-Reply-To: <YNHbSGzdgQh+6F+O@smile.fi.intel.com>

On Tue, Jun 22, 2021 at 03:44:56PM +0300, Andy Shevchenko wrote:
> On Wed, Jun 16, 2021 at 04:43:03PM +0300, Andy Shevchenko wrote:
> > Parse to and export from UUID own type, before dereferencing.
> > This also fixes wrong comment (Little Endian UUID is something else)
> > and should eliminate the direct strict types assignments.
> 
> Any comments on this version? Can it be applied?

"Any _other_ comments..."

> > Fixes: 43001c52b603 ("powerpc/papr_scm: Use ibm,unit-guid as the iset cookie")
> > Fixes: 259a948c4ba1 ("powerpc/pseries/scm: Use a specific endian format for storing uuid from the device tree")

AFAIU it's fine to have Fixes tags, but if anybody insist I will remove them
and send v3.

> > ---
> > v2: added missed header (Vaibhav), updated comment (Aneesh),
> >     rewrite part of the commit message to avoid mentioning the Sparse

-- 
With Best Regards,
Andy Shevchenko



^ permalink raw reply

* Re: [PATCH v2 1/1] powerpc/papr_scm: Properly handle UUID types and API
From: Andy Shevchenko @ 2021-06-22 12:44 UTC (permalink / raw)
  To: Michael Ellerman, linuxppc-dev, linux-kernel
  Cc: Oliver O'Halloran, Paul Mackerras, Aneesh Kumar K . V
In-Reply-To: <20210616134303.58185-1-andriy.shevchenko@linux.intel.com>

On Wed, Jun 16, 2021 at 04:43:03PM +0300, Andy Shevchenko wrote:
> Parse to and export from UUID own type, before dereferencing.
> This also fixes wrong comment (Little Endian UUID is something else)
> and should eliminate the direct strict types assignments.

Any comments on this version? Can it be applied?

> Fixes: 43001c52b603 ("powerpc/papr_scm: Use ibm,unit-guid as the iset cookie")
> Fixes: 259a948c4ba1 ("powerpc/pseries/scm: Use a specific endian format for storing uuid from the device tree")
> Cc: Oliver O'Halloran <oohall@gmail.com>
> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
> ---
> v2: added missed header (Vaibhav), updated comment (Aneesh),
>     rewrite part of the commit message to avoid mentioning the Sparse

-- 
With Best Regards,
Andy Shevchenko



^ permalink raw reply

* Re: [PATCH v4 7/7] powerpc/pseries: Add support for FORM2 associativity
From: Aneesh Kumar K.V @ 2021-06-22 12:07 UTC (permalink / raw)
  To: Daniel Henrique Barboza, linuxppc-dev, mpe
  Cc: Nathan Lynch, nvdimm, dan.j.williams, David Gibson
In-Reply-To: <e500697d-1866-538c-eaff-613e04a92c93@gmail.com>

Daniel Henrique Barboza <danielhb413@gmail.com> writes:

> On 6/17/21 1:51 PM, Aneesh Kumar K.V wrote:
>> PAPR interface currently supports two different ways of communicating resource
>> grouping details to the OS. These are referred to as Form 0 and Form 1
>> associativity grouping. Form 0 is the older format and is now considered
>> deprecated. This patch adds another resource grouping named FORM2.
>> 
>> Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>> ---
>>   Documentation/powerpc/associativity.rst   | 135 ++++++++++++++++++++
>>   arch/powerpc/include/asm/firmware.h       |   3 +-
>>   arch/powerpc/include/asm/prom.h           |   1 +
>>   arch/powerpc/kernel/prom_init.c           |   3 +-
>>   arch/powerpc/mm/numa.c                    | 149 +++++++++++++++++++++-
>>   arch/powerpc/platforms/pseries/firmware.c |   1 +
>>   6 files changed, 286 insertions(+), 6 deletions(-)
>>   create mode 100644 Documentation/powerpc/associativity.rst
>> 
>> diff --git a/Documentation/powerpc/associativity.rst b/Documentation/powerpc/associativity.rst
>> new file mode 100644
>> index 000000000000..93be604ac54d
>> --- /dev/null
>> +++ b/Documentation/powerpc/associativity.rst
>> @@ -0,0 +1,135 @@
>> +============================
>> +NUMA resource associativity
>> +=============================
>> +
>> +Associativity represents the groupings of the various platform resources into
>> +domains of substantially similar mean performance relative to resources outside
>> +of that domain. Resources subsets of a given domain that exhibit better
>> +performance relative to each other than relative to other resources subsets
>> +are represented as being members of a sub-grouping domain. This performance
>> +characteristic is presented in terms of NUMA node distance within the Linux kernel.
>> +From the platform view, these groups are also referred to as domains.
>> +
>> +PAPR interface currently supports different ways of communicating these resource
>> +grouping details to the OS. These are referred to as Form 0, Form 1 and Form2
>> +associativity grouping. Form 0 is the older format and is now considered deprecated.
>> +
>> +Hypervisor indicates the type/form of associativity used via "ibm,arcitecture-vec-5 property".
>> +Bit 0 of byte 5 in the "ibm,architecture-vec-5" property indicates usage of Form 0 or Form 1.
>> +A value of 1 indicates the usage of Form 1 associativity. For Form 2 associativity
>> +bit 2 of byte 5 in the "ibm,architecture-vec-5" property is used.
>> +
>> +Form 0
>> +-----
>> +Form 0 associativity supports only two NUMA distance (LOCAL and REMOTE).
>> +
>> +Form 1
>> +-----
>> +With Form 1 a combination of ibm,associativity-reference-points and ibm,associativity
>> +device tree properties are used to determine the NUMA distance between resource groups/domains.
>> +
>> +The “ibm,associativity” property contains one or more lists of numbers (domainID)
>> +representing the resource’s platform grouping domains.
>> +
>> +The “ibm,associativity-reference-points” property contains one or more list of numbers
>> +(domainID index) that represents the 1 based ordinal in the associativity lists.
>> +The list of domainID index represnets increasing hierachy of resource grouping.
>> +
>> +ex:
>> +{ primary domainID index, secondary domainID index, tertiary domainID index.. }
>> +
>> +Linux kernel uses the domainID at the primary domainID index as the NUMA node id.
>> +Linux kernel computes NUMA distance between two domains by recursively comparing
>> +if they belong to the same higher-level domains. For mismatch at every higher
>> +level of the resource group, the kernel doubles the NUMA distance between the
>> +comparing domains.
>> +
>> +Form 2
>> +-------
>> +Form 2 associativity format adds separate device tree properties representing NUMA node distance
>> +thereby making the node distance computation flexible. Form 2 also allows flexible primary
>> +domain numbering. With numa distance computation now detached from the index value of
>> +"ibm,associativity" property, Form 2 allows a large number of primary domain ids at the
>> +same domainID index representing resource groups of different performance/latency characteristics.
>> +
>> +Hypervisor indicates the usage of FORM2 associativity using bit 2 of byte 5 in the
>> +"ibm,architecture-vec-5" property.
>> +
>> +"ibm,numa-lookup-index-table" property contains one or more list numbers representing
>> +the domainIDs present in the system. The offset of the domainID in this property is considered
>> +the domainID index.
>> +
>> +prop-encoded-array: The number N of the domainIDs encoded as with encode-int, followed by
>> +N domainID encoded as with encode-int
>> +
>> +For ex:
>> +ibm,numa-lookup-index-table =  {4, 0, 8, 250, 252}, domainID index for domainID 8 is 1.
>> +
>> +"ibm,numa-distance-table" property contains one or more list of numbers representing the NUMA
>> +distance between resource groups/domains present in the system.
>> +
>> +prop-encoded-array: The number N of the distance values encoded as with encode-int, followed by
>> +N distance values encoded as with encode-bytes. The max distance value we could encode is 255.
>> +
>> +For ex:
>> +ibm,numa-lookup-index-table =  {3, 0, 8, 40}
>> +ibm,numa-distance-table     =  {9, 10, 20, 80, 20, 10, 160, 80, 160, 10}
>> +
>> +  | 0    8   40
>> +--|------------
>> +  |
>> +0 | 10   20  80
>> +  |
>> +8 | 20   10  160
>> +  |
>> +40| 80   160  10
>> +
>> +
>> +"ibm,associativity" property for resources in node 0, 8 and 40
>> +
>> +{ 3, 6, 7, 0 }
>> +{ 3, 6, 9, 8 }
>> +{ 3, 6, 7, 40}
>> +
>> +With "ibm,associativity-reference-points"  { 0x3 }
>
> With this configuration, would the following ibm,associativity arrays
> also be valid?
>
>
> { 3, 0, 0, 0 }
> { 3, 0, 0, 8 }
> { 3, 0, 0, 40}
>

Yes

> If yes, then we need a way to tell that the associativity domains assignment
> are optional, and FORM2 relies solely on finding out the domainID of the
> resource (0, 8 and 40) to retrieve the domainID index, and with this
> index all performance metrics can be retrieved from the numa-* properties
> (numa-distance-table, numa-bandwidth-table ...).
>

Where do you suggest we clarify that? I agree that it is not explicitly
mentioned. But we describe the details of how we find the numa distance
with example in the document.

> Retrieving the resource domainID is done by using ibm,associativity-reference-points.
>
> This will allow the platform to implement FORM2 such as:
>
> { 1, 0 }
> { 1, 8 }
> { 1, 40 }
>   
> - ref-points: { 0x1 }
>
> If the platform chooses to do so.
>

That is correct.

>
>> +
>> +Each resource (drcIndex) now also supports additional optional device tree properties.
>> +These properties are marked optional because the platform can choose not to export
>> +them and provide the system topology details using the earlier defined device tree
>> +properties alone. The optional device tree properties are used when adding new resources
>> +(DLPAR) and when the platform didn't provide the topology details of the domain which
>> +contains the newly added resource during boot.
>> +
>> +"ibm,numa-lookup-index" property contains a number representing the domainID index to be used
>> +when building the NUMA distance of the numa node to which this resource belongs. This can
>> +be looked at as the index at which this new domainID would have appeared in
>> +"ibm,numa-lookup-index-table" if the domain was present during boot. The domainID
>> +of the new resource can be obtained from the existing "ibm,associativity" property. This
>> +can be used to build distance information of a newly onlined NUMA node via DLPAR operation.
>> +The value is 1 based array index value.
>> +
>> +prop-encoded-array: An integer encoded as with encode-int specifying the domainID index
>> +
>> +"ibm,numa-distance" property contains one or more list of numbers presenting the NUMA distance
>> +from this resource domain to other resources.
>> +
>> +prop-encoded-array: The number N of the distance values encoded as with encode-int, followed by
>> +N distance values encoded as with encode-bytes. The max distance value we could encode is 255.
>> +
>> +For ex:
>> +ibm,associativity     = { 4, 5, 10, 50}
>> +ibm,numa-lookup-index = { 4 }
>> +ibm,numa-distance   =  {8, 160, 255, 80, 10, 160, 255, 80, 10}
>> +
>> +resulting in a new toplogy as below.
>> +  | 0    8   40   50
>> +--|------------------
>> +  |
>> +0 | 10   20  80   160
>> +  |
>> +8 | 20   10  160  255
>> +  |
>> +40| 80   160  10  80
>> +  |
>> +50| 160  255  80  10
>> +
>
> I see there is no mention of the special PAPR SCM handling. I saw in
> one of the your replies of v1:
>
> "Another option is to make sure that numa-distance-value is populated
> such that PMEMB distance indicates it is closer to node0 when compared
> to node1. ie, node_distance[40][0] < node_distance[40][1]. One could
> possibly infer the grouping based on the distance value and not deepend
> on ibm,associativity for that purpose."
>
>
> Is that was we're supposed to do with PAPR SCM? I'm not sure how that
> affects NVDIMM support in QEMU with FORM2.
>
>

yes that is what we are doing with this version of the patchset (v4)
version. We can drop the nvdimm specific changes from Qemu.

-aneesh

^ permalink raw reply

* [RFC PATCH 43/43] KVM: PPC: Book3S HV P9: Optimise hash guest SLB saving
From: Nicholas Piggin @ 2021-06-22 10:57 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210622105736.633352-1-npiggin@gmail.com>

slbmfee/slbmfev instructions are very expensive, moreso than a regular
mfspr instruction, so minimising them significantly improves hash guest
exit performance. The slbmfev is only required if slbmfee found a valid
SLB entry.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/kvm/book3s_hv_p9_entry.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index 3fffcec67ff8..5e9e9f809297 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -459,10 +459,22 @@ static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator
 #define accumulate_time(vcpu, next) do {} while (0)
 #endif
 
-static inline void mfslb(unsigned int idx, u64 *slbee, u64 *slbev)
+static inline u64 mfslbv(unsigned int idx)
 {
-	asm volatile("slbmfev  %0,%1" : "=r" (*slbev) : "r" (idx));
-	asm volatile("slbmfee  %0,%1" : "=r" (*slbee) : "r" (idx));
+	u64 slbev;
+
+	asm volatile("slbmfev  %0,%1" : "=r" (slbev) : "r" (idx));
+
+	return slbev;
+}
+
+static inline u64 mfslbe(unsigned int idx)
+{
+	u64 slbee;
+
+	asm volatile("slbmfee  %0,%1" : "=r" (slbee) : "r" (idx));
+
+	return slbee;
 }
 
 static inline void mtslb(u64 slbee, u64 slbev)
@@ -592,8 +604,10 @@ static void save_clear_guest_mmu(struct kvm *kvm, struct kvm_vcpu *vcpu)
 		 */
 		for (i = 0; i < vcpu->arch.slb_nr; i++) {
 			u64 slbee, slbev;
-			mfslb(i, &slbee, &slbev);
+
+			slbee = mfslbe(i);
 			if (slbee & SLB_ESID_V) {
+				slbev = mfslbv(i);
 				vcpu->arch.slb[nr].orige = slbee | i;
 				vcpu->arch.slb[nr].origv = slbev;
 				nr++;
-- 
2.23.0


^ permalink raw reply related

* [RFC PATCH 42/43] KVM: PPC: Book3S HV P9: Improve mfmsr performance on entry
From: Nicholas Piggin @ 2021-06-22 10:57 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210622105736.633352-1-npiggin@gmail.com>

Rearrange the MSR saving on entry so it does not follow the mtmsrd to
disable interrupts, avoiding a possible RAW scoreboard stall.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/kvm_book3s_64.h |  2 +
 arch/powerpc/kvm/book3s_hv.c             | 18 ++-----
 arch/powerpc/kvm/book3s_hv_p9_entry.c    | 66 +++++++++++++++---------
 3 files changed, 47 insertions(+), 39 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index f8a0ed90b853..20ca9b1a2d41 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -153,6 +153,8 @@ static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu *vcpu)
 	return radix;
 }
 
+unsigned long kvmppc_msr_hard_disable_set_facilities(struct kvm_vcpu *vcpu, unsigned long msr);
+
 int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb);
 
 #define KVM_DEFAULT_HPT_ORDER	24	/* 16MB HPT by default */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 7cb9e87b50b7..c8edab9a90cb 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3759,6 +3759,8 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, uns
 	s64 dec;
 	int trap;
 
+	msr = mfmsr();
+
 	save_p9_host_os_sprs(&host_os_sprs);
 
 	/*
@@ -3769,24 +3771,10 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, uns
 	 */
 	host_psscr = mfspr(SPRN_PSSCR_PR);
 
-	hard_irq_disable();
+	kvmppc_msr_hard_disable_set_facilities(vcpu, msr);
 	if (lazy_irq_pending())
 		return 0;
 
-	/* MSR bits may have been cleared by context switch */
-	msr = 0;
-	if (IS_ENABLED(CONFIG_PPC_FPU))
-		msr |= MSR_FP;
-	if (cpu_has_feature(CPU_FTR_ALTIVEC))
-		msr |= MSR_VEC;
-	if (cpu_has_feature(CPU_FTR_VSX))
-		msr |= MSR_VSX;
-	if ((cpu_has_feature(CPU_FTR_TM) ||
-	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
-			(vcpu->arch.hfscr & HFSCR_TM))
-		msr |= MSR_TM;
-	msr = msr_check_and_set(msr);
-
 	load_vcpu_state(vcpu, &host_os_sprs);
 
 	if (vcpu->arch.psscr != host_psscr)
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
index 48b0ce9e0c39..3fffcec67ff8 100644
--- a/arch/powerpc/kvm/book3s_hv_p9_entry.c
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -604,6 +604,44 @@ static void save_clear_guest_mmu(struct kvm *kvm, struct kvm_vcpu *vcpu)
 	}
 }
 
+unsigned long kvmppc_msr_hard_disable_set_facilities(struct kvm_vcpu *vcpu, unsigned long msr)
+{
+	unsigned long msr_needed = 0;
+
+	msr &= ~MSR_EE;
+
+	/* MSR bits may have been cleared by context switch so must recheck */
+	if (IS_ENABLED(CONFIG_PPC_FPU))
+		msr_needed |= MSR_FP;
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		msr_needed |= MSR_VEC;
+	if (cpu_has_feature(CPU_FTR_VSX))
+		msr_needed |= MSR_VSX;
+	if ((cpu_has_feature(CPU_FTR_TM) ||
+	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
+			(vcpu->arch.hfscr & HFSCR_TM))
+		msr_needed |= MSR_TM;
+
+	/*
+	 * This could be combined with MSR[RI] clearing, but that expands
+	 * the unrecoverable window. It would be better to cover unrecoverable
+	 * with KVM bad interrupt handling rather than use MSR[RI] at all.
+	 *
+	 * Much more difficult and less worthwhile to combine with IR/DR
+	 * disable.
+	 */
+	if ((msr & msr_needed) != msr_needed) {
+		msr |= msr_needed;
+		__mtmsrd(msr, 0);
+	} else {
+		__hard_irq_disable();
+	}
+	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+	return msr;
+}
+EXPORT_SYMBOL_GPL(kvmppc_msr_hard_disable_set_facilities);
+
 int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb)
 {
 	struct p9_host_os_sprs host_os_sprs;
@@ -637,6 +675,9 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 
 	vcpu->arch.ceded = 0;
 
+	/* Save MSR for restore, with EE clear. */
+	msr = mfmsr() & ~MSR_EE;
+
 	host_hfscr = mfspr(SPRN_HFSCR);
 	host_ciabr = mfspr(SPRN_CIABR);
 	host_psscr = mfspr(SPRN_PSSCR_PR);
@@ -658,35 +699,12 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
 
 	save_p9_host_os_sprs(&host_os_sprs);
 
-	/*
-	 * This could be combined with MSR[RI] clearing, but that expands
-	 * the unrecoverable window. It would be better to cover unrecoverable
-	 * with KVM bad interrupt handling rather than use MSR[RI] at all.
-	 *
-	 * Much more difficult and less worthwhile to combine with IR/DR
-	 * disable.
-	 */
-	hard_irq_disable();
+	msr = kvmppc_msr_hard_disable_set_facilities(vcpu, msr);
 	if (lazy_irq_pending()) {
 		trap = 0;
 		goto out;
 	}
 
-	/* MSR bits may have been cleared by context switch */
-	msr = 0;
-	if (IS_ENABLED(CONFIG_PPC_FPU))
-		msr |= MSR_FP;
-	if (cpu_has_feature(CPU_FTR_ALTIVEC))
-		msr |= MSR_VEC;
-	if (cpu_has_feature(CPU_FTR_VSX))
-		msr |= MSR_VSX;
-	if ((cpu_has_feature(CPU_FTR_TM) ||
-	    cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
-			(vcpu->arch.hfscr & HFSCR_TM))
-		msr |= MSR_TM;
-	msr = msr_check_and_set(msr);
-	/* Save MSR for restore. This is after hard disable, so EE is clear. */
-
 	if (vc->tb_offset) {
 		u64 new_tb = *tb + vc->tb_offset;
 		mtspr(SPRN_TBU40, new_tb);
-- 
2.23.0


^ permalink raw reply related

* [RFC PATCH 41/43] KVM: PPC: Book3S HV Nested: Avoid extra mftb() in nested entry
From: Nicholas Piggin @ 2021-06-22 10:57 UTC (permalink / raw)
  To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210622105736.633352-1-npiggin@gmail.com>

mftb() is expensive and one can be avoided on nested guest dispatch.

If the time checking code distinguishes between the L0 timer and the
nested HV timer, then both can be tested in the same place with the
same mftb() value.

This also nicely illustrates the relationship between the L0 and nested
HV timers.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 arch/powerpc/include/asm/kvm_asm.h  |  1 +
 arch/powerpc/kvm/book3s_hv.c        | 12 ++++++++++++
 arch/powerpc/kvm/book3s_hv_nested.c |  5 -----
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index fbbf3cec92e9..d68d71987d5c 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -79,6 +79,7 @@
 #define BOOK3S_INTERRUPT_FP_UNAVAIL	0x800
 #define BOOK3S_INTERRUPT_DECREMENTER	0x900
 #define BOOK3S_INTERRUPT_HV_DECREMENTER	0x980
+#define BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER	0x1980
 #define BOOK3S_INTERRUPT_DOORBELL	0xa00
 #define BOOK3S_INTERRUPT_SYSCALL	0xc00
 #define BOOK3S_INTERRUPT_TRACE		0xd00
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 9d8277a4c829..7cb9e87b50b7 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1410,6 +1410,10 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
 	run->ready_for_interrupt_injection = 1;
 	switch (vcpu->arch.trap) {
 	/* We're good on these - the host merely wanted to get our attention */
+	case BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER:
+		WARN_ON_ONCE(1); /* Should never happen */
+		vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER;
+		fallthrough;
 	case BOOK3S_INTERRUPT_HV_DECREMENTER:
 		vcpu->stat.dec_exits++;
 		r = RESUME_GUEST;
@@ -1737,6 +1741,12 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
 		vcpu->stat.ext_intr_exits++;
 		r = RESUME_GUEST;
 		break;
+	/* These need to go to the nested HV */
+	case BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER:
+		vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER;
+		vcpu->stat.dec_exits++;
+		r = RESUME_HOST;
+		break;
 	/* SR/HMI/PMI are HV interrupts that host has handled. Resume guest.*/
 	case BOOK3S_INTERRUPT_HMI:
 	case BOOK3S_INTERRUPT_PERFMON:
@@ -3855,6 +3865,8 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 		return BOOK3S_INTERRUPT_HV_DECREMENTER;
 	if (next_timer < time_limit)
 		time_limit = next_timer;
+	else if (*tb >= time_limit) /* nested time limit */
+		return BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER;
 
 	vcpu->arch.ceded = 0;
 
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 5a534f7924f2..a92808a927ff 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -361,11 +361,6 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
 	vcpu->arch.ret = RESUME_GUEST;
 	vcpu->arch.trap = 0;
 	do {
-		if (mftb() >= hdec_exp) {
-			vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER;
-			r = RESUME_HOST;
-			break;
-		}
 		r = kvmhv_run_single_vcpu(vcpu, hdec_exp, l2_hv.lpcr);
 	} while (is_kvmppc_resume_guest(r));
 
-- 
2.23.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox