All of lore.kernel.org
 help / color / mirror / Atom feed
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
To: Michael Ellerman <mpe@ellerman.id.au>
Cc: Mark Rutland <mark.rutland@arm.com>,
	Valentin Schneider <vschneid@redhat.com>,
	Peter Zijlstra <peterz@infradead.org>,
	"ndesaulniers@google.com" <ndesaulniers@google.com>,
	linux-kernel@vger.kernel.org, Nicholas Piggin <npiggin@gmail.com>,
	Rohan McLure <rmclure@linux.ibm.com>,
	linuxppc-dev <linuxppc-dev@lists.ozlabs.org>,
	Josh Poimboeuf <jpoimboe@kernel.org>
Subject: Re: [PATCH v2 1/6] powerpc/smp: Cache CPU has Asymmetric SMP
Date: Fri, 20 Oct 2023 14:39:08 +0530	[thread overview]
Message-ID: <20231020090908.GM2194132@linux.vnet.ibm.com> (raw)
In-Reply-To: <87y1fz5j03.fsf@mail.lhotse>

* Michael Ellerman <mpe@ellerman.id.au> [2023-10-19 15:33:16]:

> Srikar Dronamraju <srikar@linux.vnet.ibm.com> writes:
> > Currently cpu feature flag is checked whenever powerpc_smt_flags gets
> > called. This is an unnecessary overhead. CPU_FTR_ASYM_SMT is set based
> > on the processor and all processors will either have this set or will
> > have it unset.
> 
> The cpu_has_feature() test is implemented with a static key.
> 
> So AFAICS this is just replacing one static key with another?
> 

> I see that you use the new static key in subsequent patches. But
> couldn't those just use the existing cpu feature test?
> 

Yes, we can use the existing cpu feature test itself.

> Anyway I'd be interested to see how the generated code differs
> before/after this.
> 
---------------------------->8----------------------------------------------8<------------
Before this change
0000000000000500 <powerpc_smt_flags>:
{
     500:	00 00 4c 3c 	addis   r2,r12,0
     504:	00 00 42 38 	addi    r2,r2,0
     508:	a6 02 08 7c 	mflr    r0
     50c:	01 00 00 48 	bl      50c <powerpc_smt_flags+0xc>
     510:	f8 ff e1 fb 	std     r31,-8(r1)
     514:	91 ff 21 f8 	stdu    r1,-112(r1)
#define JUMP_ENTRY_TYPE		stringify_in_c(FTR_ENTRY_LONG)
#define JUMP_LABEL_NOP_SIZE	4

static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
	asm_volatile_goto("1:\n\t"
     518:	00 00 00 60 	nop
		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
     51c:	00 00 22 3d 	addis   r9,r2,0
		flags |= SD_ASYM_PACKING;
     520:	80 05 e0 3b 	li      r31,1408
		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
     524:	00 00 29 89 	lbz     r9,0(r9)
     528:	00 00 09 2c 	cmpwi   r9,0
     52c:	28 00 82 41 	beq     554 <powerpc_smt_flags+0x54>
}
     530:	70 00 21 38 	addi    r1,r1,112
     534:	b4 07 e3 7f 	extsw   r3,r31
     538:	f8 ff e1 eb 	ld      r31,-8(r1)
     53c:	20 00 80 4e 	blr
	int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
     540:	80 01 e0 3b 	li      r31,384
}
     544:	70 00 21 38 	addi    r1,r1,112
     548:	b4 07 e3 7f 	extsw   r3,r31
     54c:	f8 ff e1 eb 	ld      r31,-8(r1)
     550:	20 00 80 4e 	blr
		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
     554:	a6 02 08 7c 	mflr    r0
     558:	00 00 62 3c 	addis   r3,r2,0
     55c:	01 00 20 39 	li      r9,1
     560:	00 00 42 3d 	addis   r10,r2,0
     564:	00 00 63 38 	addi    r3,r3,0
     568:	00 00 2a 99 	stb     r9,0(r10)
     56c:	80 00 01 f8 	std     r0,128(r1)
     570:	01 00 00 48 	bl      570 <powerpc_smt_flags+0x70>
     574:	00 00 00 60 	nop
     578:	80 00 01 e8 	ld      r0,128(r1)
     57c:	a6 03 08 7c 	mtlr    r0
     580:	b0 ff ff 4b 	b       530 <powerpc_smt_flags+0x30>
     584:	00 00 00 60 	nop
     588:	00 00 00 60 	nop
     58c:	00 00 00 60 	nop


post this change.
0000000000000340 <powerpc_smt_flags>:
{
     340:	a6 02 08 7c 	mflr    r0
     344:	01 00 00 48 	bl      344 <powerpc_smt_flags+0x4>
#define JUMP_ENTRY_TYPE		stringify_in_c(FTR_ENTRY_LONG)
#define JUMP_LABEL_NOP_SIZE	4

static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
	asm_volatile_goto("1:\n\t"
     348:	00 00 00 60 	nop
	return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
     34c:	80 01 60 38 	li      r3,384
}
     350:	b4 07 63 7c 	extsw   r3,r3
     354:	20 00 80 4e 	blr
     358:	00 00 00 60 	nop
     35c:	00 00 00 60 	nop
		return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING;
     360:	80 05 60 38 	li      r3,1408
}
     364:	b4 07 63 7c 	extsw   r3,r3
     368:	20 00 80 4e 	blr
     36c:	00 00 00 60 	nop

---------------------------->8----------------------------------------------8<------------

I think the most of the difference is due to moving pr_info_once to
fixup_topology. Does it make sense to move the pr_info_once to
fixup_topology (which is called less often) from powerpc_smt_flags?

Even though the pr_info_once would probably translate to load + cmp + branch
we could avoid that for each smt_flag call.

So something like

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 5826f5108a12..bc22f775425b 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -993,13 +993,10 @@ static bool shared_caches;
 /* cpumask of CPUs with asymmetric SMT dependency */
 static int powerpc_smt_flags(void)
 {
-	int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
+	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
+		return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING;
 
-	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
-		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
-		flags |= SD_ASYM_PACKING;
-	}
-	return flags;
+	return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
 }
 #endif
 
@@ -1687,6 +1684,9 @@ static void __init fixup_topology(void)
 	int i;
 
 #ifdef CONFIG_SCHED_SMT
+	if (cpu_has_feature(CPU_FTR_ASYM_SMT))
+		pr_info_once("Enabling Asymmetric SMT scheduling\n");
+
 	if (has_big_cores) {
 		pr_info("Big cores detected but using small core scheduling\n");
 		powerpc_topology[smt_idx].mask = smallcore_smt_mask;
-- 
Thanks and Regards
Srikar Dronamraju

WARNING: multiple messages have this Message-ID (diff)
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
To: Michael Ellerman <mpe@ellerman.id.au>
Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>,
	Christophe Leroy <christophe.leroy@csgroup.eu>,
	Nicholas Piggin <npiggin@gmail.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Rohan McLure <rmclure@linux.ibm.com>,
	Valentin Schneider <vschneid@redhat.com>,
	Josh Poimboeuf <jpoimboe@kernel.org>,
	"ndesaulniers@google.com" <ndesaulniers@google.com>,
	Mark Rutland <mark.rutland@arm.com>,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH v2 1/6] powerpc/smp: Cache CPU has Asymmetric SMP
Date: Fri, 20 Oct 2023 14:39:08 +0530	[thread overview]
Message-ID: <20231020090908.GM2194132@linux.vnet.ibm.com> (raw)
In-Reply-To: <87y1fz5j03.fsf@mail.lhotse>

* Michael Ellerman <mpe@ellerman.id.au> [2023-10-19 15:33:16]:

> Srikar Dronamraju <srikar@linux.vnet.ibm.com> writes:
> > Currently cpu feature flag is checked whenever powerpc_smt_flags gets
> > called. This is an unnecessary overhead. CPU_FTR_ASYM_SMT is set based
> > on the processor and all processors will either have this set or will
> > have it unset.
> 
> The cpu_has_feature() test is implemented with a static key.
> 
> So AFAICS this is just replacing one static key with another?
> 

> I see that you use the new static key in subsequent patches. But
> couldn't those just use the existing cpu feature test?
> 

Yes, we can use the existing cpu feature test itself.

> Anyway I'd be interested to see how the generated code differs
> before/after this.
> 
---------------------------->8----------------------------------------------8<------------
Before this change
0000000000000500 <powerpc_smt_flags>:
{
     500:	00 00 4c 3c 	addis   r2,r12,0
     504:	00 00 42 38 	addi    r2,r2,0
     508:	a6 02 08 7c 	mflr    r0
     50c:	01 00 00 48 	bl      50c <powerpc_smt_flags+0xc>
     510:	f8 ff e1 fb 	std     r31,-8(r1)
     514:	91 ff 21 f8 	stdu    r1,-112(r1)
#define JUMP_ENTRY_TYPE		stringify_in_c(FTR_ENTRY_LONG)
#define JUMP_LABEL_NOP_SIZE	4

static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
	asm_volatile_goto("1:\n\t"
     518:	00 00 00 60 	nop
		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
     51c:	00 00 22 3d 	addis   r9,r2,0
		flags |= SD_ASYM_PACKING;
     520:	80 05 e0 3b 	li      r31,1408
		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
     524:	00 00 29 89 	lbz     r9,0(r9)
     528:	00 00 09 2c 	cmpwi   r9,0
     52c:	28 00 82 41 	beq     554 <powerpc_smt_flags+0x54>
}
     530:	70 00 21 38 	addi    r1,r1,112
     534:	b4 07 e3 7f 	extsw   r3,r31
     538:	f8 ff e1 eb 	ld      r31,-8(r1)
     53c:	20 00 80 4e 	blr
	int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
     540:	80 01 e0 3b 	li      r31,384
}
     544:	70 00 21 38 	addi    r1,r1,112
     548:	b4 07 e3 7f 	extsw   r3,r31
     54c:	f8 ff e1 eb 	ld      r31,-8(r1)
     550:	20 00 80 4e 	blr
		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
     554:	a6 02 08 7c 	mflr    r0
     558:	00 00 62 3c 	addis   r3,r2,0
     55c:	01 00 20 39 	li      r9,1
     560:	00 00 42 3d 	addis   r10,r2,0
     564:	00 00 63 38 	addi    r3,r3,0
     568:	00 00 2a 99 	stb     r9,0(r10)
     56c:	80 00 01 f8 	std     r0,128(r1)
     570:	01 00 00 48 	bl      570 <powerpc_smt_flags+0x70>
     574:	00 00 00 60 	nop
     578:	80 00 01 e8 	ld      r0,128(r1)
     57c:	a6 03 08 7c 	mtlr    r0
     580:	b0 ff ff 4b 	b       530 <powerpc_smt_flags+0x30>
     584:	00 00 00 60 	nop
     588:	00 00 00 60 	nop
     58c:	00 00 00 60 	nop


post this change.
0000000000000340 <powerpc_smt_flags>:
{
     340:	a6 02 08 7c 	mflr    r0
     344:	01 00 00 48 	bl      344 <powerpc_smt_flags+0x4>
#define JUMP_ENTRY_TYPE		stringify_in_c(FTR_ENTRY_LONG)
#define JUMP_LABEL_NOP_SIZE	4

static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
	asm_volatile_goto("1:\n\t"
     348:	00 00 00 60 	nop
	return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
     34c:	80 01 60 38 	li      r3,384
}
     350:	b4 07 63 7c 	extsw   r3,r3
     354:	20 00 80 4e 	blr
     358:	00 00 00 60 	nop
     35c:	00 00 00 60 	nop
		return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING;
     360:	80 05 60 38 	li      r3,1408
}
     364:	b4 07 63 7c 	extsw   r3,r3
     368:	20 00 80 4e 	blr
     36c:	00 00 00 60 	nop

---------------------------->8----------------------------------------------8<------------

I think the most of the difference is due to moving pr_info_once to
fixup_topology. Does it make sense to move the pr_info_once to
fixup_topology (which is called less often) from powerpc_smt_flags?

Even though the pr_info_once would probably translate to load + cmp + branch
we could avoid that for each smt_flag call.

So something like

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 5826f5108a12..bc22f775425b 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -993,13 +993,10 @@ static bool shared_caches;
 /* cpumask of CPUs with asymmetric SMT dependency */
 static int powerpc_smt_flags(void)
 {
-	int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
+	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
+		return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING;
 
-	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
-		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
-		flags |= SD_ASYM_PACKING;
-	}
-	return flags;
+	return SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
 }
 #endif
 
@@ -1687,6 +1684,9 @@ static void __init fixup_topology(void)
 	int i;
 
 #ifdef CONFIG_SCHED_SMT
+	if (cpu_has_feature(CPU_FTR_ASYM_SMT))
+		pr_info_once("Enabling Asymmetric SMT scheduling\n");
+
 	if (has_big_cores) {
 		pr_info("Big cores detected but using small core scheduling\n");
 		powerpc_topology[smt_idx].mask = smallcore_smt_mask;
-- 
Thanks and Regards
Srikar Dronamraju

  reply	other threads:[~2023-10-20  9:10 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-10-18 16:37 [PATCH v2 0/6] powerpc/smp: Shared processor sched optimizations Srikar Dronamraju
2023-10-18 16:37 ` [PATCH v2 1/6] powerpc/smp: Cache CPU has Asymmetric SMP Srikar Dronamraju
2023-10-18 16:37   ` Srikar Dronamraju
2023-10-19  4:33   ` Michael Ellerman
2023-10-19  4:33     ` Michael Ellerman
2023-10-20  9:09     ` Srikar Dronamraju [this message]
2023-10-20  9:09       ` Srikar Dronamraju
2023-10-18 16:37 ` [PATCH v2 2/6] powerpc/smp: Enable Asym packing for cores on shared processor Srikar Dronamraju
2023-10-18 16:37   ` Srikar Dronamraju
2023-10-19  4:38   ` Michael Ellerman
2023-10-19  4:38     ` Michael Ellerman
2023-10-19  7:48     ` Peter Zijlstra
2023-10-19  7:48       ` Peter Zijlstra
2023-10-19 11:50       ` Michael Ellerman
2023-10-19 11:50         ` Michael Ellerman
2023-10-19 12:54       ` Srikar Dronamraju
2023-10-19 12:54         ` Srikar Dronamraju
2023-10-19 15:56   ` Shrikanth Hegde
2023-10-19 15:56     ` Shrikanth Hegde
2023-10-20  5:44     ` Srikar Dronamraju
2023-10-20  5:44       ` Srikar Dronamraju
2023-10-18 16:37 ` [PATCH v2 3/6] powerpc/smp: Move shared_processor static key to smp.h Srikar Dronamraju
2023-10-18 16:37   ` Srikar Dronamraju
2023-10-19  4:41   ` Michael Ellerman
2023-10-19  4:41     ` Michael Ellerman
2023-10-19  4:41     ` Michael Ellerman
2023-10-19 13:08     ` Srikar Dronamraju
2023-10-19 13:08       ` Srikar Dronamraju
2023-10-20 10:44       ` Michael Ellerman
2023-10-20 10:44         ` Michael Ellerman
2023-10-20 10:44         ` Michael Ellerman
2023-10-19 10:30   ` Shrikanth Hegde
2023-10-19 10:30     ` Shrikanth Hegde
2023-10-18 16:37 ` [PATCH v2 4/6] powerpc/smp: Disable MC domain for shared processor Srikar Dronamraju
2023-10-18 16:37   ` Srikar Dronamraju
2023-10-19  4:48   ` Michael Ellerman
2023-10-19  4:48     ` Michael Ellerman
2023-10-19  7:50     ` Peter Zijlstra
2023-10-19  7:50       ` Peter Zijlstra
2023-10-19 13:23       ` Srikar Dronamraju
2023-10-19 13:23         ` Srikar Dronamraju
2023-10-19 13:16     ` Srikar Dronamraju
2023-10-19 13:16       ` Srikar Dronamraju
2023-10-18 16:37 ` [PATCH v2 5/6] powerpc/smp: Add read_mostly attribute Srikar Dronamraju
2023-10-18 16:37   ` Srikar Dronamraju
2023-10-19  4:49   ` Michael Ellerman
2023-10-19  4:49     ` Michael Ellerman
2023-10-19  7:51   ` Peter Zijlstra
2023-10-19  7:51     ` Peter Zijlstra
2023-10-19 12:56     ` Srikar Dronamraju
2023-10-19 12:56       ` Srikar Dronamraju
2023-10-18 16:37 ` [PATCH v2 6/6] powerpc/smp: Avoid asym packing within thread_group of a core Srikar Dronamraju
2023-10-18 16:37   ` Srikar Dronamraju

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231020090908.GM2194132@linux.vnet.ibm.com \
    --to=srikar@linux.vnet.ibm.com \
    --cc=jpoimboe@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=mark.rutland@arm.com \
    --cc=mpe@ellerman.id.au \
    --cc=ndesaulniers@google.com \
    --cc=npiggin@gmail.com \
    --cc=peterz@infradead.org \
    --cc=rmclure@linux.ibm.com \
    --cc=vschneid@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.