* [PATCH v2 0/7] Remove onstack cpumask var for sparc
@ 2024-04-20 5:15 Dawei Li
2024-04-20 5:15 ` [PATCH v2 1/7] sparc/srmmu: Remove on-stack cpumask var Dawei Li
` (6 more replies)
0 siblings, 7 replies; 19+ messages in thread
From: Dawei Li @ 2024-04-20 5:15 UTC (permalink / raw)
To: davem, andreas; +Cc: sparclinux, linux-kernel, sam, Dawei Li
Hi,
This is v2 of previous series[1] on removal of onstack cpumask for sparc
arch.
Change since v1:
- Fix build warning reported by test bot.
- Extend scope of removal, as suggested by Sam[2].
Note: Couldn't figure out proper approach dealing with case of
arch/sparc/kernel/ds.c. Just leave it as is.
[1] v1: https://lore.kernel.org/all/20240418104949.3606645-1-dawei.li@shingroup.cn/
[2] https://lore.kernel.org/all/20240419051350.GA558245@ravnborg.org/
Dawei Li (7):
sparc/srmmu: Remove on-stack cpumask var
sparc/irq: Remove on-stack cpumask var
sparc/of: Remove on-stack cpumask var
sparc/pci_msi: Remove on-stack cpumask var
sparc: Remove on-stack cpumask var
sparc/leon: Remove on-stack cpumask var
sparc/smp: Remove on-stack cpumask var
arch/sparc/include/asm/smp_32.h | 12 +++++-----
arch/sparc/kernel/irq_64.c | 10 +++-----
arch/sparc/kernel/kernel.h | 11 +++++++++
arch/sparc/kernel/leon_kernel.c | 9 +++----
arch/sparc/kernel/leon_smp.c | 11 ++++-----
arch/sparc/kernel/of_device_64.c | 5 +---
arch/sparc/kernel/pci_msi.c | 5 +---
arch/sparc/kernel/sun4d_smp.c | 10 ++++----
arch/sparc/kernel/sun4m_smp.c | 10 ++++----
arch/sparc/mm/init_64.c | 2 +-
arch/sparc/mm/srmmu.c | 40 ++++++++++----------------------
11 files changed, 50 insertions(+), 75 deletions(-)
Thanks,
Dawei
--
2.27.0
^ permalink raw reply [flat|nested] 19+ messages in thread* [PATCH v2 1/7] sparc/srmmu: Remove on-stack cpumask var 2024-04-20 5:15 [PATCH v2 0/7] Remove onstack cpumask var for sparc Dawei Li @ 2024-04-20 5:15 ` Dawei Li 2024-04-20 7:58 ` Sam Ravnborg 2024-04-20 5:15 ` [PATCH v2 2/7] sparc/irq: " Dawei Li ` (5 subsequent siblings) 6 siblings, 1 reply; 19+ messages in thread From: Dawei Li @ 2024-04-20 5:15 UTC (permalink / raw) To: davem, andreas; +Cc: sparclinux, linux-kernel, sam, Dawei Li In general it's preferable to avoid placing cpumasks on the stack, as for large values of NR_CPUS these can consume significant amounts of stack space and make stack overflows more likely. Use cpumask_any_but() to avoid the need for a temporary cpumask on the stack. Signed-off-by: Dawei Li <dawei.li@shingroup.cn> --- arch/sparc/mm/srmmu.c | 40 ++++++++++++---------------------------- 1 file changed, 12 insertions(+), 28 deletions(-) diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index 852085ada368..86fd20c878ae 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -1653,13 +1653,15 @@ static void smp_flush_tlb_all(void) local_ops->tlb_all(); } +static bool cpumask_any_but_current(struct mm_struct *mm) +{ + return cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids; +} + static void smp_flush_cache_mm(struct mm_struct *mm) { if (mm->context != NO_CONTEXT) { - cpumask_t cpu_mask; - cpumask_copy(&cpu_mask, mm_cpumask(mm)); - cpumask_clear_cpu(smp_processor_id(), &cpu_mask); - if (!cpumask_empty(&cpu_mask)) + if (cpumask_any_but_current(mm)) xc1(local_ops->cache_mm, (unsigned long)mm); local_ops->cache_mm(mm); } @@ -1668,10 +1670,7 @@ static void smp_flush_cache_mm(struct mm_struct *mm) static void smp_flush_tlb_mm(struct mm_struct *mm) { if (mm->context != NO_CONTEXT) { - cpumask_t cpu_mask; - cpumask_copy(&cpu_mask, mm_cpumask(mm)); - cpumask_clear_cpu(smp_processor_id(), &cpu_mask); - if (!cpumask_empty(&cpu_mask)) { + if (cpumask_any_but_current(mm)) { xc1(local_ops->tlb_mm, (unsigned long)mm); if (atomic_read(&mm->mm_users) == 1 && current->active_mm == mm) cpumask_copy(mm_cpumask(mm), @@ -1688,10 +1687,7 @@ static void smp_flush_cache_range(struct vm_area_struct *vma, struct mm_struct *mm = vma->vm_mm; if (mm->context != NO_CONTEXT) { - cpumask_t cpu_mask; - cpumask_copy(&cpu_mask, mm_cpumask(mm)); - cpumask_clear_cpu(smp_processor_id(), &cpu_mask); - if (!cpumask_empty(&cpu_mask)) + if (cpumask_any_but_current(mm)) xc3(local_ops->cache_range, (unsigned long)vma, start, end); local_ops->cache_range(vma, start, end); @@ -1705,10 +1701,7 @@ static void smp_flush_tlb_range(struct vm_area_struct *vma, struct mm_struct *mm = vma->vm_mm; if (mm->context != NO_CONTEXT) { - cpumask_t cpu_mask; - cpumask_copy(&cpu_mask, mm_cpumask(mm)); - cpumask_clear_cpu(smp_processor_id(), &cpu_mask); - if (!cpumask_empty(&cpu_mask)) + if (cpumask_any_but_current(mm)) xc3(local_ops->tlb_range, (unsigned long)vma, start, end); local_ops->tlb_range(vma, start, end); @@ -1720,10 +1713,7 @@ static void smp_flush_cache_page(struct vm_area_struct *vma, unsigned long page) struct mm_struct *mm = vma->vm_mm; if (mm->context != NO_CONTEXT) { - cpumask_t cpu_mask; - cpumask_copy(&cpu_mask, mm_cpumask(mm)); - cpumask_clear_cpu(smp_processor_id(), &cpu_mask); - if (!cpumask_empty(&cpu_mask)) + if (cpumask_any_but_current(mm)) xc2(local_ops->cache_page, (unsigned long)vma, page); local_ops->cache_page(vma, page); } @@ -1734,10 +1724,7 @@ static void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) struct mm_struct *mm = vma->vm_mm; if (mm->context != NO_CONTEXT) { - cpumask_t cpu_mask; - cpumask_copy(&cpu_mask, mm_cpumask(mm)); - cpumask_clear_cpu(smp_processor_id(), &cpu_mask); - if (!cpumask_empty(&cpu_mask)) + if (cpumask_any_but_current(mm)) xc2(local_ops->tlb_page, (unsigned long)vma, page); local_ops->tlb_page(vma, page); } @@ -1759,10 +1746,7 @@ static void smp_flush_page_to_ram(unsigned long page) static void smp_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr) { - cpumask_t cpu_mask; - cpumask_copy(&cpu_mask, mm_cpumask(mm)); - cpumask_clear_cpu(smp_processor_id(), &cpu_mask); - if (!cpumask_empty(&cpu_mask)) + if (cpumask_any_but_current(mm)) xc2(local_ops->sig_insns, (unsigned long)mm, insn_addr); local_ops->sig_insns(mm, insn_addr); } -- 2.27.0 ^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH v2 1/7] sparc/srmmu: Remove on-stack cpumask var 2024-04-20 5:15 ` [PATCH v2 1/7] sparc/srmmu: Remove on-stack cpumask var Dawei Li @ 2024-04-20 7:58 ` Sam Ravnborg 2024-04-22 5:46 ` Dawei Li 0 siblings, 1 reply; 19+ messages in thread From: Sam Ravnborg @ 2024-04-20 7:58 UTC (permalink / raw) To: Dawei Li; +Cc: davem, andreas, sparclinux, linux-kernel Hi Dawei, On Sat, Apr 20, 2024 at 01:15:41PM +0800, Dawei Li wrote: > In general it's preferable to avoid placing cpumasks on the stack, as > for large values of NR_CPUS these can consume significant amounts of > stack space and make stack overflows more likely. > > Use cpumask_any_but() to avoid the need for a temporary cpumask on > the stack. Another good argument for this patch is the simplification of the code. > > Signed-off-by: Dawei Li <dawei.li@shingroup.cn> > --- > arch/sparc/mm/srmmu.c | 40 ++++++++++++---------------------------- > 1 file changed, 12 insertions(+), 28 deletions(-) > > diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c > index 852085ada368..86fd20c878ae 100644 > --- a/arch/sparc/mm/srmmu.c > +++ b/arch/sparc/mm/srmmu.c > @@ -1653,13 +1653,15 @@ static void smp_flush_tlb_all(void) > local_ops->tlb_all(); > } > > +static bool cpumask_any_but_current(struct mm_struct *mm) > +{ > + return cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids; > +} This helper is not a cpumask helper - the name should reflect what it is used for. Something like: static bool any_other_mm_cpus(struct mm_struct *mm) { return cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids; } The implementation is fine - it is only the naming that should be improve. With this change (or a better name): Reviewed-by: Sam Ravnborg <sam@ravnborg.org> Sam ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH v2 1/7] sparc/srmmu: Remove on-stack cpumask var 2024-04-20 7:58 ` Sam Ravnborg @ 2024-04-22 5:46 ` Dawei Li 0 siblings, 0 replies; 19+ messages in thread From: Dawei Li @ 2024-04-22 5:46 UTC (permalink / raw) To: Sam Ravnborg; +Cc: davem, andreas, sparclinux, linux-kernel Hi Sam, Thanks for review. On Sat, Apr 20, 2024 at 09:58:46AM +0200, Sam Ravnborg wrote: > Hi Dawei, > On Sat, Apr 20, 2024 at 01:15:41PM +0800, Dawei Li wrote: > > In general it's preferable to avoid placing cpumasks on the stack, as > > for large values of NR_CPUS these can consume significant amounts of > > stack space and make stack overflows more likely. > > > > Use cpumask_any_but() to avoid the need for a temporary cpumask on > > the stack. > > Another good argument for this patch is the simplification of the code. > > > > > Signed-off-by: Dawei Li <dawei.li@shingroup.cn> > > --- > > arch/sparc/mm/srmmu.c | 40 ++++++++++++---------------------------- > > 1 file changed, 12 insertions(+), 28 deletions(-) > > > > diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c > > index 852085ada368..86fd20c878ae 100644 > > --- a/arch/sparc/mm/srmmu.c > > +++ b/arch/sparc/mm/srmmu.c > > @@ -1653,13 +1653,15 @@ static void smp_flush_tlb_all(void) > > local_ops->tlb_all(); > > } > > > > +static bool cpumask_any_but_current(struct mm_struct *mm) > > +{ > > + return cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids; > > +} > > This helper is not a cpumask helper - the name should reflect what it is > used for. > > Something like: > static bool any_other_mm_cpus(struct mm_struct *mm) > { > return cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids; > } Acked. I will rename the helper as you suggested. > > The implementation is fine - it is only the naming that should be > improve. > With this change (or a better name): > Reviewed-by: Sam Ravnborg <sam@ravnborg.org> > > Sam Thanks, Dawei > ^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH v2 2/7] sparc/irq: Remove on-stack cpumask var 2024-04-20 5:15 [PATCH v2 0/7] Remove onstack cpumask var for sparc Dawei Li 2024-04-20 5:15 ` [PATCH v2 1/7] sparc/srmmu: Remove on-stack cpumask var Dawei Li @ 2024-04-20 5:15 ` Dawei Li 2024-04-20 8:22 ` Sam Ravnborg 2024-04-20 5:15 ` [PATCH v2 3/7] sparc/of: " Dawei Li ` (4 subsequent siblings) 6 siblings, 1 reply; 19+ messages in thread From: Dawei Li @ 2024-04-20 5:15 UTC (permalink / raw) To: davem, andreas; +Cc: sparclinux, linux-kernel, sam, Dawei Li In general it's preferable to avoid placing cpumasks on the stack, as for large values of NR_CPUS these can consume significant amounts of stack space and make stack overflows more likely. - Both 2 arguments of cpumask_equal() is constant and free of change, no need to allocate extra cpumask variables. - Merge cpumask_and(), cpumask_first() and cpumask_empty() into cpumask_first_and(). Signed-off-by: Dawei Li <dawei.li@shingroup.cn> --- arch/sparc/kernel/irq_64.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index 5280e325d4d6..01ee800efde3 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -349,17 +349,13 @@ static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid) #ifdef CONFIG_SMP static int irq_choose_cpu(unsigned int irq, const struct cpumask *affinity) { - cpumask_t mask; int cpuid; - cpumask_copy(&mask, affinity); - if (cpumask_equal(&mask, cpu_online_mask)) { + if (cpumask_equal(affinity, cpu_online_mask)) { cpuid = map_to_cpu(irq); } else { - cpumask_t tmp; - - cpumask_and(&tmp, cpu_online_mask, &mask); - cpuid = cpumask_empty(&tmp) ? map_to_cpu(irq) : cpumask_first(&tmp); + cpuid = cpumask_first_and(affinity, cpu_online_mask); + cpuid = cpuid < nr_cpu_ids ? cpuid : map_to_cpu(irq); } return cpuid; -- 2.27.0 ^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH v2 2/7] sparc/irq: Remove on-stack cpumask var 2024-04-20 5:15 ` [PATCH v2 2/7] sparc/irq: " Dawei Li @ 2024-04-20 8:22 ` Sam Ravnborg 0 siblings, 0 replies; 19+ messages in thread From: Sam Ravnborg @ 2024-04-20 8:22 UTC (permalink / raw) To: Dawei Li; +Cc: davem, andreas, sparclinux, linux-kernel On Sat, Apr 20, 2024 at 01:15:42PM +0800, Dawei Li wrote: > In general it's preferable to avoid placing cpumasks on the stack, as > for large values of NR_CPUS these can consume significant amounts of > stack space and make stack overflows more likely. > > - Both 2 arguments of cpumask_equal() is constant and free of change, no > need to allocate extra cpumask variables. > > - Merge cpumask_and(), cpumask_first() and cpumask_empty() into > cpumask_first_and(). > > Signed-off-by: Dawei Li <dawei.li@shingroup.cn> Reviewed-by: Sam Ravnborg <sam@ravnborg.org> > --- > arch/sparc/kernel/irq_64.c | 10 +++------- > 1 file changed, 3 insertions(+), 7 deletions(-) > > diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c > index 5280e325d4d6..01ee800efde3 100644 > --- a/arch/sparc/kernel/irq_64.c > +++ b/arch/sparc/kernel/irq_64.c > @@ -349,17 +349,13 @@ static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid) > #ifdef CONFIG_SMP > static int irq_choose_cpu(unsigned int irq, const struct cpumask *affinity) > { > - cpumask_t mask; > int cpuid; > > - cpumask_copy(&mask, affinity); > - if (cpumask_equal(&mask, cpu_online_mask)) { > + if (cpumask_equal(affinity, cpu_online_mask)) { > cpuid = map_to_cpu(irq); > } else { > - cpumask_t tmp; > - > - cpumask_and(&tmp, cpu_online_mask, &mask); > - cpuid = cpumask_empty(&tmp) ? map_to_cpu(irq) : cpumask_first(&tmp); > + cpuid = cpumask_first_and(affinity, cpu_online_mask); > + cpuid = cpuid < nr_cpu_ids ? cpuid : map_to_cpu(irq); > } > > return cpuid; > -- > 2.27.0 ^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH v2 3/7] sparc/of: Remove on-stack cpumask var 2024-04-20 5:15 [PATCH v2 0/7] Remove onstack cpumask var for sparc Dawei Li 2024-04-20 5:15 ` [PATCH v2 1/7] sparc/srmmu: Remove on-stack cpumask var Dawei Li 2024-04-20 5:15 ` [PATCH v2 2/7] sparc/irq: " Dawei Li @ 2024-04-20 5:15 ` Dawei Li 2024-04-20 8:23 ` Sam Ravnborg 2024-04-20 5:15 ` [PATCH v2 4/7] sparc/pci_msi: " Dawei Li ` (3 subsequent siblings) 6 siblings, 1 reply; 19+ messages in thread From: Dawei Li @ 2024-04-20 5:15 UTC (permalink / raw) To: davem, andreas; +Cc: sparclinux, linux-kernel, sam, Dawei Li In general it's preferable to avoid placing cpumasks on the stack, as for large values of NR_CPUS these can consume significant amounts of stack space and make stack overflows more likely. @cpumask of irq_set_affinity() is read-only and free of change, drop unneeded cpumask var. Signed-off-by: Dawei Li <dawei.li@shingroup.cn> --- arch/sparc/kernel/of_device_64.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c index c350c58c7f69..f98c2901f335 100644 --- a/arch/sparc/kernel/of_device_64.c +++ b/arch/sparc/kernel/of_device_64.c @@ -624,10 +624,7 @@ static unsigned int __init build_one_device_irq(struct platform_device *op, out: nid = of_node_to_nid(dp); if (nid != -1) { - cpumask_t numa_mask; - - cpumask_copy(&numa_mask, cpumask_of_node(nid)); - irq_set_affinity(irq, &numa_mask); + irq_set_affinity(irq, cpumask_of_node(nid)); } return irq; -- 2.27.0 ^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH v2 3/7] sparc/of: Remove on-stack cpumask var 2024-04-20 5:15 ` [PATCH v2 3/7] sparc/of: " Dawei Li @ 2024-04-20 8:23 ` Sam Ravnborg 0 siblings, 0 replies; 19+ messages in thread From: Sam Ravnborg @ 2024-04-20 8:23 UTC (permalink / raw) To: Dawei Li; +Cc: davem, andreas, sparclinux, linux-kernel On Sat, Apr 20, 2024 at 01:15:43PM +0800, Dawei Li wrote: > In general it's preferable to avoid placing cpumasks on the stack, as > for large values of NR_CPUS these can consume significant amounts of > stack space and make stack overflows more likely. > > @cpumask of irq_set_affinity() is read-only and free of change, drop > unneeded cpumask var. > > Signed-off-by: Dawei Li <dawei.li@shingroup.cn> Reviewed-by: Sam Ravnborg <sam@ravnborg.org> > --- > arch/sparc/kernel/of_device_64.c | 5 +---- > 1 file changed, 1 insertion(+), 4 deletions(-) > > diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c > index c350c58c7f69..f98c2901f335 100644 > --- a/arch/sparc/kernel/of_device_64.c > +++ b/arch/sparc/kernel/of_device_64.c > @@ -624,10 +624,7 @@ static unsigned int __init build_one_device_irq(struct platform_device *op, > out: > nid = of_node_to_nid(dp); > if (nid != -1) { > - cpumask_t numa_mask; > - > - cpumask_copy(&numa_mask, cpumask_of_node(nid)); > - irq_set_affinity(irq, &numa_mask); > + irq_set_affinity(irq, cpumask_of_node(nid)); > } > > return irq; > -- > 2.27.0 ^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH v2 4/7] sparc/pci_msi: Remove on-stack cpumask var 2024-04-20 5:15 [PATCH v2 0/7] Remove onstack cpumask var for sparc Dawei Li ` (2 preceding siblings ...) 2024-04-20 5:15 ` [PATCH v2 3/7] sparc/of: " Dawei Li @ 2024-04-20 5:15 ` Dawei Li 2024-04-20 8:23 ` Sam Ravnborg 2024-04-20 5:15 ` [PATCH v2 5/7] sparc: " Dawei Li ` (2 subsequent siblings) 6 siblings, 1 reply; 19+ messages in thread From: Dawei Li @ 2024-04-20 5:15 UTC (permalink / raw) To: davem, andreas; +Cc: sparclinux, linux-kernel, sam, Dawei Li In general it's preferable to avoid placing cpumasks on the stack, as for large values of NR_CPUS these can consume significant amounts of stack space and make stack overflows more likely. @cpumask of irq_set_affinity() is read-only and free of change, drop unneeded cpumask var. Signed-off-by: Dawei Li <dawei.li@shingroup.cn> --- arch/sparc/kernel/pci_msi.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c index fc7402948b7b..acb2f83a1d5c 100644 --- a/arch/sparc/kernel/pci_msi.c +++ b/arch/sparc/kernel/pci_msi.c @@ -287,10 +287,7 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm, nid = pbm->numa_node; if (nid != -1) { - cpumask_t numa_mask; - - cpumask_copy(&numa_mask, cpumask_of_node(nid)); - irq_set_affinity(irq, &numa_mask); + irq_set_affinity(irq, cpumask_of_node(nid)); } err = request_irq(irq, sparc64_msiq_interrupt, 0, "MSIQ", -- 2.27.0 ^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH v2 4/7] sparc/pci_msi: Remove on-stack cpumask var 2024-04-20 5:15 ` [PATCH v2 4/7] sparc/pci_msi: " Dawei Li @ 2024-04-20 8:23 ` Sam Ravnborg 0 siblings, 0 replies; 19+ messages in thread From: Sam Ravnborg @ 2024-04-20 8:23 UTC (permalink / raw) To: Dawei Li; +Cc: davem, andreas, sparclinux, linux-kernel On Sat, Apr 20, 2024 at 01:15:44PM +0800, Dawei Li wrote: > In general it's preferable to avoid placing cpumasks on the stack, as > for large values of NR_CPUS these can consume significant amounts of > stack space and make stack overflows more likely. > > @cpumask of irq_set_affinity() is read-only and free of change, drop > unneeded cpumask var. > > Signed-off-by: Dawei Li <dawei.li@shingroup.cn> Reviewed-by: Sam Ravnborg <sam@ravnborg.org> > --- > arch/sparc/kernel/pci_msi.c | 5 +---- > 1 file changed, 1 insertion(+), 4 deletions(-) > > diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c > index fc7402948b7b..acb2f83a1d5c 100644 > --- a/arch/sparc/kernel/pci_msi.c > +++ b/arch/sparc/kernel/pci_msi.c > @@ -287,10 +287,7 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm, > > nid = pbm->numa_node; > if (nid != -1) { > - cpumask_t numa_mask; > - > - cpumask_copy(&numa_mask, cpumask_of_node(nid)); > - irq_set_affinity(irq, &numa_mask); > + irq_set_affinity(irq, cpumask_of_node(nid)); > } > err = request_irq(irq, sparc64_msiq_interrupt, 0, > "MSIQ", > -- > 2.27.0 ^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH v2 5/7] sparc: Remove on-stack cpumask var 2024-04-20 5:15 [PATCH v2 0/7] Remove onstack cpumask var for sparc Dawei Li ` (3 preceding siblings ...) 2024-04-20 5:15 ` [PATCH v2 4/7] sparc/pci_msi: " Dawei Li @ 2024-04-20 5:15 ` Dawei Li 2024-04-20 8:28 ` Sam Ravnborg 2024-04-20 5:15 ` [PATCH v2 6/7] sparc/leon: " Dawei Li 2024-04-20 5:15 ` [PATCH v2 7/7] sparc/smp: " Dawei Li 6 siblings, 1 reply; 19+ messages in thread From: Dawei Li @ 2024-04-20 5:15 UTC (permalink / raw) To: davem, andreas; +Cc: sparclinux, linux-kernel, sam, Dawei Li In general it's preferable to avoid placing cpumasks on the stack, as for large values of NR_CPUS these can consume significant amounts of stack space and make stack overflows more likely. Since the cpumask var resides in __init function, which means it's free of any concurrenct access, it can be safely marked with static to get rid of allocation on stack. while at it, mark it with __initdata to keep it from persistently consumed memory. Signed-off-by: Dawei Li <dawei.li@shingroup.cn> --- arch/sparc/mm/init_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 1ca9054d9b97..088d9c103dcc 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -1438,7 +1438,7 @@ static int __init numa_attach_mlgroup(struct mdesc_handle *md, u64 grp, static int __init numa_parse_mdesc_group(struct mdesc_handle *md, u64 grp, int index) { - cpumask_t mask; + static cpumask_t mask __initdata; int cpu; numa_parse_mdesc_group_cpus(md, grp, &mask); -- 2.27.0 ^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH v2 5/7] sparc: Remove on-stack cpumask var 2024-04-20 5:15 ` [PATCH v2 5/7] sparc: " Dawei Li @ 2024-04-20 8:28 ` Sam Ravnborg 2024-04-22 5:49 ` Dawei Li 0 siblings, 1 reply; 19+ messages in thread From: Sam Ravnborg @ 2024-04-20 8:28 UTC (permalink / raw) To: Dawei Li; +Cc: davem, andreas, sparclinux, linux-kernel On Sat, Apr 20, 2024 at 01:15:45PM +0800, Dawei Li wrote: > In general it's preferable to avoid placing cpumasks on the stack, as > for large values of NR_CPUS these can consume significant amounts of > stack space and make stack overflows more likely. > > Since the cpumask var resides in __init function, which means it's free > of any concurrenct access, it can be safely marked with static to get > rid of allocation on stack. > > while at it, mark it with __initdata to keep it from persistently > consumed memory. I do not see the need for this - it does not fix a bug and it complicates things. If the size is a real concern the normal pattern is to allocate and not declare it __initdata. Yes - __initdata is used in some place. I suggest to leave it as is unless we are fixing a real bug here. Sam > > Signed-off-by: Dawei Li <dawei.li@shingroup.cn> > --- > arch/sparc/mm/init_64.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c > index 1ca9054d9b97..088d9c103dcc 100644 > --- a/arch/sparc/mm/init_64.c > +++ b/arch/sparc/mm/init_64.c > @@ -1438,7 +1438,7 @@ static int __init numa_attach_mlgroup(struct mdesc_handle *md, u64 grp, > static int __init numa_parse_mdesc_group(struct mdesc_handle *md, u64 grp, > int index) > { > - cpumask_t mask; > + static cpumask_t mask __initdata; > int cpu; > > numa_parse_mdesc_group_cpus(md, grp, &mask); > -- > 2.27.0 ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH v2 5/7] sparc: Remove on-stack cpumask var 2024-04-20 8:28 ` Sam Ravnborg @ 2024-04-22 5:49 ` Dawei Li 0 siblings, 0 replies; 19+ messages in thread From: Dawei Li @ 2024-04-22 5:49 UTC (permalink / raw) To: Sam Ravnborg; +Cc: davem, andreas, sparclinux, linux-kernel Hi Sam, Thanks for review. On Sat, Apr 20, 2024 at 10:28:05AM +0200, Sam Ravnborg wrote: > On Sat, Apr 20, 2024 at 01:15:45PM +0800, Dawei Li wrote: > > In general it's preferable to avoid placing cpumasks on the stack, as > > for large values of NR_CPUS these can consume significant amounts of > > stack space and make stack overflows more likely. > > > > Since the cpumask var resides in __init function, which means it's free > > of any concurrenct access, it can be safely marked with static to get > > rid of allocation on stack. > > > > while at it, mark it with __initdata to keep it from persistently > > consumed memory. > > I do not see the need for this - it does not fix a bug and it > complicates things. > If the size is a real concern the normal pattern is to allocate > and not declare it __initdata. > > Yes - __initdata is used in some place. > I suggest to leave it as is unless we are fixing a real bug here. Acked. I will remove __initdata annotation. Thanks, Dawei > > Sam > > > > > Signed-off-by: Dawei Li <dawei.li@shingroup.cn> > > --- > > arch/sparc/mm/init_64.c | 2 +- > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c > > index 1ca9054d9b97..088d9c103dcc 100644 > > --- a/arch/sparc/mm/init_64.c > > +++ b/arch/sparc/mm/init_64.c > > @@ -1438,7 +1438,7 @@ static int __init numa_attach_mlgroup(struct mdesc_handle *md, u64 grp, > > static int __init numa_parse_mdesc_group(struct mdesc_handle *md, u64 grp, > > int index) > > { > > - cpumask_t mask; > > + static cpumask_t mask __initdata; > > int cpu; > > > > numa_parse_mdesc_group_cpus(md, grp, &mask); > > -- > > 2.27.0 > ^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH v2 6/7] sparc/leon: Remove on-stack cpumask var 2024-04-20 5:15 [PATCH v2 0/7] Remove onstack cpumask var for sparc Dawei Li ` (4 preceding siblings ...) 2024-04-20 5:15 ` [PATCH v2 5/7] sparc: " Dawei Li @ 2024-04-20 5:15 ` Dawei Li 2024-04-20 8:32 ` Sam Ravnborg 2024-04-20 5:15 ` [PATCH v2 7/7] sparc/smp: " Dawei Li 6 siblings, 1 reply; 19+ messages in thread From: Dawei Li @ 2024-04-20 5:15 UTC (permalink / raw) To: davem, andreas; +Cc: sparclinux, linux-kernel, sam, Dawei Li In general it's preferable to avoid placing cpumasks on the stack, as for large values of NR_CPUS these can consume significant amounts of stack space and make stack overflows more likely. Use cpumask_subset() and cpumask_first_and() to avoid the need for a temporary cpumask on the stack. Signed-off-by: Dawei Li <dawei.li@shingroup.cn> --- arch/sparc/kernel/leon_kernel.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c index 4c61da491fee..0070655041bb 100644 --- a/arch/sparc/kernel/leon_kernel.c +++ b/arch/sparc/kernel/leon_kernel.c @@ -106,13 +106,10 @@ unsigned long leon_get_irqmask(unsigned int irq) #ifdef CONFIG_SMP static int irq_choose_cpu(const struct cpumask *affinity) { - cpumask_t mask; + unsigned int cpu = cpumask_first_and(affinity, cpu_online_mask); - cpumask_and(&mask, cpu_online_mask, affinity); - if (cpumask_equal(&mask, cpu_online_mask) || cpumask_empty(&mask)) - return boot_cpu_id; - else - return cpumask_first(&mask); + return cpumask_subset(cpu_online_mask, affinity) || cpu >= nr_cpu_ids ? + boot_cpu_id : cpu; } #else #define irq_choose_cpu(affinity) boot_cpu_id -- 2.27.0 ^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH v2 6/7] sparc/leon: Remove on-stack cpumask var 2024-04-20 5:15 ` [PATCH v2 6/7] sparc/leon: " Dawei Li @ 2024-04-20 8:32 ` Sam Ravnborg 2024-04-22 6:15 ` Dawei Li 0 siblings, 1 reply; 19+ messages in thread From: Sam Ravnborg @ 2024-04-20 8:32 UTC (permalink / raw) To: Dawei Li; +Cc: davem, andreas, sparclinux, linux-kernel On Sat, Apr 20, 2024 at 01:15:46PM +0800, Dawei Li wrote: > In general it's preferable to avoid placing cpumasks on the stack, as > for large values of NR_CPUS these can consume significant amounts of > stack space and make stack overflows more likely. > > Use cpumask_subset() and cpumask_first_and() to avoid the need for a > temporary cpumask on the stack. > > Signed-off-by: Dawei Li <dawei.li@shingroup.cn> > --- > arch/sparc/kernel/leon_kernel.c | 9 +++------ > 1 file changed, 3 insertions(+), 6 deletions(-) > > diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c > index 4c61da491fee..0070655041bb 100644 > --- a/arch/sparc/kernel/leon_kernel.c > +++ b/arch/sparc/kernel/leon_kernel.c > @@ -106,13 +106,10 @@ unsigned long leon_get_irqmask(unsigned int irq) > #ifdef CONFIG_SMP > static int irq_choose_cpu(const struct cpumask *affinity) > { > - cpumask_t mask; > + unsigned int cpu = cpumask_first_and(affinity, cpu_online_mask); > > - cpumask_and(&mask, cpu_online_mask, affinity); > - if (cpumask_equal(&mask, cpu_online_mask) || cpumask_empty(&mask)) > - return boot_cpu_id; > - else > - return cpumask_first(&mask); > + return cpumask_subset(cpu_online_mask, affinity) || cpu >= nr_cpu_ids ? > + boot_cpu_id : cpu; This looks wrong - or if it is correct is is hard to parse. Drop ?: and use an if so the code is more readable. Sam ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH v2 6/7] sparc/leon: Remove on-stack cpumask var 2024-04-20 8:32 ` Sam Ravnborg @ 2024-04-22 6:15 ` Dawei Li 0 siblings, 0 replies; 19+ messages in thread From: Dawei Li @ 2024-04-22 6:15 UTC (permalink / raw) To: Sam Ravnborg; +Cc: davem, andreas, sparclinux, linux-kernel Hi Sam, Thanks for review. On Sat, Apr 20, 2024 at 10:32:02AM +0200, Sam Ravnborg wrote: > On Sat, Apr 20, 2024 at 01:15:46PM +0800, Dawei Li wrote: > > In general it's preferable to avoid placing cpumasks on the stack, as > > for large values of NR_CPUS these can consume significant amounts of > > stack space and make stack overflows more likely. > > > > Use cpumask_subset() and cpumask_first_and() to avoid the need for a > > temporary cpumask on the stack. > > > > Signed-off-by: Dawei Li <dawei.li@shingroup.cn> > > --- > > arch/sparc/kernel/leon_kernel.c | 9 +++------ > > 1 file changed, 3 insertions(+), 6 deletions(-) > > > > diff --git a/arch/sparc/kernel/leon_kernel.c b/arch/sparc/kernel/leon_kernel.c > > index 4c61da491fee..0070655041bb 100644 > > --- a/arch/sparc/kernel/leon_kernel.c > > +++ b/arch/sparc/kernel/leon_kernel.c > > @@ -106,13 +106,10 @@ unsigned long leon_get_irqmask(unsigned int irq) > > #ifdef CONFIG_SMP > > static int irq_choose_cpu(const struct cpumask *affinity) > > { > > - cpumask_t mask; > > + unsigned int cpu = cpumask_first_and(affinity, cpu_online_mask); > > > > - cpumask_and(&mask, cpu_online_mask, affinity); > > - if (cpumask_equal(&mask, cpu_online_mask) || cpumask_empty(&mask)) > > - return boot_cpu_id; > > - else > > - return cpumask_first(&mask); > > + return cpumask_subset(cpu_online_mask, affinity) || cpu >= nr_cpu_ids ? > > + boot_cpu_id : cpu; > > This looks wrong - or if it is correct is is hard to parse. > Drop ?: and use an if so the code is more readable. I am confused a bit here, about its correctness(not coding style). Per my understanding: A & B = A <-> For every set bit in A, it's set for B; <-> B is superset of A. <-> A is subset of B. - cpumask_and(&mask, cpu_online_mask, affinity); - if (cpumask_equal(&mask, cpu_online_mask)) So, codes above is equivalent to: if (cpumask_subset(cpu_online_mask, affinity)) Am I missing something? About the ?:, I will restore original "if else" style. > > Sam > Thanks, Dawei ^ permalink raw reply [flat|nested] 19+ messages in thread
* [PATCH v2 7/7] sparc/smp: Remove on-stack cpumask var 2024-04-20 5:15 [PATCH v2 0/7] Remove onstack cpumask var for sparc Dawei Li ` (5 preceding siblings ...) 2024-04-20 5:15 ` [PATCH v2 6/7] sparc/leon: " Dawei Li @ 2024-04-20 5:15 ` Dawei Li 2024-04-20 11:42 ` Sam Ravnborg 6 siblings, 1 reply; 19+ messages in thread From: Dawei Li @ 2024-04-20 5:15 UTC (permalink / raw) To: davem, andreas; +Cc: sparclinux, linux-kernel, sam, Dawei Li In general it's preferable to avoid placing cpumasks on the stack, as for large values of NR_CPUS these can consume significant amounts of stack space and make stack overflows more likely. - Change prototype of sparc32_ipi_ops::cross_call() so that it takes const cpumask * arg and all its callers accordingly. - As for all cross_call() implementations, divide cpumask_test_cpu() call into several sub calls to avoid on-stack cpumask var. Signed-off-by: Dawei Li <dawei.li@shingroup.cn> --- arch/sparc/include/asm/smp_32.h | 12 ++++++------ arch/sparc/kernel/kernel.h | 11 +++++++++++ arch/sparc/kernel/leon_smp.c | 11 ++++------- arch/sparc/kernel/sun4d_smp.c | 10 ++++------ arch/sparc/kernel/sun4m_smp.c | 10 ++++------ 5 files changed, 29 insertions(+), 25 deletions(-) diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h index 2cf7971d7f6c..9b6a166f6a57 100644 --- a/arch/sparc/include/asm/smp_32.h +++ b/arch/sparc/include/asm/smp_32.h @@ -54,7 +54,7 @@ void smp_bogo(struct seq_file *); void smp_info(struct seq_file *); struct sparc32_ipi_ops { - void (*cross_call)(void *func, cpumask_t mask, unsigned long arg1, + void (*cross_call)(void *func, const cpumask_t *mask, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4); void (*resched)(int cpu); @@ -65,29 +65,29 @@ extern const struct sparc32_ipi_ops *sparc32_ipi_ops; static inline void xc0(void *func) { - sparc32_ipi_ops->cross_call(func, *cpu_online_mask, 0, 0, 0, 0); + sparc32_ipi_ops->cross_call(func, cpu_online_mask, 0, 0, 0, 0); } static inline void xc1(void *func, unsigned long arg1) { - sparc32_ipi_ops->cross_call(func, *cpu_online_mask, arg1, 0, 0, 0); + sparc32_ipi_ops->cross_call(func, cpu_online_mask, arg1, 0, 0, 0); } static inline void xc2(void *func, unsigned long arg1, unsigned long arg2) { - sparc32_ipi_ops->cross_call(func, *cpu_online_mask, arg1, arg2, 0, 0); + sparc32_ipi_ops->cross_call(func, cpu_online_mask, arg1, arg2, 0, 0); } static inline void xc3(void *func, unsigned long arg1, unsigned long arg2, unsigned long arg3) { - sparc32_ipi_ops->cross_call(func, *cpu_online_mask, + sparc32_ipi_ops->cross_call(func, cpu_online_mask, arg1, arg2, arg3, 0); } static inline void xc4(void *func, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4) { - sparc32_ipi_ops->cross_call(func, *cpu_online_mask, + sparc32_ipi_ops->cross_call(func, cpu_online_mask, arg1, arg2, arg3, arg4); } diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h index a8fb7c0bf053..36747e8f7e36 100644 --- a/arch/sparc/kernel/kernel.h +++ b/arch/sparc/kernel/kernel.h @@ -4,6 +4,7 @@ #include <linux/interrupt.h> #include <linux/ftrace.h> +#include <linux/smp.h> #include <asm/traps.h> #include <asm/head.h> @@ -75,6 +76,16 @@ int sparc32_classify_syscall(unsigned int syscall); #endif #ifdef CONFIG_SPARC32 + +#ifdef CONFIG_SMP +static inline bool cpu_for_ipi(const cpumask_t *mask, unsigned int cpu) +{ + return cpumask_test_cpu(cpu, mask) && + cpumask_test_cpu(cpu, cpu_online_mask) && + cpu != smp_processor_id(); +} +#endif /* CONFIG_SMP */ + /* setup_32.c */ struct linux_romvec; void sparc32_start_kernel(struct linux_romvec *rp); diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c index 1ee393abc463..291884c8d82a 100644 --- a/arch/sparc/kernel/leon_smp.c +++ b/arch/sparc/kernel/leon_smp.c @@ -372,7 +372,7 @@ static struct smp_funcall { static DEFINE_SPINLOCK(cross_call_lock); /* Cross calls must be serialized, at least currently. */ -static void leon_cross_call(void *func, cpumask_t mask, unsigned long arg1, +static void leon_cross_call(void *func, const cpumask_t *mask, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4) { @@ -403,14 +403,11 @@ static void leon_cross_call(void *func, cpumask_t mask, unsigned long arg1, { register int i; - cpumask_clear_cpu(smp_processor_id(), &mask); - cpumask_and(&mask, cpu_online_mask, &mask); for (i = 0; i <= high; i++) { - if (cpumask_test_cpu(i, &mask)) { + if (cpu_for_ipi(mask, i)) { ccall_info.processors_in[i] = 0; ccall_info.processors_out[i] = 0; leon_send_ipi(i, LEON3_IRQ_CROSS_CALL); - } } } @@ -420,7 +417,7 @@ static void leon_cross_call(void *func, cpumask_t mask, unsigned long arg1, i = 0; do { - if (!cpumask_test_cpu(i, &mask)) + if (!cpu_for_ipi(mask, i)) continue; while (!ccall_info.processors_in[i]) @@ -429,7 +426,7 @@ static void leon_cross_call(void *func, cpumask_t mask, unsigned long arg1, i = 0; do { - if (!cpumask_test_cpu(i, &mask)) + if (!cpu_for_ipi(mask, i)) continue; while (!ccall_info.processors_out[i]) diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c index 9a62a5cf3337..7dc57ca05728 100644 --- a/arch/sparc/kernel/sun4d_smp.c +++ b/arch/sparc/kernel/sun4d_smp.c @@ -281,7 +281,7 @@ static struct smp_funcall { static DEFINE_SPINLOCK(cross_call_lock); /* Cross calls must be serialized, at least currently. */ -static void sun4d_cross_call(void *func, cpumask_t mask, unsigned long arg1, +static void sun4d_cross_call(void *func, const cpumask_t *mask, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4) { @@ -315,10 +315,8 @@ static void sun4d_cross_call(void *func, cpumask_t mask, unsigned long arg1, { register int i; - cpumask_clear_cpu(smp_processor_id(), &mask); - cpumask_and(&mask, cpu_online_mask, &mask); for (i = 0; i <= high; i++) { - if (cpumask_test_cpu(i, &mask)) { + if (cpu_for_ipi(mask, i)) { ccall_info.processors_in[i] = 0; ccall_info.processors_out[i] = 0; sun4d_send_ipi(i, IRQ_CROSS_CALL); @@ -331,7 +329,7 @@ static void sun4d_cross_call(void *func, cpumask_t mask, unsigned long arg1, i = 0; do { - if (!cpumask_test_cpu(i, &mask)) + if (!cpu_for_ipi(mask, i)) continue; while (!ccall_info.processors_in[i]) barrier(); @@ -339,7 +337,7 @@ static void sun4d_cross_call(void *func, cpumask_t mask, unsigned long arg1, i = 0; do { - if (!cpumask_test_cpu(i, &mask)) + if (!cpu_for_ipi(mask, i)) continue; while (!ccall_info.processors_out[i]) barrier(); diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c index 056df034e79e..3f43f64e3489 100644 --- a/arch/sparc/kernel/sun4m_smp.c +++ b/arch/sparc/kernel/sun4m_smp.c @@ -170,7 +170,7 @@ static struct smp_funcall { static DEFINE_SPINLOCK(cross_call_lock); /* Cross calls must be serialized, at least currently. */ -static void sun4m_cross_call(void *func, cpumask_t mask, unsigned long arg1, +static void sun4m_cross_call(void *func, const cpumask_t *mask, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4) { @@ -191,10 +191,8 @@ static void sun4m_cross_call(void *func, cpumask_t mask, unsigned long arg1, { register int i; - cpumask_clear_cpu(smp_processor_id(), &mask); - cpumask_and(&mask, cpu_online_mask, &mask); for (i = 0; i < ncpus; i++) { - if (cpumask_test_cpu(i, &mask)) { + if (cpu_for_ipi(mask, i)) { ccall_info.processors_in[i] = 0; ccall_info.processors_out[i] = 0; sun4m_send_ipi(i, IRQ_CROSS_CALL); @@ -210,7 +208,7 @@ static void sun4m_cross_call(void *func, cpumask_t mask, unsigned long arg1, i = 0; do { - if (!cpumask_test_cpu(i, &mask)) + if (!cpu_for_ipi(mask, i)) continue; while (!ccall_info.processors_in[i]) barrier(); @@ -218,7 +216,7 @@ static void sun4m_cross_call(void *func, cpumask_t mask, unsigned long arg1, i = 0; do { - if (!cpumask_test_cpu(i, &mask)) + if (!cpu_for_ipi(mask, i)) continue; while (!ccall_info.processors_out[i]) barrier(); -- 2.27.0 ^ permalink raw reply related [flat|nested] 19+ messages in thread
* Re: [PATCH v2 7/7] sparc/smp: Remove on-stack cpumask var 2024-04-20 5:15 ` [PATCH v2 7/7] sparc/smp: " Dawei Li @ 2024-04-20 11:42 ` Sam Ravnborg 2024-04-22 6:19 ` Dawei Li 0 siblings, 1 reply; 19+ messages in thread From: Sam Ravnborg @ 2024-04-20 11:42 UTC (permalink / raw) To: Dawei Li; +Cc: davem, andreas, sparclinux, linux-kernel Hi Dawei On Sat, Apr 20, 2024 at 01:15:47PM +0800, Dawei Li wrote: > In general it's preferable to avoid placing cpumasks on the stack, as > for large values of NR_CPUS these can consume significant amounts of > stack space and make stack overflows more likely. > > - Change prototype of sparc32_ipi_ops::cross_call() so that it takes > const cpumask * arg and all its callers accordingly. > > - As for all cross_call() implementations, divide cpumask_test_cpu() call > into several sub calls to avoid on-stack cpumask var. > > Signed-off-by: Dawei Li <dawei.li@shingroup.cn> The code changes looks ok from a quick look. But we have a bunch of patches pending touching or removing the same files. On top of this, the right approach would be to take a look at code from a higher level. In other words - I advise to drop this, and maybe re-visit in a few months after the pending patches has hit -next. Sorry for asking you to look as this. Sam > --- > arch/sparc/include/asm/smp_32.h | 12 ++++++------ > arch/sparc/kernel/kernel.h | 11 +++++++++++ > arch/sparc/kernel/leon_smp.c | 11 ++++------- > arch/sparc/kernel/sun4d_smp.c | 10 ++++------ > arch/sparc/kernel/sun4m_smp.c | 10 ++++------ > 5 files changed, 29 insertions(+), 25 deletions(-) > > diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h > index 2cf7971d7f6c..9b6a166f6a57 100644 > --- a/arch/sparc/include/asm/smp_32.h > +++ b/arch/sparc/include/asm/smp_32.h > @@ -54,7 +54,7 @@ void smp_bogo(struct seq_file *); > void smp_info(struct seq_file *); > > struct sparc32_ipi_ops { > - void (*cross_call)(void *func, cpumask_t mask, unsigned long arg1, > + void (*cross_call)(void *func, const cpumask_t *mask, unsigned long arg1, > unsigned long arg2, unsigned long arg3, > unsigned long arg4); > void (*resched)(int cpu); > @@ -65,29 +65,29 @@ extern const struct sparc32_ipi_ops *sparc32_ipi_ops; > > static inline void xc0(void *func) > { > - sparc32_ipi_ops->cross_call(func, *cpu_online_mask, 0, 0, 0, 0); > + sparc32_ipi_ops->cross_call(func, cpu_online_mask, 0, 0, 0, 0); > } > > static inline void xc1(void *func, unsigned long arg1) > { > - sparc32_ipi_ops->cross_call(func, *cpu_online_mask, arg1, 0, 0, 0); > + sparc32_ipi_ops->cross_call(func, cpu_online_mask, arg1, 0, 0, 0); > } > static inline void xc2(void *func, unsigned long arg1, unsigned long arg2) > { > - sparc32_ipi_ops->cross_call(func, *cpu_online_mask, arg1, arg2, 0, 0); > + sparc32_ipi_ops->cross_call(func, cpu_online_mask, arg1, arg2, 0, 0); > } > > static inline void xc3(void *func, unsigned long arg1, unsigned long arg2, > unsigned long arg3) > { > - sparc32_ipi_ops->cross_call(func, *cpu_online_mask, > + sparc32_ipi_ops->cross_call(func, cpu_online_mask, > arg1, arg2, arg3, 0); > } > > static inline void xc4(void *func, unsigned long arg1, unsigned long arg2, > unsigned long arg3, unsigned long arg4) > { > - sparc32_ipi_ops->cross_call(func, *cpu_online_mask, > + sparc32_ipi_ops->cross_call(func, cpu_online_mask, > arg1, arg2, arg3, arg4); > } > > diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h > index a8fb7c0bf053..36747e8f7e36 100644 > --- a/arch/sparc/kernel/kernel.h > +++ b/arch/sparc/kernel/kernel.h > @@ -4,6 +4,7 @@ > > #include <linux/interrupt.h> > #include <linux/ftrace.h> > +#include <linux/smp.h> > > #include <asm/traps.h> > #include <asm/head.h> > @@ -75,6 +76,16 @@ int sparc32_classify_syscall(unsigned int syscall); > #endif > > #ifdef CONFIG_SPARC32 > + > +#ifdef CONFIG_SMP > +static inline bool cpu_for_ipi(const cpumask_t *mask, unsigned int cpu) > +{ > + return cpumask_test_cpu(cpu, mask) && > + cpumask_test_cpu(cpu, cpu_online_mask) && > + cpu != smp_processor_id(); > +} > +#endif /* CONFIG_SMP */ > + > /* setup_32.c */ > struct linux_romvec; > void sparc32_start_kernel(struct linux_romvec *rp); > diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c > index 1ee393abc463..291884c8d82a 100644 > --- a/arch/sparc/kernel/leon_smp.c > +++ b/arch/sparc/kernel/leon_smp.c > @@ -372,7 +372,7 @@ static struct smp_funcall { > static DEFINE_SPINLOCK(cross_call_lock); > > /* Cross calls must be serialized, at least currently. */ > -static void leon_cross_call(void *func, cpumask_t mask, unsigned long arg1, > +static void leon_cross_call(void *func, const cpumask_t *mask, unsigned long arg1, > unsigned long arg2, unsigned long arg3, > unsigned long arg4) > { > @@ -403,14 +403,11 @@ static void leon_cross_call(void *func, cpumask_t mask, unsigned long arg1, > { > register int i; > > - cpumask_clear_cpu(smp_processor_id(), &mask); > - cpumask_and(&mask, cpu_online_mask, &mask); > for (i = 0; i <= high; i++) { > - if (cpumask_test_cpu(i, &mask)) { > + if (cpu_for_ipi(mask, i)) { > ccall_info.processors_in[i] = 0; > ccall_info.processors_out[i] = 0; > leon_send_ipi(i, LEON3_IRQ_CROSS_CALL); > - > } > } > } > @@ -420,7 +417,7 @@ static void leon_cross_call(void *func, cpumask_t mask, unsigned long arg1, > > i = 0; > do { > - if (!cpumask_test_cpu(i, &mask)) > + if (!cpu_for_ipi(mask, i)) > continue; > > while (!ccall_info.processors_in[i]) > @@ -429,7 +426,7 @@ static void leon_cross_call(void *func, cpumask_t mask, unsigned long arg1, > > i = 0; > do { > - if (!cpumask_test_cpu(i, &mask)) > + if (!cpu_for_ipi(mask, i)) > continue; > > while (!ccall_info.processors_out[i]) > diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c > index 9a62a5cf3337..7dc57ca05728 100644 > --- a/arch/sparc/kernel/sun4d_smp.c > +++ b/arch/sparc/kernel/sun4d_smp.c > @@ -281,7 +281,7 @@ static struct smp_funcall { > static DEFINE_SPINLOCK(cross_call_lock); > > /* Cross calls must be serialized, at least currently. */ > -static void sun4d_cross_call(void *func, cpumask_t mask, unsigned long arg1, > +static void sun4d_cross_call(void *func, const cpumask_t *mask, unsigned long arg1, > unsigned long arg2, unsigned long arg3, > unsigned long arg4) > { > @@ -315,10 +315,8 @@ static void sun4d_cross_call(void *func, cpumask_t mask, unsigned long arg1, > { > register int i; > > - cpumask_clear_cpu(smp_processor_id(), &mask); > - cpumask_and(&mask, cpu_online_mask, &mask); > for (i = 0; i <= high; i++) { > - if (cpumask_test_cpu(i, &mask)) { > + if (cpu_for_ipi(mask, i)) { > ccall_info.processors_in[i] = 0; > ccall_info.processors_out[i] = 0; > sun4d_send_ipi(i, IRQ_CROSS_CALL); > @@ -331,7 +329,7 @@ static void sun4d_cross_call(void *func, cpumask_t mask, unsigned long arg1, > > i = 0; > do { > - if (!cpumask_test_cpu(i, &mask)) > + if (!cpu_for_ipi(mask, i)) > continue; > while (!ccall_info.processors_in[i]) > barrier(); > @@ -339,7 +337,7 @@ static void sun4d_cross_call(void *func, cpumask_t mask, unsigned long arg1, > > i = 0; > do { > - if (!cpumask_test_cpu(i, &mask)) > + if (!cpu_for_ipi(mask, i)) > continue; > while (!ccall_info.processors_out[i]) > barrier(); > diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c > index 056df034e79e..3f43f64e3489 100644 > --- a/arch/sparc/kernel/sun4m_smp.c > +++ b/arch/sparc/kernel/sun4m_smp.c > @@ -170,7 +170,7 @@ static struct smp_funcall { > static DEFINE_SPINLOCK(cross_call_lock); > > /* Cross calls must be serialized, at least currently. */ > -static void sun4m_cross_call(void *func, cpumask_t mask, unsigned long arg1, > +static void sun4m_cross_call(void *func, const cpumask_t *mask, unsigned long arg1, > unsigned long arg2, unsigned long arg3, > unsigned long arg4) > { > @@ -191,10 +191,8 @@ static void sun4m_cross_call(void *func, cpumask_t mask, unsigned long arg1, > { > register int i; > > - cpumask_clear_cpu(smp_processor_id(), &mask); > - cpumask_and(&mask, cpu_online_mask, &mask); > for (i = 0; i < ncpus; i++) { > - if (cpumask_test_cpu(i, &mask)) { > + if (cpu_for_ipi(mask, i)) { > ccall_info.processors_in[i] = 0; > ccall_info.processors_out[i] = 0; > sun4m_send_ipi(i, IRQ_CROSS_CALL); > @@ -210,7 +208,7 @@ static void sun4m_cross_call(void *func, cpumask_t mask, unsigned long arg1, > > i = 0; > do { > - if (!cpumask_test_cpu(i, &mask)) > + if (!cpu_for_ipi(mask, i)) > continue; > while (!ccall_info.processors_in[i]) > barrier(); > @@ -218,7 +216,7 @@ static void sun4m_cross_call(void *func, cpumask_t mask, unsigned long arg1, > > i = 0; > do { > - if (!cpumask_test_cpu(i, &mask)) > + if (!cpu_for_ipi(mask, i)) > continue; > while (!ccall_info.processors_out[i]) > barrier(); > -- > 2.27.0 ^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH v2 7/7] sparc/smp: Remove on-stack cpumask var 2024-04-20 11:42 ` Sam Ravnborg @ 2024-04-22 6:19 ` Dawei Li 0 siblings, 0 replies; 19+ messages in thread From: Dawei Li @ 2024-04-22 6:19 UTC (permalink / raw) To: Sam Ravnborg; +Cc: davem, andreas, sparclinux, linux-kernel Hi Sam, Thanks for review. On Sat, Apr 20, 2024 at 01:42:07PM +0200, Sam Ravnborg wrote: > Hi Dawei > > On Sat, Apr 20, 2024 at 01:15:47PM +0800, Dawei Li wrote: > > In general it's preferable to avoid placing cpumasks on the stack, as > > for large values of NR_CPUS these can consume significant amounts of > > stack space and make stack overflows more likely. > > > > - Change prototype of sparc32_ipi_ops::cross_call() so that it takes > > const cpumask * arg and all its callers accordingly. > > > > - As for all cross_call() implementations, divide cpumask_test_cpu() call > > into several sub calls to avoid on-stack cpumask var. > > > > Signed-off-by: Dawei Li <dawei.li@shingroup.cn> > > The code changes looks ok from a quick look. > But we have a bunch of patches pending touching or removing the same > files. On top of this, the right approach would be to take a > look at code from a higher level. > > In other words - I advise to drop this, and maybe re-visit in a few > months after the pending patches has hit -next. > > Sorry for asking you to look as this. It's OK :), I will drop this commit until your patch series are applied. Thanks, Dawei > > Sam > > > --- > > arch/sparc/include/asm/smp_32.h | 12 ++++++------ > > arch/sparc/kernel/kernel.h | 11 +++++++++++ > > arch/sparc/kernel/leon_smp.c | 11 ++++------- > > arch/sparc/kernel/sun4d_smp.c | 10 ++++------ > > arch/sparc/kernel/sun4m_smp.c | 10 ++++------ > > 5 files changed, 29 insertions(+), 25 deletions(-) > > > > diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h > > index 2cf7971d7f6c..9b6a166f6a57 100644 > > --- a/arch/sparc/include/asm/smp_32.h > > +++ b/arch/sparc/include/asm/smp_32.h > > @@ -54,7 +54,7 @@ void smp_bogo(struct seq_file *); > > void smp_info(struct seq_file *); > > > > struct sparc32_ipi_ops { > > - void (*cross_call)(void *func, cpumask_t mask, unsigned long arg1, > > + void (*cross_call)(void *func, const cpumask_t *mask, unsigned long arg1, > > unsigned long arg2, unsigned long arg3, > > unsigned long arg4); > > void (*resched)(int cpu); > > @@ -65,29 +65,29 @@ extern const struct sparc32_ipi_ops *sparc32_ipi_ops; > > > > static inline void xc0(void *func) > > { > > - sparc32_ipi_ops->cross_call(func, *cpu_online_mask, 0, 0, 0, 0); > > + sparc32_ipi_ops->cross_call(func, cpu_online_mask, 0, 0, 0, 0); > > } > > > > static inline void xc1(void *func, unsigned long arg1) > > { > > - sparc32_ipi_ops->cross_call(func, *cpu_online_mask, arg1, 0, 0, 0); > > + sparc32_ipi_ops->cross_call(func, cpu_online_mask, arg1, 0, 0, 0); > > } > > static inline void xc2(void *func, unsigned long arg1, unsigned long arg2) > > { > > - sparc32_ipi_ops->cross_call(func, *cpu_online_mask, arg1, arg2, 0, 0); > > + sparc32_ipi_ops->cross_call(func, cpu_online_mask, arg1, arg2, 0, 0); > > } > > > > static inline void xc3(void *func, unsigned long arg1, unsigned long arg2, > > unsigned long arg3) > > { > > - sparc32_ipi_ops->cross_call(func, *cpu_online_mask, > > + sparc32_ipi_ops->cross_call(func, cpu_online_mask, > > arg1, arg2, arg3, 0); > > } > > > > static inline void xc4(void *func, unsigned long arg1, unsigned long arg2, > > unsigned long arg3, unsigned long arg4) > > { > > - sparc32_ipi_ops->cross_call(func, *cpu_online_mask, > > + sparc32_ipi_ops->cross_call(func, cpu_online_mask, > > arg1, arg2, arg3, arg4); > > } > > > > diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h > > index a8fb7c0bf053..36747e8f7e36 100644 > > --- a/arch/sparc/kernel/kernel.h > > +++ b/arch/sparc/kernel/kernel.h > > @@ -4,6 +4,7 @@ > > > > #include <linux/interrupt.h> > > #include <linux/ftrace.h> > > +#include <linux/smp.h> > > > > #include <asm/traps.h> > > #include <asm/head.h> > > @@ -75,6 +76,16 @@ int sparc32_classify_syscall(unsigned int syscall); > > #endif > > > > #ifdef CONFIG_SPARC32 > > + > > +#ifdef CONFIG_SMP > > +static inline bool cpu_for_ipi(const cpumask_t *mask, unsigned int cpu) > > +{ > > + return cpumask_test_cpu(cpu, mask) && > > + cpumask_test_cpu(cpu, cpu_online_mask) && > > + cpu != smp_processor_id(); > > +} > > +#endif /* CONFIG_SMP */ > > + > > /* setup_32.c */ > > struct linux_romvec; > > void sparc32_start_kernel(struct linux_romvec *rp); > > diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c > > index 1ee393abc463..291884c8d82a 100644 > > --- a/arch/sparc/kernel/leon_smp.c > > +++ b/arch/sparc/kernel/leon_smp.c > > @@ -372,7 +372,7 @@ static struct smp_funcall { > > static DEFINE_SPINLOCK(cross_call_lock); > > > > /* Cross calls must be serialized, at least currently. */ > > -static void leon_cross_call(void *func, cpumask_t mask, unsigned long arg1, > > +static void leon_cross_call(void *func, const cpumask_t *mask, unsigned long arg1, > > unsigned long arg2, unsigned long arg3, > > unsigned long arg4) > > { > > @@ -403,14 +403,11 @@ static void leon_cross_call(void *func, cpumask_t mask, unsigned long arg1, > > { > > register int i; > > > > - cpumask_clear_cpu(smp_processor_id(), &mask); > > - cpumask_and(&mask, cpu_online_mask, &mask); > > for (i = 0; i <= high; i++) { > > - if (cpumask_test_cpu(i, &mask)) { > > + if (cpu_for_ipi(mask, i)) { > > ccall_info.processors_in[i] = 0; > > ccall_info.processors_out[i] = 0; > > leon_send_ipi(i, LEON3_IRQ_CROSS_CALL); > > - > > } > > } > > } > > @@ -420,7 +417,7 @@ static void leon_cross_call(void *func, cpumask_t mask, unsigned long arg1, > > > > i = 0; > > do { > > - if (!cpumask_test_cpu(i, &mask)) > > + if (!cpu_for_ipi(mask, i)) > > continue; > > > > while (!ccall_info.processors_in[i]) > > @@ -429,7 +426,7 @@ static void leon_cross_call(void *func, cpumask_t mask, unsigned long arg1, > > > > i = 0; > > do { > > - if (!cpumask_test_cpu(i, &mask)) > > + if (!cpu_for_ipi(mask, i)) > > continue; > > > > while (!ccall_info.processors_out[i]) > > diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c > > index 9a62a5cf3337..7dc57ca05728 100644 > > --- a/arch/sparc/kernel/sun4d_smp.c > > +++ b/arch/sparc/kernel/sun4d_smp.c > > @@ -281,7 +281,7 @@ static struct smp_funcall { > > static DEFINE_SPINLOCK(cross_call_lock); > > > > /* Cross calls must be serialized, at least currently. */ > > -static void sun4d_cross_call(void *func, cpumask_t mask, unsigned long arg1, > > +static void sun4d_cross_call(void *func, const cpumask_t *mask, unsigned long arg1, > > unsigned long arg2, unsigned long arg3, > > unsigned long arg4) > > { > > @@ -315,10 +315,8 @@ static void sun4d_cross_call(void *func, cpumask_t mask, unsigned long arg1, > > { > > register int i; > > > > - cpumask_clear_cpu(smp_processor_id(), &mask); > > - cpumask_and(&mask, cpu_online_mask, &mask); > > for (i = 0; i <= high; i++) { > > - if (cpumask_test_cpu(i, &mask)) { > > + if (cpu_for_ipi(mask, i)) { > > ccall_info.processors_in[i] = 0; > > ccall_info.processors_out[i] = 0; > > sun4d_send_ipi(i, IRQ_CROSS_CALL); > > @@ -331,7 +329,7 @@ static void sun4d_cross_call(void *func, cpumask_t mask, unsigned long arg1, > > > > i = 0; > > do { > > - if (!cpumask_test_cpu(i, &mask)) > > + if (!cpu_for_ipi(mask, i)) > > continue; > > while (!ccall_info.processors_in[i]) > > barrier(); > > @@ -339,7 +337,7 @@ static void sun4d_cross_call(void *func, cpumask_t mask, unsigned long arg1, > > > > i = 0; > > do { > > - if (!cpumask_test_cpu(i, &mask)) > > + if (!cpu_for_ipi(mask, i)) > > continue; > > while (!ccall_info.processors_out[i]) > > barrier(); > > diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c > > index 056df034e79e..3f43f64e3489 100644 > > --- a/arch/sparc/kernel/sun4m_smp.c > > +++ b/arch/sparc/kernel/sun4m_smp.c > > @@ -170,7 +170,7 @@ static struct smp_funcall { > > static DEFINE_SPINLOCK(cross_call_lock); > > > > /* Cross calls must be serialized, at least currently. */ > > -static void sun4m_cross_call(void *func, cpumask_t mask, unsigned long arg1, > > +static void sun4m_cross_call(void *func, const cpumask_t *mask, unsigned long arg1, > > unsigned long arg2, unsigned long arg3, > > unsigned long arg4) > > { > > @@ -191,10 +191,8 @@ static void sun4m_cross_call(void *func, cpumask_t mask, unsigned long arg1, > > { > > register int i; > > > > - cpumask_clear_cpu(smp_processor_id(), &mask); > > - cpumask_and(&mask, cpu_online_mask, &mask); > > for (i = 0; i < ncpus; i++) { > > - if (cpumask_test_cpu(i, &mask)) { > > + if (cpu_for_ipi(mask, i)) { > > ccall_info.processors_in[i] = 0; > > ccall_info.processors_out[i] = 0; > > sun4m_send_ipi(i, IRQ_CROSS_CALL); > > @@ -210,7 +208,7 @@ static void sun4m_cross_call(void *func, cpumask_t mask, unsigned long arg1, > > > > i = 0; > > do { > > - if (!cpumask_test_cpu(i, &mask)) > > + if (!cpu_for_ipi(mask, i)) > > continue; > > while (!ccall_info.processors_in[i]) > > barrier(); > > @@ -218,7 +216,7 @@ static void sun4m_cross_call(void *func, cpumask_t mask, unsigned long arg1, > > > > i = 0; > > do { > > - if (!cpumask_test_cpu(i, &mask)) > > + if (!cpu_for_ipi(mask, i)) > > continue; > > while (!ccall_info.processors_out[i]) > > barrier(); > > -- > > 2.27.0 > ^ permalink raw reply [flat|nested] 19+ messages in thread
end of thread, other threads:[~2024-04-22 6:20 UTC | newest] Thread overview: 19+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2024-04-20 5:15 [PATCH v2 0/7] Remove onstack cpumask var for sparc Dawei Li 2024-04-20 5:15 ` [PATCH v2 1/7] sparc/srmmu: Remove on-stack cpumask var Dawei Li 2024-04-20 7:58 ` Sam Ravnborg 2024-04-22 5:46 ` Dawei Li 2024-04-20 5:15 ` [PATCH v2 2/7] sparc/irq: " Dawei Li 2024-04-20 8:22 ` Sam Ravnborg 2024-04-20 5:15 ` [PATCH v2 3/7] sparc/of: " Dawei Li 2024-04-20 8:23 ` Sam Ravnborg 2024-04-20 5:15 ` [PATCH v2 4/7] sparc/pci_msi: " Dawei Li 2024-04-20 8:23 ` Sam Ravnborg 2024-04-20 5:15 ` [PATCH v2 5/7] sparc: " Dawei Li 2024-04-20 8:28 ` Sam Ravnborg 2024-04-22 5:49 ` Dawei Li 2024-04-20 5:15 ` [PATCH v2 6/7] sparc/leon: " Dawei Li 2024-04-20 8:32 ` Sam Ravnborg 2024-04-22 6:15 ` Dawei Li 2024-04-20 5:15 ` [PATCH v2 7/7] sparc/smp: " Dawei Li 2024-04-20 11:42 ` Sam Ravnborg 2024-04-22 6:19 ` Dawei Li
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox