* [PATCH 02/33] PCI: Protect against concurrent change of housekeeping cpumask [not found] <20250829154814.47015-1-frederic@kernel.org> @ 2025-08-29 15:47 ` Frederic Weisbecker [not found] ` <458c5db8-0c31-4c02-9c41-b7eca851d04a@redhat.com> 2025-08-29 15:48 ` [PATCH 20/33] PCI: Remove superfluous HK_TYPE_WQ check Frederic Weisbecker 1 sibling, 1 reply; 5+ messages in thread From: Frederic Weisbecker @ 2025-08-29 15:47 UTC (permalink / raw) To: LKML Cc: Frederic Weisbecker, Bjorn Helgaas, Marco Crivellari, Michal Hocko, Peter Zijlstra, Tejun Heo, Thomas Gleixner, Vlastimil Babka, Waiman Long, linux-pci HK_TYPE_DOMAIN will soon integrate cpuset isolated partitions and therefore be made modifyable at runtime. Synchronize against the cpumask update using RCU. The RCU locked section includes both the housekeeping CPU target election for the PCI probe work and the work enqueue. This way the housekeeping update side will simply need to flush the pending related works after updating the housekeeping mask in order to make sure that no PCI work ever executes on an isolated CPU. Signed-off-by: Frederic Weisbecker <frederic@kernel.org> --- drivers/pci/pci-driver.c | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 63665240ae87..cf2b83004886 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -302,9 +302,8 @@ struct drv_dev_and_id { const struct pci_device_id *id; }; -static long local_pci_probe(void *_ddi) +static int local_pci_probe(struct drv_dev_and_id *ddi) { - struct drv_dev_and_id *ddi = _ddi; struct pci_dev *pci_dev = ddi->dev; struct pci_driver *pci_drv = ddi->drv; struct device *dev = &pci_dev->dev; @@ -338,6 +337,19 @@ static long local_pci_probe(void *_ddi) return 0; } +struct pci_probe_arg { + struct drv_dev_and_id *ddi; + struct work_struct work; + int ret; +}; + +static void local_pci_probe_callback(struct work_struct *work) +{ + struct pci_probe_arg *arg = container_of(work, struct pci_probe_arg, work); + + arg->ret = local_pci_probe(arg->ddi); +} + static bool pci_physfn_is_probed(struct pci_dev *dev) { #ifdef CONFIG_PCI_IOV @@ -362,34 +374,44 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev, dev->is_probed = 1; cpu_hotplug_disable(); - /* * Prevent nesting work_on_cpu() for the case where a Virtual Function * device is probed from work_on_cpu() of the Physical device. */ if (node < 0 || node >= MAX_NUMNODES || !node_online(node) || pci_physfn_is_probed(dev)) { - cpu = nr_cpu_ids; + error = local_pci_probe(&ddi); } else { cpumask_var_t wq_domain_mask; + struct pci_probe_arg arg = { .ddi = &ddi }; + + INIT_WORK_ONSTACK(&arg.work, local_pci_probe_callback); if (!zalloc_cpumask_var(&wq_domain_mask, GFP_KERNEL)) { error = -ENOMEM; goto out; } + + rcu_read_lock(); cpumask_and(wq_domain_mask, housekeeping_cpumask(HK_TYPE_WQ), housekeeping_cpumask(HK_TYPE_DOMAIN)); cpu = cpumask_any_and(cpumask_of_node(node), wq_domain_mask); + if (cpu < nr_cpu_ids) { + schedule_work_on(cpu, &arg.work); + rcu_read_unlock(); + flush_work(&arg.work); + error = arg.ret; + } else { + rcu_read_unlock(); + error = local_pci_probe(&ddi); + } + free_cpumask_var(wq_domain_mask); + destroy_work_on_stack(&arg.work); } - - if (cpu < nr_cpu_ids) - error = work_on_cpu(cpu, local_pci_probe, &ddi); - else - error = local_pci_probe(&ddi); out: dev->is_probed = 0; cpu_hotplug_enable(); -- 2.51.0 ^ permalink raw reply related [flat|nested] 5+ messages in thread
[parent not found: <458c5db8-0c31-4c02-9c41-b7eca851d04a@redhat.com>]
* Re: [PATCH 02/33] PCI: Protect against concurrent change of housekeeping cpumask [not found] ` <458c5db8-0c31-4c02-9c41-b7eca851d04a@redhat.com> @ 2025-09-18 14:00 ` Frederic Weisbecker 2025-09-22 21:51 ` Waiman Long 0 siblings, 1 reply; 5+ messages in thread From: Frederic Weisbecker @ 2025-09-18 14:00 UTC (permalink / raw) To: Waiman Long Cc: LKML, Bjorn Helgaas, Marco Crivellari, Michal Hocko, Peter Zijlstra, Tejun Heo, Thomas Gleixner, Vlastimil Babka, linux-pci Le Fri, Aug 29, 2025 at 06:01:17PM -0400, Waiman Long a écrit : > On 8/29/25 11:47 AM, Frederic Weisbecker wrote: > > HK_TYPE_DOMAIN will soon integrate cpuset isolated partitions and > > therefore be made modifyable at runtime. Synchronize against the cpumask > > update using RCU. > > > > The RCU locked section includes both the housekeeping CPU target > > election for the PCI probe work and the work enqueue. > > > > This way the housekeeping update side will simply need to flush the > > pending related works after updating the housekeeping mask in order to > > make sure that no PCI work ever executes on an isolated CPU. > > > > Signed-off-by: Frederic Weisbecker<frederic@kernel.org> > > --- > > drivers/pci/pci-driver.c | 40 +++++++++++++++++++++++++++++++--------- > > 1 file changed, 31 insertions(+), 9 deletions(-) > > > > diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c > > index 63665240ae87..cf2b83004886 100644 > > --- a/drivers/pci/pci-driver.c > > +++ b/drivers/pci/pci-driver.c > > @@ -302,9 +302,8 @@ struct drv_dev_and_id { > > const struct pci_device_id *id; > > }; > > -static long local_pci_probe(void *_ddi) > > +static int local_pci_probe(struct drv_dev_and_id *ddi) > > { > > - struct drv_dev_and_id *ddi = _ddi; > > struct pci_dev *pci_dev = ddi->dev; > > struct pci_driver *pci_drv = ddi->drv; > > struct device *dev = &pci_dev->dev; > > @@ -338,6 +337,19 @@ static long local_pci_probe(void *_ddi) > > return 0; > > } > > +struct pci_probe_arg { > > + struct drv_dev_and_id *ddi; > > + struct work_struct work; > > + int ret; > > +}; > > + > > +static void local_pci_probe_callback(struct work_struct *work) > > +{ > > + struct pci_probe_arg *arg = container_of(work, struct pci_probe_arg, work); > > + > > + arg->ret = local_pci_probe(arg->ddi); > > +} > > + > > static bool pci_physfn_is_probed(struct pci_dev *dev) > > { > > #ifdef CONFIG_PCI_IOV > > @@ -362,34 +374,44 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev, > > dev->is_probed = 1; > > cpu_hotplug_disable(); > > - > > /* > > * Prevent nesting work_on_cpu() for the case where a Virtual Function > > * device is probed from work_on_cpu() of the Physical device. > > */ > > if (node < 0 || node >= MAX_NUMNODES || !node_online(node) || > > pci_physfn_is_probed(dev)) { > > - cpu = nr_cpu_ids; > > + error = local_pci_probe(&ddi); > > } else { > > cpumask_var_t wq_domain_mask; > > + struct pci_probe_arg arg = { .ddi = &ddi }; > > + > > + INIT_WORK_ONSTACK(&arg.work, local_pci_probe_callback); > > if (!zalloc_cpumask_var(&wq_domain_mask, GFP_KERNEL)) { > > error = -ENOMEM; > > goto out; > > } > > + > > + rcu_read_lock(); > > cpumask_and(wq_domain_mask, > > housekeeping_cpumask(HK_TYPE_WQ), > > housekeeping_cpumask(HK_TYPE_DOMAIN)); > > cpu = cpumask_any_and(cpumask_of_node(node), > > wq_domain_mask); > > + if (cpu < nr_cpu_ids) { > > + schedule_work_on(cpu, &arg.work); > > + rcu_read_unlock(); > > + flush_work(&arg.work); > > + error = arg.ret; > > + } else { > > + rcu_read_unlock(); > > + error = local_pci_probe(&ddi); > > + } > > + > > free_cpumask_var(wq_domain_mask); > > + destroy_work_on_stack(&arg.work); > > } > > - > > - if (cpu < nr_cpu_ids) > > - error = work_on_cpu(cpu, local_pci_probe, &ddi); > > - else > > - error = local_pci_probe(&ddi); > > out: > > dev->is_probed = 0; > > cpu_hotplug_enable(); > > A question. Is the purpose of open-coding work_on_cpu() to avoid calling > INIT_WORK_ONSTACK() and destroy_work_on_stack() in RCU read-side critical > section? These two macro/function may call debugobjects code which I don't > know if they are allowed inside rcu_read_lock() critical section. > > Cheers, Longman No the point is that I need to keep the target selection (housekeeping_cpumask() read) and the work queue within the same RCU critical section so that things are synchronized that way: CPU 0 CPU 1 ----- ----- rcu_read_lock() housekeeping_update() cpu = cpumask_any(housekeeping_cpumask(...)) housekeeping_cpumask &= ~val queue_work_on(cpu, pci_probe_wq, work) synchronize_rcu() rcu_read_unlock() flush_workqueue(pci_probe_wq) flush_work(work) And I can't include the whole work_on_cpu() within rcu_read_lock() because flush_work() may sleep. Also now that you mention it, I need to create that pci_probe_wq and flush it :-) -- Frederic Weisbecker SUSE Labs ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 02/33] PCI: Protect against concurrent change of housekeeping cpumask 2025-09-18 14:00 ` Frederic Weisbecker @ 2025-09-22 21:51 ` Waiman Long 2025-09-23 9:07 ` Frederic Weisbecker 0 siblings, 1 reply; 5+ messages in thread From: Waiman Long @ 2025-09-22 21:51 UTC (permalink / raw) To: Frederic Weisbecker, Waiman Long Cc: LKML, Bjorn Helgaas, Marco Crivellari, Michal Hocko, Peter Zijlstra, Tejun Heo, Thomas Gleixner, Vlastimil Babka, linux-pci On 9/18/25 10:00 AM, Frederic Weisbecker wrote: > Le Fri, Aug 29, 2025 at 06:01:17PM -0400, Waiman Long a écrit : >> On 8/29/25 11:47 AM, Frederic Weisbecker wrote: >>> HK_TYPE_DOMAIN will soon integrate cpuset isolated partitions and >>> therefore be made modifyable at runtime. Synchronize against the cpumask >>> update using RCU. >>> >>> The RCU locked section includes both the housekeeping CPU target >>> election for the PCI probe work and the work enqueue. >>> >>> This way the housekeeping update side will simply need to flush the >>> pending related works after updating the housekeeping mask in order to >>> make sure that no PCI work ever executes on an isolated CPU. >>> >>> Signed-off-by: Frederic Weisbecker<frederic@kernel.org> >>> --- >>> drivers/pci/pci-driver.c | 40 +++++++++++++++++++++++++++++++--------- >>> 1 file changed, 31 insertions(+), 9 deletions(-) >>> >>> diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c >>> index 63665240ae87..cf2b83004886 100644 >>> --- a/drivers/pci/pci-driver.c >>> +++ b/drivers/pci/pci-driver.c >>> @@ -302,9 +302,8 @@ struct drv_dev_and_id { >>> const struct pci_device_id *id; >>> }; >>> -static long local_pci_probe(void *_ddi) >>> +static int local_pci_probe(struct drv_dev_and_id *ddi) >>> { >>> - struct drv_dev_and_id *ddi = _ddi; >>> struct pci_dev *pci_dev = ddi->dev; >>> struct pci_driver *pci_drv = ddi->drv; >>> struct device *dev = &pci_dev->dev; >>> @@ -338,6 +337,19 @@ static long local_pci_probe(void *_ddi) >>> return 0; >>> } >>> +struct pci_probe_arg { >>> + struct drv_dev_and_id *ddi; >>> + struct work_struct work; >>> + int ret; >>> +}; >>> + >>> +static void local_pci_probe_callback(struct work_struct *work) >>> +{ >>> + struct pci_probe_arg *arg = container_of(work, struct pci_probe_arg, work); >>> + >>> + arg->ret = local_pci_probe(arg->ddi); >>> +} >>> + >>> static bool pci_physfn_is_probed(struct pci_dev *dev) >>> { >>> #ifdef CONFIG_PCI_IOV >>> @@ -362,34 +374,44 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev, >>> dev->is_probed = 1; >>> cpu_hotplug_disable(); >>> - >>> /* >>> * Prevent nesting work_on_cpu() for the case where a Virtual Function >>> * device is probed from work_on_cpu() of the Physical device. >>> */ >>> if (node < 0 || node >= MAX_NUMNODES || !node_online(node) || >>> pci_physfn_is_probed(dev)) { >>> - cpu = nr_cpu_ids; >>> + error = local_pci_probe(&ddi); >>> } else { >>> cpumask_var_t wq_domain_mask; >>> + struct pci_probe_arg arg = { .ddi = &ddi }; >>> + >>> + INIT_WORK_ONSTACK(&arg.work, local_pci_probe_callback); >>> if (!zalloc_cpumask_var(&wq_domain_mask, GFP_KERNEL)) { >>> error = -ENOMEM; >>> goto out; >>> } >>> + >>> + rcu_read_lock(); >>> cpumask_and(wq_domain_mask, >>> housekeeping_cpumask(HK_TYPE_WQ), >>> housekeeping_cpumask(HK_TYPE_DOMAIN)); >>> cpu = cpumask_any_and(cpumask_of_node(node), >>> wq_domain_mask); >>> + if (cpu < nr_cpu_ids) { >>> + schedule_work_on(cpu, &arg.work); >>> + rcu_read_unlock(); >>> + flush_work(&arg.work); >>> + error = arg.ret; >>> + } else { >>> + rcu_read_unlock(); >>> + error = local_pci_probe(&ddi); >>> + } >>> + >>> free_cpumask_var(wq_domain_mask); >>> + destroy_work_on_stack(&arg.work); >>> } >>> - >>> - if (cpu < nr_cpu_ids) >>> - error = work_on_cpu(cpu, local_pci_probe, &ddi); >>> - else >>> - error = local_pci_probe(&ddi); >>> out: >>> dev->is_probed = 0; >>> cpu_hotplug_enable(); >> A question. Is the purpose of open-coding work_on_cpu() to avoid calling >> INIT_WORK_ONSTACK() and destroy_work_on_stack() in RCU read-side critical >> section? These two macro/function may call debugobjects code which I don't >> know if they are allowed inside rcu_read_lock() critical section. >> >> Cheers, Longman > No the point is that I need to keep the target selection > (housekeeping_cpumask() read) and the work queue within the same > RCU critical section so that things are synchronized that way: > > CPU 0 CPU 1 > ----- ----- > rcu_read_lock() housekeeping_update() > cpu = cpumask_any(housekeeping_cpumask(...)) housekeeping_cpumask &= ~val > queue_work_on(cpu, pci_probe_wq, work) synchronize_rcu() > rcu_read_unlock() flush_workqueue(pci_probe_wq) > flush_work(work) > > And I can't include the whole work_on_cpu() within rcu_read_lock() because > flush_work() may sleep. Right, you are trying to avoid flush_work() within rcu_read_lock() critical section. It makes it easier to review if you mention that in the commit log. > > Also now that you mention it, I need to create that pci_probe_wq and flush it :-) OK, another wq :-) Cheers, Longman ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 02/33] PCI: Protect against concurrent change of housekeeping cpumask 2025-09-22 21:51 ` Waiman Long @ 2025-09-23 9:07 ` Frederic Weisbecker 0 siblings, 0 replies; 5+ messages in thread From: Frederic Weisbecker @ 2025-09-23 9:07 UTC (permalink / raw) To: Waiman Long Cc: LKML, Bjorn Helgaas, Marco Crivellari, Michal Hocko, Peter Zijlstra, Tejun Heo, Thomas Gleixner, Vlastimil Babka, linux-pci Le Mon, Sep 22, 2025 at 05:51:39PM -0400, Waiman Long a écrit : > On 9/18/25 10:00 AM, Frederic Weisbecker wrote: > > No the point is that I need to keep the target selection > > (housekeeping_cpumask() read) and the work queue within the same > > RCU critical section so that things are synchronized that way: > > > > CPU 0 CPU 1 > > ----- ----- > > rcu_read_lock() housekeeping_update() > > cpu = cpumask_any(housekeeping_cpumask(...)) housekeeping_cpumask &= ~val > > queue_work_on(cpu, pci_probe_wq, work) synchronize_rcu() > > rcu_read_unlock() flush_workqueue(pci_probe_wq) > > flush_work(work) > > And I can't include the whole work_on_cpu() within rcu_read_lock() because > > flush_work() may sleep. > > Right, you are trying to avoid flush_work() within rcu_read_lock() critical > section. It makes it easier to review if you mention that in the commit log. Good point! > > > > > Also now that you mention it, I need to create that pci_probe_wq and flush it :-) > > OK, another wq :-) Yeah I know :-s > > Cheers, > Longman > -- Frederic Weisbecker SUSE Labs ^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 20/33] PCI: Remove superfluous HK_TYPE_WQ check [not found] <20250829154814.47015-1-frederic@kernel.org> 2025-08-29 15:47 ` [PATCH 02/33] PCI: Protect against concurrent change of housekeeping cpumask Frederic Weisbecker @ 2025-08-29 15:48 ` Frederic Weisbecker 1 sibling, 0 replies; 5+ messages in thread From: Frederic Weisbecker @ 2025-08-29 15:48 UTC (permalink / raw) To: LKML Cc: Frederic Weisbecker, Bjorn Helgaas, Marco Crivellari, Michal Hocko, Peter Zijlstra, Tejun Heo, Thomas Gleixner, Vlastimil Babka, Waiman Long, linux-pci It doesn't make sense to use nohz_full without also isolating the related CPUs from the domain topology, either through the use of isolcpus= or cpuset isolated partitions. And now HK_TYPE_DOMAIN includes all kinds of domain isolated CPUs. This means that HK_TYPE_KERNEL_NOISE (of which HK_TYPE_WQ is only an alias) is always a superset of HK_TYPE_DOMAIN. Therefore: HK_TYPE_KERNEL_NOISE & HK_TYPE_DOMAIN = HK_TYPE_DOMAIN Simplify the PCI probe target election accordingly. Signed-off-by: Frederic Weisbecker <frederic@kernel.org> --- drivers/pci/pci-driver.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index cf2b83004886..326112ec516e 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -382,23 +382,14 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev, pci_physfn_is_probed(dev)) { error = local_pci_probe(&ddi); } else { - cpumask_var_t wq_domain_mask; struct pci_probe_arg arg = { .ddi = &ddi }; INIT_WORK_ONSTACK(&arg.work, local_pci_probe_callback); - if (!zalloc_cpumask_var(&wq_domain_mask, GFP_KERNEL)) { - error = -ENOMEM; - goto out; - } - rcu_read_lock(); - cpumask_and(wq_domain_mask, - housekeeping_cpumask(HK_TYPE_WQ), - housekeeping_cpumask(HK_TYPE_DOMAIN)); - cpu = cpumask_any_and(cpumask_of_node(node), - wq_domain_mask); + housekeeping_cpumask(HK_TYPE_DOMAIN)); + if (cpu < nr_cpu_ids) { schedule_work_on(cpu, &arg.work); rcu_read_unlock(); @@ -409,10 +400,9 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev, error = local_pci_probe(&ddi); } - free_cpumask_var(wq_domain_mask); destroy_work_on_stack(&arg.work); } -out: + dev->is_probed = 0; cpu_hotplug_enable(); return error; -- 2.51.0 ^ permalink raw reply related [flat|nested] 5+ messages in thread
end of thread, other threads:[~2025-09-23 9:07 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20250829154814.47015-1-frederic@kernel.org>
2025-08-29 15:47 ` [PATCH 02/33] PCI: Protect against concurrent change of housekeeping cpumask Frederic Weisbecker
[not found] ` <458c5db8-0c31-4c02-9c41-b7eca851d04a@redhat.com>
2025-09-18 14:00 ` Frederic Weisbecker
2025-09-22 21:51 ` Waiman Long
2025-09-23 9:07 ` Frederic Weisbecker
2025-08-29 15:48 ` [PATCH 20/33] PCI: Remove superfluous HK_TYPE_WQ check Frederic Weisbecker
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox