linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3] lib: cpu_rmap: avoid flushing all workqueues
       [not found] <cover.1356810877.git.decot@googlers.com>
@ 2012-12-29 19:57 ` David Decotigny
  2012-12-29 20:36   ` Josh Triplett
  2013-01-02 20:21   ` Ben Hutchings
  0 siblings, 2 replies; 6+ messages in thread
From: David Decotigny @ 2012-12-29 19:57 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ben Hutchings, David S. Miller, Or Gerlitz, Amir Vadai,
	Paul E. McKenney, Thomas Gleixner, Andrew Morton, Josh Triplett,
	David Howells, Paul Gortmaker, David Decotigny

In some cases, free_irq_cpu_rmap() is called while holding a lock
(eg. rtnl). This can lead to deadlocks, because it invokes
flush_scheduled_work() which ends up waiting for whole system
workqueue to flush, but some pending works might try to acquire the
lock we are already holding.

This commit uses reference-counting to replace
irq_run_affinity_notifiers(). It also removes
irq_run_affinity_notifiers() altogether.

Signed-off-by: David Decotigny <decot@googlers.com>
---
 include/linux/cpu_rmap.h  |   13 ++++-------
 include/linux/interrupt.h |    5 ----
 lib/cpu_rmap.c            |   56 ++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 54 insertions(+), 20 deletions(-)

diff --git a/include/linux/cpu_rmap.h b/include/linux/cpu_rmap.h
index ac3bbb5..3be2813 100644
--- a/include/linux/cpu_rmap.h
+++ b/include/linux/cpu_rmap.h
@@ -13,9 +13,11 @@
 #include <linux/cpumask.h>
 #include <linux/gfp.h>
 #include <linux/slab.h>
+#include <linux/kref.h>
 
 /**
  * struct cpu_rmap - CPU affinity reverse-map
+ * @refcount: kref for object
  * @size: Number of objects to be reverse-mapped
  * @used: Number of objects added
  * @obj: Pointer to array of object pointers
@@ -23,6 +25,7 @@
  *      based on affinity masks
  */
 struct cpu_rmap {
+	struct kref	refcount;
 	u16		size, used;
 	void		**obj;
 	struct {
@@ -33,15 +36,7 @@ struct cpu_rmap {
 #define CPU_RMAP_DIST_INF 0xffff
 
 extern struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags);
-
-/**
- * free_cpu_rmap - free CPU affinity reverse-map
- * @rmap: Reverse-map allocated with alloc_cpu_rmap(), or %NULL
- */
-static inline void free_cpu_rmap(struct cpu_rmap *rmap)
-{
-	kfree(rmap);
-}
+extern void free_cpu_rmap(struct cpu_rmap *rmap);
 
 extern int cpu_rmap_add(struct cpu_rmap *rmap, void *obj);
 extern int cpu_rmap_update(struct cpu_rmap *rmap, u16 index,
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 5e4e617..5fa5afe 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -268,11 +268,6 @@ struct irq_affinity_notify {
 extern int
 irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify);
 
-static inline void irq_run_affinity_notifiers(void)
-{
-	flush_scheduled_work();
-}
-
 #else /* CONFIG_SMP */
 
 static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
diff --git a/lib/cpu_rmap.c b/lib/cpu_rmap.c
index 145dec5..adf048c 100644
--- a/lib/cpu_rmap.c
+++ b/lib/cpu_rmap.c
@@ -45,6 +45,7 @@ struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags)
 	if (!rmap)
 		return NULL;
 
+	kref_init(&rmap->refcount);
 	rmap->obj = (void **)((char *)rmap + obj_offset);
 
 	/* Initially assign CPUs to objects on a rota, since we have
@@ -63,6 +64,35 @@ struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags)
 }
 EXPORT_SYMBOL(alloc_cpu_rmap);
 
+/**
+ * cpu_rmap_reclaim - internal reclaiming helper called from kref_put
+ * @ref: kref to struct cpu_rmap
+ */
+static void cpu_rmap_reclaim(struct kref *ref)
+{
+	struct cpu_rmap *rmap = container_of(ref, struct cpu_rmap, refcount);
+	kfree(rmap);
+}
+
+/**
+ * cpu_rmap_put - internal helper to release ref on a cpu_rmap
+ * @rmap: reverse-map allocated with alloc_cpu_rmap()
+ */
+static inline void cpu_rmap_put(struct cpu_rmap *rmap)
+{
+	kref_put(&rmap->refcount, cpu_rmap_reclaim);
+}
+
+/**
+ * free_cpu_rmap - free CPU affinity reverse-map
+ * @rmap: Reverse-map allocated with alloc_cpu_rmap()
+ */
+void free_cpu_rmap(struct cpu_rmap *rmap)
+{
+	cpu_rmap_put(rmap);
+}
+EXPORT_SYMBOL(free_cpu_rmap);
+
 /* Reevaluate nearest object for given CPU, comparing with the given
  * neighbours at the given distance.
  */
@@ -197,8 +227,7 @@ struct irq_glue {
  * free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs
  * @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL
  *
- * Must be called in process context, before freeing the IRQs, and
- * without holding any locks required by global workqueue items.
+ * Must be called in process context, before freeing the IRQs.
  */
 void free_irq_cpu_rmap(struct cpu_rmap *rmap)
 {
@@ -212,12 +241,18 @@ void free_irq_cpu_rmap(struct cpu_rmap *rmap)
 		glue = rmap->obj[index];
 		irq_set_affinity_notifier(glue->notify.irq, NULL);
 	}
-	irq_run_affinity_notifiers();
 
-	kfree(rmap);
+	cpu_rmap_put(rmap);
 }
 EXPORT_SYMBOL(free_irq_cpu_rmap);
 
+/**
+ * irq_cpu_rmap_notify - callback for IRQ subsystem when IRQ affinity updated
+ * @notify: struct irq_affinity_notify passed by irq/manage.c
+ * @mask: cpu mask for new SMP affinity
+ *
+ * This is executed in workqueue context.
+ */
 static void
 irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask)
 {
@@ -230,16 +265,22 @@ irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask)
 		pr_warning("irq_cpu_rmap_notify: update failed: %d\n", rc);
 }
 
+/**
+ * irq_cpu_rmap_release - reclaiming callback for IRQ subsystem
+ * @ref: kref to struct irq_affinity_notify passed by irq/manage.c
+ */
 static void irq_cpu_rmap_release(struct kref *ref)
 {
 	struct irq_glue *glue =
 		container_of(ref, struct irq_glue, notify.kref);
+
+	cpu_rmap_put(glue->rmap);
 	kfree(glue);
 }
 
 /**
  * irq_cpu_rmap_add - add an IRQ to a CPU affinity reverse-map
- * @rmap: The reverse-map
+ * @rmap: The per-IRQ reverse-map
  * @irq: The IRQ number
  *
  * This adds an IRQ affinity notifier that will update the reverse-map
@@ -259,9 +300,12 @@ int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq)
 	glue->notify.release = irq_cpu_rmap_release;
 	glue->rmap = rmap;
 	glue->index = cpu_rmap_add(rmap, glue);
+	kref_get(&rmap->refcount);
 	rc = irq_set_affinity_notifier(irq, &glue->notify);
-	if (rc)
+	if (rc) {
+		cpu_rmap_put(glue->rmap);
 		kfree(glue);
+	}
 	return rc;
 }
 EXPORT_SYMBOL(irq_cpu_rmap_add);
-- 
1.7.10.2.5.g20d7bc9


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] lib: cpu_rmap: avoid flushing all workqueues
  2012-12-29 19:57 ` [PATCH v3] lib: cpu_rmap: avoid flushing all workqueues David Decotigny
@ 2012-12-29 20:36   ` Josh Triplett
  2013-01-02 20:29     ` Ben Hutchings
  2013-01-02 20:21   ` Ben Hutchings
  1 sibling, 1 reply; 6+ messages in thread
From: Josh Triplett @ 2012-12-29 20:36 UTC (permalink / raw)
  To: David Decotigny
  Cc: linux-kernel, Ben Hutchings, David S. Miller, Or Gerlitz,
	Amir Vadai, Paul E. McKenney, Thomas Gleixner, Andrew Morton,
	David Howells, Paul Gortmaker

On Sat, Dec 29, 2012 at 11:57:09AM -0800, David Decotigny wrote:
> In some cases, free_irq_cpu_rmap() is called while holding a lock
> (eg. rtnl). This can lead to deadlocks, because it invokes
> flush_scheduled_work() which ends up waiting for whole system
> workqueue to flush, but some pending works might try to acquire the
> lock we are already holding.
> 
> This commit uses reference-counting to replace
> irq_run_affinity_notifiers(). It also removes
> irq_run_affinity_notifiers() altogether.
> 
> Signed-off-by: David Decotigny <decot@googlers.com>

You might consider adding a cpu_rmap_get to parallel cpu_rmap_put.

Also, why keep free_cpu_rmap around at this point?  As far as I can
tell, it has no callers.

Otherwise, this looks good to me.

- Josh Triplett

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] lib: cpu_rmap: avoid flushing all workqueues
  2012-12-29 19:57 ` [PATCH v3] lib: cpu_rmap: avoid flushing all workqueues David Decotigny
  2012-12-29 20:36   ` Josh Triplett
@ 2013-01-02 20:21   ` Ben Hutchings
  1 sibling, 0 replies; 6+ messages in thread
From: Ben Hutchings @ 2013-01-02 20:21 UTC (permalink / raw)
  To: David Decotigny
  Cc: linux-kernel, David S. Miller, Or Gerlitz, Amir Vadai,
	Paul E. McKenney, Thomas Gleixner, Andrew Morton, Josh Triplett,
	David Howells, Paul Gortmaker

On Sat, 2012-12-29 at 11:57 -0800, David Decotigny wrote:
> In some cases, free_irq_cpu_rmap() is called while holding a lock
> (eg. rtnl).

I made fairly sure that it didn't get called while holding the RTNL
lock.  However it looks like some mlx4_en ethtool ops now call it
(indirectly).

> This can lead to deadlocks, because it invokes
> flush_scheduled_work() which ends up waiting for whole system
> workqueue to flush, but some pending works might try to acquire the
> lock we are already holding.

Which is why the kernel-doc says not to do that.  But realistically a
device driver may not easily be able to avoid holding either its
underlying device's lock or a susbsytem global lock... and presumably
there may sometimes be a work item that needs the device lock.

> This commit uses reference-counting to replace
> irq_run_affinity_notifiers(). It also removes
> irq_run_affinity_notifiers() altogether.
> 
> Signed-off-by: David Decotigny <decot@googlers.com>
[...]
> --- a/lib/cpu_rmap.c
> +++ b/lib/cpu_rmap.c
[...]
>  /**
>   * irq_cpu_rmap_add - add an IRQ to a CPU affinity reverse-map
> - * @rmap: The reverse-map
> + * @rmap: The per-IRQ reverse-map
[...]

Please drop this comment change; the only 'per-IRQ' thing is the IRQ
notifier structure which is private.

With that, you can add:

Reviewed-by: Ben Hutchings <bhutchings@solarflare.com>

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] lib: cpu_rmap: avoid flushing all workqueues
  2012-12-29 20:36   ` Josh Triplett
@ 2013-01-02 20:29     ` Ben Hutchings
  2013-01-02 20:34       ` David Decotigny
  0 siblings, 1 reply; 6+ messages in thread
From: Ben Hutchings @ 2013-01-02 20:29 UTC (permalink / raw)
  To: Josh Triplett
  Cc: David Decotigny, linux-kernel, David S. Miller, Or Gerlitz,
	Amir Vadai, Paul E. McKenney, Thomas Gleixner, Andrew Morton,
	David Howells, Paul Gortmaker

On Sat, 2012-12-29 at 12:36 -0800, Josh Triplett wrote:
> On Sat, Dec 29, 2012 at 11:57:09AM -0800, David Decotigny wrote:
> > In some cases, free_irq_cpu_rmap() is called while holding a lock
> > (eg. rtnl). This can lead to deadlocks, because it invokes
> > flush_scheduled_work() which ends up waiting for whole system
> > workqueue to flush, but some pending works might try to acquire the
> > lock we are already holding.
> > 
> > This commit uses reference-counting to replace
> > irq_run_affinity_notifiers(). It also removes
> > irq_run_affinity_notifiers() altogether.
> > 
> > Signed-off-by: David Decotigny <decot@googlers.com>
> 
> You might consider adding a cpu_rmap_get to parallel cpu_rmap_put.
> 
> Also, why keep free_cpu_rmap around at this point?  As far as I can
> tell, it has no callers.
> 
> Otherwise, this looks good to me.

I intended to make cpu_rmap usable independently of IRQ notification,
although as you note there have been no other users so far.
free_cpu_rmap() is now effectively an alias for cpu_rmap_put(), and the
latter *does* have a caller.  So perhaps cpu_rmap_put() should be extern
and the alias dropped.

Ben.

-- 
Ben Hutchings, Staff Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] lib: cpu_rmap: avoid flushing all workqueues
  2013-01-02 20:29     ` Ben Hutchings
@ 2013-01-02 20:34       ` David Decotigny
  2013-01-02 21:12         ` David Decotigny
  0 siblings, 1 reply; 6+ messages in thread
From: David Decotigny @ 2013-01-02 20:34 UTC (permalink / raw)
  To: Ben Hutchings
  Cc: Josh Triplett, linux-kernel@vger.kernel.org, David S. Miller,
	Or Gerlitz, Amir Vadai, Paul E. McKenney, Thomas Gleixner,
	Andrew Morton, David Howells, Paul Gortmaker

Thanks,

I appreciate your review and clarification, as I was afraid there
could be something I missed. I will send v4 of this patch: an update
with the cpu_rmap_get helper that Josh suggested and the
"free_cpu_rmap" alias removed.

Regards,

--
David Decotigny

On Wed, Jan 2, 2013 at 12:29 PM, Ben Hutchings
<bhutchings@solarflare.com> wrote:
> On Sat, 2012-12-29 at 12:36 -0800, Josh Triplett wrote:
>> On Sat, Dec 29, 2012 at 11:57:09AM -0800, David Decotigny wrote:
>> > In some cases, free_irq_cpu_rmap() is called while holding a lock
>> > (eg. rtnl). This can lead to deadlocks, because it invokes
>> > flush_scheduled_work() which ends up waiting for whole system
>> > workqueue to flush, but some pending works might try to acquire the
>> > lock we are already holding.
>> >
>> > This commit uses reference-counting to replace
>> > irq_run_affinity_notifiers(). It also removes
>> > irq_run_affinity_notifiers() altogether.
>> >
>> > Signed-off-by: David Decotigny <decot@googlers.com>
>>
>> You might consider adding a cpu_rmap_get to parallel cpu_rmap_put.
>>
>> Also, why keep free_cpu_rmap around at this point?  As far as I can
>> tell, it has no callers.
>>
>> Otherwise, this looks good to me.
>
> I intended to make cpu_rmap usable independently of IRQ notification,
> although as you note there have been no other users so far.
> free_cpu_rmap() is now effectively an alias for cpu_rmap_put(), and the
> latter *does* have a caller.  So perhaps cpu_rmap_put() should be extern
> and the alias dropped.
>
> Ben.
>
> --
> Ben Hutchings, Staff Engineer, Solarflare
> Not speaking for my employer; that's the marketing department's job.
> They asked us to note that Solarflare product names are trademarked.
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] lib: cpu_rmap: avoid flushing all workqueues
  2013-01-02 20:34       ` David Decotigny
@ 2013-01-02 21:12         ` David Decotigny
  0 siblings, 0 replies; 6+ messages in thread
From: David Decotigny @ 2013-01-02 21:12 UTC (permalink / raw)
  To: Ben Hutchings
  Cc: Josh Triplett, linux-kernel@vger.kernel.org, David S. Miller,
	Or Gerlitz, Amir Vadai, Paul E. McKenney, Thomas Gleixner,
	Andrew Morton, David Howells, Paul Gortmaker

Revising my position: v4 will only have comments update (Ben) and new
cpu_rmap_get inetrnal helper (Josh). IMHO, further API rework
discussed with Ben and Eric ought to be in a later patch, as they
affect drivers.

--
David Decotigny


On Wed, Jan 2, 2013 at 12:34 PM, David Decotigny <decot@googlers.com> wrote:
> Thanks,
>
> I appreciate your review and clarification, as I was afraid there
> could be something I missed. I will send v4 of this patch: an update
> with the cpu_rmap_get helper that Josh suggested and the
> "free_cpu_rmap" alias removed.
>
> Regards,
>
> --
> David Decotigny
>
> On Wed, Jan 2, 2013 at 12:29 PM, Ben Hutchings
> <bhutchings@solarflare.com> wrote:
>> On Sat, 2012-12-29 at 12:36 -0800, Josh Triplett wrote:
>>> On Sat, Dec 29, 2012 at 11:57:09AM -0800, David Decotigny wrote:
>>> > In some cases, free_irq_cpu_rmap() is called while holding a lock
>>> > (eg. rtnl). This can lead to deadlocks, because it invokes
>>> > flush_scheduled_work() which ends up waiting for whole system
>>> > workqueue to flush, but some pending works might try to acquire the
>>> > lock we are already holding.
>>> >
>>> > This commit uses reference-counting to replace
>>> > irq_run_affinity_notifiers(). It also removes
>>> > irq_run_affinity_notifiers() altogether.
>>> >
>>> > Signed-off-by: David Decotigny <decot@googlers.com>
>>>
>>> You might consider adding a cpu_rmap_get to parallel cpu_rmap_put.
>>>
>>> Also, why keep free_cpu_rmap around at this point?  As far as I can
>>> tell, it has no callers.
>>>
>>> Otherwise, this looks good to me.
>>
>> I intended to make cpu_rmap usable independently of IRQ notification,
>> although as you note there have been no other users so far.
>> free_cpu_rmap() is now effectively an alias for cpu_rmap_put(), and the
>> latter *does* have a caller.  So perhaps cpu_rmap_put() should be extern
>> and the alias dropped.
>>
>> Ben.
>>
>> --
>> Ben Hutchings, Staff Engineer, Solarflare
>> Not speaking for my employer; that's the marketing department's job.
>> They asked us to note that Solarflare product names are trademarked.
>>

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2013-01-02 21:13 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <cover.1356810877.git.decot@googlers.com>
2012-12-29 19:57 ` [PATCH v3] lib: cpu_rmap: avoid flushing all workqueues David Decotigny
2012-12-29 20:36   ` Josh Triplett
2013-01-02 20:29     ` Ben Hutchings
2013-01-02 20:34       ` David Decotigny
2013-01-02 21:12         ` David Decotigny
2013-01-02 20:21   ` Ben Hutchings

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).