public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [GIT PULL] percpu fixes for 2.6.32-rc6
@ 2009-11-13  3:53 Tejun Heo
  0 siblings, 0 replies; 29+ messages in thread
From: Tejun Heo @ 2009-11-13  3:53 UTC (permalink / raw)
  To: Linus Torvalds, Linux Kernel; +Cc: Ingo Molnar

Hello, Linus.

Please pull from the following percpu fix branch.

  git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu.git for-linus

It restructures pcpu_extend_area_map() to de-obfuscate locking and
fixes the following two bugs.

* wrong return value which makes the caller continue walking a
  modified list and may lead to oops.

* possible deadlock caused by lock ordering inversion through irq.

Thanks.
---
Tejun Heo (1):
      percpu: restructure pcpu_extend_area_map() to fix bugs and improve readability

 mm/percpu.c |  121 +++++++++++++++++++++++++++++++++++++++-------------------
 1 files changed, 81 insertions(+), 40 deletions(-)

diff --git a/mm/percpu.c b/mm/percpu.c
index d907971..5adfc26 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -355,62 +355,86 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
 }

 /**
- * pcpu_extend_area_map - extend area map for allocation
- * @chunk: target chunk
+ * pcpu_need_to_extend - determine whether chunk area map needs to be extended
+ * @chunk: chunk of interest
  *
- * Extend area map of @chunk so that it can accomodate an allocation.
- * A single allocation can split an area into three areas, so this
- * function makes sure that @chunk->map has at least two extra slots.
+ * Determine whether area map of @chunk needs to be extended to
+ * accomodate a new allocation.
  *
  * CONTEXT:
- * pcpu_alloc_mutex, pcpu_lock.  pcpu_lock is released and reacquired
- * if area map is extended.
+ * pcpu_lock.
  *
  * RETURNS:
- * 0 if noop, 1 if successfully extended, -errno on failure.
+ * New target map allocation length if extension is necessary, 0
+ * otherwise.
  */
-static int pcpu_extend_area_map(struct pcpu_chunk *chunk, unsigned long *flags)
+static int pcpu_need_to_extend(struct pcpu_chunk *chunk)
 {
 	int new_alloc;
-	int *new;
-	size_t size;

-	/* has enough? */
 	if (chunk->map_alloc >= chunk->map_used + 2)
 		return 0;

-	spin_unlock_irqrestore(&pcpu_lock, *flags);
-
 	new_alloc = PCPU_DFL_MAP_ALLOC;
 	while (new_alloc < chunk->map_used + 2)
 		new_alloc *= 2;

-	new = pcpu_mem_alloc(new_alloc * sizeof(new[0]));
-	if (!new) {
-		spin_lock_irqsave(&pcpu_lock, *flags);
+	return new_alloc;
+}
+
+/**
+ * pcpu_extend_area_map - extend area map of a chunk
+ * @chunk: chunk of interest
+ * @new_alloc: new target allocation length of the area map
+ *
+ * Extend area map of @chunk to have @new_alloc entries.
+ *
+ * CONTEXT:
+ * Does GFP_KERNEL allocation.  Grabs and releases pcpu_lock.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
+{
+	int *old = NULL, *new = NULL;
+	size_t old_size = 0, new_size = new_alloc * sizeof(new[0]);
+	unsigned long flags;
+
+	new = pcpu_mem_alloc(new_size);
+	if (!new)
 		return -ENOMEM;
-	}

-	/*
-	 * Acquire pcpu_lock and switch to new area map.  Only free
-	 * could have happened inbetween, so map_used couldn't have
-	 * grown.
-	 */
-	spin_lock_irqsave(&pcpu_lock, *flags);
-	BUG_ON(new_alloc < chunk->map_used + 2);
+	/* acquire pcpu_lock and switch to new area map */
+	spin_lock_irqsave(&pcpu_lock, flags);
+
+	if (new_alloc <= chunk->map_alloc)
+		goto out_unlock;

-	size = chunk->map_alloc * sizeof(chunk->map[0]);
-	memcpy(new, chunk->map, size);
+	old_size = chunk->map_alloc * sizeof(chunk->map[0]);
+	memcpy(new, chunk->map, old_size);

 	/*
 	 * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is
 	 * one of the first chunks and still using static map.
 	 */
 	if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC)
-		pcpu_mem_free(chunk->map, size);
+		old = chunk->map;

 	chunk->map_alloc = new_alloc;
 	chunk->map = new;
+	new = NULL;
+
+out_unlock:
+	spin_unlock_irqrestore(&pcpu_lock, flags);
+
+	/*
+	 * pcpu_mem_free() might end up calling vfree() which uses
+	 * IRQ-unsafe lock and thus can't be called under pcpu_lock.
+	 */
+	pcpu_mem_free(old, old_size);
+	pcpu_mem_free(new, new_size);
+
 	return 0;
 }

@@ -1049,7 +1073,7 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved)
 	static int warn_limit = 10;
 	struct pcpu_chunk *chunk;
 	const char *err;
-	int slot, off;
+	int slot, off, new_alloc;
 	unsigned long flags;

 	if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
@@ -1064,14 +1088,25 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved)
 	/* serve reserved allocations from the reserved chunk if available */
 	if (reserved && pcpu_reserved_chunk) {
 		chunk = pcpu_reserved_chunk;
-		if (size > chunk->contig_hint ||
-		    pcpu_extend_area_map(chunk, &flags) < 0) {
-			err = "failed to extend area map of reserved chunk";
+
+		if (size > chunk->contig_hint) {
+			err = "alloc from reserved chunk failed";
 			goto fail_unlock;
 		}
+
+		while ((new_alloc = pcpu_need_to_extend(chunk))) {
+			spin_unlock_irqrestore(&pcpu_lock, flags);
+			if (pcpu_extend_area_map(chunk, new_alloc) < 0) {
+				err = "failed to extend area map of reserved chunk";
+				goto fail_unlock_mutex;
+			}
+			spin_lock_irqsave(&pcpu_lock, flags);
+		}
+
 		off = pcpu_alloc_area(chunk, size, align);
 		if (off >= 0)
 			goto area_found;
+
 		err = "alloc from reserved chunk failed";
 		goto fail_unlock;
 	}
@@ -1083,14 +1118,20 @@ restart:
 			if (size > chunk->contig_hint)
 				continue;

-			switch (pcpu_extend_area_map(chunk, &flags)) {
-			case 0:
-				break;
-			case 1:
-				goto restart;	/* pcpu_lock dropped, restart */
-			default:
-				err = "failed to extend area map";
-				goto fail_unlock;
+			new_alloc = pcpu_need_to_extend(chunk);
+			if (new_alloc) {
+				spin_unlock_irqrestore(&pcpu_lock, flags);
+				if (pcpu_extend_area_map(chunk,
+							 new_alloc) < 0) {
+					err = "failed to extend area map";
+					goto fail_unlock_mutex;
+				}
+				spin_lock_irqsave(&pcpu_lock, flags);
+				/*
+				 * pcpu_lock has been dropped, need to
+				 * restart cpu_slot list walking.
+				 */
+				goto restart;
 			}

 			off = pcpu_alloc_area(chunk, size, align);

-- 
tejun

^ permalink raw reply related	[flat|nested] 29+ messages in thread
* [GIT PULL] percpu fixes for 2.6.32-rc6
@ 2009-11-10  6:04 Tejun Heo
  2009-11-10 17:10 ` Linus Torvalds
  0 siblings, 1 reply; 29+ messages in thread
From: Tejun Heo @ 2009-11-10  6:04 UTC (permalink / raw)
  To: Linus Torvalds, Linux Kernel; +Cc: Yinghai Lu, Ingo Molnar

Hello, Linus.

Please pull from the following percpu fix branch.

  git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu.git for-linus

It fixes a possible deadlock caused by lock ordering inversion through
irq.

Thanks.
---
Tejun Heo (1):
      percpu: fix possible deadlock via irq lock inversion

 mm/percpu.c |   17 +++++++++++++++--

diff --git a/mm/percpu.c b/mm/percpu.c
index d907971..30cd343 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -372,7 +372,7 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
 static int pcpu_extend_area_map(struct pcpu_chunk *chunk, unsigned long *flags)
 {
 	int new_alloc;
-	int *new;
+	int *new, *old = NULL;
 	size_t size;

 	/* has enough? */
@@ -407,10 +407,23 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, unsigned long *flags)
 	 * one of the first chunks and still using static map.
 	 */
 	if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC)
-		pcpu_mem_free(chunk->map, size);
+		old = chunk->map;

 	chunk->map_alloc = new_alloc;
 	chunk->map = new;
+
+	/*
+	 * pcpu_mem_free() might end up calling vfree() which uses
+	 * IRQ-unsafe lock and thus can't be called with pcpu_lock
+	 * held.  Release and reacquire pcpu_lock if old map needs to
+	 * be freed.
+	 */
+	if (old) {
+		spin_unlock_irqrestore(&pcpu_lock, *flags);
+		pcpu_mem_free(old, size);
+		spin_lock_irqsave(&pcpu_lock, *flags);
+	}
+
 	return 0;
 }

-- 
tejun

^ permalink raw reply related	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2009-11-13  3:54 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-11-13  3:53 [GIT PULL] percpu fixes for 2.6.32-rc6 Tejun Heo
  -- strict thread matches above, loose matches on Subject: below --
2009-11-10  6:04 Tejun Heo
2009-11-10 17:10 ` Linus Torvalds
2009-11-10 18:33   ` Tejun Heo
2009-11-10 18:54     ` Linus Torvalds
2009-11-10 19:25       ` Tejun Heo
2009-11-10 19:37         ` Ingo Molnar
2009-11-10 19:50           ` Tejun Heo
2009-11-10 21:42             ` Linus Torvalds
2009-11-11  3:55               ` Tejun Heo
2009-11-11 11:31                 ` Ingo Molnar
2009-11-11 12:21                   ` Tejun Heo
2009-11-11 19:57                     ` Ingo Molnar
2009-11-12 10:11                       ` Tejun Heo
2009-11-12 10:36                         ` Ingo Molnar
2009-11-12 10:58                           ` Tejun Heo
2009-11-12 11:25                             ` Ingo Molnar
2009-11-12 14:26                             ` Oliver Neukum
2009-11-12 15:17                             ` Linus Torvalds
2009-11-12 15:30                               ` Tejun Heo
2009-11-12 15:45                                 ` Tejun Heo
2009-11-12 15:52                                   ` Linus Torvalds
2009-11-12 17:04                               ` Andres Baldrich
2009-11-12 17:18                                 ` Linus Torvalds
2009-11-12 18:04                                   ` Ingo Molnar
2009-11-12 18:14                               ` Andi Kleen
2009-11-12 11:07                         ` Ingo Molnar
2009-11-12 11:29                           ` Tejun Heo
2009-11-10 19:44         ` Tejun Heo

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox