All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tejun Heo <tj@kernel.org>
To: Linus Torvalds <torvalds@linux-foundation.org>,
	Linux Kernel <linux-kernel@vger.kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Subject: [GIT PULL] percpu fixes for 2.6.32-rc6
Date: Fri, 13 Nov 2009 12:53:34 +0900	[thread overview]
Message-ID: <4AFCD83E.4000408@kernel.org> (raw)

Hello, Linus.

Please pull from the following percpu fix branch.

  git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu.git for-linus

It restructures pcpu_extend_area_map() to de-obfuscate locking and
fixes the following two bugs.

* wrong return value which makes the caller continue walking a
  modified list and may lead to oops.

* possible deadlock caused by lock ordering inversion through irq.

Thanks.
---
Tejun Heo (1):
      percpu: restructure pcpu_extend_area_map() to fix bugs and improve readability

 mm/percpu.c |  121 +++++++++++++++++++++++++++++++++++++++-------------------
 1 files changed, 81 insertions(+), 40 deletions(-)

diff --git a/mm/percpu.c b/mm/percpu.c
index d907971..5adfc26 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -355,62 +355,86 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
 }

 /**
- * pcpu_extend_area_map - extend area map for allocation
- * @chunk: target chunk
+ * pcpu_need_to_extend - determine whether chunk area map needs to be extended
+ * @chunk: chunk of interest
  *
- * Extend area map of @chunk so that it can accomodate an allocation.
- * A single allocation can split an area into three areas, so this
- * function makes sure that @chunk->map has at least two extra slots.
+ * Determine whether area map of @chunk needs to be extended to
+ * accomodate a new allocation.
  *
  * CONTEXT:
- * pcpu_alloc_mutex, pcpu_lock.  pcpu_lock is released and reacquired
- * if area map is extended.
+ * pcpu_lock.
  *
  * RETURNS:
- * 0 if noop, 1 if successfully extended, -errno on failure.
+ * New target map allocation length if extension is necessary, 0
+ * otherwise.
  */
-static int pcpu_extend_area_map(struct pcpu_chunk *chunk, unsigned long *flags)
+static int pcpu_need_to_extend(struct pcpu_chunk *chunk)
 {
 	int new_alloc;
-	int *new;
-	size_t size;

-	/* has enough? */
 	if (chunk->map_alloc >= chunk->map_used + 2)
 		return 0;

-	spin_unlock_irqrestore(&pcpu_lock, *flags);
-
 	new_alloc = PCPU_DFL_MAP_ALLOC;
 	while (new_alloc < chunk->map_used + 2)
 		new_alloc *= 2;

-	new = pcpu_mem_alloc(new_alloc * sizeof(new[0]));
-	if (!new) {
-		spin_lock_irqsave(&pcpu_lock, *flags);
+	return new_alloc;
+}
+
+/**
+ * pcpu_extend_area_map - extend area map of a chunk
+ * @chunk: chunk of interest
+ * @new_alloc: new target allocation length of the area map
+ *
+ * Extend area map of @chunk to have @new_alloc entries.
+ *
+ * CONTEXT:
+ * Does GFP_KERNEL allocation.  Grabs and releases pcpu_lock.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
+{
+	int *old = NULL, *new = NULL;
+	size_t old_size = 0, new_size = new_alloc * sizeof(new[0]);
+	unsigned long flags;
+
+	new = pcpu_mem_alloc(new_size);
+	if (!new)
 		return -ENOMEM;
-	}

-	/*
-	 * Acquire pcpu_lock and switch to new area map.  Only free
-	 * could have happened inbetween, so map_used couldn't have
-	 * grown.
-	 */
-	spin_lock_irqsave(&pcpu_lock, *flags);
-	BUG_ON(new_alloc < chunk->map_used + 2);
+	/* acquire pcpu_lock and switch to new area map */
+	spin_lock_irqsave(&pcpu_lock, flags);
+
+	if (new_alloc <= chunk->map_alloc)
+		goto out_unlock;

-	size = chunk->map_alloc * sizeof(chunk->map[0]);
-	memcpy(new, chunk->map, size);
+	old_size = chunk->map_alloc * sizeof(chunk->map[0]);
+	memcpy(new, chunk->map, old_size);

 	/*
 	 * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is
 	 * one of the first chunks and still using static map.
 	 */
 	if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC)
-		pcpu_mem_free(chunk->map, size);
+		old = chunk->map;

 	chunk->map_alloc = new_alloc;
 	chunk->map = new;
+	new = NULL;
+
+out_unlock:
+	spin_unlock_irqrestore(&pcpu_lock, flags);
+
+	/*
+	 * pcpu_mem_free() might end up calling vfree() which uses
+	 * IRQ-unsafe lock and thus can't be called under pcpu_lock.
+	 */
+	pcpu_mem_free(old, old_size);
+	pcpu_mem_free(new, new_size);
+
 	return 0;
 }

@@ -1049,7 +1073,7 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved)
 	static int warn_limit = 10;
 	struct pcpu_chunk *chunk;
 	const char *err;
-	int slot, off;
+	int slot, off, new_alloc;
 	unsigned long flags;

 	if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
@@ -1064,14 +1088,25 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved)
 	/* serve reserved allocations from the reserved chunk if available */
 	if (reserved && pcpu_reserved_chunk) {
 		chunk = pcpu_reserved_chunk;
-		if (size > chunk->contig_hint ||
-		    pcpu_extend_area_map(chunk, &flags) < 0) {
-			err = "failed to extend area map of reserved chunk";
+
+		if (size > chunk->contig_hint) {
+			err = "alloc from reserved chunk failed";
 			goto fail_unlock;
 		}
+
+		while ((new_alloc = pcpu_need_to_extend(chunk))) {
+			spin_unlock_irqrestore(&pcpu_lock, flags);
+			if (pcpu_extend_area_map(chunk, new_alloc) < 0) {
+				err = "failed to extend area map of reserved chunk";
+				goto fail_unlock_mutex;
+			}
+			spin_lock_irqsave(&pcpu_lock, flags);
+		}
+
 		off = pcpu_alloc_area(chunk, size, align);
 		if (off >= 0)
 			goto area_found;
+
 		err = "alloc from reserved chunk failed";
 		goto fail_unlock;
 	}
@@ -1083,14 +1118,20 @@ restart:
 			if (size > chunk->contig_hint)
 				continue;

-			switch (pcpu_extend_area_map(chunk, &flags)) {
-			case 0:
-				break;
-			case 1:
-				goto restart;	/* pcpu_lock dropped, restart */
-			default:
-				err = "failed to extend area map";
-				goto fail_unlock;
+			new_alloc = pcpu_need_to_extend(chunk);
+			if (new_alloc) {
+				spin_unlock_irqrestore(&pcpu_lock, flags);
+				if (pcpu_extend_area_map(chunk,
+							 new_alloc) < 0) {
+					err = "failed to extend area map";
+					goto fail_unlock_mutex;
+				}
+				spin_lock_irqsave(&pcpu_lock, flags);
+				/*
+				 * pcpu_lock has been dropped, need to
+				 * restart cpu_slot list walking.
+				 */
+				goto restart;
 			}

 			off = pcpu_alloc_area(chunk, size, align);

-- 
tejun

             reply	other threads:[~2009-11-13  3:54 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-11-13  3:53 Tejun Heo [this message]
  -- strict thread matches above, loose matches on Subject: below --
2009-11-10  6:04 [GIT PULL] percpu fixes for 2.6.32-rc6 Tejun Heo
2009-11-10 17:10 ` Linus Torvalds
2009-11-10 18:33   ` Tejun Heo
2009-11-10 18:54     ` Linus Torvalds
2009-11-10 19:25       ` Tejun Heo
2009-11-10 19:37         ` Ingo Molnar
2009-11-10 19:50           ` Tejun Heo
2009-11-10 21:42             ` Linus Torvalds
2009-11-11  3:55               ` Tejun Heo
2009-11-11 11:31                 ` Ingo Molnar
2009-11-11 12:21                   ` Tejun Heo
2009-11-11 19:57                     ` Ingo Molnar
2009-11-12 10:11                       ` Tejun Heo
2009-11-12 10:36                         ` Ingo Molnar
2009-11-12 10:58                           ` Tejun Heo
2009-11-12 11:25                             ` Ingo Molnar
2009-11-12 14:26                             ` Oliver Neukum
2009-11-12 15:17                             ` Linus Torvalds
2009-11-12 15:30                               ` Tejun Heo
2009-11-12 15:45                                 ` Tejun Heo
2009-11-12 15:52                                   ` Linus Torvalds
2009-11-12 17:04                               ` Andres Baldrich
2009-11-12 17:18                                 ` Linus Torvalds
2009-11-12 18:04                                   ` Ingo Molnar
2009-11-12 18:14                               ` Andi Kleen
2009-11-12 11:07                         ` Ingo Molnar
2009-11-12 11:29                           ` Tejun Heo
2009-11-10 19:44         ` Tejun Heo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4AFCD83E.4000408@kernel.org \
    --to=tj@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.