public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: William Lee Irwin III <wli@holomorphy.com>
To: Ingo Molnar <mingo@elte.hu>
Cc: Anton Blanchard <anton@samba.org>,
	linux-kernel@vger.kernel.org,
	viro@parcelfarce.linux.theplanet.co.uk
Subject: Re: /proc/sys/kernel/pid_max issues
Date: Sun, 12 Sep 2004 10:13:19 -0700	[thread overview]
Message-ID: <20040912171319.GR2660@holomorphy.com> (raw)
In-Reply-To: <20040912112026.GA16678@elte.hu>

* William Lee Irwin III <wli@holomorphy.com> wrote:
>> Forgot to check map->page in the first spin:
>> last_pid is not honored because next_free_map(map - 1, ...) may return
>> the same map and so restart with a lesser offset.

On Sun, Sep 12, 2004 at 01:20:26PM +0200, Ingo Molnar wrote:
> it's getting quite spaghetti ... do we really want to handle
> RESERVED_PID? There's no guarantee that any root daemon wont stray out
> of the 1...300 PID range anyway, so if it has an exploitable PID race
> bug then it's probably exploitable even without the RESERVED_PID
> protection.

I presumed it was merely cosmetic, so daemons around system startup
will get low pid numbers recognizable by sysadmins. Maybe filtering
process listings for pids < 300 is/was used to find daemons that may
have crashed? I'm not particularly attached to the feature, and have
never used it myself, but merely noticed its implementation was off.

Spaghetti may mean it's time to rewrite things. The following is a
goto-less alloc_pidmap() vs. 2.6.9-rc1-mm4 addressing all stated
concerns, but otherwise changing none of the semantics. Untested.


-- wli

Index: mm4-2.6.9-rc1/kernel/pid.c
===================================================================
--- mm4-2.6.9-rc1.orig/kernel/pid.c	2004-09-08 05:46:18.000000000 -0700
+++ mm4-2.6.9-rc1/kernel/pid.c	2004-09-12 09:44:52.586896544 -0700
@@ -38,6 +38,9 @@
 #define PIDMAP_ENTRIES		(PID_MAX_LIMIT/PAGE_SIZE/8)
 #define BITS_PER_PAGE		(PAGE_SIZE*8)
 #define BITS_PER_PAGE_MASK	(BITS_PER_PAGE-1)
+#define mk_pid(map, off)	(((map) - pidmap_array)*BITS_PER_PAGE + (off))
+#define find_next_offset(map, off)					\
+		find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
 
 /*
  * PID-map pages start out as NULL, they get allocated upon
@@ -53,8 +56,6 @@
 static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
 	 { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } };
 
-static pidmap_t *map_limit = pidmap_array + PIDMAP_ENTRIES;
-
 static spinlock_t pidmap_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
 
 fastcall void free_pidmap(int pid)
@@ -66,15 +67,16 @@
 	atomic_inc(&map->nr_free);
 }
 
-/*
- * Here we search for the next map that has free bits left.
- * Normally the next map has free PIDs.
- */
-static inline pidmap_t *next_free_map(pidmap_t *map, int *max_steps)
+int alloc_pidmap(void)
 {
-	while (--*max_steps) {
-		if (++map == map_limit)
-			map = pidmap_array;
+	int i, offset, pid = last_pid + 1;
+	pidmap_t *map;
+
+	if (pid >= pid_max)
+		pid = RESERVED_PIDS;
+	offset = pid & BITS_PER_PAGE_MASK;
+	map = &pidmap_array[pid/BITS_PER_PAGE];
+	for (i = 0; i < PIDMAP_ENTRIES; ++i) {
 		if (unlikely(!map->page)) {
 			unsigned long page = get_zeroed_page(GFP_KERNEL);
 			/*
@@ -87,64 +89,29 @@
 			else
 				map->page = (void *)page;
 			spin_unlock(&pidmap_lock);
-
-			if (!map->page)
+			if (unlikely(!map->page))
 				break;
 		}
-		if (atomic_read(&map->nr_free))
-			return map;
-	}
-	return NULL;
-}
-
-int alloc_pidmap(void)
-{
-	int pid, offset, max_steps = PIDMAP_ENTRIES + 1;
-	pidmap_t *map;
-
-	pid = last_pid + 1;
-	if (pid >= pid_max)
-		pid = RESERVED_PIDS;
-
-	offset = pid & BITS_PER_PAGE_MASK;
-	map = pidmap_array + pid / BITS_PER_PAGE;
-
-	if (likely(map->page && !test_and_set_bit(offset, map->page))) {
-		/*
-		 * There is a small window for last_pid updates to race,
-		 * but in that case the next allocation will go into the
-		 * slowpath and that fixes things up.
-		 */
-return_pid:
-		atomic_dec(&map->nr_free);
-		last_pid = pid;
-		return pid;
-	}
-	
-	if (!offset || !atomic_read(&map->nr_free)) {
-		if (!offset)
-			map--;
-next_map:
-		map = next_free_map(map, &max_steps);
-		if (!map)
-			goto failure;
-		offset = 0;
+		if (likely(atomic_read(&map->nr_free))) {
+			do {
+				if (!test_and_set_bit(offset, map->page)) {
+					atomic_dec(&map->nr_free);
+					last_pid = pid;
+					return pid;
+				}
+				offset = find_next_offset(map, offset);
+				pid = mk_pid(map, offset);
+			} while (offset < BITS_PER_PAGE && pid < pid_max);
+		}
+		if (map < &pidmap_array[(pid_max-1)/BITS_PER_PAGE]) {
+			++map;
+			offset = 0;
+		} else {
+			map = &pidmap_array[0];
+			offset = RESERVED_PIDS;
+		}
+		pid = mk_pid(map, offset);
 	}
-	/*
-	 * Find the next zero bit:
-	 */
-scan_more:
-	offset = find_next_zero_bit(map->page, BITS_PER_PAGE, offset);
-	if (offset >= BITS_PER_PAGE)
-		goto next_map;
-	if (test_and_set_bit(offset, map->page))
-		goto scan_more;
-
-	/* we got the PID: */
-	pid = (map - pidmap_array) * BITS_PER_PAGE + offset;
-	goto return_pid;
-
-failure:
 	return -1;
 }
 

  reply	other threads:[~2004-09-12 17:13 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-09-12  8:56 /proc/sys/kernel/pid_max issues Anton Blanchard
2004-09-12  9:36 ` William Lee Irwin III
2004-09-12  9:51   ` Ingo Molnar
2004-09-12  9:58   ` William Lee Irwin III
2004-09-12 10:10     ` William Lee Irwin III
2004-09-12 10:13     ` Ingo Molnar
2004-09-12 10:43       ` William Lee Irwin III
2004-09-12 10:45         ` William Lee Irwin III
2004-09-12 11:08           ` William Lee Irwin III
2004-09-12 11:20             ` Ingo Molnar
2004-09-12 17:13               ` William Lee Irwin III [this message]
2004-09-12 18:02                 ` Chris Wedgwood
2004-09-12 23:06                   ` William Lee Irwin III
2004-09-13  1:46                 ` [pidhashing] rewrite alloc_pidmap() William Lee Irwin III
2004-09-12  9:39 ` /proc/sys/kernel/pid_max issues Ingo Molnar
2004-09-12  9:43   ` William Lee Irwin III
2004-09-12 12:18 ` Arjan van de Ven
2004-09-12 12:30   ` Anton Blanchard
2004-09-12 12:44     ` Arjan van de Ven
2004-09-12 13:34       ` Anton Blanchard
2004-09-12 13:41       ` Ingo Molnar
  -- strict thread matches above, loose matches on Subject: below --
2004-09-13  3:20 Albert Cahalan
2004-09-13  7:42 ` William Lee Irwin III
2004-09-13 14:11   ` Albert Cahalan
2004-09-13 14:27     ` William Lee Irwin III
2004-09-13 14:51       ` Herbert Poetzl
2004-09-14  2:13         ` William Lee Irwin III
2004-09-23 13:13       ` Pavel Machek
2004-09-24 16:02         ` Martin Mares
2004-09-23 13:11   ` Pavel Machek
2004-09-13  7:57 ` Ingo Molnar
2004-09-13 13:54   ` Albert Cahalan
2004-09-13 14:24     ` William Lee Irwin III
2004-09-13 14:54       ` Albert Cahalan
2004-09-14  2:02         ` William Lee Irwin III
2004-09-14 15:32     ` Ingo Molnar
2004-09-18 18:32       ` Pavel Machek
2004-09-23 13:18     ` Pavel Machek

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20040912171319.GR2660@holomorphy.com \
    --to=wli@holomorphy.com \
    --cc=anton@samba.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=viro@parcelfarce.linux.theplanet.co.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox