public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Andrew Morton <akpm@zip.com.au>
To: Paul Larson <plars@austin.ibm.com>
Cc: Linus Torvalds <torvalds@transmeta.com>,
	lkml <linux-kernel@vger.kernel.org>,
	davej@suse.de, frankeh@us.ibm.com, andrea@suse.de
Subject: Re: [PATCH] Linux-2.5 fix/improve get_pid()
Date: Wed, 07 Aug 2002 16:06:05 -0700	[thread overview]
Message-ID: <3D51A7DD.A4F7C5E4@zip.com.au> (raw)
In-Reply-To: 1028757835.22405.300.camel@plars.austin.ibm.com

Paul Larson wrote:
> 
> This patch provides an improved version of get_pid() while also taking
> care of the bug that causes the machine to hang when you hit PID_MAX.

Has this been evaluated against Bill Irwin's constant-time
allocator?  Bill says it has slightly worse normal-case and
vastly improved worst-case performance over the stock allocator.
Not sure how it stacks up against this one.   Plus it's much nicer
looking code.

He's shy, so.....



#include <linux/compiler.h>
#include <linux/bitops.h>
#include <linux/spinlock.h>
#include <linux/init.h>

/*
 * pid allocator
 * (C) 2002 William Irwin, IBM
 *
 * The strategy is to maintain a tower of bitmaps where a bitmap above
 * another in each bit accounts whether any pid's are available in the
 * space tracked by BITS_PER_LONG bits of the level below. The bitmaps
 * must be marked on allocation and also release, hence some
 * infrastructure for detecting when the last user of a pid releases it
 * must be in place.
 *
 * This general strategy is simple in concept and enforces highly
 * deterministic bounds on the search time for the next pid.
 */

#define PID_MAX		0x8000
#define RESERVED_PIDS	300

#define MAP0_SIZE	(PID_MAX   >> BITS_PER_LONG_SHIFT)
#define MAP1_SIZE	(MAP0_SIZE >> BITS_PER_LONG_SHIFT)
#define MAP2_SIZE	(MAP1_SIZE >> BITS_PER_LONG_SHIFT)

#define MAP0_SHIFT	BITS_PER_LONG_SHIFT
#define MAP1_SHIFT	(2*BITS_PER_LONG_SHIFT)
#define MAP2_SHIFT	(3*BITS_PER_LONG_SHIFT)

#define PID_MAP_MASK	(BITS_PER_LONG - 1)

#define PID_MAP_DEPTH	(ARRAY_SIZE(pid_map) - 1)

static unsigned long pid_map0[MAP0_SIZE];
static unsigned long pid_map1[MAP1_SIZE];
static unsigned long pid_map2[MAP2_SIZE];

static unsigned long * pid_map[] = { pid_map0, pid_map1, pid_map2, NULL, };

unsigned long last_pid = 0;
unsigned long npids = 0;

static const int map_shifts[] =
	{	0,
		BITS_PER_LONG_SHIFT,
		BITS_PER_LONG_SHIFT*2,
		BITS_PER_LONG_SHIFT*3,
		BITS_PER_LONG_SHIFT*4,
	};

static inline int pid_map_shift(int depth)
{
	return map_shifts[depth+1];
}

static spinlock_t pid_lock = SPIN_LOCK_UNLOCKED;

void free_pid(unsigned long pid)
{
	unsigned long **map = pid_map;

	spin_lock(&pid_lock);
	while (*map) {
		int bit = pid & PID_MAP_MASK;
		pid >>= BITS_PER_LONG_SHIFT;
		__clear_bit(bit, &(*map)[pid]);
		++map;
	}
	--npids;
	spin_unlock(&pid_lock);
}

static inline int whole_block_used(int level, unsigned long pid)
{
	return pid_map[level][pid >> pid_map_shift(level)] == ~0UL;
}

static inline void mark_pid(unsigned long pid)
{
	int level;
	for (level = 0; level < PID_MAP_DEPTH; ++level) {
		int shift, bit;
		unsigned long entry;

		shift = pid_map_shift(level);
		entry = pid >> shift;
		bit   = (pid >> (shift - BITS_PER_LONG_SHIFT)) & PID_MAP_MASK;
		if (level == 0 || whole_block_used(level - 1, pid))
			__set_bit(bit, &pid_map[level][entry]);
		else
			break;
	}
	++npids;
}

static inline int pid_map_limit(int depth)
{
	return PID_MAX >> pid_map_shift(depth);
}

#ifdef PID_ALLOC_EXAMPLE
/*
 * the pid allocation traverses the bitmaps by recursively ffz'ing
 * through down the tower of maps. Some additional logic is required
 * to enforce lower limits, but the following example of how one
 * would perform this search without the lower limit may well prove
 * enlightening for those interested in the mechanics of the algorithm.
 */
static long alloc_pid_from_zero(void)
{
	unsigned long pid = 0;
	int level;

	for (level = PID_MAP_DEPTH - 1; level >= 0; --level) {
		unsigned long entry = pid_map[level][pid];

		if (unlikely(entry == ~0UL))
			return ~0UL;

		pid = (pid << BITS_PER_LONG_SHIFT) + ffz(pid_map[level][pid]);
	}
	return pid;
}
#endif /* PID_ALLOC_EXAMPLE */


static const unsigned long pid_max_levels[] =
	{	PID_MAX >> BITS_PER_LONG_SHIFT,
		PID_MAX >> (BITS_PER_LONG_SHIFT*2),
		PID_MAX >> (BITS_PER_LONG_SHIFT*3),
		PID_MAX >> (BITS_PER_LONG_SHIFT*4),
	};

static inline unsigned long pid_map_digit(int level, unsigned long limit)
{
	return (limit >> pid_map_shift(level-1)) & PID_MAP_MASK;
}

/*
 * Scratch space for storing the digits of the limit, all accesses
 * protected by the pid_lock.
 */
static unsigned long limit_digits[4];

/*
 * This is not a high-performance implementation. alloc_pid_after()
 * can be made highly compact with some effort, but this is instead
 * meant to be clear. As the cost of fork() is dominated by much
 * more expensive operations and the cost of this is constant-bounded
 * by a very low constant, the gains from manual optimization here
 * are marginal.
 */
static long alloc_pid_after(unsigned long limit)
{
	unsigned long pid = 0;
	int level;

	/*
	 * The limit passed to us is a strict lower limit. It is more
	 * convenient to work with <= constraints.
	 */
	++limit;
	if (unlikely(limit == PID_MAX))
		return ~0UL;

	for (level = 0; level < PID_MAP_DEPTH; ++level) {
		limit_digits[level] = limit & PID_MAP_MASK;
		limit >>= BITS_PER_LONG_SHIFT;
	}

	/*
	 * Now the lowest available pid number above limit is
	 * reconstructed by ffz'ing down the bitmap and checking
	 * each digit against the digits of the limit for
	 * dictionary ordering. If the check should fail, it's
	 * fixed up by using the maximum of the two digits. The
	 * dictionary ordering on digits also means that a
	 * greater significant digit found in the bitmap
	 * invalidates all further comparisons, which requires
	 * fallback to the pure recursive ffz algorithm outlined
	 * above in order to be handled.
	 */
	for (level = PID_MAP_DEPTH - 1; level >= 0; --level) {
		unsigned long bit, digit;

		if (unlikely(pid >= pid_max_levels[level]))
			return ~0UL;

		bit = ffz(pid_map[level][pid]);
		digit = limit_digits[level];

		if (unlikely(bit < digit))
			bit = digit;

		pid = (pid << BITS_PER_LONG_SHIFT) + bit;

		/*
		 * This is not an optimization; if this check
		 * should succeed the digit comparisons above
		 * are no longer valid and (pessimistically)
		 * incorrect first available pid's are found.
		 * 
		 */
		if (likely(bit > digit)) {
			--level;
			goto finish_just_ffz;
		}
	}
out:
	if (pid < PID_MAX)
		return pid;
	else
		return ~0UL;

finish_just_ffz:
	/*
	 * Now revert to the pure recursive ffz algorithm with
	 * the slight variation of not beginning at a fixed level,
	 * because it is no longer valid to perform comparisons
	 * of the digit obtained by ffz'ing the bitmap against the
	 * digits of the limit.
	 */
	while (level >= 0) {
		unsigned long bit;

		if (unlikely(pid >= pid_max_levels[level]))
			return ~0UL;

		bit = ffz(pid_map[level][pid]);
		pid = (pid << BITS_PER_LONG_SHIFT) + bit;
		--level;
	}

	goto out;
}

int alloc_pid(void)
{
	unsigned long pid;

	spin_lock(&pid_lock);
	BUG_ON(last_pid >= PID_MAX);
	pid = alloc_pid_after(last_pid);
	if (unlikely(pid == ~0UL)) {
		pid = alloc_pid_after(RESERVED_PIDS);
		if (unlikely(pid == ~0UL))
			goto out;
		BUG_ON(pid < RESERVED_PIDS);
	} else
		BUG_ON(pid <= last_pid);
	last_pid = pid;
	mark_pid(pid);
out:
	spin_unlock(&pid_lock);
	return (int)pid;
}

void __init pid_init(void)
{
	mark_pid(0);
}

  reply	other threads:[~2002-08-07 23:04 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-08-07 22:03 [PATCH] Linux-2.5 fix/improve get_pid() Paul Larson
2002-08-07 23:06 ` Andrew Morton [this message]
2002-08-08  0:24   ` Andries Brouwer
2002-08-08 19:42     ` William Lee Irwin III
2002-08-08 20:47       ` Andries Brouwer
2002-08-09 11:22     ` Analysis for Linux-2.5 fix/improve get_pid(), comparing various approaches Hubertus Franke
2002-08-09 15:36       ` Andries Brouwer
2002-08-09 18:14         ` Hubertus Franke
2002-08-09 16:05       ` Linus Torvalds
2002-08-09 18:18         ` Hubertus Franke
2002-08-08 20:24   ` [PATCH] Linux-2.5 fix/improve get_pid() Linus Torvalds
2002-08-08 21:30     ` H. Peter Anvin
2002-08-08 21:45     ` William Lee Irwin III
2002-08-09  4:42     ` William Lee Irwin III
  -- strict thread matches above, loose matches on Subject: below --
2002-08-08 18:56 Hubertus Franke
2002-08-08 19:15 ` Rik van Riel
2002-08-08 19:18 ` Paul Larson
2002-08-08 21:43 Hubertus Franke
2002-08-08 22:02 ` Linus Torvalds
2002-08-08 22:26   ` Linus Torvalds
2002-08-08 23:09     ` Albert D. Cahalan
2002-08-09  3:26       ` Chris Adams
2002-08-09  7:04         ` Albert D. Cahalan
2002-08-09  8:48           ` Helge Hafting
2002-08-09 10:32             ` Alan Cox
2002-08-09  9:35               ` Andrew Morton
2002-08-09 10:37               ` Andries Brouwer
2002-08-09 13:00           ` Chris Adams
2002-08-09 14:39             ` Albert D. Cahalan
2002-08-09 13:59           ` Rik van Riel
2002-08-09 14:57             ` Albert D. Cahalan
2002-08-08 22:34   ` Paul Larson
2002-08-08 22:44     ` Rik van Riel
2002-08-08 22:37   ` Rik van Riel
2002-08-09  1:49     ` Andries Brouwer
2002-08-09 19:34   ` Paul Larson
2002-08-09 20:13     ` Paul Larson
2002-08-09 20:40     ` Andries Brouwer
2002-08-09 21:23     ` Linus Torvalds
2002-08-09 21:42       ` Linus Torvalds
2002-08-09 21:46         ` Paul Larson
2002-08-09 22:04           ` Linus Torvalds
2002-08-10 17:23             ` Jamie Lokier
2002-08-10 18:33               ` Linus Torvalds
2002-08-10 18:48                 ` Jamie Lokier
2002-08-11 20:41                   ` Alan Cox
2002-08-11 19:58                     ` Jamie Lokier
2002-08-11 21:23                       ` Alan Cox
2002-08-11 20:10                         ` Jamie Lokier
2002-08-12  8:56             ` Padraig Brady
2002-08-12 10:37               ` Alan Cox
2002-08-12  9:21                 ` Padraig Brady
2002-08-12 14:40             ` Paul Larson
2002-08-08 21:50 Hubertus Franke
2002-08-09  0:53 Hubertus Franke
2002-08-12 16:15 Jim Houston

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3D51A7DD.A4F7C5E4@zip.com.au \
    --to=akpm@zip.com.au \
    --cc=andrea@suse.de \
    --cc=davej@suse.de \
    --cc=frankeh@us.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=plars@austin.ibm.com \
    --cc=torvalds@transmeta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox