All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrew Morton <akpm@zip.com.au>
To: Paul Larson <plars@austin.ibm.com>
Cc: Linus Torvalds <torvalds@transmeta.com>,
	lkml <linux-kernel@vger.kernel.org>,
	davej@suse.de, frankeh@us.ibm.com, andrea@suse.de
Subject: Re: [PATCH] Linux-2.5 fix/improve get_pid()
Date: Wed, 07 Aug 2002 16:06:05 -0700	[thread overview]
Message-ID: <3D51A7DD.A4F7C5E4@zip.com.au> (raw)
In-Reply-To: 1028757835.22405.300.camel@plars.austin.ibm.com

Paul Larson wrote:
> 
> This patch provides an improved version of get_pid() while also taking
> care of the bug that causes the machine to hang when you hit PID_MAX.

Has this been evaluated against Bill Irwin's constant-time
allocator?  Bill says it has slightly worse normal-case and
vastly improved worst-case performance over the stock allocator.
Not sure how it stacks up against this one.   Plus it's much nicer
looking code.

He's shy, so.....



#include <linux/compiler.h>
#include <linux/bitops.h>
#include <linux/spinlock.h>
#include <linux/init.h>

/*
 * pid allocator
 * (C) 2002 William Irwin, IBM
 *
 * The strategy is to maintain a tower of bitmaps where a bitmap above
 * another in each bit accounts whether any pid's are available in the
 * space tracked by BITS_PER_LONG bits of the level below. The bitmaps
 * must be marked on allocation and also release, hence some
 * infrastructure for detecting when the last user of a pid releases it
 * must be in place.
 *
 * This general strategy is simple in concept and enforces highly
 * deterministic bounds on the search time for the next pid.
 */

#define PID_MAX		0x8000
#define RESERVED_PIDS	300

#define MAP0_SIZE	(PID_MAX   >> BITS_PER_LONG_SHIFT)
#define MAP1_SIZE	(MAP0_SIZE >> BITS_PER_LONG_SHIFT)
#define MAP2_SIZE	(MAP1_SIZE >> BITS_PER_LONG_SHIFT)

#define MAP0_SHIFT	BITS_PER_LONG_SHIFT
#define MAP1_SHIFT	(2*BITS_PER_LONG_SHIFT)
#define MAP2_SHIFT	(3*BITS_PER_LONG_SHIFT)

#define PID_MAP_MASK	(BITS_PER_LONG - 1)

#define PID_MAP_DEPTH	(ARRAY_SIZE(pid_map) - 1)

static unsigned long pid_map0[MAP0_SIZE];
static unsigned long pid_map1[MAP1_SIZE];
static unsigned long pid_map2[MAP2_SIZE];

static unsigned long * pid_map[] = { pid_map0, pid_map1, pid_map2, NULL, };

unsigned long last_pid = 0;
unsigned long npids = 0;

static const int map_shifts[] =
	{	0,
		BITS_PER_LONG_SHIFT,
		BITS_PER_LONG_SHIFT*2,
		BITS_PER_LONG_SHIFT*3,
		BITS_PER_LONG_SHIFT*4,
	};

static inline int pid_map_shift(int depth)
{
	return map_shifts[depth+1];
}

static spinlock_t pid_lock = SPIN_LOCK_UNLOCKED;

void free_pid(unsigned long pid)
{
	unsigned long **map = pid_map;

	spin_lock(&pid_lock);
	while (*map) {
		int bit = pid & PID_MAP_MASK;
		pid >>= BITS_PER_LONG_SHIFT;
		__clear_bit(bit, &(*map)[pid]);
		++map;
	}
	--npids;
	spin_unlock(&pid_lock);
}

static inline int whole_block_used(int level, unsigned long pid)
{
	return pid_map[level][pid >> pid_map_shift(level)] == ~0UL;
}

static inline void mark_pid(unsigned long pid)
{
	int level;
	for (level = 0; level < PID_MAP_DEPTH; ++level) {
		int shift, bit;
		unsigned long entry;

		shift = pid_map_shift(level);
		entry = pid >> shift;
		bit   = (pid >> (shift - BITS_PER_LONG_SHIFT)) & PID_MAP_MASK;
		if (level == 0 || whole_block_used(level - 1, pid))
			__set_bit(bit, &pid_map[level][entry]);
		else
			break;
	}
	++npids;
}

static inline int pid_map_limit(int depth)
{
	return PID_MAX >> pid_map_shift(depth);
}

#ifdef PID_ALLOC_EXAMPLE
/*
 * the pid allocation traverses the bitmaps by recursively ffz'ing
 * through down the tower of maps. Some additional logic is required
 * to enforce lower limits, but the following example of how one
 * would perform this search without the lower limit may well prove
 * enlightening for those interested in the mechanics of the algorithm.
 */
static long alloc_pid_from_zero(void)
{
	unsigned long pid = 0;
	int level;

	for (level = PID_MAP_DEPTH - 1; level >= 0; --level) {
		unsigned long entry = pid_map[level][pid];

		if (unlikely(entry == ~0UL))
			return ~0UL;

		pid = (pid << BITS_PER_LONG_SHIFT) + ffz(pid_map[level][pid]);
	}
	return pid;
}
#endif /* PID_ALLOC_EXAMPLE */


static const unsigned long pid_max_levels[] =
	{	PID_MAX >> BITS_PER_LONG_SHIFT,
		PID_MAX >> (BITS_PER_LONG_SHIFT*2),
		PID_MAX >> (BITS_PER_LONG_SHIFT*3),
		PID_MAX >> (BITS_PER_LONG_SHIFT*4),
	};

static inline unsigned long pid_map_digit(int level, unsigned long limit)
{
	return (limit >> pid_map_shift(level-1)) & PID_MAP_MASK;
}

/*
 * Scratch space for storing the digits of the limit, all accesses
 * protected by the pid_lock.
 */
static unsigned long limit_digits[4];

/*
 * This is not a high-performance implementation. alloc_pid_after()
 * can be made highly compact with some effort, but this is instead
 * meant to be clear. As the cost of fork() is dominated by much
 * more expensive operations and the cost of this is constant-bounded
 * by a very low constant, the gains from manual optimization here
 * are marginal.
 */
static long alloc_pid_after(unsigned long limit)
{
	unsigned long pid = 0;
	int level;

	/*
	 * The limit passed to us is a strict lower limit. It is more
	 * convenient to work with <= constraints.
	 */
	++limit;
	if (unlikely(limit == PID_MAX))
		return ~0UL;

	for (level = 0; level < PID_MAP_DEPTH; ++level) {
		limit_digits[level] = limit & PID_MAP_MASK;
		limit >>= BITS_PER_LONG_SHIFT;
	}

	/*
	 * Now the lowest available pid number above limit is
	 * reconstructed by ffz'ing down the bitmap and checking
	 * each digit against the digits of the limit for
	 * dictionary ordering. If the check should fail, it's
	 * fixed up by using the maximum of the two digits. The
	 * dictionary ordering on digits also means that a
	 * greater significant digit found in the bitmap
	 * invalidates all further comparisons, which requires
	 * fallback to the pure recursive ffz algorithm outlined
	 * above in order to be handled.
	 */
	for (level = PID_MAP_DEPTH - 1; level >= 0; --level) {
		unsigned long bit, digit;

		if (unlikely(pid >= pid_max_levels[level]))
			return ~0UL;

		bit = ffz(pid_map[level][pid]);
		digit = limit_digits[level];

		if (unlikely(bit < digit))
			bit = digit;

		pid = (pid << BITS_PER_LONG_SHIFT) + bit;

		/*
		 * This is not an optimization; if this check
		 * should succeed the digit comparisons above
		 * are no longer valid and (pessimistically)
		 * incorrect first available pid's are found.
		 * 
		 */
		if (likely(bit > digit)) {
			--level;
			goto finish_just_ffz;
		}
	}
out:
	if (pid < PID_MAX)
		return pid;
	else
		return ~0UL;

finish_just_ffz:
	/*
	 * Now revert to the pure recursive ffz algorithm with
	 * the slight variation of not beginning at a fixed level,
	 * because it is no longer valid to perform comparisons
	 * of the digit obtained by ffz'ing the bitmap against the
	 * digits of the limit.
	 */
	while (level >= 0) {
		unsigned long bit;

		if (unlikely(pid >= pid_max_levels[level]))
			return ~0UL;

		bit = ffz(pid_map[level][pid]);
		pid = (pid << BITS_PER_LONG_SHIFT) + bit;
		--level;
	}

	goto out;
}

int alloc_pid(void)
{
	unsigned long pid;

	spin_lock(&pid_lock);
	BUG_ON(last_pid >= PID_MAX);
	pid = alloc_pid_after(last_pid);
	if (unlikely(pid == ~0UL)) {
		pid = alloc_pid_after(RESERVED_PIDS);
		if (unlikely(pid == ~0UL))
			goto out;
		BUG_ON(pid < RESERVED_PIDS);
	} else
		BUG_ON(pid <= last_pid);
	last_pid = pid;
	mark_pid(pid);
out:
	spin_unlock(&pid_lock);
	return (int)pid;
}

void __init pid_init(void)
{
	mark_pid(0);
}

  reply	other threads:[~2002-08-07 23:04 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-08-07 22:03 [PATCH] Linux-2.5 fix/improve get_pid() Paul Larson
2002-08-07 23:06 ` Andrew Morton [this message]
2002-08-08  0:24   ` Andries Brouwer
2002-08-08 19:42     ` William Lee Irwin III
2002-08-08 20:47       ` Andries Brouwer
2002-08-09 11:22     ` Analysis for Linux-2.5 fix/improve get_pid(), comparing various approaches Hubertus Franke
2002-08-09 15:36       ` Andries Brouwer
2002-08-09 18:14         ` Hubertus Franke
2002-08-09 16:05       ` Linus Torvalds
2002-08-09 18:18         ` Hubertus Franke
2002-08-08 20:24   ` [PATCH] Linux-2.5 fix/improve get_pid() Linus Torvalds
2002-08-08 21:30     ` H. Peter Anvin
2002-08-08 21:45     ` William Lee Irwin III
2002-08-09  4:42     ` William Lee Irwin III
  -- strict thread matches above, loose matches on Subject: below --
2002-08-08 18:56 Hubertus Franke
2002-08-08 19:15 ` Rik van Riel
2002-08-08 19:18 ` Paul Larson
2002-08-08 21:43 Hubertus Franke
2002-08-08 22:02 ` Linus Torvalds
2002-08-08 22:26   ` Linus Torvalds
2002-08-08 23:09     ` Albert D. Cahalan
2002-08-09  3:26       ` Chris Adams
2002-08-09  7:04         ` Albert D. Cahalan
2002-08-09  8:48           ` Helge Hafting
2002-08-09 10:32             ` Alan Cox
2002-08-09  9:35               ` Andrew Morton
2002-08-09 10:37               ` Andries Brouwer
2002-08-09 13:00           ` Chris Adams
2002-08-09 14:39             ` Albert D. Cahalan
2002-08-09 13:59           ` Rik van Riel
2002-08-09 14:57             ` Albert D. Cahalan
2002-08-08 22:34   ` Paul Larson
2002-08-08 22:44     ` Rik van Riel
2002-08-08 22:37   ` Rik van Riel
2002-08-09  1:49     ` Andries Brouwer
2002-08-09 19:34   ` Paul Larson
2002-08-09 20:13     ` Paul Larson
2002-08-09 20:40     ` Andries Brouwer
2002-08-09 21:23     ` Linus Torvalds
2002-08-09 21:42       ` Linus Torvalds
2002-08-09 21:46         ` Paul Larson
2002-08-09 22:04           ` Linus Torvalds
2002-08-10 17:23             ` Jamie Lokier
2002-08-10 18:33               ` Linus Torvalds
2002-08-10 18:48                 ` Jamie Lokier
2002-08-11 20:41                   ` Alan Cox
2002-08-11 19:58                     ` Jamie Lokier
2002-08-11 21:23                       ` Alan Cox
2002-08-11 20:10                         ` Jamie Lokier
2002-08-12  8:56             ` Padraig Brady
2002-08-12 10:37               ` Alan Cox
2002-08-12  9:21                 ` Padraig Brady
2002-08-12 14:40             ` Paul Larson
2002-08-08 21:50 Hubertus Franke
2002-08-09  0:53 Hubertus Franke
2002-08-12 16:15 Jim Houston

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3D51A7DD.A4F7C5E4@zip.com.au \
    --to=akpm@zip.com.au \
    --cc=andrea@suse.de \
    --cc=davej@suse.de \
    --cc=frankeh@us.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=plars@austin.ibm.com \
    --cc=torvalds@transmeta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.