From: Johannes Weiner <hannes@saeurebad.de>
To: Andi Kleen <andi@firstfloor.org>
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org
Subject: Re: [PATCH] [6/13] Core maskable allocator
Date: Fri, 07 Mar 2008 11:53:30 +0100 [thread overview]
Message-ID: <871w6m955h.fsf@saeurebad.de> (raw)
In-Reply-To: <20080307090716.9D3E91B419C@basil.firstfloor.org> (Andi Kleen's message of "Fri, 7 Mar 2008 10:07:16 +0100 (CET)")
Hi Andi,
Andi Kleen <andi@firstfloor.org> writes:
> Index: linux/mm/mask-alloc.c
> ===================================================================
> --- /dev/null
> +++ linux/mm/mask-alloc.c
> @@ -0,0 +1,504 @@
> +/*
> + * Generic management of low memory zone to allocate memory with a address mask.
> + *
> + * The maskable pool is reserved inside another zone, but managed by a
> + * specialized bitmap allocator. The allocator is not O(1) (searches
> + * the bitmap with a last use hint) but should be fast enough for
> + * normal purposes. The advantage of the allocator is that it can
> + * allocate based on a mask.
> + *
> + * The allocator could be improved, but it's better to keep
> + * things simple for now and there are relatively few users
> + * which are usually not that speed critical. Also for simple
> + * repetive allocation patterns it should be approximately usually
> + * O(1) anyways due to the rotating cursor in the bitmap.
> + *
> + * This allocator should be only used by architectures with reasonably
> + * continuous physical memory at least for the low normal zone.
> + *
> + * Note book:
> + * Right now there are no high priority reservations (__GFP_HIGH). Iff
> + * they are needed it would be possible to reserve some very low memory
> + * for those.
> + *
> + * Copyright 2007, 2008 Andi Kleen, SUSE Labs.
> + * Subject to the GNU Public License v.2 only.
> + */
> +
> +#include <linux/mm.h>
> +#include <linux/gfp.h>
> +#include <linux/kernel.h>
> +#include <linux/sched.h>
> +#include <linux/bitops.h>
> +#include <linux/string.h>
> +#include <linux/wait.h>
> +#include <linux/bootmem.h>
> +#include <linux/module.h>
> +#include <linux/fault-inject.h>
> +#include <linux/ctype.h>
> +#include <linux/kallsyms.h>
> +#include "internal.h"
> +
> +#define BITS_PER_PAGE (PAGE_SIZE * 8)
> +
> +#define MASK_ZONE_LIMIT (2U<<30) /* 2GB max for now */
> +
> +#define Mprintk(x...)
> +#define Mprint_symbol(x...)
> +
> +static int force_mask __read_mostly;
> +static DECLARE_WAIT_QUEUE_HEAD(mask_zone_wait);
> +unsigned long mask_timeout __read_mostly = 5*HZ;
> +
> +/*
> + * The mask_bitmap maintains all the pages in the mask pool.
> + * It is reversed (lowest pfn has the highest index)
> + * to make reverse search easier.
> + * All accesses are protected by the mask_bitmap_lock
> + */
> +static DEFINE_SPINLOCK(mask_bitmap_lock);
> +static unsigned long *mask_bitmap;
> +static unsigned long mask_max_pfn;
> +
> +static inline unsigned pfn_to_maskbm_index(unsigned long pfn)
> +{
> + return mask_max_pfn - pfn;
> +}
> +
> +static inline unsigned maskbm_index_to_pfn(unsigned index)
> +{
> + return mask_max_pfn - index;
> +}
> +
> +static unsigned wait_for_mask_free(unsigned left)
> +{
> + DEFINE_WAIT(wait);
> + prepare_to_wait(&mask_zone_wait, &wait, TASK_UNINTERRUPTIBLE);
> + left = schedule_timeout(left);
> + finish_wait(&mask_zone_wait, &wait);
> + return left;
> +}
> +
If ...
> +/* First try normal zones if possible. */
> +static struct page *
> +alloc_higher_pages(gfp_t gfp_mask, unsigned order, unsigned long pfn)
> +{
> + struct page *p = NULL;
> + if (pfn > mask_max_pfn) {
> +#ifdef CONFIG_ZONE_DMA32
> + if (pfn <= (0xffffffff >> PAGE_SHIFT)) {
> + p = alloc_pages(gfp_mask|GFP_DMA32|__GFP_NOWARN,
> + order);
... this succeeds and allocated pages, and ...
> + if (p && page_to_pfn(p) >= pfn) {
> + __free_pages(p, order);
> + p = NULL;
> + }
... p is and it's pfn is lower than pfn ...
> + }
> +#endif
> + p = alloc_pages(gfp_mask|__GFP_NOWARN, order);
... isn't this a leak here?
> + if (p && page_to_pfn(p) >= pfn) {
> + __free_pages(p, order);
> + p = NULL;
> + }
> + }
> + return p;
> +}
> +
> +static unsigned long alloc_mask(int pages, unsigned long max)
> +{
> + static unsigned long next_bit;
> + unsigned long offset, flags, start, pfn;
> + int k;
> +
> + if (max >= mask_max_pfn)
> + max = mask_max_pfn;
Can omit the assignment when max == mask_max_pfn.
> + start = mask_max_pfn - max;
> +
> + spin_lock_irqsave(&mask_bitmap_lock, flags);
> + offset = -1L;
> +
> + if (next_bit >= start && next_bit + pages < (mask_max_pfn - (max>>1))) {
> + offset = find_next_zero_string(mask_bitmap, next_bit,
> + mask_max_pfn, pages);
> + if (offset != -1L)
> + count_vm_events(MASK_BITMAP_SKIP, offset - next_bit);
> + }
> + if (offset == -1L) {
> + offset = find_next_zero_string(mask_bitmap, start,
> + mask_max_pfn, pages);
> + if (offset != -1L)
> + count_vm_events(MASK_BITMAP_SKIP, offset - start);
> + }
> + if (offset != -1L) {
> + for (k = 0; k < pages; k++) {
> + BUG_ON(test_bit(offset + k, mask_bitmap));
> + set_bit(offset + k, mask_bitmap);
> + }
> + next_bit = offset + pages;
> + if (next_bit >= mask_max_pfn)
> + next_bit = start;
> + }
> + spin_unlock_irqrestore(&mask_bitmap_lock, flags);
> + if (offset == -1L)
> + return -1L;
> +
> + offset += pages - 1;
> + pfn = maskbm_index_to_pfn(offset);
> +
> + BUG_ON(maskbm_index_to_pfn(offset) != pfn);
> + return pfn;
> +}
Hannes
WARNING: multiple messages have this Message-ID (diff)
From: Johannes Weiner <hannes@saeurebad.de>
To: Andi Kleen <andi@firstfloor.org>
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org
Subject: Re: [PATCH] [6/13] Core maskable allocator
Date: Fri, 07 Mar 2008 11:53:30 +0100 [thread overview]
Message-ID: <871w6m955h.fsf@saeurebad.de> (raw)
In-Reply-To: <20080307090716.9D3E91B419C@basil.firstfloor.org> (Andi Kleen's message of "Fri, 7 Mar 2008 10:07:16 +0100 (CET)")
Hi Andi,
Andi Kleen <andi@firstfloor.org> writes:
> Index: linux/mm/mask-alloc.c
> ===================================================================
> --- /dev/null
> +++ linux/mm/mask-alloc.c
> @@ -0,0 +1,504 @@
> +/*
> + * Generic management of low memory zone to allocate memory with a address mask.
> + *
> + * The maskable pool is reserved inside another zone, but managed by a
> + * specialized bitmap allocator. The allocator is not O(1) (searches
> + * the bitmap with a last use hint) but should be fast enough for
> + * normal purposes. The advantage of the allocator is that it can
> + * allocate based on a mask.
> + *
> + * The allocator could be improved, but it's better to keep
> + * things simple for now and there are relatively few users
> + * which are usually not that speed critical. Also for simple
> + * repetive allocation patterns it should be approximately usually
> + * O(1) anyways due to the rotating cursor in the bitmap.
> + *
> + * This allocator should be only used by architectures with reasonably
> + * continuous physical memory at least for the low normal zone.
> + *
> + * Note book:
> + * Right now there are no high priority reservations (__GFP_HIGH). Iff
> + * they are needed it would be possible to reserve some very low memory
> + * for those.
> + *
> + * Copyright 2007, 2008 Andi Kleen, SUSE Labs.
> + * Subject to the GNU Public License v.2 only.
> + */
> +
> +#include <linux/mm.h>
> +#include <linux/gfp.h>
> +#include <linux/kernel.h>
> +#include <linux/sched.h>
> +#include <linux/bitops.h>
> +#include <linux/string.h>
> +#include <linux/wait.h>
> +#include <linux/bootmem.h>
> +#include <linux/module.h>
> +#include <linux/fault-inject.h>
> +#include <linux/ctype.h>
> +#include <linux/kallsyms.h>
> +#include "internal.h"
> +
> +#define BITS_PER_PAGE (PAGE_SIZE * 8)
> +
> +#define MASK_ZONE_LIMIT (2U<<30) /* 2GB max for now */
> +
> +#define Mprintk(x...)
> +#define Mprint_symbol(x...)
> +
> +static int force_mask __read_mostly;
> +static DECLARE_WAIT_QUEUE_HEAD(mask_zone_wait);
> +unsigned long mask_timeout __read_mostly = 5*HZ;
> +
> +/*
> + * The mask_bitmap maintains all the pages in the mask pool.
> + * It is reversed (lowest pfn has the highest index)
> + * to make reverse search easier.
> + * All accesses are protected by the mask_bitmap_lock
> + */
> +static DEFINE_SPINLOCK(mask_bitmap_lock);
> +static unsigned long *mask_bitmap;
> +static unsigned long mask_max_pfn;
> +
> +static inline unsigned pfn_to_maskbm_index(unsigned long pfn)
> +{
> + return mask_max_pfn - pfn;
> +}
> +
> +static inline unsigned maskbm_index_to_pfn(unsigned index)
> +{
> + return mask_max_pfn - index;
> +}
> +
> +static unsigned wait_for_mask_free(unsigned left)
> +{
> + DEFINE_WAIT(wait);
> + prepare_to_wait(&mask_zone_wait, &wait, TASK_UNINTERRUPTIBLE);
> + left = schedule_timeout(left);
> + finish_wait(&mask_zone_wait, &wait);
> + return left;
> +}
> +
If ...
> +/* First try normal zones if possible. */
> +static struct page *
> +alloc_higher_pages(gfp_t gfp_mask, unsigned order, unsigned long pfn)
> +{
> + struct page *p = NULL;
> + if (pfn > mask_max_pfn) {
> +#ifdef CONFIG_ZONE_DMA32
> + if (pfn <= (0xffffffff >> PAGE_SHIFT)) {
> + p = alloc_pages(gfp_mask|GFP_DMA32|__GFP_NOWARN,
> + order);
... this succeeds and allocated pages, and ...
> + if (p && page_to_pfn(p) >= pfn) {
> + __free_pages(p, order);
> + p = NULL;
> + }
... p is and it's pfn is lower than pfn ...
> + }
> +#endif
> + p = alloc_pages(gfp_mask|__GFP_NOWARN, order);
... isn't this a leak here?
> + if (p && page_to_pfn(p) >= pfn) {
> + __free_pages(p, order);
> + p = NULL;
> + }
> + }
> + return p;
> +}
> +
> +static unsigned long alloc_mask(int pages, unsigned long max)
> +{
> + static unsigned long next_bit;
> + unsigned long offset, flags, start, pfn;
> + int k;
> +
> + if (max >= mask_max_pfn)
> + max = mask_max_pfn;
Can omit the assignment when max == mask_max_pfn.
> + start = mask_max_pfn - max;
> +
> + spin_lock_irqsave(&mask_bitmap_lock, flags);
> + offset = -1L;
> +
> + if (next_bit >= start && next_bit + pages < (mask_max_pfn - (max>>1))) {
> + offset = find_next_zero_string(mask_bitmap, next_bit,
> + mask_max_pfn, pages);
> + if (offset != -1L)
> + count_vm_events(MASK_BITMAP_SKIP, offset - next_bit);
> + }
> + if (offset == -1L) {
> + offset = find_next_zero_string(mask_bitmap, start,
> + mask_max_pfn, pages);
> + if (offset != -1L)
> + count_vm_events(MASK_BITMAP_SKIP, offset - start);
> + }
> + if (offset != -1L) {
> + for (k = 0; k < pages; k++) {
> + BUG_ON(test_bit(offset + k, mask_bitmap));
> + set_bit(offset + k, mask_bitmap);
> + }
> + next_bit = offset + pages;
> + if (next_bit >= mask_max_pfn)
> + next_bit = start;
> + }
> + spin_unlock_irqrestore(&mask_bitmap_lock, flags);
> + if (offset == -1L)
> + return -1L;
> +
> + offset += pages - 1;
> + pfn = maskbm_index_to_pfn(offset);
> +
> + BUG_ON(maskbm_index_to_pfn(offset) != pfn);
> + return pfn;
> +}
Hannes
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2008-03-07 10:55 UTC|newest]
Thread overview: 112+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-03-07 9:07 [PATCH] [0/13] General DMA zone rework Andi Kleen
2008-03-07 9:07 ` Andi Kleen
2008-03-07 9:07 ` [PATCH] [2/13] Make get_order(0) return 0 Andi Kleen
2008-03-07 9:07 ` Andi Kleen
2008-03-07 9:07 ` [PATCH] [3/13] Make kvm bad_page symbol static Andi Kleen
2008-03-07 9:07 ` Andi Kleen
2008-03-07 9:07 ` [PATCH] [4/13] Prepare page_alloc for the maskable allocator Andi Kleen
2008-03-07 9:07 ` Andi Kleen
2008-03-07 18:19 ` Sam Ravnborg
2008-03-07 18:19 ` Sam Ravnborg
2008-03-07 18:36 ` Cyrill Gorcunov
2008-03-07 18:36 ` Cyrill Gorcunov
2008-03-07 19:02 ` Andi Kleen
2008-03-07 19:02 ` Andi Kleen
2008-03-07 9:07 ` [PATCH] [5/13] Add mask allocator statistics to vmstat.[ch] Andi Kleen
2008-03-07 9:07 ` Andi Kleen
2008-03-08 2:24 ` Christoph Lameter
2008-03-08 2:24 ` Christoph Lameter
2008-03-07 9:07 ` [PATCH] [6/13] Core maskable allocator Andi Kleen
2008-03-07 9:07 ` Andi Kleen
2008-03-07 10:53 ` Johannes Weiner [this message]
2008-03-07 10:53 ` Johannes Weiner
2008-03-07 11:14 ` Andi Kleen
2008-03-07 11:14 ` Andi Kleen
2008-03-07 17:05 ` Randy Dunlap
2008-03-07 17:05 ` Randy Dunlap
2008-03-07 17:31 ` Andi Kleen
2008-03-07 17:31 ` Andi Kleen
2008-03-07 17:33 ` Randy Dunlap
2008-03-07 17:33 ` Randy Dunlap
2008-03-07 17:43 ` Andi Kleen
2008-03-07 17:43 ` Andi Kleen
2008-03-07 17:51 ` Randy Dunlap
2008-03-07 17:51 ` Randy Dunlap
2008-03-07 21:13 ` Cyrill Gorcunov
2008-03-07 21:13 ` Cyrill Gorcunov
2008-03-07 23:28 ` Andi Kleen
2008-03-07 23:28 ` Andi Kleen
2008-03-08 5:03 ` KAMEZAWA Hiroyuki
2008-03-08 5:03 ` KAMEZAWA Hiroyuki
2008-03-08 5:41 ` KAMEZAWA Hiroyuki
2008-03-08 5:41 ` KAMEZAWA Hiroyuki
2008-03-08 11:41 ` Andi Kleen
2008-03-08 11:41 ` Andi Kleen
2008-03-11 15:34 ` Jonathan Corbet
2008-03-11 15:34 ` Jonathan Corbet
2008-03-11 15:54 ` Andi Kleen
2008-03-11 15:54 ` Andi Kleen
2008-03-07 9:07 ` [PATCH] [7/13] Implement compat hooks for GFP_DMA Andi Kleen
2008-03-07 9:07 ` Andi Kleen
2008-03-07 9:07 ` [PATCH] [8/13] Enable the mask allocator for x86 Andi Kleen
2008-03-07 9:07 ` Andi Kleen
2008-03-07 18:32 ` Sam Ravnborg
2008-03-07 18:32 ` Sam Ravnborg
2008-03-07 19:03 ` Andi Kleen
2008-03-07 19:03 ` Andi Kleen
2008-03-07 19:09 ` Sam Ravnborg
2008-03-07 19:09 ` Sam Ravnborg
2008-03-08 2:37 ` Christoph Lameter
2008-03-08 2:37 ` Christoph Lameter
2008-03-08 6:35 ` Yinghai Lu
2008-03-08 6:35 ` Yinghai Lu
2008-03-08 7:31 ` Christoph Lameter
2008-03-08 7:31 ` Christoph Lameter
2008-03-08 11:54 ` Andi Kleen
2008-03-08 11:54 ` Andi Kleen
2008-03-10 17:13 ` Christoph Lameter
2008-03-10 17:13 ` Christoph Lameter
2008-03-07 9:07 ` [PATCH] [9/13] Remove set_dma_reserve Andi Kleen
2008-03-07 9:07 ` Andi Kleen
2008-03-07 9:07 ` [PATCH] [10/13] Switch the 32bit dma_alloc_coherent functions over to use the maskable allocator Andi Kleen
2008-03-07 9:07 ` Andi Kleen
2008-03-07 9:07 ` [PATCH] [11/13] Switch x86-64 dma_alloc_coherent over to " Andi Kleen
2008-03-07 9:07 ` Andi Kleen
2008-03-07 9:07 ` [PATCH] [12/13] Add vmstat statistics for new swiotlb code Andi Kleen
2008-03-07 9:07 ` Andi Kleen
2008-03-08 2:38 ` Christoph Lameter
2008-03-08 2:38 ` Christoph Lameter
2008-03-07 9:07 ` [PATCH] [13/13] Convert x86-64 swiotlb to use the mask allocator directly Andi Kleen
2008-03-07 9:07 ` Andi Kleen
2008-03-07 15:18 ` [PATCH] [0/13] General DMA zone rework Rene Herman
2008-03-07 15:18 ` Rene Herman
2008-03-07 15:22 ` Rene Herman
2008-03-07 15:22 ` Rene Herman
2008-03-07 15:31 ` Andi Kleen
2008-03-07 15:31 ` Andi Kleen
2008-03-07 15:34 ` Andi Kleen
2008-03-07 15:34 ` Andi Kleen
2008-03-07 20:51 ` Luiz Fernando N. Capitulino
2008-03-07 20:51 ` Luiz Fernando N. Capitulino
2008-03-08 0:46 ` Andi Kleen
2008-03-08 0:46 ` Andi Kleen
2008-03-10 18:03 ` Luiz Fernando N. Capitulino
2008-03-10 18:03 ` Luiz Fernando N. Capitulino
2008-03-10 18:08 ` Andi Kleen
2008-03-10 18:08 ` Andi Kleen
2008-03-11 17:26 ` Luiz Fernando N. Capitulino
2008-03-11 17:26 ` Luiz Fernando N. Capitulino
2008-03-11 17:35 ` Andi Kleen
2008-03-11 17:35 ` Andi Kleen
2008-03-11 18:00 ` Luiz Fernando N. Capitulino
2008-03-11 18:00 ` Luiz Fernando N. Capitulino
2008-03-11 18:49 ` Andi Kleen
2008-03-11 18:49 ` Andi Kleen
2008-03-11 19:36 ` Luiz Fernando N. Capitulino
2008-03-11 19:36 ` Luiz Fernando N. Capitulino
2008-03-08 2:42 ` Christoph Lameter
2008-03-08 2:42 ` Christoph Lameter
2008-03-08 11:57 ` Andi Kleen
2008-03-08 11:57 ` Andi Kleen
2008-03-10 17:14 ` Christoph Lameter
2008-03-10 17:14 ` Christoph Lameter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=871w6m955h.fsf@saeurebad.de \
--to=hannes@saeurebad.de \
--cc=andi@firstfloor.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.