[RFC] mm: generic adaptive large memory allocation APIs

linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [RFC] mm: generic adaptive large memory allocation APIs
@ 2010-05-06  0:30 Changli Gao
  2010-05-06  0:37 ` Changli Gao
                   ` (5 more replies)
  0 siblings, 6 replies; 16+ messages in thread
From: Changli Gao @ 2010-05-06  0:30 UTC (permalink / raw)
  To: akpm
  Cc: Eric Dumazet, Jiri Slaby, Changli Gao, Alexander Viro,
	Paul E. McKenney, Alexey Dobriyan, Ingo Molnar, Peter Zijlstra,
	linux-fsdevel, linux-kernel, Avi Kivity, Tetsuo Handa

kvmalloc() will try to allocate physically contiguous memory first, and try 
vmalloc to allocate virtually contiguous memory when the former allocation
fails.

kvfree() is used to free the memory allocated by kvmalloc(). It can't be used
in atomic context. If the callers are in atomic contex, they can use
kvfree_inatomic() instead.

There is much duplicate code to do such things in kernel, so I generate the
above APIs.

Thank Eric Dumazet for the "kv" prefix. :)

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/interrupt.h>

void *kvmalloc(size_t size)
{
	void *ptr;

	if (size < PAGE_SIZE)
		return kmalloc(PAGE_SIZE, GFP_KERNEL);
	ptr = alloc_pages_exact(size, GFP_KERNEL | __GFP_NOWARN);
	if (ptr != NULL)
		return ptr;

	return vmalloc(size);
}
EXPORT_SYMBOL(kvmalloc);

void kvfree(void *ptr, size_t size)
{
	if (size < PAGE_SIZE)
		kfree(ptr);
	else if (is_vmalloc_addr(ptr))
		vfree(ptr);
	else
		free_pages_exact(ptr, size);
}
EXPORT_SYMBOL(kvfree);

struct kvfree_work_struct {
	struct work_struct	work;
	void			*head;
	void			**ptail;
};

DEFINE_PER_CPU(struct kvfree_work_struct, kvfree_work_struct);

static void kvfree_work(struct work_struct *_work)
{
	struct kvfree_work_struct *work;
	void *head, *tmp;
	
	work = container_of(_work, struct kvfree_work_struct, work);
	local_bh_disable();
	head = work->head;
	work->head = NULL;
	work->ptail = &work->head;
	local_bh_enable();

	while (head) {
		tmp = head;
		head = *(void **)head;
		vfree(tmp);
	}
}

void kvfree_inatomic(void *ptr, size_t size)
{
	if (size < PAGE_SIZE) {
		kfree(ptr);
	} else if (is_vmalloc_addr(ptr)) {
		struct kvfree_work_struct *work;

		*(void **)ptr = NULL;
		local_irq_disable();
		work = this_cpu_ptr(&kvfree_work_struct);
		*(work->ptail) = ptr;
		work->ptail = (void**)ptr;
		schedule_work(&work->work);
		local_irq_enable();
	} else {
		free_pages_exact(ptr, size);
	}
}
EXPORT_SYMBOL(kvfree_inatomic);

static int kvfree_work_struct_init(void)
{
	int cpu;
	struct kvfree_work_struct *work;

	for_each_possible_cpu(cpu) {
		work = per_cpu_ptr(&kvfree_work_struct, cpu);
		INIT_WORK(&work->work, kvfree_work);
		work->head = NULL;
		work->ptail = &work->head;
	}

	return 0;
}
//pure_initcall(kvfree_work_struct_init);

//--------------------
// for testing
static int test_init(void)
{
	int size;
	void *ptr;

	kvfree_work_struct_init();
	for (size = 1; size < (1<<30); size <<= 1) {
		ptr = kvmalloc(size);
		if (is_vmalloc_addr(ptr)) {
			printk("%d\n", size);
			break;
		}
		kvfree(ptr, size);
	}

	return 0;
}
module_init(test_init);

static void test_exit(void)
{
	int cpu;
	struct kvfree_work_struct *work;

	for_each_possible_cpu(cpu) {
		work = per_cpu_ptr(&kvfree_work_struct, cpu);
		cancel_work_sync(&work->work);
	}
}
module_exit(test_exit);

MODULE_LICENSE("GPL");

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC] mm: generic adaptive large memory allocation APIs
  2010-05-06  0:30 [RFC] mm: generic adaptive large memory allocation APIs Changli Gao
@ 2010-05-06  0:37 ` Changli Gao
  2010-05-06  1:25 ` Changli Gao
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 16+ messages in thread
From: Changli Gao @ 2010-05-06  0:37 UTC (permalink / raw)
  To: akpm
  Cc: Eric Dumazet, Jiri Slaby, Changli Gao, Alexander Viro,
	Paul E. McKenney, Alexey Dobriyan, Ingo Molnar, Peter Zijlstra,
	linux-fsdevel, linux-kernel, Avi Kivity, Tetsuo Handa

On Thu, May 6, 2010 at 8:30 AM, Changli Gao <xiaosuo@gmail.com> wrote:
> kvmalloc() will try to allocate physically contiguous memory first, and try
> vmalloc to allocate virtually contiguous memory when the former allocation
> fails.
>
> kvfree() is used to free the memory allocated by kvmalloc(). It can't be used
> in atomic context. If the callers are in atomic contex, they can use
> kvfree_inatomic() instead.
>
> There is much duplicate code to do such things in kernel, so I generate the
> above APIs.
>
> Thank Eric Dumazet for the "kv" prefix. :)
>
> #include <linux/kernel.h>
> #include <linux/module.h>
> #include <linux/mm.h>
> #include <linux/init.h>
> #include <linux/slab.h>
> #include <linux/vmalloc.h>
> #include <linux/interrupt.h>
>
> void *kvmalloc(size_t size)
> {
>        void *ptr;
>
>        if (size < PAGE_SIZE)
>                return kmalloc(PAGE_SIZE, GFP_KERNEL);
>        ptr = alloc_pages_exact(size, GFP_KERNEL | __GFP_NOWARN);
>        if (ptr != NULL)
>                return ptr;
>
>        return vmalloc(size);
> }
> EXPORT_SYMBOL(kvmalloc);
>
> void kvfree(void *ptr, size_t size)
> {
>        if (size < PAGE_SIZE)
>                kfree(ptr);
>        else if (is_vmalloc_addr(ptr))
>                vfree(ptr);
>        else
>                free_pages_exact(ptr, size);
> }
> EXPORT_SYMBOL(kvfree);
>
> struct kvfree_work_struct {
>        struct work_struct      work;
>        void                    *head;
>        void                    **ptail;
> };
>
> DEFINE_PER_CPU(struct kvfree_work_struct, kvfree_work_struct);
>
> static void kvfree_work(struct work_struct *_work)
> {
>        struct kvfree_work_struct *work;
>        void *head, *tmp;
>
>        work = container_of(_work, struct kvfree_work_struct, work);
>        local_bh_disable();
>        head = work->head;
>        work->head = NULL;
>        work->ptail = &work->head;
>        local_bh_enable();

local_bh_disable should be local_irq_disable(), and local_bh_enable()
should be local_irq_enable().

>
>        while (head) {
>                tmp = head;
>                head = *(void **)head;
>                vfree(tmp);
>        }
> }
>
> void kvfree_inatomic(void *ptr, size_t size)
> {
>        if (size < PAGE_SIZE) {
>                kfree(ptr);
>        } else if (is_vmalloc_addr(ptr)) {
>                struct kvfree_work_struct *work;
>
>                *(void **)ptr = NULL;
>                local_irq_disable();
>                work = this_cpu_ptr(&kvfree_work_struct);
>                *(work->ptail) = ptr;
>                work->ptail = (void**)ptr;
>                schedule_work(&work->work);
>                local_irq_enable();
>        } else {
>                free_pages_exact(ptr, size);
>        }
> }
> EXPORT_SYMBOL(kvfree_inatomic);
>
> static int kvfree_work_struct_init(void)
> {
>        int cpu;
>        struct kvfree_work_struct *work;
>
>        for_each_possible_cpu(cpu) {
>                work = per_cpu_ptr(&kvfree_work_struct, cpu);
>                INIT_WORK(&work->work, kvfree_work);
>                work->head = NULL;
>                work->ptail = &work->head;
>        }
>
>        return 0;
> }
> //pure_initcall(kvfree_work_struct_init);
>
> //--------------------
> // for testing
> static int test_init(void)
> {
>        int size;
>        void *ptr;
>
>        kvfree_work_struct_init();
>        for (size = 1; size < (1<<30); size <<= 1) {
>                ptr = kvmalloc(size);
>                if (is_vmalloc_addr(ptr)) {
>                        printk("%d\n", size);
>                        break;
>                }
>                kvfree(ptr, size);
>        }
>
>        return 0;
> }
> module_init(test_init);
>
> static void test_exit(void)
> {
>        int cpu;
>        struct kvfree_work_struct *work;
>
>        for_each_possible_cpu(cpu) {
>                work = per_cpu_ptr(&kvfree_work_struct, cpu);
>                cancel_work_sync(&work->work);
>        }
> }
> module_exit(test_exit);
>
> MODULE_LICENSE("GPL");
>



-- 
Regards，
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC] mm: generic adaptive large memory allocation APIs
  2010-05-06  0:30 [RFC] mm: generic adaptive large memory allocation APIs Changli Gao
  2010-05-06  0:37 ` Changli Gao
@ 2010-05-06  1:25 ` Changli Gao
  2010-05-06  3:12 ` Tetsuo Handa
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 16+ messages in thread
From: Changli Gao @ 2010-05-06  1:25 UTC (permalink / raw)
  To: akpm
  Cc: Eric Dumazet, Jiri Slaby, Changli Gao, Alexander Viro,
	Paul E. McKenney, Alexey Dobriyan, Ingo Molnar, Peter Zijlstra,
	linux-fsdevel, linux-kernel, Avi Kivity, Tetsuo Handa

On Thu, May 6, 2010 at 8:30 AM, Changli Gao <xiaosuo@gmail.com> wrote:
> kvmalloc() will try to allocate physically contiguous memory first, and try
> vmalloc to allocate virtually contiguous memory when the former allocation
> fails.
>
> kvfree() is used to free the memory allocated by kvmalloc(). It can't be used
> in atomic context. If the callers are in atomic contex, they can use
> kvfree_inatomic() instead.
>
> There is much duplicate code to do such things in kernel, so I generate the
> above APIs.
>
> Thank Eric Dumazet for the "kv" prefix. :)
>
> #include <linux/kernel.h>
> #include <linux/module.h>
> #include <linux/mm.h>
> #include <linux/init.h>
> #include <linux/slab.h>
> #include <linux/vmalloc.h>
> #include <linux/interrupt.h>
>
> void *kvmalloc(size_t size)
> {
>        void *ptr;
>
>        if (size < PAGE_SIZE)
>                return kmalloc(PAGE_SIZE, GFP_KERNEL);

typo mistake, should be kmalloc(size, GFP_KERNEL), thank Tetsuo Handa
<penguin-kernel@i-love.sakura.ne.jp>.



-- 
Regards，
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC] mm: generic adaptive large memory allocation APIs
  2010-05-06  0:30 [RFC] mm: generic adaptive large memory allocation APIs Changli Gao
  2010-05-06  0:37 ` Changli Gao
  2010-05-06  1:25 ` Changli Gao
@ 2010-05-06  3:12 ` Tetsuo Handa
  2010-05-06  3:22   ` Changli Gao
  2010-05-06 15:35 ` Jamie Lokier
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 16+ messages in thread
From: Tetsuo Handa @ 2010-05-06  3:12 UTC (permalink / raw)
  To: xiaosuo
  Cc: eric.dumazet, jslaby, viro, paulmck, adobriyan, mingo, peterz,
	linux-fsdevel, linux-kernel, avi, akpm

Changli Gao wrote:
> struct kvfree_work_struct {
> 	struct work_struct	work;
> 	void			*head;
> 	void			**ptail;
> };

I wonder why "struct kvfree_work_struct" is needed.
According to http://kernel.ubuntu.com/git?p=jj/ubuntu-lucid.git;a=blobdiff;f=security/apparmor/match.c;h=d2cd55419acfcae85cb748c8f837a4384a3a0d29;hp=afc2dd2260edffcf88521ae86458ad03aa8ea12c;hb=f5eba4b0a01cc671affa429ba1512b6de7caeb5b;hpb=abdff9ddaf2644d0f9962490f73e030806ba90d3 ,

static void kvfree_work(struct work_struct *work)
{
	vfree(work);
}

void kvfree_inatomic(void *ptr, size_t size)
{
	if (size < PAGE_SIZE) {
		kfree(ptr);
	} else if (is_vmalloc_addr(ptr)) {
		/*
		 * We can embed "struct work_struct" inside *ptr
		 * because size >= PAGE_SIZE.
		 */
		struct work_struct *work = ptr;
		BUILD_BUG_ON(sizeof(struct work_struct) > PAGE_SIZE);
		INIT_WORK(&work, kvfree_work);
		schedule_work(&work);
	} else {
		free_pages_exact(ptr, size);
	}
}

should do what you want. (Though, I didn't test it.)

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC] mm: generic adaptive large memory allocation APIs
  2010-05-06  3:12 ` Tetsuo Handa
@ 2010-05-06  3:22   ` Changli Gao
  0 siblings, 0 replies; 16+ messages in thread
From: Changli Gao @ 2010-05-06  3:22 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: eric.dumazet, jslaby, viro, paulmck, adobriyan, mingo, peterz,
	linux-fsdevel, linux-kernel, avi, akpm

2010/5/6 Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>:
> Changli Gao wrote:
>> struct kvfree_work_struct {
>>       struct work_struct      work;
>>       void                    *head;
>>       void                    **ptail;
>> };
>
> I wonder why "struct kvfree_work_struct" is needed.
> According to http://kernel.ubuntu.com/git?p=jj/ubuntu-lucid.git;a=blobdiff;f=security/apparmor/match.c;h=d2cd55419acfcae85cb748c8f837a4384a3a0d29;hp=afc2dd2260edffcf88521ae86458ad03aa8ea12c;hb=f5eba4b0a01cc671affa429ba1512b6de7caeb5b;hpb=abdff9ddaf2644d0f9962490f73e030806ba90d3 ,
>
> static void kvfree_work(struct work_struct *work)
> {
>        vfree(work);
> }
>
> void kvfree_inatomic(void *ptr, size_t size)
> {
>        if (size < PAGE_SIZE) {
>                kfree(ptr);
>        } else if (is_vmalloc_addr(ptr)) {
>                /*
>                 * We can embed "struct work_struct" inside *ptr
>                 * because size >= PAGE_SIZE.
>                 */
>                struct work_struct *work = ptr;
>                BUILD_BUG_ON(sizeof(struct work_struct) > PAGE_SIZE);
>                INIT_WORK(&work, kvfree_work);
>                schedule_work(&work);

&work should be work. It is a much better idea. thanks very much.

>        } else {
>                free_pages_exact(ptr, size);
>        }
> }
>
> should do what you want. (Though, I didn't test it.)
>

-- 
Regards，
Changli Gao(xiaosuo@gmail.com)
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC] mm: generic adaptive large memory allocation APIs
  2010-05-06  0:30 [RFC] mm: generic adaptive large memory allocation APIs Changli Gao
                   ` (2 preceding siblings ...)
  2010-05-06  3:12 ` Tetsuo Handa
@ 2010-05-06 15:35 ` Jamie Lokier
  2010-05-07  4:32 ` Changli Gao
  2010-05-13  4:45 ` KOSAKI Motohiro
  5 siblings, 0 replies; 16+ messages in thread
From: Jamie Lokier @ 2010-05-06 15:35 UTC (permalink / raw)
  To: Changli Gao
  Cc: akpm, Eric Dumazet, Jiri Slaby, Alexander Viro, Paul E. McKenney,
	Alexey Dobriyan, Ingo Molnar, Peter Zijlstra, linux-fsdevel,
	linux-kernel, Avi Kivity, Tetsuo Handa

Changli Gao wrote:
> kvmalloc() will try to allocate physically contiguous memory first, and try 
> vmalloc to allocate virtually contiguous memory when the former allocation
> fails.

Note that converting users from vmalloc() to kvmalloc() may increase
fragmentation problems for other parts of the kernel, because it will
tend to use up more of the available large blocks.  Especially users
who allocate large blocks and often.  That's worth a mention
somewhere.

On the other hand, this API could make it easier to convert some kmalloc()
calls to kvmalloc(), reducing fragmentation problems. :-)

Since the caller is indicating they don't mind which happens, then
anti-fragmentation heuristics (such as checking watermarks) could be
added to kvmalloc() at some future time, if needed.

-- Jamie

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC] mm: generic adaptive large memory allocation APIs
  2010-05-06  0:30 [RFC] mm: generic adaptive large memory allocation APIs Changli Gao
                   ` (3 preceding siblings ...)
  2010-05-06 15:35 ` Jamie Lokier
@ 2010-05-07  4:32 ` Changli Gao
  2010-05-07 12:42   ` Tetsuo Handa
  2010-05-13  4:45 ` KOSAKI Motohiro
  5 siblings, 1 reply; 16+ messages in thread
From: Changli Gao @ 2010-05-07  4:32 UTC (permalink / raw)
  To: akpm
  Cc: Eric Dumazet, Jiri Slaby, Changli Gao, Alexander Viro,
	Paul E. McKenney, Alexey Dobriyan, Ingo Molnar, Peter Zijlstra,
	linux-fsdevel, linux-kernel, Avi Kivity, Tetsuo Handa

On Thu, May 6, 2010 at 8:30 AM, Changli Gao <xiaosuo@gmail.com> wrote:
> void kvfree(void *ptr, size_t size)
> {
>        if (size < PAGE_SIZE)
>                kfree(ptr);
>        else if (is_vmalloc_addr(ptr))
>                vfree(ptr);
>        else
>                free_pages_exact(ptr, size);
> }

I found a way to eliminate the size argument of kvfree() and
kvfree_inatomic(). We can check where the page_head is slab or
compound, as pages owned by slab  subsystem always have Slab flag set
or are compound.

                page = virt_to_head_page(ptr);
                if (PageSlab(page) || PageCompound(page))
                        kfree(ptr);
                else
                        free_pages_exact(ptr, page->private);

And we can save the size argument in page->private after
alloc_pages_eact() returns.

        ptr = alloc_pages_exact(size, GFP_KERNEL | __GFP_NOWARN);
        if (ptr != NULL) {
                virt_to_head_page(ptr)->private = size;
                return ptr;
        }

here is the test code:

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/interrupt.h>

#define kmalloc(size, gfp) ({ \
        void *ptr = kmalloc(size, gfp); \
        if (ptr != NULL) \
                printk("kmalloc: %p(%u)\n", ptr, size); \
        ptr; })

#define alloc_pages_exact(size, gfp) ({ \
        void *ptr = alloc_pages_exact(size, gfp); \
        if (ptr != NULL) \
                printk("alloc_pages_exact: %p(%u)\n", ptr, size); \
        ptr; })

#define vmalloc(size) ({ \
        void *ptr = vmalloc(size); \
        if (ptr != NULL) \
                printk("vmalloc: %p(%u)\n", ptr, size); \
        ptr; })

#define kfree(ptr) do { printk("kfree: %p\n", ptr); kfree(ptr); } while (0)
#define vfree(ptr) do { printk("vfree: %p\n", ptr); vfree(ptr); } while (0)
#define free_pages_exact(ptr, size) do { \
                printk("free_pages_exact: %p(%u)\n", ptr, size); \
                free_pages_exact(ptr, size); \
        } while (0)

void *kvmalloc(size_t size)
{
        void *ptr;

        if (size < PAGE_SIZE)
                return kmalloc(size, GFP_KERNEL);
        ptr = alloc_pages_exact(size, GFP_KERNEL | __GFP_NOWARN);
        if (ptr != NULL) {
                virt_to_head_page(ptr)->private = size;
                return ptr;
        }

        return vmalloc(size);
}
EXPORT_SYMBOL(kvmalloc);

static void kvfree_work(struct work_struct *work)
{
        vfree(work);
}

static void __kvfree(void *ptr, bool inatomic)
{
        if (unlikely(ZERO_OR_NULL_PTR(ptr)))
                return;
        if (is_vmalloc_addr(ptr)) {
                if (inatomic) {
                        struct work_struct *work;

                        work = ptr;
                        BUILD_BUG_ON(sizeof(struct work_struct) > PAGE_SIZE);
                        INIT_WORK(work, kvfree_work);
                        schedule_work(work);
                } else {
                        vfree(ptr);
                }
        } else {
                struct page *page;

                page = virt_to_head_page(ptr);
                if (PageSlab(page) || PageCompound(page))
                        kfree(ptr);
                else
                        free_pages_exact(ptr, page->private);
        }
}

void kvfree(void *ptr)
{
        __kvfree(ptr, false);
}
EXPORT_SYMBOL(kvfree);

void kvfree_inatomic(void *ptr)
{
        __kvfree(ptr, true);
}
EXPORT_SYMBOL(kvfree_inatomic);

//--------------------
// for testing
static int test_init(void)
{
        int size;
        void *ptr;

        for (size = 1; size < (1<<30); size <<= 1) {
                ptr = kvmalloc(size);
                if (ptr == NULL)
                        return -1;
                if (is_vmalloc_addr(ptr)) {
                        kvfree(ptr);
                        break;
                }
                kvfree(ptr);
        }
        ptr = kvmalloc(size);
        if (ptr == NULL)
                return -1;
        kvfree_inatomic(ptr);

        return 0;
}
module_init(test_init);

static void test_exit(void)
{
}
module_exit(test_exit);

MODULE_LICENSE("GPL");

-- 
Regards，
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC] mm: generic adaptive large memory allocation APIs
  2010-05-07  4:32 ` Changli Gao
@ 2010-05-07 12:42   ` Tetsuo Handa
  2010-05-07 12:52     ` Peter Zijlstra
  0 siblings, 1 reply; 16+ messages in thread
From: Tetsuo Handa @ 2010-05-07 12:42 UTC (permalink / raw)
  To: xiaosuo, akpm
  Cc: eric.dumazet, jslaby, viro, paulmck, adobriyan, mingo, peterz,
	linux-fsdevel, linux-kernel, avi

Changli Gao wrote:
> static void __kvfree(void *ptr, bool inatomic)

inatomic might be confusing because what vfree() checks is
BUG_ON(in_interrupt()) rather than BUG_ON(in_atomic()).

> {
>         if (unlikely(ZERO_OR_NULL_PTR(ptr)))
>                 return;
>         if (is_vmalloc_addr(ptr)) {
>                 if (inatomic) {

By the way, is in_interrupt() a heavy operation?

  register unsigned long current_stack_pointer asm("esp") __used;
  static inline struct thread_info *current_thread_info(void)
  {
  	return (struct thread_info *)
  		(current_stack_pointer & ~(THREAD_SIZE - 1));
  }
  #define preempt_count() (current_thread_info()->preempt_count)
  #define irq_count()     (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK | NMI_MASK))
  #define in_interrupt()  (irq_count())

If we can agree on changing from (inatomic) to (in_interrupt()),
we can merge kvfree() and kvfree_inatomic().

>                         struct work_struct *work;
> 
>                         work = ptr;
>                         BUILD_BUG_ON(sizeof(struct work_struct) > PAGE_SIZE);
>                         INIT_WORK(work, kvfree_work);
>                         schedule_work(work);
>                 } else {
>                         vfree(ptr);
>                 }
>         } else {
>                 struct page *page;
> 
>                 page = virt_to_head_page(ptr);
>                 if (PageSlab(page) || PageCompound(page))
>                         kfree(ptr);
>                 else
>                         free_pages_exact(ptr, page->private);
>         }
> }

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC] mm: generic adaptive large memory allocation APIs
  2010-05-07 12:42   ` Tetsuo Handa
@ 2010-05-07 12:52     ` Peter Zijlstra
  0 siblings, 0 replies; 16+ messages in thread
From: Peter Zijlstra @ 2010-05-07 12:52 UTC (permalink / raw)
  To: Tetsuo Handa
  Cc: xiaosuo, akpm, eric.dumazet, jslaby, viro, paulmck, adobriyan,
	mingo, linux-fsdevel, linux-kernel, avi

On Fri, 2010-05-07 at 21:42 +0900, Tetsuo Handa wrote:
> Changli Gao wrote:
> > static void __kvfree(void *ptr, bool inatomic)
> 
> inatomic might be confusing because what vfree() checks is
> BUG_ON(in_interrupt()) rather than BUG_ON(in_atomic()).
> 
> > {
> >         if (unlikely(ZERO_OR_NULL_PTR(ptr)))
> >                 return;
> >         if (is_vmalloc_addr(ptr)) {
> >                 if (inatomic) {
> 
> By the way, is in_interrupt() a heavy operation?
> 
>   register unsigned long current_stack_pointer asm("esp") __used;
>   static inline struct thread_info *current_thread_info(void)
>   {
>   	return (struct thread_info *)
>   		(current_stack_pointer & ~(THREAD_SIZE - 1));
>   }
>   #define preempt_count() (current_thread_info()->preempt_count)
>   #define irq_count()     (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK | NMI_MASK))
>   #define in_interrupt()  (irq_count())
> 
> If we can agree on changing from (inatomic) to (in_interrupt()),
> we can merge kvfree() and kvfree_inatomic().

I really dislike all of that, just don't allow usage from interrupt
context.




^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC] mm: generic adaptive large memory allocation APIs
  2010-05-06  0:30 [RFC] mm: generic adaptive large memory allocation APIs Changli Gao
                   ` (4 preceding siblings ...)
  2010-05-07  4:32 ` Changli Gao
@ 2010-05-13  4:45 ` KOSAKI Motohiro
  2010-05-13  8:43   ` Jiri Slaby
  5 siblings, 1 reply; 16+ messages in thread
From: KOSAKI Motohiro @ 2010-05-13  4:45 UTC (permalink / raw)
  To: Changli Gao
  Cc: kosaki.motohiro, akpm, Eric Dumazet, Jiri Slaby, Alexander Viro,
	Paul E. McKenney, Alexey Dobriyan, Ingo Molnar, Peter Zijlstra,
	linux-fsdevel, linux-kernel, Avi Kivity, Tetsuo Handa

Hi

> void *kvmalloc(size_t size)
> {
> 	void *ptr;
> 
> 	if (size < PAGE_SIZE)
> 		return kmalloc(PAGE_SIZE, GFP_KERNEL);
> 	ptr = alloc_pages_exact(size, GFP_KERNEL | __GFP_NOWARN);

low order GFP_KERNEL allocation never fail. then, this doesn't works
as you expected.

> 	if (ptr != NULL)
> 		return ptr;
> 
> 	return vmalloc(size);

On x86, vmalloc area is only 128MB address space. it is very rare 
resource than physical ram. vmalloc fallback is not good idea.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC] mm: generic adaptive large memory allocation APIs
  2010-05-13  4:45 ` KOSAKI Motohiro
@ 2010-05-13  8:43   ` Jiri Slaby
  2010-05-13  9:05     ` KOSAKI Motohiro
  0 siblings, 1 reply; 16+ messages in thread
From: Jiri Slaby @ 2010-05-13  8:43 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Changli Gao, akpm, Eric Dumazet, Alexander Viro, Paul E. McKenney,
	Alexey Dobriyan, Ingo Molnar, Peter Zijlstra, linux-fsdevel,
	linux-kernel, Avi Kivity, Tetsuo Handa

On 05/13/2010 06:45 AM, KOSAKI Motohiro wrote:
> Hi
> 
>> void *kvmalloc(size_t size)
>> {
>> 	void *ptr;
>>
>> 	if (size < PAGE_SIZE)
>> 		return kmalloc(PAGE_SIZE, GFP_KERNEL);
>> 	ptr = alloc_pages_exact(size, GFP_KERNEL | __GFP_NOWARN);
> 
> low order GFP_KERNEL allocation never fail. then, this doesn't works
> as you expected.

Hi, I suppose you mean the kmalloc allocation -- so kmalloc should fail
iff alloc_pages_exact (unless somebody frees a heap of memory indeed)?

>> 	if (ptr != NULL)
>> 		return ptr;
>>
>> 	return vmalloc(size);
> 
> On x86, vmalloc area is only 128MB address space. it is very rare 
> resource than physical ram. vmalloc fallback is not good idea.

These functions are a replacement for explicit
if (!(x = kmalloc()))
   x = vmalloc();
...
if (is_vmalloc(x))
  vfree(x);
else
  kfree(x);
in the code (like fdtable does this).

The 128M limit on x86_32 for vmalloc is configurable so if drivers in
sum need more on some specific hardware, it can be increased on the
command line (I had to do this on one machine in the past).

Anyway as this is a replacement for explicit tests, it shouldn't change
the behaviour in any way. Obviously when a user doesn't need virtually
contiguous space, he shouldn't use this interface at all.

thanks,
-- 
js
suse labs

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC] mm: generic adaptive large memory allocation APIs
  2010-05-13  8:43   ` Jiri Slaby
@ 2010-05-13  9:05     ` KOSAKI Motohiro
  2010-05-13  9:19       ` Jiri Slaby
  0 siblings, 1 reply; 16+ messages in thread
From: KOSAKI Motohiro @ 2010-05-13  9:05 UTC (permalink / raw)
  To: Jiri Slaby
  Cc: kosaki.motohiro, Changli Gao, akpm, Eric Dumazet, Alexander Viro,
	Paul E. McKenney, Alexey Dobriyan, Ingo Molnar, Peter Zijlstra,
	linux-fsdevel, linux-kernel, Avi Kivity, Tetsuo Handa

Hi

> > Hi
> > 
> >> void *kvmalloc(size_t size)
> >> {
> >> 	void *ptr;
> >>
> >> 	if (size < PAGE_SIZE)
> >> 		return kmalloc(PAGE_SIZE, GFP_KERNEL);
> >> 	ptr = alloc_pages_exact(size, GFP_KERNEL | __GFP_NOWARN);
> > 
> > low order GFP_KERNEL allocation never fail. then, this doesn't works
> > as you expected.
> 
> Hi, I suppose you mean the kmalloc allocation -- so kmalloc should fail
> iff alloc_pages_exact (unless somebody frees a heap of memory indeed)?

I mean, if size of alloc_pages_exact() argument is less than 8 pages,
alloc_pages_exact() never fail. see __alloc_pages_slowpath().

> 
> >> 	if (ptr != NULL)
> >> 		return ptr;
> >>
> >> 	return vmalloc(size);
> > 
> > On x86, vmalloc area is only 128MB address space. it is very rare 
> > resource than physical ram. vmalloc fallback is not good idea.
> 
> These functions are a replacement for explicit
> if (!(x = kmalloc()))
>    x = vmalloc();
> ...
> if (is_vmalloc(x))
>   vfree(x);
> else
>   kfree(x);
> in the code (like fdtable does this).
> 
> The 128M limit on x86_32 for vmalloc is configurable so if drivers in
> sum need more on some specific hardware, it can be increased on the
> command line (I had to do this on one machine in the past).

Right, but 99% end user don't do this. I don't think this is effective advise.


> Anyway as this is a replacement for explicit tests, it shouldn't change
> the behaviour in any way. Obviously when a user doesn't need virtually
> contiguous space, he shouldn't use this interface at all.

Why can't we make fdtable virtually contiguous free?
Anyway, alloc_fdmem() also don't works as author expected.




^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC] mm: generic adaptive large memory allocation APIs
  2010-05-13  9:05     ` KOSAKI Motohiro
@ 2010-05-13  9:19       ` Jiri Slaby
  2010-05-13  9:40         ` KOSAKI Motohiro
  0 siblings, 1 reply; 16+ messages in thread
From: Jiri Slaby @ 2010-05-13  9:19 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Changli Gao, akpm, Eric Dumazet, Alexander Viro, Paul E. McKenney,
	Alexey Dobriyan, Ingo Molnar, Peter Zijlstra, linux-fsdevel,
	linux-kernel, Avi Kivity, Tetsuo Handa

On 05/13/2010 11:05 AM, KOSAKI Motohiro wrote:
>>>> void *kvmalloc(size_t size)
>>>> {
>>>> 	void *ptr;
>>>>
>>>> 	if (size < PAGE_SIZE)
>>>> 		return kmalloc(PAGE_SIZE, GFP_KERNEL);
>>>> 	ptr = alloc_pages_exact(size, GFP_KERNEL | __GFP_NOWARN);
>>>
>>> low order GFP_KERNEL allocation never fail. then, this doesn't works
>>> as you expected.
>>
>> Hi, I suppose you mean the kmalloc allocation -- so kmalloc should fail
>> iff alloc_pages_exact (unless somebody frees a heap of memory indeed)?
> 
> I mean, if size of alloc_pages_exact() argument is less than 8 pages,
> alloc_pages_exact() never fail. see __alloc_pages_slowpath().

Sorry, I don't see what's the problem with that. I can see only that
alloc_pages_exact is superfluous there as kmalloc "won't fail" earlier.

>>>> 	if (ptr != NULL)
>>>> 		return ptr;
>>>>
>>>> 	return vmalloc(size);
>>>
>>> On x86, vmalloc area is only 128MB address space. it is very rare 
>>> resource than physical ram. vmalloc fallback is not good idea.
>>
>> These functions are a replacement for explicit
>> if (!(x = kmalloc()))
>>    x = vmalloc();
>> ...
>> if (is_vmalloc(x))
>>   vfree(x);
>> else
>>   kfree(x);
>> in the code (like fdtable does this).
>>
>> The 128M limit on x86_32 for vmalloc is configurable so if drivers in
>> sum need more on some specific hardware, it can be increased on the
>> command line (I had to do this on one machine in the past).
> 
> Right, but 99% end user don't do this. I don't think this is effective advise.

Indeed. I didn't mean that as the users should change that. They should
only if there is some weird hardware with weird drivers.

>> Anyway as this is a replacement for explicit tests, it shouldn't change
>> the behaviour in any way. Obviously when a user doesn't need virtually
>> contiguous space, he shouldn't use this interface at all.
> 
> Why can't we make fdtable virtually contiguous free?

This is possible, but the question is why to make the code more complex?

> Anyway, alloc_fdmem() also don't works as author expected.

Pardon my ignorance, why? (There are more similar users:
init_section_page_cgroup, sys_add_key, ext4_fill_flex_info and many others.)

-- 
js
suse labs

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC] mm: generic adaptive large memory allocation APIs
  2010-05-13  9:19       ` Jiri Slaby
@ 2010-05-13  9:40         ` KOSAKI Motohiro
  2010-05-13 10:16           ` Jiri Slaby
  0 siblings, 1 reply; 16+ messages in thread
From: KOSAKI Motohiro @ 2010-05-13  9:40 UTC (permalink / raw)
  To: Jiri Slaby
  Cc: kosaki.motohiro, Changli Gao, akpm, Eric Dumazet, Alexander Viro,
	Paul E. McKenney, Alexey Dobriyan, Ingo Molnar, Peter Zijlstra,
	linux-fsdevel, linux-kernel, Avi Kivity, Tetsuo Handa

> On 05/13/2010 11:05 AM, KOSAKI Motohiro wrote:
> >>>> void *kvmalloc(size_t size)
> >>>> {
> >>>> 	void *ptr;
> >>>>
> >>>> 	if (size < PAGE_SIZE)
> >>>> 		return kmalloc(PAGE_SIZE, GFP_KERNEL);
> >>>> 	ptr = alloc_pages_exact(size, GFP_KERNEL | __GFP_NOWARN);
> >>>
> >>> low order GFP_KERNEL allocation never fail. then, this doesn't works
> >>> as you expected.
> >>
> >> Hi, I suppose you mean the kmalloc allocation -- so kmalloc should fail
> >> iff alloc_pages_exact (unless somebody frees a heap of memory indeed)?
> > 
> > I mean, if size of alloc_pages_exact() argument is less than 8 pages,
> > alloc_pages_exact() never fail. see __alloc_pages_slowpath().
> 
> Sorry, I don't see what's the problem with that. I can see only that
> alloc_pages_exact is superfluous there as kmalloc "won't fail" earlier.

I don't talk about kmalloc. it's ok to never fail.  but low order alloc_pages_exact() never fail too.
Is this ok? Why?


> >>>> 	if (ptr != NULL)
> >>>> 		return ptr;
> >>>>
> >>>> 	return vmalloc(size);
> >>>
> >>> On x86, vmalloc area is only 128MB address space. it is very rare 
> >>> resource than physical ram. vmalloc fallback is not good idea.
> >>
> >> These functions are a replacement for explicit
> >> if (!(x = kmalloc()))
> >>    x = vmalloc();
> >> ...
> >> if (is_vmalloc(x))
> >>   vfree(x);
> >> else
> >>   kfree(x);
> >> in the code (like fdtable does this).
> >>
> >> The 128M limit on x86_32 for vmalloc is configurable so if drivers in
> >> sum need more on some specific hardware, it can be increased on the
> >> command line (I had to do this on one machine in the past).
> > 
> > Right, but 99% end user don't do this. I don't think this is effective advise.
> 
> Indeed. I didn't mean that as the users should change that. They should
> only if there is some weird hardware with weird drivers.
> 
> >> Anyway as this is a replacement for explicit tests, it shouldn't change
> >> the behaviour in any way. Obviously when a user doesn't need virtually
> >> contiguous space, he shouldn't use this interface at all.
> > 
> > Why can't we make fdtable virtually contiguous free?
> 
> This is possible, but the question is why to make the code more complex?

because  it's broken. Or Am I missing something?


> > Anyway, alloc_fdmem() also don't works as author expected.
> 
> Pardon my ignorance, why? (There are more similar users:
> init_section_page_cgroup, sys_add_key, ext4_fill_flex_info and many others.)

I think init_section_page_cgroup is ok. it's called at boot time. we don't enter forever page reclaim.

but other case, I don't know the reason. I guess they also have specific assumption.
I only said, generically it isn't right.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC] mm: generic adaptive large memory allocation APIs
  2010-05-13  9:40         ` KOSAKI Motohiro
@ 2010-05-13 10:16           ` Jiri Slaby
  2010-05-13 10:43             ` KOSAKI Motohiro
  0 siblings, 1 reply; 16+ messages in thread
From: Jiri Slaby @ 2010-05-13 10:16 UTC (permalink / raw)
  To: KOSAKI Motohiro
  Cc: Changli Gao, akpm, Eric Dumazet, Alexander Viro, Paul E. McKenney,
	Alexey Dobriyan, Ingo Molnar, Peter Zijlstra, linux-fsdevel,
	linux-kernel, Avi Kivity, Tetsuo Handa

On 05/13/2010 11:40 AM, KOSAKI Motohiro wrote:
>>>> Anyway as this is a replacement for explicit tests, it shouldn't change
>>>> the behaviour in any way. Obviously when a user doesn't need virtually
>>>> contiguous space, he shouldn't use this interface at all.
>>>
>>> Why can't we make fdtable virtually contiguous free?
>>
>> This is possible, but the question is why to make the code more complex?
> 
> because  it's broken.

Well, could you explain what exactly is broken about
x = kmalloc(size, GFP_KERNEL);
if (!x)
  x = vmalloc(size);
? Is is that kmalloc doesn't return until is has the memory to return
when asking for order(size) <= COSTLY_ORDER? I think this is expected.

thanks,
-- 
js
suse labs

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC] mm: generic adaptive large memory allocation APIs
  2010-05-13 10:16           ` Jiri Slaby
@ 2010-05-13 10:43             ` KOSAKI Motohiro
  0 siblings, 0 replies; 16+ messages in thread
From: KOSAKI Motohiro @ 2010-05-13 10:43 UTC (permalink / raw)
  To: Jiri Slaby
  Cc: kosaki.motohiro, Changli Gao, akpm, Eric Dumazet, Alexander Viro,
	Paul E. McKenney, Alexey Dobriyan, Ingo Molnar, Peter Zijlstra,
	linux-fsdevel, linux-kernel, Avi Kivity, Tetsuo Handa

> On 05/13/2010 11:40 AM, KOSAKI Motohiro wrote:
> >>>> Anyway as this is a replacement for explicit tests, it shouldn't change
> >>>> the behaviour in any way. Obviously when a user doesn't need virtually
> >>>> contiguous space, he shouldn't use this interface at all.
> >>>
> >>> Why can't we make fdtable virtually contiguous free?
> >>
> >> This is possible, but the question is why to make the code more complex?
> > 
> > because  it's broken.
> 
> Well, could you explain what exactly is broken about
> x = kmalloc(size, GFP_KERNEL);
> if (!x)
>   x = vmalloc(size);
> ? Is is that kmalloc doesn't return until is has the memory to return
> when asking for order(size) <= COSTLY_ORDER? I think this is expected.

Well, but fdtable doesn't really need contenious memory. no?
To make API mean we recommend to use it. but I don't hope to spread this 
wrong habit.  Instead, to kill it seems better.

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2010-05-13 10:43 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-05-06  0:30 [RFC] mm: generic adaptive large memory allocation APIs Changli Gao
2010-05-06  0:37 ` Changli Gao
2010-05-06  1:25 ` Changli Gao
2010-05-06  3:12 ` Tetsuo Handa
2010-05-06  3:22   ` Changli Gao
2010-05-06 15:35 ` Jamie Lokier
2010-05-07  4:32 ` Changli Gao
2010-05-07 12:42   ` Tetsuo Handa
2010-05-07 12:52     ` Peter Zijlstra
2010-05-13  4:45 ` KOSAKI Motohiro
2010-05-13  8:43   ` Jiri Slaby
2010-05-13  9:05     ` KOSAKI Motohiro
2010-05-13  9:19       ` Jiri Slaby
2010-05-13  9:40         ` KOSAKI Motohiro
2010-05-13 10:16           ` Jiri Slaby
2010-05-13 10:43             ` KOSAKI Motohiro

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).