linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Alexey Korolev <akorolex@gmail.com>
To: Mel Gorman <mel@csn.ul.ie>
Cc: Alexey Korolev <akorolev@infradead.org>, linux-mm@kvack.org
Subject: Re: HugeTLB mapping for drivers (sample driver)
Date: Wed, 15 Jul 2009 12:08:26 +1200	[thread overview]
Message-ID: <202cde0e0907141708g51294247i7a201c34e97f5b66@mail.gmail.com> (raw)
In-Reply-To: <20090714102735.GD28569@csn.ul.ie>

Mel,

Thank you for review.
I'm about to renovate this sample driver in order to handle set of
different scenarios.
Please tell me if you have additional error scenarios. I'll try to
handle them as well.
After that there will be more or less clear picture about should we
involve hugetlbfs or not.

On Tue, Jul 14, 2009 at 10:27 PM, Mel Gorman<mel@csn.ul.ie> wrote:
> On Tue, Jul 14, 2009 at 03:07:47AM +0100, Alexey Korolev wrote:
>> Hi,
>>
>> This is a sample driver which provides huge page mapping to user space.
>> It might be useful for understanding purposes.
>>
>> Here we defined file operations for device driver.
>>
>> We must call htlbfs get_unmapped_area and hugetlbfs_file_mmap functions to
>>  done some HTLB mapping preparations. (If proposed approach is more
>> or less Ok, it will be more accurate to avoid hugetlbfs calls at all - and
>> substitute them with htlb functions).
>> Allocated page get assiciated with mapping via add_to_page_cache call in
>> file->open.
>>
>
> I ran out of time to review this properly, but glancing through I would be
> concerned with what happens on fork() and COW. At a short read, it would
> appear that pages get allocated from alloc_buddy_huge_page() instead of your
> normal function altering the counters for hstate_nores.
>
>> ---
>> diff -Naurp empty/hpage_map.c hpage_map/hpage_map.c
>> --- empty/hpage_map.c 1970-01-01 12:00:00.000000000 +1200
>> +++ hpage_map/hpage_map.c     2009-07-13 18:40:28.000000000 +1200
>> @@ -0,0 +1,137 @@
>> +#include <linux/module.h>
>> +#include <linux/mm.h>
>> +#include <linux/file.h>
>> +#include <linux/pagemap.h>
>> +#include <linux/hugetlb.h>
>> +#include <linux/pagevec.h>
>> +#include <linux/miscdevice.h>
>> +
>> +static void make_file_empty(struct file *file)
>> +{
>> +    struct address_space *mapping = file->f_mapping;
>> +    struct pagevec pvec;
>> +    pgoff_t next = 0;
>> +    int i;
>> +
>> +    pagevec_init(&pvec, 0);
>> +    while (1) {
>> +     if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
>> +         if (!next)
>> +             break;
>> +         next = 0;
>> +         continue;
>> +     }
>> +
>> +     for (i = 0; i < pagevec_count(&pvec); ++i) {
>> +         struct page *page = pvec.pages[i];
>> +
>> +         lock_page(page);
>> +         if (page->index > next)
>> +             next = page->index;
>> +         ++next;
>> +         remove_from_page_cache(page);
>> +         unlock_page(page);
>> +         hugetlb_free_pages(page);
>> +     }
>> +    }
>> +    BUG_ON(mapping->nrpages);
>> +}
>> +
>> +
>> +static int hpage_map_mmap(struct file *file, struct vm_area_struct
>> *vma)
>> +{
>> +     unsigned long idx;
>> +     struct address_space *mapping;
>> +     int ret = VM_FAULT_SIGBUS;
>> +
>> +     idx = vma->vm_pgoff >> huge_page_order(h);
>> +     mapping = file->f_mapping;
>> +     ret = hugetlbfs_file_mmap(file, vma);
>> +
>> +     return ret;
>> +}
>> +
>> +
>> +static unsigned long hpage_map_get_unmapped_area(struct file *file,
>> +     unsigned long addr, unsigned long len, unsigned long pgoff,
>> +     unsigned long flags)
>> +{
>> +     return hugetlb_get_unmapped_area(file, addr, len, pgoff, flags);
>> +}
>> +
>> +static int hpage_map_open(struct inode * inode, struct file * file)
>> +{
>> +    struct page *page;
>> +    int num_hpages = 10, cnt = 0;
>
> What happens if the mmap() call is more than 10 pages? What if the process
> fork()s, the mapping is MAP_PRIVATE and the child is long lived causing
> a COW fault on the parent process when it next writes the mapping and the
> subsequent allocation fails?
>
> Again, I'm worried that by avoiding hugetlbfs, your drivers end up
> trying to solve all the same problems.
>
>> +    int ret = 0;
>> +
>> +    /* Announce  hugetlb file mapping */
>> +    mapping_set_hugetlb(file->f_mapping);
>> +
>> +    for (cnt = 0; cnt < num_hpages; cnt++ ) {
>> +     page = hugetlb_alloc_pages_node(0,GFP_KERNEL);
>> +     if (IS_ERR(page)) {
>> +         ret = -PTR_ERR(page);
>> +         goto out_err;
>> +     }
>> +     ret = add_to_page_cache(page, file->f_mapping, cnt, GFP_KERNEL);
>> +     if (ret) {
>> +         hugetlb_free_pages(page);
>> +         goto out_err;
>> +     }
>> +     SetPageUptodate(page);
>> +     unlock_page(page);
>> +    }
>> +    return 0;
>> +out_err:
>> +    printk(KERN_ERR"%s : Error %d \n",__func__, ret);
>> +    make_file_empty(file);
>> +    return ret;
>> +}
>> +
>> +
>> +static int hpage_map_release(struct inode * inode, struct file * file)
>> +{
>> +    make_file_empty(file);
>> +    return 0;
>> +}
>> +/*
>> + * The file operations for /dev/hpage_map
>> + */
>> +static const struct file_operations hpage_map_fops = {
>> +     .owner          = THIS_MODULE,
>> +     .mmap           = hpage_map_mmap,
>> +     .open           = hpage_map_open,
>> +     .release        = hpage_map_release,
>> +     .get_unmapped_area      = hpage_map_get_unmapped_area,
>> +};
>> +
>> +static struct miscdevice hpage_map_dev = {
>> +     MISC_DYNAMIC_MINOR,
>> +     "hpage_map",
>> +     &hpage_map_fops
>> +};
>> +
>> +static int __init
>> +hpage_map_init(void)
>> +{
>> +     /* Create the device in the /sys/class/misc directory. */
>> +     if (misc_register(&hpage_map_dev))
>> +             return -EIO;
>> +     return 0;
>> +}
>> +
>> +module_init(hpage_map_init);
>> +
>> +static void __exit
>> +hpage_map_exit(void)
>> +{
>> +     misc_deregister(&hpage_map_dev);
>> +}
>> +
>> +module_exit(hpage_map_exit);
>> +
>> +MODULE_LICENSE("GPL");
>> +MODULE_AUTHOR("Alexey Korolev");
>> +MODULE_DESCRIPTION("Example of driver with hugetlb mapping");
>> +MODULE_VERSION("1.0");
>> diff -Naurp empty/Makefile hpage_map/Makefile
>> --- empty/Makefile    1970-01-01 12:00:00.000000000 +1200
>> +++ hpage_map/Makefile        2009-07-13 18:31:27.000000000 +1200
>> @@ -0,0 +1,7 @@
>> +obj-m := hpage_map.o
>> +
>> +KDIR  := /lib/modules/$(shell uname -r)/build
>> +PWD   := $(shell pwd)
>> +
>> +default:
>> +     $(MAKE) -C $(KDIR) M=$(PWD) modules
>>
>
> --
> Mel Gorman
> Part-time Phd Student                          Linux Technology Center
> University of Limerick                         IBM Dublin Software Lab
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2009-07-14 23:33 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-07-14  2:07 HugeTLB mapping for drivers (sample driver) Alexey Korolev
2009-07-14 10:27 ` Mel Gorman
2009-07-15  0:08   ` Alexey Korolev [this message]
2009-07-19 13:39     ` Alexey Korolev
2009-07-20  8:11       ` Mel Gorman
2009-07-21  9:32         ` Alexey Korolev
2009-07-21  9:40           ` Mel Gorman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=202cde0e0907141708g51294247i7a201c34e97f5b66@mail.gmail.com \
    --to=akorolex@gmail.com \
    --cc=akorolev@infradead.org \
    --cc=linux-mm@kvack.org \
    --cc=mel@csn.ul.ie \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).