All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eric B Munson <ebmunson@us.ibm.com>
To: Mel Gorman <mel@csn.ul.ie>
Cc: linux-mm@kvack.org, npiggin@suse.de, apw@shadowen.org,
	andi@firstfloor.org, hugh@veritas.com,
	avid@gibson.dropbear.id.au, kenneth.w.chen@intel.com,
	wli@holomorphy.com, linux-kernel@vger.kernel.org,
	akpm@linux-foundation.org, starlight@binnacle.cx
Subject: Re: [PATCH] Determine if mapping is MAP_SHARED using VM_MAYSHARE and not VM_SHARED in hugetlbfs
Date: Wed, 20 May 2009 11:12:24 +0100	[thread overview]
Message-ID: <20090520101224.GA6420@us.ibm.com> (raw)
In-Reply-To: <20090519083619.GD19146@csn.ul.ie>

[-- Attachment #1: Type: text/plain, Size: 6548 bytes --]

On Tue, 19 May 2009, Mel Gorman wrote:

> hugetlbfs reserves huge pages and accounts for them differently depending
> on whether the mapping was mapped MAP_SHARED or MAP_PRIVATE. However, the
> check made against VMA->vm_flags is sometimes VM_SHARED and not VM_MAYSHARE.
> For file-backed mappings, such as hugetlbfs, VM_SHARED is set only if the
> mapping is MAP_SHARED *and* it is read-write. For example, if a shared
> memory mapping was created read-write with shmget() for populating of data
> and mapped SHM_RDONLY by other processes, then hugetlbfs gets the accounting
> wrong and reservations leak.
> 
> This patch alters mm/hugetlb.c and replaces VM_SHARED with VM_MAYSHARE when
> the intent of the code was to check whether the VMA was mapped MAP_SHARED
> or MAP_PRIVATE.
> 
> The patch needs wider review as there are places where we really mean
> VM_SHARED and not VM_MAYSHARE. I believe I got all the right places, but a
> second opinion is needed. When/if this patch passes review, it'll be needed
> for 2.6.30 and -stable as it partially addresses the problem reported in
> http://bugzilla.kernel.org/show_bug.cgi?id=13302 and
> http://bugzilla.kernel.org/show_bug.cgi?id=12134.
> 
> Signed-off-by: Mel Gorman <mel@csn.ul.ie>

The libhugetlbfs test suite has a test that triggers this bug reliably, I have
run this test both with and without this patch on x86_64 and ppc64.  The bug
triggers every time wiouth the patch and does not with the patch applied.

Tested-by: Eric B Munson <ebmunson@us.ibm.com>

> --- 
>  mm/hugetlb.c |   26 +++++++++++++-------------
>  1 file changed, 13 insertions(+), 13 deletions(-)
> 
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index 28c655b..e83ad2c 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -316,7 +316,7 @@ static void resv_map_release(struct kref *ref)
>  static struct resv_map *vma_resv_map(struct vm_area_struct *vma)
>  {
>  	VM_BUG_ON(!is_vm_hugetlb_page(vma));
> -	if (!(vma->vm_flags & VM_SHARED))
> +	if (!(vma->vm_flags & VM_MAYSHARE))
>  		return (struct resv_map *)(get_vma_private_data(vma) &
>  							~HPAGE_RESV_MASK);
>  	return NULL;
> @@ -325,7 +325,7 @@ static struct resv_map *vma_resv_map(struct vm_area_struct *vma)
>  static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map)
>  {
>  	VM_BUG_ON(!is_vm_hugetlb_page(vma));
> -	VM_BUG_ON(vma->vm_flags & VM_SHARED);
> +	VM_BUG_ON(vma->vm_flags & VM_MAYSHARE);
> 
>  	set_vma_private_data(vma, (get_vma_private_data(vma) &
>  				HPAGE_RESV_MASK) | (unsigned long)map);
> @@ -334,7 +334,7 @@ static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map)
>  static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags)
>  {
>  	VM_BUG_ON(!is_vm_hugetlb_page(vma));
> -	VM_BUG_ON(vma->vm_flags & VM_SHARED);
> +	VM_BUG_ON(vma->vm_flags & VM_MAYSHARE);
> 
>  	set_vma_private_data(vma, get_vma_private_data(vma) | flags);
>  }
> @@ -353,7 +353,7 @@ static void decrement_hugepage_resv_vma(struct hstate *h,
>  	if (vma->vm_flags & VM_NORESERVE)
>  		return;
> 
> -	if (vma->vm_flags & VM_SHARED) {
> +	if (vma->vm_flags & VM_MAYSHARE) {
>  		/* Shared mappings always use reserves */
>  		h->resv_huge_pages--;
>  	} else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
> @@ -369,14 +369,14 @@ static void decrement_hugepage_resv_vma(struct hstate *h,
>  void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
>  {
>  	VM_BUG_ON(!is_vm_hugetlb_page(vma));
> -	if (!(vma->vm_flags & VM_SHARED))
> +	if (!(vma->vm_flags & VM_MAYSHARE))
>  		vma->vm_private_data = (void *)0;
>  }
> 
>  /* Returns true if the VMA has associated reserve pages */
>  static int vma_has_reserves(struct vm_area_struct *vma)
>  {
> -	if (vma->vm_flags & VM_SHARED)
> +	if (vma->vm_flags & VM_MAYSHARE)
>  		return 1;
>  	if (is_vma_resv_set(vma, HPAGE_RESV_OWNER))
>  		return 1;
> @@ -924,7 +924,7 @@ static long vma_needs_reservation(struct hstate *h,
>  	struct address_space *mapping = vma->vm_file->f_mapping;
>  	struct inode *inode = mapping->host;
> 
> -	if (vma->vm_flags & VM_SHARED) {
> +	if (vma->vm_flags & VM_MAYSHARE) {
>  		pgoff_t idx = vma_hugecache_offset(h, vma, addr);
>  		return region_chg(&inode->i_mapping->private_list,
>  							idx, idx + 1);
> @@ -949,7 +949,7 @@ static void vma_commit_reservation(struct hstate *h,
>  	struct address_space *mapping = vma->vm_file->f_mapping;
>  	struct inode *inode = mapping->host;
> 
> -	if (vma->vm_flags & VM_SHARED) {
> +	if (vma->vm_flags & VM_MAYSHARE) {
>  		pgoff_t idx = vma_hugecache_offset(h, vma, addr);
>  		region_add(&inode->i_mapping->private_list, idx, idx + 1);
> 
> @@ -1893,7 +1893,7 @@ retry_avoidcopy:
>  	 * at the time of fork() could consume its reserves on COW instead
>  	 * of the full address range.
>  	 */
> -	if (!(vma->vm_flags & VM_SHARED) &&
> +	if (!(vma->vm_flags & VM_MAYSHARE) &&
>  			is_vma_resv_set(vma, HPAGE_RESV_OWNER) &&
>  			old_page != pagecache_page)
>  		outside_reserve = 1;
> @@ -2000,7 +2000,7 @@ retry:
>  		clear_huge_page(page, address, huge_page_size(h));
>  		__SetPageUptodate(page);
> 
> -		if (vma->vm_flags & VM_SHARED) {
> +		if (vma->vm_flags & VM_MAYSHARE) {
>  			int err;
>  			struct inode *inode = mapping->host;
> 
> @@ -2104,7 +2104,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
>  			goto out_mutex;
>  		}
> 
> -		if (!(vma->vm_flags & VM_SHARED))
> +		if (!(vma->vm_flags & VM_MAYSHARE))
>  			pagecache_page = hugetlbfs_pagecache_page(h,
>  								vma, address);
>  	}
> @@ -2289,7 +2289,7 @@ int hugetlb_reserve_pages(struct inode *inode,
>  	 * to reserve the full area even if read-only as mprotect() may be
>  	 * called to make the mapping read-write. Assume !vma is a shm mapping
>  	 */
> -	if (!vma || vma->vm_flags & VM_SHARED)
> +	if (!vma || vma->vm_flags & VM_MAYSHARE)
>  		chg = region_chg(&inode->i_mapping->private_list, from, to);
>  	else {
>  		struct resv_map *resv_map = resv_map_alloc();
> @@ -2330,7 +2330,7 @@ int hugetlb_reserve_pages(struct inode *inode,
>  	 * consumed reservations are stored in the map. Hence, nothing
>  	 * else has to be done for private mappings here
>  	 */
> -	if (!vma || vma->vm_flags & VM_SHARED)
> +	if (!vma || vma->vm_flags & VM_MAYSHARE)
>  		region_add(&inode->i_mapping->private_list, from, to);
>  	return 0;
>  }
> 

-- 
Eric B Munson
IBM Linux Technology Center
ebmunson@us.ibm.com


[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 197 bytes --]

  reply	other threads:[~2009-05-20 10:12 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-05-19  8:36 [PATCH] Determine if mapping is MAP_SHARED using VM_MAYSHARE and not VM_SHARED in hugetlbfs Mel Gorman
2009-05-19  8:36 ` Mel Gorman
2009-05-20 10:12 ` Eric B Munson [this message]
2009-05-25 21:09 ` Hugh Dickins
2009-05-25 21:09   ` Hugh Dickins
2009-05-26 10:12   ` Mel Gorman
2009-05-26 10:12     ` Mel Gorman
2009-05-26 12:54     ` Eric B Munson
2009-05-26 20:51     ` Hugh Dickins
2009-05-26 20:51       ` Hugh Dickins
2009-05-27  0:48       ` Mel Gorman
2009-05-27  0:48         ` Mel Gorman
2009-05-27  3:17         ` KOSAKI Motohiro
2009-05-27  3:17           ` KOSAKI Motohiro
2009-05-27  9:56           ` Mel Gorman
2009-05-27  9:56             ` Mel Gorman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090520101224.GA6420@us.ibm.com \
    --to=ebmunson@us.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=andi@firstfloor.org \
    --cc=apw@shadowen.org \
    --cc=avid@gibson.dropbear.id.au \
    --cc=hugh@veritas.com \
    --cc=kenneth.w.chen@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mel@csn.ul.ie \
    --cc=npiggin@suse.de \
    --cc=starlight@binnacle.cx \
    --cc=wli@holomorphy.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.