All of lore.kernel.org
 help / color / mirror / Atom feed
From: Minchan Kim <minchan.kim@gmail.com>
To: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Andi Kleen <andi@firstfloor.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"linux-mm@kvack.org" <linux-mm@kvack.org>,
	cl@linux-foundation.org,
	"akpm@linux-foundation.org" <akpm@linux-foundation.org>,
	"mingo@elte.hu" <mingo@elte.hu>
Subject: Re: [RFC 3/4] lockless vma caching
Date: Sat, 19 Dec 2009 12:43:31 +0900	[thread overview]
Message-ID: <4B2C4BE3.3030104@gmail.com> (raw)
In-Reply-To: <20091218094513.490f27b4.kamezawa.hiroyu@jp.fujitsu.com>



KAMEZAWA Hiroyuki wrote:
> For accessing vma in lockless style, some modification for vma lookup is
> required. Now, rb-tree is used and it doesn't allow read while modification.
> 
> This is a trial to caching vma rather than diving into rb-tree. The last
> fault vma is cached to pgd's page->cached_vma field. And, add reference count
> and waitqueue to vma.
> 
> The accessor will have to do
> 
> 	vma = lookup_vma_cache(mm, address);
> 	if (vma) {
> 		if (mm_check_version(mm) && /* no write lock at this point ? */
> 		    (vma->vm_start <= address) && (vma->vm_end > address))
> 			goto found_vma; /* start speculative job */
> 		else
> 			vma_release_cache(vma);
> 		vma = NULL;
> 	}
> 	vma = find_vma();
> found_vma:
> 	....do some jobs....
> 	vma_release_cache(vma);
> 
> Maybe some more consideration for invalidation point is necessary.
> 
> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> ---
>  include/linux/mm.h       |   20 +++++++++
>  include/linux/mm_types.h |    5 ++
>  mm/memory.c              |   14 ++++++
>  mm/mmap.c                |  102 +++++++++++++++++++++++++++++++++++++++++++++--
>  mm/page_alloc.c          |    1 
>  5 files changed, 138 insertions(+), 4 deletions(-)
> 
> Index: mmotm-mm-accessor/include/linux/mm.h
> ===================================================================
> --- mmotm-mm-accessor.orig/include/linux/mm.h
> +++ mmotm-mm-accessor/include/linux/mm.h
> @@ -763,6 +763,26 @@ unsigned long unmap_vmas(struct mmu_gath
>  		unsigned long end_addr, unsigned long *nr_accounted,
>  		struct zap_details *);
>  
> +struct vm_area_struct *lookup_vma_cache(struct mm_struct *mm,
> +		unsigned long address);
> +void invalidate_vma_cache(struct mm_struct *mm,
> +		struct vm_area_struct *vma);
> +void wait_vmas_cache_range(struct vm_area_struct *vma, unsigned long end);
> +
> +static inline void vma_hold(struct vm_area_struct *vma)
Nitpick:
How about static inline void vma_cache_[get/put] naming?

> +{
> +	atomic_inc(&vma->cache_access);
> +}
> +
> +void __vma_release(struct vm_area_struct *vma);
> +static inline void vma_release(struct vm_area_struct *vma)
> +{
> +	if (atomic_dec_and_test(&vma->cache_access)) {
> +		if (waitqueue_active(&vma->cache_wait))
> +			__vma_release(vma);
> +	}
> +}
> +
>  /**
>   * mm_walk - callbacks for walk_page_range
>   * @pgd_entry: if set, called for each non-empty PGD (top-level) entry
> Index: mmotm-mm-accessor/include/linux/mm_types.h
> ===================================================================
> --- mmotm-mm-accessor.orig/include/linux/mm_types.h
> +++ mmotm-mm-accessor/include/linux/mm_types.h
> @@ -12,6 +12,7 @@
>  #include <linux/completion.h>
>  #include <linux/cpumask.h>
>  #include <linux/page-debug-flags.h>
> +#include <linux/wait.h>
>  #include <asm/page.h>
>  #include <asm/mmu.h>
>  
> @@ -77,6 +78,7 @@ struct page {
>  	union {
>  		pgoff_t index;		/* Our offset within mapping. */
>  		void *freelist;		/* SLUB: freelist req. slab lock */
> +		void *cache;

Let's add annotation "/* vm_area_struct cache when the page is used as page table */".


>  	};
>  	struct list_head lru;		/* Pageout list, eg. active_list
>  					 * protected by zone->lru_lock !
> @@ -180,6 +182,9 @@ struct vm_area_struct {
>  	void * vm_private_data;		/* was vm_pte (shared mem) */
>  	unsigned long vm_truncate_count;/* truncate_count or restart_addr */
>  
> +	atomic_t cache_access;
> +	wait_queue_head_t cache_wait;
> +
>  #ifndef CONFIG_MMU
>  	struct vm_region *vm_region;	/* NOMMU mapping region */
>  #endif
> Index: mmotm-mm-accessor/mm/memory.c
> ===================================================================
> --- mmotm-mm-accessor.orig/mm/memory.c
> +++ mmotm-mm-accessor/mm/memory.c
> @@ -145,6 +145,14 @@ void pmd_clear_bad(pmd_t *pmd)
>  	pmd_clear(pmd);
>  }
>  

Let's put the note here. "The caller needs to hold the pte lock"

> +static void update_vma_cache(pmd_t *pmd, struct vm_area_struct *vma)
> +{
> +	struct page *page;
> +	/* ptelock is held */
> +	page = pmd_page(*pmd);
> +	page->cache = vma;
> +}
> +
>  /*
>   * Note: this doesn't free the actual pages themselves. That
>   * has been handled earlier when unmapping all the memory regions.
> @@ -2118,6 +2126,7 @@ reuse:
>  		if (ptep_set_access_flags(vma, address, page_table, entry,1))
>  			update_mmu_cache(vma, address, entry);
>  		ret |= VM_FAULT_WRITE;
> +		update_vma_cache(pmd, vma);
>  		goto unlock;
>  	}
>  
..
<snip>
..

> Index: mmotm-mm-accessor/mm/page_alloc.c
> ===================================================================
> --- mmotm-mm-accessor.orig/mm/page_alloc.c
> +++ mmotm-mm-accessor/mm/page_alloc.c
> @@ -698,6 +698,7 @@ static int prep_new_page(struct page *pa
>  
>  	set_page_private(page, 0);
>  	set_page_refcounted(page);
> +	page->cache = NULL;

Is here is proper place to initialize page->cache?
It cause unnecessary overhead about not pmd page.

How about pmd_alloc?

>  
>  	arch_alloc_page(page, order);
>  	kernel_map_pages(page, 1 << order, 1);
> 

WARNING: multiple messages have this Message-ID (diff)
From: Minchan Kim <minchan.kim@gmail.com>
To: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Andi Kleen <andi@firstfloor.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"linux-mm@kvack.org" <linux-mm@kvack.org>,
	cl@linux-foundation.org,
	"akpm@linux-foundation.org" <akpm@linux-foundation.org>,
	"mingo@elte.hu" <mingo@elte.hu>
Subject: Re: [RFC 3/4] lockless vma caching
Date: Sat, 19 Dec 2009 12:43:31 +0900	[thread overview]
Message-ID: <4B2C4BE3.3030104@gmail.com> (raw)
In-Reply-To: <20091218094513.490f27b4.kamezawa.hiroyu@jp.fujitsu.com>



KAMEZAWA Hiroyuki wrote:
> For accessing vma in lockless style, some modification for vma lookup is
> required. Now, rb-tree is used and it doesn't allow read while modification.
> 
> This is a trial to caching vma rather than diving into rb-tree. The last
> fault vma is cached to pgd's page->cached_vma field. And, add reference count
> and waitqueue to vma.
> 
> The accessor will have to do
> 
> 	vma = lookup_vma_cache(mm, address);
> 	if (vma) {
> 		if (mm_check_version(mm) && /* no write lock at this point ? */
> 		    (vma->vm_start <= address) && (vma->vm_end > address))
> 			goto found_vma; /* start speculative job */
> 		else
> 			vma_release_cache(vma);
> 		vma = NULL;
> 	}
> 	vma = find_vma();
> found_vma:
> 	....do some jobs....
> 	vma_release_cache(vma);
> 
> Maybe some more consideration for invalidation point is necessary.
> 
> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> ---
>  include/linux/mm.h       |   20 +++++++++
>  include/linux/mm_types.h |    5 ++
>  mm/memory.c              |   14 ++++++
>  mm/mmap.c                |  102 +++++++++++++++++++++++++++++++++++++++++++++--
>  mm/page_alloc.c          |    1 
>  5 files changed, 138 insertions(+), 4 deletions(-)
> 
> Index: mmotm-mm-accessor/include/linux/mm.h
> ===================================================================
> --- mmotm-mm-accessor.orig/include/linux/mm.h
> +++ mmotm-mm-accessor/include/linux/mm.h
> @@ -763,6 +763,26 @@ unsigned long unmap_vmas(struct mmu_gath
>  		unsigned long end_addr, unsigned long *nr_accounted,
>  		struct zap_details *);
>  
> +struct vm_area_struct *lookup_vma_cache(struct mm_struct *mm,
> +		unsigned long address);
> +void invalidate_vma_cache(struct mm_struct *mm,
> +		struct vm_area_struct *vma);
> +void wait_vmas_cache_range(struct vm_area_struct *vma, unsigned long end);
> +
> +static inline void vma_hold(struct vm_area_struct *vma)
Nitpick:
How about static inline void vma_cache_[get/put] naming?

> +{
> +	atomic_inc(&vma->cache_access);
> +}
> +
> +void __vma_release(struct vm_area_struct *vma);
> +static inline void vma_release(struct vm_area_struct *vma)
> +{
> +	if (atomic_dec_and_test(&vma->cache_access)) {
> +		if (waitqueue_active(&vma->cache_wait))
> +			__vma_release(vma);
> +	}
> +}
> +
>  /**
>   * mm_walk - callbacks for walk_page_range
>   * @pgd_entry: if set, called for each non-empty PGD (top-level) entry
> Index: mmotm-mm-accessor/include/linux/mm_types.h
> ===================================================================
> --- mmotm-mm-accessor.orig/include/linux/mm_types.h
> +++ mmotm-mm-accessor/include/linux/mm_types.h
> @@ -12,6 +12,7 @@
>  #include <linux/completion.h>
>  #include <linux/cpumask.h>
>  #include <linux/page-debug-flags.h>
> +#include <linux/wait.h>
>  #include <asm/page.h>
>  #include <asm/mmu.h>
>  
> @@ -77,6 +78,7 @@ struct page {
>  	union {
>  		pgoff_t index;		/* Our offset within mapping. */
>  		void *freelist;		/* SLUB: freelist req. slab lock */
> +		void *cache;

Let's add annotation "/* vm_area_struct cache when the page is used as page table */".


>  	};
>  	struct list_head lru;		/* Pageout list, eg. active_list
>  					 * protected by zone->lru_lock !
> @@ -180,6 +182,9 @@ struct vm_area_struct {
>  	void * vm_private_data;		/* was vm_pte (shared mem) */
>  	unsigned long vm_truncate_count;/* truncate_count or restart_addr */
>  
> +	atomic_t cache_access;
> +	wait_queue_head_t cache_wait;
> +
>  #ifndef CONFIG_MMU
>  	struct vm_region *vm_region;	/* NOMMU mapping region */
>  #endif
> Index: mmotm-mm-accessor/mm/memory.c
> ===================================================================
> --- mmotm-mm-accessor.orig/mm/memory.c
> +++ mmotm-mm-accessor/mm/memory.c
> @@ -145,6 +145,14 @@ void pmd_clear_bad(pmd_t *pmd)
>  	pmd_clear(pmd);
>  }
>  

Let's put the note here. "The caller needs to hold the pte lock"

> +static void update_vma_cache(pmd_t *pmd, struct vm_area_struct *vma)
> +{
> +	struct page *page;
> +	/* ptelock is held */
> +	page = pmd_page(*pmd);
> +	page->cache = vma;
> +}
> +
>  /*
>   * Note: this doesn't free the actual pages themselves. That
>   * has been handled earlier when unmapping all the memory regions.
> @@ -2118,6 +2126,7 @@ reuse:
>  		if (ptep_set_access_flags(vma, address, page_table, entry,1))
>  			update_mmu_cache(vma, address, entry);
>  		ret |= VM_FAULT_WRITE;
> +		update_vma_cache(pmd, vma);
>  		goto unlock;
>  	}
>  
..
<snip>
..

> Index: mmotm-mm-accessor/mm/page_alloc.c
> ===================================================================
> --- mmotm-mm-accessor.orig/mm/page_alloc.c
> +++ mmotm-mm-accessor/mm/page_alloc.c
> @@ -698,6 +698,7 @@ static int prep_new_page(struct page *pa
>  
>  	set_page_private(page, 0);
>  	set_page_refcounted(page);
> +	page->cache = NULL;

Is here is proper place to initialize page->cache?
It cause unnecessary overhead about not pmd page.

How about pmd_alloc?

>  
>  	arch_alloc_page(page, order);
>  	kernel_map_pages(page, 1 << order, 1);
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2009-12-19  3:43 UTC|newest]

Thread overview: 125+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-12-16  3:00 [mm][RFC][PATCH 0/11] mm accessor updates KAMEZAWA Hiroyuki
2009-12-16  3:00 ` KAMEZAWA Hiroyuki
2009-12-16  3:01 ` [mm][RFC][PATCH 1/11] mm accessor for replacing mmap_sem KAMEZAWA Hiroyuki
2009-12-16  3:01   ` KAMEZAWA Hiroyuki
2009-12-16  3:02 ` [mm][RFC][PATCH 2/11] mm accessor for kernel core KAMEZAWA Hiroyuki
2009-12-16  3:02   ` KAMEZAWA Hiroyuki
2009-12-16  3:03 ` [mm][RFC][PATCH 3/11] mm accessor for fs KAMEZAWA Hiroyuki
2009-12-16  3:03   ` KAMEZAWA Hiroyuki
2009-12-16  3:04 ` [mm][RFC][PATCH 4/11] mm accessor for kvm KAMEZAWA Hiroyuki
2009-12-16  3:04   ` KAMEZAWA Hiroyuki
2009-12-16  3:05 ` [mm][RFC][PATCH 5/11] mm accessor for tomoyo KAMEZAWA Hiroyuki
2009-12-16  3:05   ` KAMEZAWA Hiroyuki
2009-12-16  3:06 ` [mm][RFC][PATCH 6/11] mm accessor for driver/gpu KAMEZAWA Hiroyuki
2009-12-16  3:06   ` KAMEZAWA Hiroyuki
2009-12-16  3:07 ` [mm][RFC][PATCH 7/11] mm accessor for inifiniband KAMEZAWA Hiroyuki
2009-12-16  3:07   ` KAMEZAWA Hiroyuki
2009-12-16  3:08 ` [mm][RFC][PATCH 8/11] mm accessor for video KAMEZAWA Hiroyuki
2009-12-16  3:08   ` KAMEZAWA Hiroyuki
2009-12-16  3:09 ` [mm][RFC][PATCH 9/11] mm accessor for sgi gru KAMEZAWA Hiroyuki
2009-12-16  3:09   ` KAMEZAWA Hiroyuki
2009-12-16  3:10 ` [mm][RFC][PATCH 10/11] mm accessor for misc drivers KAMEZAWA Hiroyuki
2009-12-16  3:10   ` KAMEZAWA Hiroyuki
2009-12-16  3:11 ` [mm][RFC][PATCH 11/11] mm accessor for x86 KAMEZAWA Hiroyuki
2009-12-16  3:11   ` KAMEZAWA Hiroyuki
2009-12-16 10:11 ` [mm][RFC][PATCH 0/11] mm accessor updates Andi Kleen
2009-12-16 10:11   ` Andi Kleen
2009-12-16 10:13   ` KAMEZAWA Hiroyuki
2009-12-16 10:13     ` KAMEZAWA Hiroyuki
2009-12-16 10:28     ` Andi Kleen
2009-12-16 10:28       ` Andi Kleen
2009-12-16 10:31       ` KAMEZAWA Hiroyuki
2009-12-16 10:31         ` KAMEZAWA Hiroyuki
2009-12-16 10:49         ` Andi Kleen
2009-12-16 10:49           ` Andi Kleen
2009-12-16 11:12           ` KAMEZAWA Hiroyuki
2009-12-16 11:12             ` KAMEZAWA Hiroyuki
2009-12-16 11:31             ` Andi Kleen
2009-12-16 11:31               ` Andi Kleen
2009-12-16 16:27               ` Christoph Lameter
2009-12-16 16:27                 ` Christoph Lameter
2009-12-16 23:01                 ` Peter Zijlstra
2009-12-16 23:01                   ` Peter Zijlstra
2009-12-17  4:11                   ` KOSAKI Motohiro
2009-12-17  4:11                     ` KOSAKI Motohiro
2009-12-17  8:41                   ` Andi Kleen
2009-12-17  8:41                     ` Andi Kleen
2009-12-16 22:57         ` Peter Zijlstra
2009-12-16 22:57           ` Peter Zijlstra
2009-12-17  8:40           ` Andi Kleen
2009-12-17  8:40             ` Andi Kleen
2009-12-17  8:45             ` Peter Zijlstra
2009-12-17  8:45               ` Peter Zijlstra
2009-12-17  8:54               ` Andi Kleen
2009-12-17  8:54                 ` Andi Kleen
2009-12-17 14:45                 ` Paul E. McKenney
2009-12-17 14:45                   ` Paul E. McKenney
2009-12-17 15:02                   ` Peter Zijlstra
2009-12-17 15:02                     ` Peter Zijlstra
2009-12-17 17:53                   ` Andi Kleen
2009-12-17 17:53                     ` Andi Kleen
2009-12-17 19:08                     ` Paul E. McKenney
2009-12-17 19:08                       ` Paul E. McKenney
2009-12-17 19:55                       ` Andi Kleen
2009-12-17 19:55                         ` Andi Kleen
2009-12-17 19:56                         ` Christoph Lameter
2009-12-17 19:56                           ` Christoph Lameter
2009-12-17 20:14                           ` Peter Zijlstra
2009-12-17 20:14                             ` Peter Zijlstra
2009-12-17 20:42                             ` Christoph Lameter
2009-12-17 20:42                               ` Christoph Lameter
2009-12-18  5:17                               ` Ingo Molnar
2009-12-18  5:17                                 ` Ingo Molnar
2009-12-18 17:00                                 ` Avi Kivity
2009-12-18 17:00                                   ` Avi Kivity
2009-12-18 17:12                                   ` Ingo Molnar
2009-12-18 17:12                                     ` Ingo Molnar
2009-12-18 18:12                                     ` Christoph Lameter
2009-12-18 18:12                                       ` Christoph Lameter
2009-12-18 18:43                                       ` Andi Kleen
2009-12-18 18:43                                         ` Andi Kleen
2009-12-18 18:45                                       ` Ingo Molnar
2009-12-18 18:45                                         ` Ingo Molnar
2009-12-18 23:18                                         ` KAMEZAWA Hiroyuki
2009-12-18 23:18                                           ` KAMEZAWA Hiroyuki
2009-12-17 19:33             ` Christoph Lameter
2009-12-17 19:33               ` Christoph Lameter
2009-12-17 20:07               ` Peter Zijlstra
2009-12-17 20:07                 ` Peter Zijlstra
2009-12-17 20:13                 ` Christoph Lameter
2009-12-17 20:13                   ` Christoph Lameter
2009-12-17 20:19                   ` Peter Zijlstra
2009-12-17 20:19                     ` Peter Zijlstra
2009-12-16 10:31       ` Minchan Kim
2009-12-16 10:31         ` Minchan Kim
2009-12-16 10:33         ` KAMEZAWA Hiroyuki
2009-12-16 10:33           ` KAMEZAWA Hiroyuki
2009-12-18  0:38           ` [RFC 0/4] speculative page fault (Was " KAMEZAWA Hiroyuki
2009-12-18  0:41             ` [RFC 1/4] uninline mm accessor KAMEZAWA Hiroyuki
2009-12-18  0:41               ` KAMEZAWA Hiroyuki
2009-12-18  0:43             ` [RFC 2/4] add mm event counter KAMEZAWA Hiroyuki
2009-12-18  0:43               ` KAMEZAWA Hiroyuki
2009-12-19  3:23               ` Minchan Kim
2009-12-19  3:23                 ` Minchan Kim
2009-12-19  6:37                 ` KAMEZAWA Hiroyuki
2009-12-19  6:37                   ` KAMEZAWA Hiroyuki
2009-12-18  0:45             ` [RFC 3/4] lockless vma caching KAMEZAWA Hiroyuki
2009-12-18  0:45               ` KAMEZAWA Hiroyuki
2009-12-19  3:43               ` Minchan Kim [this message]
2009-12-19  3:43                 ` Minchan Kim
2009-12-19  6:44                 ` KAMEZAWA Hiroyuki
2009-12-19  6:44                   ` KAMEZAWA Hiroyuki
2009-12-18  0:46             ` [RFC 4/4] speculative pag fault KAMEZAWA Hiroyuki
2009-12-18  0:46               ` KAMEZAWA Hiroyuki
2009-12-18  5:54               ` Minchan Kim
2009-12-18  5:54                 ` Minchan Kim
2009-12-18  6:06                 ` KAMEZAWA Hiroyuki
2009-12-18  6:06                   ` KAMEZAWA Hiroyuki
2009-12-18  6:33                   ` Minchan Kim
2009-12-18  6:33                     ` Minchan Kim
2009-12-19  3:55               ` Minchan Kim
2009-12-19  3:55                 ` Minchan Kim
2009-12-19  6:49                 ` KAMEZAWA Hiroyuki
2009-12-19  6:49                   ` KAMEZAWA Hiroyuki
2009-12-16 16:24   ` [mm][RFC][PATCH 0/11] mm accessor updates Christoph Lameter
2009-12-16 16:24     ` Christoph Lameter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4B2C4BE3.3030104@gmail.com \
    --to=minchan.kim@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=andi@firstfloor.org \
    --cc=cl@linux-foundation.org \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mingo@elte.hu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.