From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756639AbcECVCR (ORCPT ); Tue, 3 May 2016 17:02:17 -0400 Received: from mga14.intel.com ([192.55.52.115]:5409 "EHLO mga14.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1756136AbcECVCP (ORCPT ); Tue, 3 May 2016 17:02:15 -0400 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.24,574,1455004800"; d="scan'208";a="798051265" Message-ID: <1462309326.21143.10.camel@linux.intel.com> Subject: [PATCH 3/7] mm: Add new functions to allocate swap slots in batches From: Tim Chen To: Andrew Morton , Vladimir Davydov , Johannes Weiner , Michal Hocko , Minchan Kim , Hugh Dickins Cc: "Kirill A.Shutemov" , Andi Kleen , Aaron Lu , Huang Ying , linux-mm , linux-kernel@vger.kernel.org Date: Tue, 03 May 2016 14:02:06 -0700 In-Reply-To: References: Content-Type: text/plain; charset="UTF-8" X-Mailer: Evolution 3.18.5.2 (3.18.5.2-1.fc23) Mime-Version: 1.0 Content-Transfer-Encoding: 8bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Currently, the swap slots have to be allocated one page at a time, causing contention to the swap_info lock protecting the swap partition on every page being swapped. This patch adds new functions get_swap_pages and scan_swap_map_slots to request multiple swap slots at once. This will reduce the lock contention on the swap_info lock as we only need to acquire the lock once to get multiple slots.  Also scan_swap_map_slots can operate more efficiently as swap slots often occurs in clusters close to each other on a swap device and it is quicker to allocate them together. Multiple swap slots can also be freed in one shot with new function swapcache_free_entries, that further reduce contention on the swap_info lock. Signed-off-by: Tim Chen ---  include/linux/swap.h |  27 +++++--  mm/swap_state.c      |  23 +++---  mm/swapfile.c        | 215 +++++++++++++++++++++++++++++++++++++++++++++------  mm/vmscan.c          |   2 +-  4 files changed, 228 insertions(+), 39 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 2b83359..da6d994 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -23,6 +23,7 @@ struct bio;  #define SWAP_FLAG_DISCARD 0x10000 /* enable discard for swap */  #define SWAP_FLAG_DISCARD_ONCE 0x20000 /* discard swap area at swapon-time */  #define SWAP_FLAG_DISCARD_PAGES 0x40000 /* discard page-clusters after use */ +#define SWAP_BATCH 64    #define SWAP_FLAGS_VALID (SWAP_FLAG_PRIO_MASK | SWAP_FLAG_PREFER | \    SWAP_FLAG_DISCARD | SWAP_FLAG_DISCARD_ONCE | \ @@ -370,7 +371,8 @@ extern struct address_space swapper_spaces[];  #define swap_address_space(entry) (&swapper_spaces[swp_type(entry)])  extern unsigned long total_swapcache_pages(void);  extern void show_swap_cache_info(void); -extern int add_to_swap(struct page *, struct list_head *list); +extern int add_to_swap(struct page *, struct list_head *list, + swp_entry_t *entry);  extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);  extern int __add_to_swap_cache(struct page *page, swp_entry_t entry);  extern void __delete_from_swap_cache(struct page *); @@ -403,6 +405,7 @@ static inline long get_nr_swap_pages(void)    extern void si_swapinfo(struct sysinfo *);  extern swp_entry_t get_swap_page(void); +extern int get_swap_pages(int n, swp_entry_t swp_entries[]);  extern swp_entry_t get_swap_page_of_type(int);  extern int add_swap_count_continuation(swp_entry_t, gfp_t);  extern void swap_shmem_alloc(swp_entry_t); @@ -410,6 +413,7 @@ extern int swap_duplicate(swp_entry_t);  extern int swapcache_prepare(swp_entry_t);  extern void swap_free(swp_entry_t);  extern void swapcache_free(swp_entry_t); +extern void swapcache_free_entries(swp_entry_t *entries, int n);  extern int free_swap_and_cache(swp_entry_t);  extern int swap_type_of(dev_t, sector_t, struct block_device **);  extern unsigned int count_swap_pages(int, int); @@ -429,7 +433,6 @@ struct backing_dev_info;  #define total_swap_pages 0L  #define total_swapcache_pages() 0UL  #define vm_swap_full() 0 -  #define si_swapinfo(val) \   do { (val)->freeswap = (val)->totalswap = 0; } while (0)  /* only sparc can not include linux/pagemap.h in this file @@ -451,6 +454,21 @@ static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)   return 0;  }   +static inline int add_to_swap(struct page *page, struct list_head *list, + swp_entry_t *entry) +{ + return 0; +} + +static inline int get_swap_pages(int n, swp_entry_t swp_entries[]) +{ + return 0; +} + +static inline void swapcache_free_entries(swp_entry_t *entries, int n) +{ +} +  static inline void swap_shmem_alloc(swp_entry_t swp)  {  } @@ -484,11 +502,6 @@ static inline struct page *lookup_swap_cache(swp_entry_t swp)   return NULL;  }   -static inline int add_to_swap(struct page *page, struct list_head *list) -{ - return 0; -} -  static inline int add_to_swap_cache(struct page *page, swp_entry_t entry,   gfp_t gfp_mask)  { diff --git a/mm/swap_state.c b/mm/swap_state.c index 366ce35..bad02c1 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -154,30 +154,35 @@ void __delete_from_swap_cache(struct page *page)  /**   * add_to_swap - allocate swap space for a page   * @page: page we want to move to swap + * @entry: swap entry that we have pre-allocated   *   * Allocate swap space for the page and add the page to the   * swap cache.  Caller needs to hold the page lock.    */ -int add_to_swap(struct page *page, struct list_head *list) +int add_to_swap(struct page *page, struct list_head *list, swp_entry_t *entry)  { - swp_entry_t entry;   int err; + swp_entry_t ent;     VM_BUG_ON_PAGE(!PageLocked(page), page);   VM_BUG_ON_PAGE(!PageUptodate(page), page);   - entry = get_swap_page(); - if (!entry.val) + if (!entry) { + ent = get_swap_page(); + entry = &ent; + } + + if (entry && !entry->val)   return 0;   - if (mem_cgroup_try_charge_swap(page, entry)) { - swapcache_free(entry); + if (mem_cgroup_try_charge_swap(page, *entry)) { + swapcache_free(*entry);   return 0;   }     if (unlikely(PageTransHuge(page)))   if (unlikely(split_huge_page_to_list(page, list))) { - swapcache_free(entry); + swapcache_free(*entry);   return 0;   }   @@ -192,7 +197,7 @@ int add_to_swap(struct page *page, struct list_head *list)   /*    * Add it to the swap cache.    */ - err = add_to_swap_cache(page, entry, + err = add_to_swap_cache(page, *entry,   __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN);     if (!err) { @@ -202,7 +207,7 @@ int add_to_swap(struct page *page, struct list_head *list)    * add_to_swap_cache() doesn't return -EEXIST, so we can safely    * clear SWAP_HAS_CACHE flag.    */ - swapcache_free(entry); + swapcache_free(*entry);   return 0;   }  } diff --git a/mm/swapfile.c b/mm/swapfile.c index 83874ec..2c294a6 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -437,7 +437,7 @@ scan_swap_map_ssd_cluster_conflict(struct swap_info_struct *si,   * Try to get a swap entry from current cpu's swap entry pool (a cluster). This   * might involve allocating a new cluster for current CPU too.   */ -static void scan_swap_map_try_ssd_cluster(struct swap_info_struct *si, +static bool scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,   unsigned long *offset, unsigned long *scan_base)  {   struct percpu_cluster *cluster; @@ -460,7 +460,7 @@ new_cluster:   *scan_base = *offset = si->cluster_next;   goto new_cluster;   } else - return; + return false;   }     found_free = false; @@ -485,15 +485,21 @@ new_cluster:   cluster->next = tmp + 1;   *offset = tmp;   *scan_base = tmp; + return found_free;  }   -static unsigned long scan_swap_map(struct swap_info_struct *si, -    unsigned char usage) +static int scan_swap_map_slots(struct swap_info_struct *si, +    unsigned char usage, int nr, +    unsigned long slots[])  {   unsigned long offset;   unsigned long scan_base;   unsigned long last_in_cluster = 0;   int latency_ration = LATENCY_LIMIT; + int n_ret = 0; + + if (nr > SWAP_BATCH) + nr = SWAP_BATCH;     /*    * We try to cluster swap pages by allocating them sequentially @@ -511,8 +517,10 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,     /* SSD algorithm */   if (si->cluster_info) { - scan_swap_map_try_ssd_cluster(si, &offset, &scan_base); - goto checks; + if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base)) + goto checks; + else + goto done;   }     if (unlikely(!si->cluster_nr--)) { @@ -556,8 +564,14 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,    checks:   if (si->cluster_info) { - while (scan_swap_map_ssd_cluster_conflict(si, offset)) - scan_swap_map_try_ssd_cluster(si, &offset, &scan_base); + while (scan_swap_map_ssd_cluster_conflict(si, offset)) { + /* take a break if we already got some slots */ + if (n_ret) + goto done; + if (!scan_swap_map_try_ssd_cluster(si, &offset, + &scan_base)) + goto done; + }   }   if (!(si->flags & SWP_WRITEOK))   goto no_page; @@ -578,8 +592,12 @@ checks:   goto scan; /* check next one */   }   - if (si->swap_map[offset]) - goto scan; + if (si->swap_map[offset]) { + if (!n_ret) + goto scan; + else + goto done; + }     if (offset == si->lowest_bit)   si->lowest_bit++; @@ -596,9 +614,42 @@ checks:   si->swap_map[offset] = usage;   inc_cluster_info_page(si, si->cluster_info, offset);   si->cluster_next = offset + 1; - si->flags -= SWP_SCANNING; + slots[n_ret] = offset; + ++n_ret;   - return offset; + /* got enough slots or reach max slots? */ + if ((n_ret == nr) || (offset >= si->highest_bit)) + goto done; + + /* search for next available slot */ + + /* time to take a break? */ + if (unlikely(--latency_ration < 0)) { + spin_unlock(&si->lock); + cond_resched(); + spin_lock(&si->lock); + latency_ration = LATENCY_LIMIT; + } + + /* try to get more slots in cluster */ + if (si->cluster_info) { + if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base)) + goto checks; + else + goto done; + } + /* non-ssd case */ + ++offset; + + /* non-ssd case, still more slots in cluster? */ + if (si->cluster_nr && !si->swap_map[offset]) { + --si->cluster_nr; + goto checks; + } + +done: + si->flags -= SWP_SCANNING; + return n_ret;    scan:   spin_unlock(&si->lock); @@ -636,17 +687,44 @@ scan:    no_page:   si->flags -= SWP_SCANNING; - return 0; + return n_ret;  }   -swp_entry_t get_swap_page(void) +static unsigned long scan_swap_map(struct swap_info_struct *si, +    unsigned char usage) +{ + unsigned long slots[1]; + int n_ret; + + n_ret = scan_swap_map_slots(si, usage, 1, slots); + + if (n_ret) + return slots[0]; + else + return 0; + +} + +int get_swap_pages(int n, swp_entry_t swp_entries[])  {   struct swap_info_struct *si, *next; - pgoff_t offset; + long avail_pgs, n_ret, n_goal;   - if (atomic_long_read(&nr_swap_pages) <= 0) + n_ret = 0; + avail_pgs = atomic_long_read(&nr_swap_pages); + if (avail_pgs <= 0)   goto noswap; - atomic_long_dec(&nr_swap_pages); + + n_goal = n; + swp_entries[0] = (swp_entry_t) {0}; + + if (n_goal > SWAP_BATCH) + n_goal = SWAP_BATCH; + + if (n_goal > avail_pgs) + n_goal = avail_pgs; + + atomic_long_sub(n_goal, &nr_swap_pages);     spin_lock(&swap_avail_lock);   @@ -674,10 +752,26 @@ start_over:   }     /* This is called for allocating swap entry for cache */ - offset = scan_swap_map(si, SWAP_HAS_CACHE); + while (n_ret < n_goal) { + unsigned long slots[SWAP_BATCH]; + int ret, i; + + ret = scan_swap_map_slots(si, SWAP_HAS_CACHE, + n_goal-n_ret, slots); + if (!ret) + break; + + for (i = 0; i < ret; ++i) + swp_entries[n_ret+i] = swp_entry(si->type, + slots[i]); + + n_ret += ret; + } +   spin_unlock(&si->lock); - if (offset) - return swp_entry(si->type, offset); + if (n_ret == n_goal) + return n_ret; +   pr_debug("scan_swap_map of si %d failed to find offset\n",          si->type);   spin_lock(&swap_avail_lock); @@ -698,9 +792,23 @@ nextsi:     spin_unlock(&swap_avail_lock);   - atomic_long_inc(&nr_swap_pages); + if (n_ret < n_goal) + atomic_long_add((long) (n_goal-n_ret), &nr_swap_pages);  noswap: - return (swp_entry_t) {0}; + return n_ret; +} + +swp_entry_t get_swap_page(void) +{ + swp_entry_t swp_entries[1]; + long n_ret; + + n_ret = get_swap_pages(1, swp_entries); + + if (n_ret) + return swp_entries[0]; + else + return (swp_entry_t) {0};  }    /* The only caller of this function is now suspend routine */ @@ -761,6 +869,47 @@ out:   return NULL;  }   +static struct swap_info_struct *swap_info_get_cont(swp_entry_t entry, + struct swap_info_struct *q) +{ + struct swap_info_struct *p; + unsigned long offset, type; + + if (!entry.val) + goto out; + type = swp_type(entry); + if (type >= nr_swapfiles) + goto bad_nofile; + p = swap_info[type]; + if (!(p->flags & SWP_USED)) + goto bad_device; + offset = swp_offset(entry); + if (offset >= p->max) + goto bad_offset; + if (!p->swap_map[offset]) + goto bad_free; + if (p != q) { + if (q != NULL) + spin_unlock(&q->lock); + spin_lock(&p->lock); + } + return p; + +bad_free: + pr_err("swap_free: %s%08lx\n", Unused_offset, entry.val); + goto out; +bad_offset: + pr_err("swap_free: %s%08lx\n", Bad_offset, entry.val); + goto out; +bad_device: + pr_err("swap_free: %s%08lx\n", Unused_file, entry.val); + goto out; +bad_nofile: + pr_err("swap_free: %s%08lx\n", Bad_file, entry.val); +out: + return NULL; +} +  static unsigned char swap_entry_free(struct swap_info_struct *p,        swp_entry_t entry, unsigned char usage)  { @@ -855,6 +1004,28 @@ void swapcache_free(swp_entry_t entry)   }  }   +void swapcache_free_entries(swp_entry_t *entries, int n) +{ + struct swap_info_struct *p, *prev; + int i; + + if (n <= 0) + return; + + prev = NULL; + p = NULL; + for (i = 0; i < n; ++i) { + p = swap_info_get_cont(entries[i], prev); + if (p) + swap_entry_free(p, entries[i], SWAP_HAS_CACHE); + else + break; + prev = p; + } + if (p) + spin_unlock(&p->lock); +} +  /*   * How many references to page are currently swapped out?   * This does not give an exact answer when swap count is continued, diff --git a/mm/vmscan.c b/mm/vmscan.c index 132ba02..e36d8a7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1114,7 +1114,7 @@ static unsigned long shrink_anon_page_list(struct list_head *page_list,   * Try to allocate it some swap space here.   */   - if (!add_to_swap(page, page_list)) { + if (!add_to_swap(page, page_list, NULL)) {   pg_finish(page, PG_ACTIVATE_LOCKED, swap_ret, &nr_reclaimed,   pgactivate, ret_pages, free_pages);   continue; --  2.5.5