[PATCH 3/7] mm: Add new functions to allocate swap slots in batches

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Tim Chen <tim.c.chen@linux.intel.com>
To: Andrew Morton <akpm@linux-foundation.org>,
	Vladimir Davydov <vdavydov@virtuozzo.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Michal Hocko <mhocko@suse.cz>, Minchan Kim <minchan@kernel.org>,
	Hugh Dickins <hughd@google.com>
Cc: "Kirill A.Shutemov" <kirill.shutemov@linux.intel.com>,
	Andi Kleen <andi@firstfloor.org>, Aaron Lu <aaron.lu@intel.com>,
	Huang Ying <ying.huang@intel.com>, linux-mm <linux-mm@kvack.org>,
	linux-kernel@vger.kernel.org
Subject: [PATCH 3/7] mm: Add new functions to allocate swap slots in batches
Date: Tue, 03 May 2016 14:02:06 -0700	[thread overview]
Message-ID: <1462309326.21143.10.camel@linux.intel.com> (raw)
In-Reply-To: <cover.1462306228.git.tim.c.chen@linux.intel.com>

Currently, the swap slots have to be allocated one page at a time,
causing contention to the swap_info lock protecting the swap partition
on every page being swapped.

This patch adds new functions get_swap_pages and scan_swap_map_slots to
request multiple swap slots at once. This will reduce the lock contention
on the swap_info lock as we only need to acquire the lock once to get
multiple slots.A A Also scan_swap_map_slots can operate more efficiently
as swap slots often occurs in clusters close to each other on a swap
device and it is quicker to allocate them together.

Multiple swap slots can also be freed in one shot with new function
swapcache_free_entries, that further reduce contention on the swap_info
lock.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
---
A include/linux/swap.h |A A 27 +++++--
A mm/swap_state.cA A A A A A |A A 23 +++---
A mm/swapfile.cA A A A A A A A | 215 +++++++++++++++++++++++++++++++++++++++++++++------
A mm/vmscan.cA A A A A A A A A A |A A A 2 +-
A 4 files changed, 228 insertions(+), 39 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 2b83359..da6d994 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -23,6 +23,7 @@ struct bio;
A #define SWAP_FLAG_DISCARD	0x10000 /* enable discard for swap */
A #define SWAP_FLAG_DISCARD_ONCE	0x20000 /* discard swap area at swapon-time */
A #define SWAP_FLAG_DISCARD_PAGES 0x40000 /* discard page-clusters after use */
+#define SWAP_BATCH 64
A 
A #define SWAP_FLAGS_VALID	(SWAP_FLAG_PRIO_MASK | SWAP_FLAG_PREFER | \
A 				A SWAP_FLAG_DISCARD | SWAP_FLAG_DISCARD_ONCE | \
@@ -370,7 +371,8 @@ extern struct address_space swapper_spaces[];
A #define swap_address_space(entry) (&swapper_spaces[swp_type(entry)])
A extern unsigned long total_swapcache_pages(void);
A extern void show_swap_cache_info(void);
-extern int add_to_swap(struct page *, struct list_head *list);
+extern int add_to_swap(struct page *, struct list_head *list,
+			swp_entry_t *entry);
A extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);
A extern int __add_to_swap_cache(struct page *page, swp_entry_t entry);
A extern void __delete_from_swap_cache(struct page *);
@@ -403,6 +405,7 @@ static inline long get_nr_swap_pages(void)
A 
A extern void si_swapinfo(struct sysinfo *);
A extern swp_entry_t get_swap_page(void);
+extern int get_swap_pages(int n, swp_entry_t swp_entries[]);
A extern swp_entry_t get_swap_page_of_type(int);
A extern int add_swap_count_continuation(swp_entry_t, gfp_t);
A extern void swap_shmem_alloc(swp_entry_t);
@@ -410,6 +413,7 @@ extern int swap_duplicate(swp_entry_t);
A extern int swapcache_prepare(swp_entry_t);
A extern void swap_free(swp_entry_t);
A extern void swapcache_free(swp_entry_t);
+extern void swapcache_free_entries(swp_entry_t *entries, int n);
A extern int free_swap_and_cache(swp_entry_t);
A extern int swap_type_of(dev_t, sector_t, struct block_device **);
A extern unsigned int count_swap_pages(int, int);
@@ -429,7 +433,6 @@ struct backing_dev_info;
A #define total_swap_pages			0L
A #define total_swapcache_pages()			0UL
A #define vm_swap_full()				0
-
A #define si_swapinfo(val) \
A 	do { (val)->freeswap = (val)->totalswap = 0; } while (0)
A /* only sparc can not include linux/pagemap.h in this file
@@ -451,6 +454,21 @@ static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
A 	return 0;
A }
A 
+static inline int add_to_swap(struct page *page, struct list_head *list,
+				swp_entry_t *entry)
+{
+	return 0;
+}
+
+static inline int get_swap_pages(int n, swp_entry_t swp_entries[])
+{
+	return 0;
+}
+
+static inline void swapcache_free_entries(swp_entry_t *entries, int n)
+{
+}
+
A static inline void swap_shmem_alloc(swp_entry_t swp)
A {
A }
@@ -484,11 +502,6 @@ static inline struct page *lookup_swap_cache(swp_entry_t swp)
A 	return NULL;
A }
A 
-static inline int add_to_swap(struct page *page, struct list_head *list)
-{
-	return 0;
-}
-
A static inline int add_to_swap_cache(struct page *page, swp_entry_t entry,
A 							gfp_t gfp_mask)
A {
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 366ce35..bad02c1 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -154,30 +154,35 @@ void __delete_from_swap_cache(struct page *page)
A /**
A  * add_to_swap - allocate swap space for a page
A  * @page: page we want to move to swap
+ * @entry: swap entry that we have pre-allocated
A  *
A  * Allocate swap space for the page and add the page to the
A  * swap cache.A A Caller needs to hold the page lock.A 
A  */
-int add_to_swap(struct page *page, struct list_head *list)
+int add_to_swap(struct page *page, struct list_head *list, swp_entry_t *entry)
A {
-	swp_entry_t entry;
A 	int err;
+	swp_entry_t ent;
A 
A 	VM_BUG_ON_PAGE(!PageLocked(page), page);
A 	VM_BUG_ON_PAGE(!PageUptodate(page), page);
A 
-	entry = get_swap_page();
-	if (!entry.val)
+	if (!entry) {
+		ent = get_swap_page();
+		entry = &ent;
+	}
+
+	if (entry && !entry->val)
A 		return 0;
A 
-	if (mem_cgroup_try_charge_swap(page, entry)) {
-		swapcache_free(entry);
+	if (mem_cgroup_try_charge_swap(page, *entry)) {
+		swapcache_free(*entry);
A 		return 0;
A 	}
A 
A 	if (unlikely(PageTransHuge(page)))
A 		if (unlikely(split_huge_page_to_list(page, list))) {
-			swapcache_free(entry);
+			swapcache_free(*entry);
A 			return 0;
A 		}
A 
@@ -192,7 +197,7 @@ int add_to_swap(struct page *page, struct list_head *list)
A 	/*
A 	A * Add it to the swap cache.
A 	A */
-	err = add_to_swap_cache(page, entry,
+	err = add_to_swap_cache(page, *entry,
A 			__GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN);
A 
A 	if (!err) {
@@ -202,7 +207,7 @@ int add_to_swap(struct page *page, struct list_head *list)
A 		A * add_to_swap_cache() doesn't return -EEXIST, so we can safely
A 		A * clear SWAP_HAS_CACHE flag.
A 		A */
-		swapcache_free(entry);
+		swapcache_free(*entry);
A 		return 0;
A 	}
A }
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 83874ec..2c294a6 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -437,7 +437,7 @@ scan_swap_map_ssd_cluster_conflict(struct swap_info_struct *si,
A  * Try to get a swap entry from current cpu's swap entry pool (a cluster). This
A  * might involve allocating a new cluster for current CPU too.
A  */
-static void scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
+static bool scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
A 	unsigned long *offset, unsigned long *scan_base)
A {
A 	struct percpu_cluster *cluster;
@@ -460,7 +460,7 @@ new_cluster:
A 			*scan_base = *offset = si->cluster_next;
A 			goto new_cluster;
A 		} else
-			return;
+			return false;
A 	}
A 
A 	found_free = false;
@@ -485,15 +485,21 @@ new_cluster:
A 	cluster->next = tmp + 1;
A 	*offset = tmp;
A 	*scan_base = tmp;
+	return found_free;
A }
A 
-static unsigned long scan_swap_map(struct swap_info_struct *si,
-				A A A unsigned char usage)
+static int scan_swap_map_slots(struct swap_info_struct *si,
+				A A A unsigned char usage, int nr,
+				A A A unsigned long slots[])
A {
A 	unsigned long offset;
A 	unsigned long scan_base;
A 	unsigned long last_in_cluster = 0;
A 	int latency_ration = LATENCY_LIMIT;
+	int n_ret = 0;
+
+	if (nr > SWAP_BATCH)
+		nr = SWAP_BATCH;
A 
A 	/*
A 	A * We try to cluster swap pages by allocating them sequentially
@@ -511,8 +517,10 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
A 
A 	/* SSD algorithm */
A 	if (si->cluster_info) {
-		scan_swap_map_try_ssd_cluster(si, &offset, &scan_base);
-		goto checks;
+		if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
+			goto checks;
+		else
+			goto done;
A 	}
A 
A 	if (unlikely(!si->cluster_nr--)) {
@@ -556,8 +564,14 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
A 
A checks:
A 	if (si->cluster_info) {
-		while (scan_swap_map_ssd_cluster_conflict(si, offset))
-			scan_swap_map_try_ssd_cluster(si, &offset, &scan_base);
+		while (scan_swap_map_ssd_cluster_conflict(si, offset)) {
+		/* take a break if we already got some slots */
+			if (n_ret)
+				goto done;
+			if (!scan_swap_map_try_ssd_cluster(si, &offset,
+							&scan_base))
+				goto done;
+		}
A 	}
A 	if (!(si->flags & SWP_WRITEOK))
A 		goto no_page;
@@ -578,8 +592,12 @@ checks:
A 		goto scan; /* check next one */
A 	}
A 
-	if (si->swap_map[offset])
-		goto scan;
+	if (si->swap_map[offset]) {
+		if (!n_ret)
+			goto scan;
+		else
+			goto done;
+	}
A 
A 	if (offset == si->lowest_bit)
A 		si->lowest_bit++;
@@ -596,9 +614,42 @@ checks:
A 	si->swap_map[offset] = usage;
A 	inc_cluster_info_page(si, si->cluster_info, offset);
A 	si->cluster_next = offset + 1;
-	si->flags -= SWP_SCANNING;
+	slots[n_ret] = offset;
+	++n_ret;
A 
-	return offset;
+	/* got enough slots or reach max slots? */
+	if ((n_ret == nr) || (offset >= si->highest_bit))
+		goto done;
+
+	/* search for next available slot */
+
+	/* time to take a break? */
+	if (unlikely(--latency_ration < 0)) {
+		spin_unlock(&si->lock);
+		cond_resched();
+		spin_lock(&si->lock);
+		latency_ration = LATENCY_LIMIT;
+	}
+
+	/* try to get more slots in cluster */
+	if (si->cluster_info) {
+		if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
+			goto checks;
+		else
+			goto done;
+	}
+	/* non-ssd case */
+	++offset;
+
+	/* non-ssd case, still more slots in cluster? */
+	if (si->cluster_nr && !si->swap_map[offset]) {
+		--si->cluster_nr;
+		goto checks;
+	}
+
+done:
+	si->flags -= SWP_SCANNING;
+	return n_ret;
A 
A scan:
A 	spin_unlock(&si->lock);
@@ -636,17 +687,44 @@ scan:
A 
A no_page:
A 	si->flags -= SWP_SCANNING;
-	return 0;
+	return n_ret;
A }
A 
-swp_entry_t get_swap_page(void)
+static unsigned long scan_swap_map(struct swap_info_struct *si,
+				A A A unsigned char usage)
+{
+	unsigned long slots[1];
+	int n_ret;
+
+	n_ret = scan_swap_map_slots(si, usage, 1, slots);
+
+	if (n_ret)
+		return slots[0];
+	else
+		return 0;
+
+}
+
+int get_swap_pages(int n, swp_entry_t swp_entries[])
A {
A 	struct swap_info_struct *si, *next;
-	pgoff_t offset;
+	long avail_pgs, n_ret, n_goal;
A 
-	if (atomic_long_read(&nr_swap_pages) <= 0)
+	n_ret = 0;
+	avail_pgs = atomic_long_read(&nr_swap_pages);
+	if (avail_pgs <= 0)
A 		goto noswap;
-	atomic_long_dec(&nr_swap_pages);
+
+	n_goal = n;
+	swp_entries[0] = (swp_entry_t) {0};
+
+	if (n_goal > SWAP_BATCH)
+		n_goal = SWAP_BATCH;
+
+	if (n_goal > avail_pgs)
+		n_goal = avail_pgs;
+
+	atomic_long_sub(n_goal, &nr_swap_pages);
A 
A 	spin_lock(&swap_avail_lock);
A 
@@ -674,10 +752,26 @@ start_over:
A 		}
A 
A 		/* This is called for allocating swap entry for cache */
-		offset = scan_swap_map(si, SWAP_HAS_CACHE);
+		while (n_ret < n_goal) {
+			unsigned long slots[SWAP_BATCH];
+			int ret, i;
+
+			ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
+							n_goal-n_ret, slots);
+			if (!ret)
+				break;
+
+			for (i = 0; i < ret; ++i)
+				swp_entries[n_ret+i] = swp_entry(si->type,
+								slots[i]);
+
+			n_ret += ret;
+		}
+
A 		spin_unlock(&si->lock);
-		if (offset)
-			return swp_entry(si->type, offset);
+		if (n_ret == n_goal)
+			return n_ret;
+
A 		pr_debug("scan_swap_map of si %d failed to find offset\n",
A 		A A A A A A A si->type);
A 		spin_lock(&swap_avail_lock);
@@ -698,9 +792,23 @@ nextsi:
A 
A 	spin_unlock(&swap_avail_lock);
A 
-	atomic_long_inc(&nr_swap_pages);
+	if (n_ret < n_goal)
+		atomic_long_add((long) (n_goal-n_ret), &nr_swap_pages);
A noswap:
-	return (swp_entry_t) {0};
+	return n_ret;
+}
+
+swp_entry_t get_swap_page(void)
+{
+	swp_entry_t swp_entries[1];
+	long n_ret;
+
+	n_ret = get_swap_pages(1, swp_entries);
+
+	if (n_ret)
+		return swp_entries[0];
+	else
+		return (swp_entry_t) {0};
A }
A 
A /* The only caller of this function is now suspend routine */
@@ -761,6 +869,47 @@ out:
A 	return NULL;
A }
A 
+static struct swap_info_struct *swap_info_get_cont(swp_entry_t entry,
+					struct swap_info_struct *q)
+{
+	struct swap_info_struct *p;
+	unsigned long offset, type;
+
+	if (!entry.val)
+		goto out;
+	type = swp_type(entry);
+	if (type >= nr_swapfiles)
+		goto bad_nofile;
+	p = swap_info[type];
+	if (!(p->flags & SWP_USED))
+		goto bad_device;
+	offset = swp_offset(entry);
+	if (offset >= p->max)
+		goto bad_offset;
+	if (!p->swap_map[offset])
+		goto bad_free;
+	if (p != q) {
+		if (q != NULL)
+			spin_unlock(&q->lock);
+		spin_lock(&p->lock);
+	}
+	return p;
+
+bad_free:
+	pr_err("swap_free: %s%08lx\n", Unused_offset, entry.val);
+	goto out;
+bad_offset:
+	pr_err("swap_free: %s%08lx\n", Bad_offset, entry.val);
+	goto out;
+bad_device:
+	pr_err("swap_free: %s%08lx\n", Unused_file, entry.val);
+	goto out;
+bad_nofile:
+	pr_err("swap_free: %s%08lx\n", Bad_file, entry.val);
+out:
+	return NULL;
+}
+
A static unsigned char swap_entry_free(struct swap_info_struct *p,
A 				A A A A A swp_entry_t entry, unsigned char usage)
A {
@@ -855,6 +1004,28 @@ void swapcache_free(swp_entry_t entry)
A 	}
A }
A 
+void swapcache_free_entries(swp_entry_t *entries, int n)
+{
+	struct swap_info_struct *p, *prev;
+	int i;
+
+	if (n <= 0)
+		return;
+
+	prev = NULL;
+	p = NULL;
+	for (i = 0; i < n; ++i) {
+		p = swap_info_get_cont(entries[i], prev);
+		if (p)
+			swap_entry_free(p, entries[i], SWAP_HAS_CACHE);
+		else
+			break;
+		prev = p;
+	}
+	if (p)
+		spin_unlock(&p->lock);
+}
+
A /*
A  * How many references to page are currently swapped out?
A  * This does not give an exact answer when swap count is continued,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 132ba02..e36d8a7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1114,7 +1114,7 @@ static unsigned long shrink_anon_page_list(struct list_head *page_list,
A 		* Try to allocate it some swap space here.
A 		*/
A 
-		if (!add_to_swap(page, page_list)) {
+		if (!add_to_swap(page, page_list, NULL)) {
A 			pg_finish(page, PG_ACTIVATE_LOCKED, swap_ret, &nr_reclaimed,
A 					pgactivate, ret_pages, free_pages);
A 			continue;
--A 
2.5.5

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

WARNING: multiple messages have this Message-ID (diff)

From: Tim Chen <tim.c.chen@linux.intel.com>
To: Andrew Morton <akpm@linux-foundation.org>,
	Vladimir Davydov <vdavydov@virtuozzo.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Michal Hocko <mhocko@suse.cz>, Minchan Kim <minchan@kernel.org>,
	Hugh Dickins <hughd@google.com>
Cc: "Kirill A.Shutemov" <kirill.shutemov@linux.intel.com>,
	Andi Kleen <andi@firstfloor.org>, Aaron Lu <aaron.lu@intel.com>,
	Huang Ying <ying.huang@intel.com>, linux-mm <linux-mm@kvack.org>,
	linux-kernel@vger.kernel.org
Subject: [PATCH 3/7] mm: Add new functions to allocate swap slots in batches
Date: Tue, 03 May 2016 14:02:06 -0700	[thread overview]
Message-ID: <1462309326.21143.10.camel@linux.intel.com> (raw)
In-Reply-To: <cover.1462306228.git.tim.c.chen@linux.intel.com>

Currently, the swap slots have to be allocated one page at a time,
causing contention to the swap_info lock protecting the swap partition
on every page being swapped.

This patch adds new functions get_swap_pages and scan_swap_map_slots to
request multiple swap slots at once. This will reduce the lock contention
on the swap_info lock as we only need to acquire the lock once to get
multiple slots.  Also scan_swap_map_slots can operate more efficiently
as swap slots often occurs in clusters close to each other on a swap
device and it is quicker to allocate them together.

Multiple swap slots can also be freed in one shot with new function
swapcache_free_entries, that further reduce contention on the swap_info
lock.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
---
 include/linux/swap.h |  27 +++++--
 mm/swap_state.c      |  23 +++---
 mm/swapfile.c        | 215 +++++++++++++++++++++++++++++++++++++++++++++------
 mm/vmscan.c          |   2 +-
 4 files changed, 228 insertions(+), 39 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 2b83359..da6d994 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -23,6 +23,7 @@ struct bio;
 #define SWAP_FLAG_DISCARD	0x10000 /* enable discard for swap */
 #define SWAP_FLAG_DISCARD_ONCE	0x20000 /* discard swap area at swapon-time */
 #define SWAP_FLAG_DISCARD_PAGES 0x40000 /* discard page-clusters after use */
+#define SWAP_BATCH 64
 
 #define SWAP_FLAGS_VALID	(SWAP_FLAG_PRIO_MASK | SWAP_FLAG_PREFER | \
 				 SWAP_FLAG_DISCARD | SWAP_FLAG_DISCARD_ONCE | \
@@ -370,7 +371,8 @@ extern struct address_space swapper_spaces[];
 #define swap_address_space(entry) (&swapper_spaces[swp_type(entry)])
 extern unsigned long total_swapcache_pages(void);
 extern void show_swap_cache_info(void);
-extern int add_to_swap(struct page *, struct list_head *list);
+extern int add_to_swap(struct page *, struct list_head *list,
+			swp_entry_t *entry);
 extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);
 extern int __add_to_swap_cache(struct page *page, swp_entry_t entry);
 extern void __delete_from_swap_cache(struct page *);
@@ -403,6 +405,7 @@ static inline long get_nr_swap_pages(void)
 
 extern void si_swapinfo(struct sysinfo *);
 extern swp_entry_t get_swap_page(void);
+extern int get_swap_pages(int n, swp_entry_t swp_entries[]);
 extern swp_entry_t get_swap_page_of_type(int);
 extern int add_swap_count_continuation(swp_entry_t, gfp_t);
 extern void swap_shmem_alloc(swp_entry_t);
@@ -410,6 +413,7 @@ extern int swap_duplicate(swp_entry_t);
 extern int swapcache_prepare(swp_entry_t);
 extern void swap_free(swp_entry_t);
 extern void swapcache_free(swp_entry_t);
+extern void swapcache_free_entries(swp_entry_t *entries, int n);
 extern int free_swap_and_cache(swp_entry_t);
 extern int swap_type_of(dev_t, sector_t, struct block_device **);
 extern unsigned int count_swap_pages(int, int);
@@ -429,7 +433,6 @@ struct backing_dev_info;
 #define total_swap_pages			0L
 #define total_swapcache_pages()			0UL
 #define vm_swap_full()				0
-
 #define si_swapinfo(val) \
 	do { (val)->freeswap = (val)->totalswap = 0; } while (0)
 /* only sparc can not include linux/pagemap.h in this file
@@ -451,6 +454,21 @@ static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
 	return 0;
 }
 
+static inline int add_to_swap(struct page *page, struct list_head *list,
+				swp_entry_t *entry)
+{
+	return 0;
+}
+
+static inline int get_swap_pages(int n, swp_entry_t swp_entries[])
+{
+	return 0;
+}
+
+static inline void swapcache_free_entries(swp_entry_t *entries, int n)
+{
+}
+
 static inline void swap_shmem_alloc(swp_entry_t swp)
 {
 }
@@ -484,11 +502,6 @@ static inline struct page *lookup_swap_cache(swp_entry_t swp)
 	return NULL;
 }
 
-static inline int add_to_swap(struct page *page, struct list_head *list)
-{
-	return 0;
-}
-
 static inline int add_to_swap_cache(struct page *page, swp_entry_t entry,
 							gfp_t gfp_mask)
 {
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 366ce35..bad02c1 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -154,30 +154,35 @@ void __delete_from_swap_cache(struct page *page)
 /**
  * add_to_swap - allocate swap space for a page
  * @page: page we want to move to swap
+ * @entry: swap entry that we have pre-allocated
  *
  * Allocate swap space for the page and add the page to the
  * swap cache.  Caller needs to hold the page lock. 
  */
-int add_to_swap(struct page *page, struct list_head *list)
+int add_to_swap(struct page *page, struct list_head *list, swp_entry_t *entry)
 {
-	swp_entry_t entry;
 	int err;
+	swp_entry_t ent;
 
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	VM_BUG_ON_PAGE(!PageUptodate(page), page);
 
-	entry = get_swap_page();
-	if (!entry.val)
+	if (!entry) {
+		ent = get_swap_page();
+		entry = &ent;
+	}
+
+	if (entry && !entry->val)
 		return 0;
 
-	if (mem_cgroup_try_charge_swap(page, entry)) {
-		swapcache_free(entry);
+	if (mem_cgroup_try_charge_swap(page, *entry)) {
+		swapcache_free(*entry);
 		return 0;
 	}
 
 	if (unlikely(PageTransHuge(page)))
 		if (unlikely(split_huge_page_to_list(page, list))) {
-			swapcache_free(entry);
+			swapcache_free(*entry);
 			return 0;
 		}
 
@@ -192,7 +197,7 @@ int add_to_swap(struct page *page, struct list_head *list)
 	/*
 	 * Add it to the swap cache.
 	 */
-	err = add_to_swap_cache(page, entry,
+	err = add_to_swap_cache(page, *entry,
 			__GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN);
 
 	if (!err) {
@@ -202,7 +207,7 @@ int add_to_swap(struct page *page, struct list_head *list)
 		 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
 		 * clear SWAP_HAS_CACHE flag.
 		 */
-		swapcache_free(entry);
+		swapcache_free(*entry);
 		return 0;
 	}
 }
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 83874ec..2c294a6 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -437,7 +437,7 @@ scan_swap_map_ssd_cluster_conflict(struct swap_info_struct *si,
  * Try to get a swap entry from current cpu's swap entry pool (a cluster). This
  * might involve allocating a new cluster for current CPU too.
  */
-static void scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
+static bool scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
 	unsigned long *offset, unsigned long *scan_base)
 {
 	struct percpu_cluster *cluster;
@@ -460,7 +460,7 @@ new_cluster:
 			*scan_base = *offset = si->cluster_next;
 			goto new_cluster;
 		} else
-			return;
+			return false;
 	}
 
 	found_free = false;
@@ -485,15 +485,21 @@ new_cluster:
 	cluster->next = tmp + 1;
 	*offset = tmp;
 	*scan_base = tmp;
+	return found_free;
 }
 
-static unsigned long scan_swap_map(struct swap_info_struct *si,
-				   unsigned char usage)
+static int scan_swap_map_slots(struct swap_info_struct *si,
+				   unsigned char usage, int nr,
+				   unsigned long slots[])
 {
 	unsigned long offset;
 	unsigned long scan_base;
 	unsigned long last_in_cluster = 0;
 	int latency_ration = LATENCY_LIMIT;
+	int n_ret = 0;
+
+	if (nr > SWAP_BATCH)
+		nr = SWAP_BATCH;
 
 	/*
 	 * We try to cluster swap pages by allocating them sequentially
@@ -511,8 +517,10 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
 
 	/* SSD algorithm */
 	if (si->cluster_info) {
-		scan_swap_map_try_ssd_cluster(si, &offset, &scan_base);
-		goto checks;
+		if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
+			goto checks;
+		else
+			goto done;
 	}
 
 	if (unlikely(!si->cluster_nr--)) {
@@ -556,8 +564,14 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
 
 checks:
 	if (si->cluster_info) {
-		while (scan_swap_map_ssd_cluster_conflict(si, offset))
-			scan_swap_map_try_ssd_cluster(si, &offset, &scan_base);
+		while (scan_swap_map_ssd_cluster_conflict(si, offset)) {
+		/* take a break if we already got some slots */
+			if (n_ret)
+				goto done;
+			if (!scan_swap_map_try_ssd_cluster(si, &offset,
+							&scan_base))
+				goto done;
+		}
 	}
 	if (!(si->flags & SWP_WRITEOK))
 		goto no_page;
@@ -578,8 +592,12 @@ checks:
 		goto scan; /* check next one */
 	}
 
-	if (si->swap_map[offset])
-		goto scan;
+	if (si->swap_map[offset]) {
+		if (!n_ret)
+			goto scan;
+		else
+			goto done;
+	}
 
 	if (offset == si->lowest_bit)
 		si->lowest_bit++;
@@ -596,9 +614,42 @@ checks:
 	si->swap_map[offset] = usage;
 	inc_cluster_info_page(si, si->cluster_info, offset);
 	si->cluster_next = offset + 1;
-	si->flags -= SWP_SCANNING;
+	slots[n_ret] = offset;
+	++n_ret;
 
-	return offset;
+	/* got enough slots or reach max slots? */
+	if ((n_ret == nr) || (offset >= si->highest_bit))
+		goto done;
+
+	/* search for next available slot */
+
+	/* time to take a break? */
+	if (unlikely(--latency_ration < 0)) {
+		spin_unlock(&si->lock);
+		cond_resched();
+		spin_lock(&si->lock);
+		latency_ration = LATENCY_LIMIT;
+	}
+
+	/* try to get more slots in cluster */
+	if (si->cluster_info) {
+		if (scan_swap_map_try_ssd_cluster(si, &offset, &scan_base))
+			goto checks;
+		else
+			goto done;
+	}
+	/* non-ssd case */
+	++offset;
+
+	/* non-ssd case, still more slots in cluster? */
+	if (si->cluster_nr && !si->swap_map[offset]) {
+		--si->cluster_nr;
+		goto checks;
+	}
+
+done:
+	si->flags -= SWP_SCANNING;
+	return n_ret;
 
 scan:
 	spin_unlock(&si->lock);
@@ -636,17 +687,44 @@ scan:
 
 no_page:
 	si->flags -= SWP_SCANNING;
-	return 0;
+	return n_ret;
 }
 
-swp_entry_t get_swap_page(void)
+static unsigned long scan_swap_map(struct swap_info_struct *si,
+				   unsigned char usage)
+{
+	unsigned long slots[1];
+	int n_ret;
+
+	n_ret = scan_swap_map_slots(si, usage, 1, slots);
+
+	if (n_ret)
+		return slots[0];
+	else
+		return 0;
+
+}
+
+int get_swap_pages(int n, swp_entry_t swp_entries[])
 {
 	struct swap_info_struct *si, *next;
-	pgoff_t offset;
+	long avail_pgs, n_ret, n_goal;
 
-	if (atomic_long_read(&nr_swap_pages) <= 0)
+	n_ret = 0;
+	avail_pgs = atomic_long_read(&nr_swap_pages);
+	if (avail_pgs <= 0)
 		goto noswap;
-	atomic_long_dec(&nr_swap_pages);
+
+	n_goal = n;
+	swp_entries[0] = (swp_entry_t) {0};
+
+	if (n_goal > SWAP_BATCH)
+		n_goal = SWAP_BATCH;
+
+	if (n_goal > avail_pgs)
+		n_goal = avail_pgs;
+
+	atomic_long_sub(n_goal, &nr_swap_pages);
 
 	spin_lock(&swap_avail_lock);
 
@@ -674,10 +752,26 @@ start_over:
 		}
 
 		/* This is called for allocating swap entry for cache */
-		offset = scan_swap_map(si, SWAP_HAS_CACHE);
+		while (n_ret < n_goal) {
+			unsigned long slots[SWAP_BATCH];
+			int ret, i;
+
+			ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
+							n_goal-n_ret, slots);
+			if (!ret)
+				break;
+
+			for (i = 0; i < ret; ++i)
+				swp_entries[n_ret+i] = swp_entry(si->type,
+								slots[i]);
+
+			n_ret += ret;
+		}
+
 		spin_unlock(&si->lock);
-		if (offset)
-			return swp_entry(si->type, offset);
+		if (n_ret == n_goal)
+			return n_ret;
+
 		pr_debug("scan_swap_map of si %d failed to find offset\n",
 		       si->type);
 		spin_lock(&swap_avail_lock);
@@ -698,9 +792,23 @@ nextsi:
 
 	spin_unlock(&swap_avail_lock);
 
-	atomic_long_inc(&nr_swap_pages);
+	if (n_ret < n_goal)
+		atomic_long_add((long) (n_goal-n_ret), &nr_swap_pages);
 noswap:
-	return (swp_entry_t) {0};
+	return n_ret;
+}
+
+swp_entry_t get_swap_page(void)
+{
+	swp_entry_t swp_entries[1];
+	long n_ret;
+
+	n_ret = get_swap_pages(1, swp_entries);
+
+	if (n_ret)
+		return swp_entries[0];
+	else
+		return (swp_entry_t) {0};
 }
 
 /* The only caller of this function is now suspend routine */
@@ -761,6 +869,47 @@ out:
 	return NULL;
 }
 
+static struct swap_info_struct *swap_info_get_cont(swp_entry_t entry,
+					struct swap_info_struct *q)
+{
+	struct swap_info_struct *p;
+	unsigned long offset, type;
+
+	if (!entry.val)
+		goto out;
+	type = swp_type(entry);
+	if (type >= nr_swapfiles)
+		goto bad_nofile;
+	p = swap_info[type];
+	if (!(p->flags & SWP_USED))
+		goto bad_device;
+	offset = swp_offset(entry);
+	if (offset >= p->max)
+		goto bad_offset;
+	if (!p->swap_map[offset])
+		goto bad_free;
+	if (p != q) {
+		if (q != NULL)
+			spin_unlock(&q->lock);
+		spin_lock(&p->lock);
+	}
+	return p;
+
+bad_free:
+	pr_err("swap_free: %s%08lx\n", Unused_offset, entry.val);
+	goto out;
+bad_offset:
+	pr_err("swap_free: %s%08lx\n", Bad_offset, entry.val);
+	goto out;
+bad_device:
+	pr_err("swap_free: %s%08lx\n", Unused_file, entry.val);
+	goto out;
+bad_nofile:
+	pr_err("swap_free: %s%08lx\n", Bad_file, entry.val);
+out:
+	return NULL;
+}
+
 static unsigned char swap_entry_free(struct swap_info_struct *p,
 				     swp_entry_t entry, unsigned char usage)
 {
@@ -855,6 +1004,28 @@ void swapcache_free(swp_entry_t entry)
 	}
 }
 
+void swapcache_free_entries(swp_entry_t *entries, int n)
+{
+	struct swap_info_struct *p, *prev;
+	int i;
+
+	if (n <= 0)
+		return;
+
+	prev = NULL;
+	p = NULL;
+	for (i = 0; i < n; ++i) {
+		p = swap_info_get_cont(entries[i], prev);
+		if (p)
+			swap_entry_free(p, entries[i], SWAP_HAS_CACHE);
+		else
+			break;
+		prev = p;
+	}
+	if (p)
+		spin_unlock(&p->lock);
+}
+
 /*
  * How many references to page are currently swapped out?
  * This does not give an exact answer when swap count is continued,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 132ba02..e36d8a7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1114,7 +1114,7 @@ static unsigned long shrink_anon_page_list(struct list_head *page_list,
 		* Try to allocate it some swap space here.
 		*/
 
-		if (!add_to_swap(page, page_list)) {
+		if (!add_to_swap(page, page_list, NULL)) {
 			pg_finish(page, PG_ACTIVATE_LOCKED, swap_ret, &nr_reclaimed,
 					pgactivate, ret_pages, free_pages);
 			continue;
-- 
2.5.5

next prev parent reply	other threads:[~2016-05-03 21:02 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <cover.1462306228.git.tim.c.chen@linux.intel.com>
2016-05-03 21:00 ` [PATCH 0/7] mm: Improve swap path scalability with batched operations Tim Chen
2016-05-03 21:00   ` Tim Chen
2016-05-04 12:45   ` Michal Hocko
2016-05-04 12:45     ` Michal Hocko
2016-05-04 17:13     ` Tim Chen
2016-05-04 17:13       ` Tim Chen
2016-05-04 19:49       ` Michal Hocko
2016-05-04 19:49         ` Michal Hocko
2016-05-04 21:05         ` Andi Kleen
2016-05-04 21:05           ` Andi Kleen
2016-05-04 21:25         ` Johannes Weiner
2016-05-04 21:25           ` Johannes Weiner
2016-05-05  0:08           ` Minchan Kim
2016-05-05  0:08             ` Minchan Kim
2016-05-05  7:49           ` Michal Hocko
2016-05-05  7:49             ` Michal Hocko
2016-05-05 15:56             ` Tim Chen
2016-05-05 15:56               ` Tim Chen
2016-05-03 21:01 ` [PATCH 1/7] mm: Cleanup - Reorganize the shrink_page_list code into smaller functions Tim Chen
2016-05-03 21:01   ` Tim Chen
2016-05-27 16:40   ` Tim Chen
2016-05-27 16:40     ` Tim Chen
2016-05-30  8:48     ` Michal Hocko
2016-05-30  8:48       ` Michal Hocko
2016-05-03 21:01 ` [PATCH 2/7] mm: Group the processing of anonymous pages to be swapped in shrink_page_list Tim Chen
2016-05-03 21:01   ` Tim Chen
2016-05-03 21:02 ` Tim Chen [this message]
2016-05-03 21:02   ` [PATCH 3/7] mm: Add new functions to allocate swap slots in batches Tim Chen
2016-05-03 21:02 ` [PATCH 4/7] mm: Shrink page list batch allocates swap slots for page swapping Tim Chen
2016-05-03 21:02   ` Tim Chen
2016-05-03 21:02 ` [PATCH 5/7] mm: Batch addtion of pages to swap cache Tim Chen
2016-05-03 21:02   ` Tim Chen
2016-05-03 21:03 ` [PATCH 6/7] mm: Cleanup - Reorganize code to group handling of page Tim Chen
2016-05-03 21:03   ` Tim Chen
2016-05-03 21:03 ` [PATCH 7/7] mm: Batch unmapping of pages that are in swap cache Tim Chen
2016-05-03 21:03   ` Tim Chen

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:2b83359 dfblob:da6d994 dfblob:366ce35 dfblob:bad02c1
dfblob:83874ec dfblob:2c294a6 dfblob:132ba02 dfblob:e36d8a7
dfblob:2b83359 dfblob:da6d994 dfblob:366ce35 dfblob:bad02c1
dfblob:83874ec dfblob:2c294a6 dfblob:132ba02 dfblob:e36d8a7 )
 OR (
bs:"[PATCH 3/7] mm: Add new functions to allocate swap slots in batches" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1462309326.21143.10.camel@linux.intel.com \
    --to=tim.c.chen@linux.intel.com \
    --cc=aaron.lu@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=andi@firstfloor.org \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.cz \
    --cc=minchan@kernel.org \
    --cc=vdavydov@virtuozzo.com \
    --cc=ying.huang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.