All of lore.kernel.org
 help / color / mirror / Atom feed
diff for duplicates of <87y3un2vdp.fsf@yhuang-dev.intel.com>

diff --git a/a/1.txt b/N1/1.txt
index 4f1c270..375b674 100644
--- a/a/1.txt
+++ b/N1/1.txt
@@ -115,3 +115,131 @@ for one swap user case too.
 
 Best Regards,
 Huang, Ying
+
+>From 7bd903c42749c448ef6acbbdee8dcbc1c5b498b9 Mon Sep 17 00:00:00 2001
+From: Huang Ying <ying.huang@intel.com>
+Date: Thu, 23 Feb 2017 13:05:20 +0800
+Subject: [PATCH -v5] mm, swap: Sort swap entries before free
+
+To reduce the lock contention of swap_info_struct->lock when freeing
+swap entry.  The freed swap entries will be collected in a per-CPU
+buffer firstly, and be really freed later in batch.  During the batch
+freeing, if the consecutive swap entries in the per-CPU buffer belongs
+to same swap device, the swap_info_struct->lock needs to be
+acquired/released only once, so that the lock contention could be
+reduced greatly.  But if there are multiple swap devices, it is
+possible that the lock may be unnecessarily released/acquired because
+the swap entries belong to the same swap device are non-consecutive in
+the per-CPU buffer.
+
+To solve the issue, the per-CPU buffer is sorted according to the swap
+device before freeing the swap entries.  Test shows that the time
+spent by swapcache_free_entries() could be reduced after the patch.
+
+With the patch, the memory (some swapped out) free time reduced
+13.6% (from 2.59s to 2.28s) in the vm-scalability swap-w-rand test
+case with 16 processes.  The test is done on a Xeon E5 v3 system.  The
+swap device used is a RAM simulated PMEM (persistent memory) device.
+To test swapping, the test case creates 16 processes, which allocate
+and write to the anonymous pages until the RAM and part of the swap
+device is used up, finally the memory (some swapped out) is freed
+before exit.
+
+Signed-off-by: Huang Ying <ying.huang@intel.com>
+Acked-by: Tim Chen <tim.c.chen@intel.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Shaohua Li <shli@kernel.org>
+Cc: Minchan Kim <minchan@kernel.org>
+Cc: Rik van Riel <riel@redhat.com>
+
+v5:
+
+- Use a smarter way to determine whether sort is necessary.
+
+v4:
+
+- Avoid unnecessary sort if all entries are from one swap device.
+
+v3:
+
+- Add some comments in code per Rik's suggestion.
+
+v2:
+
+- Avoid sort swap entries if there is only one swap device.
+---
+ mm/swapfile.c | 43 ++++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 38 insertions(+), 5 deletions(-)
+
+diff --git a/mm/swapfile.c b/mm/swapfile.c
+index 71890061f653..10e75f9e8ac1 100644
+--- a/mm/swapfile.c
++++ b/mm/swapfile.c
+@@ -37,6 +37,7 @@
+ #include <linux/swapfile.h>
+ #include <linux/export.h>
+ #include <linux/swap_slots.h>
++#include <linux/sort.h>
+ 
+ #include <asm/pgtable.h>
+ #include <asm/tlbflush.h>
+@@ -1065,20 +1066,52 @@ void swapcache_free(swp_entry_t entry)
+ 	}
+ }
+ 
++static int swp_entry_cmp(const void *ent1, const void *ent2)
++{
++	const swp_entry_t *e1 = ent1, *e2 = ent2;
++
++	return (int)(swp_type(*e1) - swp_type(*e2));
++}
++
+ void swapcache_free_entries(swp_entry_t *entries, int n)
+ {
+ 	struct swap_info_struct *p, *prev;
+-	int i;
++	int i, m;
++	swp_entry_t entry;
++	unsigned int prev_swp_type;
+ 
+ 	if (n <= 0)
+ 		return;
+ 
+ 	prev = NULL;
+ 	p = NULL;
+-	for (i = 0; i < n; ++i) {
+-		p = swap_info_get_cont(entries[i], prev);
+-		if (p)
+-			swap_entry_free(p, entries[i]);
++	m = 0;
++	prev_swp_type = swp_type(entries[0]);
++	for (i = 0; i < n; i++) {
++		entry = entries[i];
++		if (likely(swp_type(entry) == prev_swp_type)) {
++			p = swap_info_get_cont(entry, prev);
++			if (likely(p))
++				swap_entry_free(p, entry);
++			prev = p;
++		} else if (!m)
++			m = i;
++	}
++	if (p)
++		spin_unlock(&p->lock);
++	if (likely(!m))
++		return;
++
++	/* Sort swap entries by swap device, so each lock is only taken once. */
++	sort(entries + m, n - m, sizeof(entries[0]), swp_entry_cmp, NULL);
++	prev = NULL;
++	for (i = m; i < n; i++) {
++		entry = entries[i];
++		if (swp_type(entry) == prev_swp_type)
++			continue;
++		p = swap_info_get_cont(entry, prev);
++		if (likely(p))
++			swap_entry_free(p, entry);
+ 		prev = p;
+ 	}
+ 	if (p)
+-- 
+2.11.0
diff --git a/a/content_digest b/N1/content_digest
index 16c5982..1b37fde 100644
--- a/a/content_digest
+++ b/N1/content_digest
@@ -9,11 +9,11 @@
  "Subject\0Re: [PATCH -mm -v3] mm, swap: Sort swap entries before free\0"
  "Date\0Wed, 26 Apr 2017 20:42:10 +0800\0"
  "To\0Minchan Kim <minchan@kernel.org>\0"
- "Cc\0Huang"
+ "Cc\0Huang\\"
   Ying <ying.huang@intel.com>
   Andrew Morton <akpm@linux-foundation.org>
-  linux-mm@kvack.org
-  linux-kernel@vger.kernel.org
+  <linux-mm@kvack.org>
+  <linux-kernel@vger.kernel.org>
   Hugh Dickins <hughd@google.com>
   Shaohua Li <shli@kernel.org>
  " Rik van Riel <riel@redhat.com>\0"
@@ -135,6 +135,134 @@
  "for one swap user case too.\n"
  "\n"
  "Best Regards,\n"
- Huang, Ying
+ "Huang, Ying\n"
+ "\n"
+ ">From 7bd903c42749c448ef6acbbdee8dcbc1c5b498b9 Mon Sep 17 00:00:00 2001\n"
+ "From: Huang Ying <ying.huang@intel.com>\n"
+ "Date: Thu, 23 Feb 2017 13:05:20 +0800\n"
+ "Subject: [PATCH -v5] mm, swap: Sort swap entries before free\n"
+ "\n"
+ "To reduce the lock contention of swap_info_struct->lock when freeing\n"
+ "swap entry.  The freed swap entries will be collected in a per-CPU\n"
+ "buffer firstly, and be really freed later in batch.  During the batch\n"
+ "freeing, if the consecutive swap entries in the per-CPU buffer belongs\n"
+ "to same swap device, the swap_info_struct->lock needs to be\n"
+ "acquired/released only once, so that the lock contention could be\n"
+ "reduced greatly.  But if there are multiple swap devices, it is\n"
+ "possible that the lock may be unnecessarily released/acquired because\n"
+ "the swap entries belong to the same swap device are non-consecutive in\n"
+ "the per-CPU buffer.\n"
+ "\n"
+ "To solve the issue, the per-CPU buffer is sorted according to the swap\n"
+ "device before freeing the swap entries.  Test shows that the time\n"
+ "spent by swapcache_free_entries() could be reduced after the patch.\n"
+ "\n"
+ "With the patch, the memory (some swapped out) free time reduced\n"
+ "13.6% (from 2.59s to 2.28s) in the vm-scalability swap-w-rand test\n"
+ "case with 16 processes.  The test is done on a Xeon E5 v3 system.  The\n"
+ "swap device used is a RAM simulated PMEM (persistent memory) device.\n"
+ "To test swapping, the test case creates 16 processes, which allocate\n"
+ "and write to the anonymous pages until the RAM and part of the swap\n"
+ "device is used up, finally the memory (some swapped out) is freed\n"
+ "before exit.\n"
+ "\n"
+ "Signed-off-by: Huang Ying <ying.huang@intel.com>\n"
+ "Acked-by: Tim Chen <tim.c.chen@intel.com>\n"
+ "Cc: Hugh Dickins <hughd@google.com>\n"
+ "Cc: Shaohua Li <shli@kernel.org>\n"
+ "Cc: Minchan Kim <minchan@kernel.org>\n"
+ "Cc: Rik van Riel <riel@redhat.com>\n"
+ "\n"
+ "v5:\n"
+ "\n"
+ "- Use a smarter way to determine whether sort is necessary.\n"
+ "\n"
+ "v4:\n"
+ "\n"
+ "- Avoid unnecessary sort if all entries are from one swap device.\n"
+ "\n"
+ "v3:\n"
+ "\n"
+ "- Add some comments in code per Rik's suggestion.\n"
+ "\n"
+ "v2:\n"
+ "\n"
+ "- Avoid sort swap entries if there is only one swap device.\n"
+ "---\n"
+ " mm/swapfile.c | 43 ++++++++++++++++++++++++++++++++++++++-----\n"
+ " 1 file changed, 38 insertions(+), 5 deletions(-)\n"
+ "\n"
+ "diff --git a/mm/swapfile.c b/mm/swapfile.c\n"
+ "index 71890061f653..10e75f9e8ac1 100644\n"
+ "--- a/mm/swapfile.c\n"
+ "+++ b/mm/swapfile.c\n"
+ "@@ -37,6 +37,7 @@\n"
+ " #include <linux/swapfile.h>\n"
+ " #include <linux/export.h>\n"
+ " #include <linux/swap_slots.h>\n"
+ "+#include <linux/sort.h>\n"
+ " \n"
+ " #include <asm/pgtable.h>\n"
+ " #include <asm/tlbflush.h>\n"
+ "@@ -1065,20 +1066,52 @@ void swapcache_free(swp_entry_t entry)\n"
+ " \t}\n"
+ " }\n"
+ " \n"
+ "+static int swp_entry_cmp(const void *ent1, const void *ent2)\n"
+ "+{\n"
+ "+\tconst swp_entry_t *e1 = ent1, *e2 = ent2;\n"
+ "+\n"
+ "+\treturn (int)(swp_type(*e1) - swp_type(*e2));\n"
+ "+}\n"
+ "+\n"
+ " void swapcache_free_entries(swp_entry_t *entries, int n)\n"
+ " {\n"
+ " \tstruct swap_info_struct *p, *prev;\n"
+ "-\tint i;\n"
+ "+\tint i, m;\n"
+ "+\tswp_entry_t entry;\n"
+ "+\tunsigned int prev_swp_type;\n"
+ " \n"
+ " \tif (n <= 0)\n"
+ " \t\treturn;\n"
+ " \n"
+ " \tprev = NULL;\n"
+ " \tp = NULL;\n"
+ "-\tfor (i = 0; i < n; ++i) {\n"
+ "-\t\tp = swap_info_get_cont(entries[i], prev);\n"
+ "-\t\tif (p)\n"
+ "-\t\t\tswap_entry_free(p, entries[i]);\n"
+ "+\tm = 0;\n"
+ "+\tprev_swp_type = swp_type(entries[0]);\n"
+ "+\tfor (i = 0; i < n; i++) {\n"
+ "+\t\tentry = entries[i];\n"
+ "+\t\tif (likely(swp_type(entry) == prev_swp_type)) {\n"
+ "+\t\t\tp = swap_info_get_cont(entry, prev);\n"
+ "+\t\t\tif (likely(p))\n"
+ "+\t\t\t\tswap_entry_free(p, entry);\n"
+ "+\t\t\tprev = p;\n"
+ "+\t\t} else if (!m)\n"
+ "+\t\t\tm = i;\n"
+ "+\t}\n"
+ "+\tif (p)\n"
+ "+\t\tspin_unlock(&p->lock);\n"
+ "+\tif (likely(!m))\n"
+ "+\t\treturn;\n"
+ "+\n"
+ "+\t/* Sort swap entries by swap device, so each lock is only taken once. */\n"
+ "+\tsort(entries + m, n - m, sizeof(entries[0]), swp_entry_cmp, NULL);\n"
+ "+\tprev = NULL;\n"
+ "+\tfor (i = m; i < n; i++) {\n"
+ "+\t\tentry = entries[i];\n"
+ "+\t\tif (swp_type(entry) == prev_swp_type)\n"
+ "+\t\t\tcontinue;\n"
+ "+\t\tp = swap_info_get_cont(entry, prev);\n"
+ "+\t\tif (likely(p))\n"
+ "+\t\t\tswap_entry_free(p, entry);\n"
+ " \t\tprev = p;\n"
+ " \t}\n"
+ " \tif (p)\n"
+ "-- \n"
+ 2.11.0
 
-6cbc6b9381685abdd072543ea715a6dc29e22a983988fd88176cb366446aae3b
+3acf579459c1512fca110f9b216ea10a63f9a0e19da122908393ebb0050b2c89

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.