From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
To: xen-devel@lists.xen.org
Cc: sstabellini@kernel.org, wei.liu2@citrix.com,
George.Dunlap@eu.citrix.com, andrew.cooper3@citrix.com,
ian.jackson@eu.citrix.com, tim@xen.org, jbeulich@suse.com,
Boris Ostrovsky <boris.ostrovsky@oracle.com>
Subject: [PATCH v4 6/8] mm: Keep heap accessible to others while scrubbing
Date: Fri, 19 May 2017 11:50:38 -0400 [thread overview]
Message-ID: <1495209040-11101-7-git-send-email-boris.ostrovsky@oracle.com> (raw)
In-Reply-To: <1495209040-11101-1-git-send-email-boris.ostrovsky@oracle.com>
Instead of scrubbing pages while holding heap lock we can mark
buddy's head as being scrubbed and drop the lock temporarily.
If someone (most likely alloc_heap_pages()) tries to access
this chunk it will signal the scrubber to abort scrub by setting
head's PAGE_SCRUB_ABORT bit. The scrubber checks this bit after
processing each page and stops its work as soon as it sees it.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
Changes in v4:
* Drop unnecessary ACCESS_ONCE in scrub_free_pages, add smp_wmb()
in scrub_continue()
* Keep track of first_dirty in scrub_wait_state
xen/common/page_alloc.c | 100 ++++++++++++++++++++++++++++++++++++++++++++---
xen/include/asm-arm/mm.h | 4 ++
xen/include/asm-x86/mm.h | 4 ++
3 files changed, 103 insertions(+), 5 deletions(-)
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 6e505b1..b365305 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -694,6 +694,17 @@ static void page_list_add_scrub(struct page_info *pg, unsigned int node,
page_list_add(pg, &heap(node, zone, order));
}
+static void check_and_stop_scrub(struct page_info *head)
+{
+ if ( head->u.free.scrub_state & PAGE_SCRUBBING )
+ {
+ head->u.free.scrub_state |= PAGE_SCRUB_ABORT;
+ spin_lock_kick();
+ while ( ACCESS_ONCE(head->u.free.scrub_state) & PAGE_SCRUB_ABORT )
+ cpu_relax();
+ }
+}
+
static struct page_info *get_free_buddy(unsigned int zone_lo,
unsigned int zone_hi,
unsigned int order, unsigned int memflags,
@@ -738,9 +749,15 @@ static struct page_info *get_free_buddy(unsigned int zone_lo,
{
if ( (pg = page_list_remove_head(&heap(node, zone, j))) )
{
- if ( (order == 0) || use_unscrubbed ||
- pg->u.free.first_dirty == INVALID_DIRTY_IDX)
+ if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX )
return pg;
+
+ if ( (order == 0) || use_unscrubbed )
+ {
+ check_and_stop_scrub(pg);
+ return pg;
+ }
+
page_list_add_tail(pg, &heap(node, zone, j));
}
}
@@ -925,6 +942,7 @@ static int reserve_offlined_page(struct page_info *head)
cur_head = head;
+ check_and_stop_scrub(head);
/*
* We may break the buddy so let's mark the head as clean. Then, when
* merging chunks back into the heap, we will see whether the chunk has
@@ -1069,6 +1087,29 @@ static unsigned int node_to_scrub(bool get_node)
return closest;
}
+struct scrub_wait_state {
+ struct page_info *pg;
+ unsigned int first_dirty;
+ bool drop;
+};
+
+static void scrub_continue(void *data)
+{
+ struct scrub_wait_state *st = data;
+
+ if ( st->drop )
+ return;
+
+ if ( st->pg->u.free.scrub_state & PAGE_SCRUB_ABORT )
+ {
+ /* There is a waiter for this buddy. Release it. */
+ st->drop = true;
+ st->pg->u.free.first_dirty = st->first_dirty;
+ smp_wmb();
+ st->pg->u.free.scrub_state = 0;
+ }
+}
+
bool scrub_free_pages(void)
{
struct page_info *pg;
@@ -1090,24 +1131,51 @@ bool scrub_free_pages(void)
do {
while ( !page_list_empty(&heap(node, zone, order)) )
{
- unsigned int i;
+ unsigned int i, dirty_cnt;
+ struct scrub_wait_state st;
/* Unscrubbed pages are always at the end of the list. */
pg = page_list_last(&heap(node, zone, order));
if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX )
break;
+ ASSERT(!pg->u.free.scrub_state);
+ pg->u.free.scrub_state = PAGE_SCRUBBING;
+
+ spin_unlock(&heap_lock);
+
+ dirty_cnt = 0;
for ( i = pg->u.free.first_dirty; i < (1U << order); i++)
{
cnt++;
if ( test_bit(_PGC_need_scrub, &pg[i].count_info) )
{
scrub_one_page(&pg[i]);
+ /*
+ * We can modify count_info without holding heap
+ * lock since we effectively locked this buddy by
+ * setting its scrub_state.
+ */
pg[i].count_info &= ~PGC_need_scrub;
- node_need_scrub[node]--;
+ dirty_cnt++;
cnt += 100; /* scrubbed pages add heavier weight. */
}
+ if ( pg->u.free.scrub_state & PAGE_SCRUB_ABORT )
+ {
+ /* Someone wants this chunk. Drop everything. */
+
+ pg->u.free.first_dirty = (i == (1U << order)) ?
+ INVALID_DIRTY_IDX : i + 1;
+ smp_wmb();
+ pg->u.free.scrub_state = 0;
+
+ spin_lock(&heap_lock);
+ node_need_scrub[node] -= dirty_cnt;
+ spin_unlock(&heap_lock);
+ goto out_nolock;
+ }
+
/*
* Scrub a few (8) pages before becoming eligible for
* preemtion. But also count non-scrubbing loop iteration
@@ -1121,6 +1189,17 @@ bool scrub_free_pages(void)
}
}
+ st.pg = pg;
+ st.first_dirty = (i == (1UL << order)) ?
+ INVALID_DIRTY_IDX : i + 1;
+ st.drop = false;
+ spin_lock_cb(&heap_lock, scrub_continue, &st);
+
+ node_need_scrub[node] -= dirty_cnt;
+
+ if ( st.drop )
+ goto out;
+
if ( i == (1U << order) )
{
page_list_del(pg, &heap(node, zone, order));
@@ -1128,7 +1207,9 @@ bool scrub_free_pages(void)
}
else
pg->u.free.first_dirty = i + 1;
-
+
+ pg->u.free.scrub_state = 0;
+
if ( preempt || (node_need_scrub[node] == 0) )
goto out;
}
@@ -1137,6 +1218,8 @@ bool scrub_free_pages(void)
out:
spin_unlock(&heap_lock);
+
+ out_nolock:
node_clear(node, node_scrubbing);
return softirq_pending(cpu) || (node_to_scrub(false) != NUMA_NO_NODE);
}
@@ -1175,6 +1258,8 @@ static void free_heap_pages(
if ( page_state_is(&pg[i], offlined) )
tainted = 1;
+ pg[i].u.free.scrub_state = 0;
+
/* If a page has no owner it will need no safety TLB flush. */
pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL);
if ( pg[i].u.free.need_tlbflush )
@@ -1218,6 +1303,8 @@ static void free_heap_pages(
(phys_to_nid(page_to_maddr(predecessor)) != node) )
break;
+ check_and_stop_scrub(predecessor);
+
page_list_del(predecessor, &heap(node, zone, order));
if ( predecessor->u.free.first_dirty != INVALID_DIRTY_IDX )
@@ -1240,6 +1327,9 @@ static void free_heap_pages(
(PFN_ORDER(successor) != order) ||
(phys_to_nid(page_to_maddr(successor)) != node) )
break;
+
+ check_and_stop_scrub(successor);
+
page_list_del(successor, &heap(node, zone, order));
need_scrub |= (successor->u.free.first_dirty != INVALID_DIRTY_IDX);
diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h
index 38d4fba..148d60b 100644
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -46,6 +46,10 @@ struct page_info
/* Index of the first *possibly* unscrubbed page in the buddy. */
#define INVALID_DIRTY_IDX -1U
unsigned int first_dirty;
+#define PAGE_SCRUBBING (1<<0)
+#define PAGE_SCRUB_ABORT (1<<1)
+ unsigned char scrub_state;
+
/* Do TLBs need flushing for safety before next page use? */
bool_t need_tlbflush;
} free;
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index e20f161..f46b242 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -90,6 +90,10 @@ struct page_info
/* Index of the first *possibly* unscrubbed page in the buddy. */
#define INVALID_DIRTY_IDX -1U
unsigned int first_dirty;
+#define PAGE_SCRUBBING (1<<0)
+#define PAGE_SCRUB_ABORT (1<<1)
+ unsigned char scrub_state;
+
/* Do TLBs need flushing for safety before next page use? */
bool_t need_tlbflush;
} free;
--
1.8.3.1
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel
next prev parent reply other threads:[~2017-05-19 15:50 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-05-19 15:50 [PATCH v4 0/8] Memory scrubbing from idle loop Boris Ostrovsky
2017-05-19 15:50 ` [PATCH v4 1/8] mm: Place unscrubbed pages at the end of pagelist Boris Ostrovsky
2017-06-09 14:50 ` Jan Beulich
2017-06-09 20:07 ` Boris Ostrovsky
2017-06-12 6:50 ` Jan Beulich
2017-05-19 15:50 ` [PATCH v4 2/8] mm: Extract allocation loop from alloc_heap_pages() Boris Ostrovsky
2017-06-09 15:08 ` Jan Beulich
2017-05-19 15:50 ` [PATCH v4 3/8] mm: Scrub pages in alloc_heap_pages() if needed Boris Ostrovsky
2017-06-09 15:22 ` Jan Beulich
2017-06-09 20:55 ` Boris Ostrovsky
2017-06-12 6:54 ` Jan Beulich
2017-05-19 15:50 ` [PATCH v4 4/8] mm: Scrub memory from idle loop Boris Ostrovsky
2017-06-12 8:08 ` Jan Beulich
2017-06-12 17:01 ` Boris Ostrovsky
2017-06-12 21:28 ` Dario Faggioli
2017-06-13 8:19 ` Jan Beulich
2017-06-13 18:39 ` Boris Ostrovsky
2017-06-13 20:36 ` Dario Faggioli
2017-06-13 21:54 ` Boris Ostrovsky
2017-06-14 9:18 ` Jan Beulich
2017-06-13 8:12 ` Jan Beulich
2017-06-13 18:20 ` Boris Ostrovsky
2017-06-14 9:17 ` Jan Beulich
2017-05-19 15:50 ` [PATCH v4 5/8] spinlock: Introduce spin_lock_cb() Boris Ostrovsky
2017-06-12 8:23 ` Jan Beulich
2017-05-19 15:50 ` Boris Ostrovsky [this message]
2017-06-12 8:30 ` [PATCH v4 6/8] mm: Keep heap accessible to others while scrubbing Jan Beulich
2017-06-12 17:11 ` Boris Ostrovsky
2017-05-19 15:50 ` [PATCH v4 7/8] mm: Print number of unscrubbed pages in 'H' debug handler Boris Ostrovsky
2017-05-19 15:50 ` [PATCH v4 8/8] mm: Make sure pages are scrubbed Boris Ostrovsky
2017-06-12 8:43 ` Jan Beulich
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1495209040-11101-7-git-send-email-boris.ostrovsky@oracle.com \
--to=boris.ostrovsky@oracle.com \
--cc=George.Dunlap@eu.citrix.com \
--cc=andrew.cooper3@citrix.com \
--cc=ian.jackson@eu.citrix.com \
--cc=jbeulich@suse.com \
--cc=sstabellini@kernel.org \
--cc=tim@xen.org \
--cc=wei.liu2@citrix.com \
--cc=xen-devel@lists.xen.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).