From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
To: xen-devel@lists.xen.org
Cc: sstabellini@kernel.org, wei.liu2@citrix.com,
George.Dunlap@eu.citrix.com, andrew.cooper3@citrix.com,
ian.jackson@eu.citrix.com, tim@xen.org, jbeulich@suse.com,
Boris Ostrovsky <boris.ostrovsky@oracle.com>
Subject: [PATCH v4 1/8] mm: Place unscrubbed pages at the end of pagelist
Date: Fri, 19 May 2017 11:50:33 -0400 [thread overview]
Message-ID: <1495209040-11101-2-git-send-email-boris.ostrovsky@oracle.com> (raw)
In-Reply-To: <1495209040-11101-1-git-send-email-boris.ostrovsky@oracle.com>
. so that it's easy to find pages that need to be scrubbed (those pages are
now marked with _PGC_need_scrub bit).
We keep track of the first unscrubbed page in a page buddy using first_dirty
field. For now it can have two values, 0 (whole buddy needs scrubbing) or
INVALID_DIRTY_IDX (the buddy does not need to be scrubbed). Subsequent patches
will allow scrubbing to be interrupted, resulting in first_dirty taking any
value.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
Changes in v4:
* Instead of using a bool dirty_head in page_info use int first_dirty.
- Keep track of first_dirty in free_heap_pages()
* Alias PGC_need_scrub flag to PGC_allocated
xen/common/page_alloc.c | 175 ++++++++++++++++++++++++++++++++++++++---------
xen/include/asm-arm/mm.h | 10 +++
xen/include/asm-x86/mm.h | 10 +++
3 files changed, 163 insertions(+), 32 deletions(-)
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 9e41fb4..c65d214 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -383,6 +383,8 @@ typedef struct page_list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];
static heap_by_zone_and_order_t *_heap[MAX_NUMNODES];
#define heap(node, zone, order) ((*_heap[node])[zone][order])
+static unsigned long node_need_scrub[MAX_NUMNODES];
+
static unsigned long *avail[MAX_NUMNODES];
static long total_avail_pages;
@@ -678,6 +680,20 @@ static void check_low_mem_virq(void)
}
}
+/* Pages that need a scrub are added to tail, otherwise to head. */
+static void page_list_add_scrub(struct page_info *pg, unsigned int node,
+ unsigned int zone, unsigned int order,
+ unsigned int first_dirty)
+{
+ PFN_ORDER(pg) = order;
+ pg->u.free.first_dirty = first_dirty;
+
+ if ( first_dirty != INVALID_DIRTY_IDX )
+ page_list_add_tail(pg, &heap(node, zone, order));
+ else
+ page_list_add(pg, &heap(node, zone, order));
+}
+
/* Allocate 2^@order contiguous pages. */
static struct page_info *alloc_heap_pages(
unsigned int zone_lo, unsigned int zone_hi,
@@ -689,7 +705,7 @@ static struct page_info *alloc_heap_pages(
unsigned long request = 1UL << order;
struct page_info *pg;
nodemask_t nodemask = (d != NULL ) ? d->node_affinity : node_online_map;
- bool_t need_tlbflush = 0;
+ bool need_scrub, need_tlbflush = 0;
uint32_t tlbflush_timestamp = 0;
/* Make sure there are enough bits in memflags for nodeID. */
@@ -798,11 +814,18 @@ static struct page_info *alloc_heap_pages(
return NULL;
found:
+ need_scrub = (pg->u.free.first_dirty != INVALID_DIRTY_IDX);
+
/* We may have to halve the chunk a number of times. */
while ( j != order )
{
- PFN_ORDER(pg) = --j;
- page_list_add_tail(pg, &heap(node, zone, j));
+ /*
+ * Some of the sub-chunks may be clean but we will mark them
+ * as dirty (if need_scrub is set) to avoid traversing the
+ * list here.
+ */
+ page_list_add_scrub(pg, node, zone, --j,
+ need_scrub ? 0 : INVALID_DIRTY_IDX);
pg += 1 << j;
}
@@ -851,11 +874,20 @@ static int reserve_offlined_page(struct page_info *head)
int zone = page_to_zone(head), i, head_order = PFN_ORDER(head), count = 0;
struct page_info *cur_head;
int cur_order;
+ bool need_scrub;
ASSERT(spin_is_locked(&heap_lock));
cur_head = head;
+ /*
+ * We may break the buddy so let's mark the head as clean. Then, when
+ * merging chunks back into the heap, we will see whether the chunk has
+ * unscrubbed pages and set its first_dirty properly.
+ */
+ need_scrub = (head->u.free.first_dirty != INVALID_DIRTY_IDX);
+ head->u.free.first_dirty = INVALID_DIRTY_IDX;
+
page_list_del(head, &heap(node, zone, head_order));
while ( cur_head < (head + (1 << head_order)) )
@@ -873,6 +905,8 @@ static int reserve_offlined_page(struct page_info *head)
while ( cur_order < head_order )
{
+ unsigned int first_dirty = INVALID_DIRTY_IDX;
+
next_order = cur_order + 1;
if ( (cur_head + (1 << next_order)) >= (head + ( 1 << head_order)) )
@@ -892,8 +926,20 @@ static int reserve_offlined_page(struct page_info *head)
{
merge:
/* We don't consider merging outside the head_order. */
- page_list_add_tail(cur_head, &heap(node, zone, cur_order));
- PFN_ORDER(cur_head) = cur_order;
+
+ /* See if any of the pages indeed need scrubbing. */
+ if ( need_scrub )
+ {
+ for ( i = 0; i < (1 << cur_order); i++ )
+ if ( test_bit(_PGC_need_scrub,
+ &cur_head[i].count_info) )
+ {
+ first_dirty = i;
+ break;
+ }
+ }
+ page_list_add_scrub(cur_head, node, zone,
+ cur_order, first_dirty);
cur_head += (1 << cur_order);
break;
}
@@ -919,9 +965,52 @@ static int reserve_offlined_page(struct page_info *head)
return count;
}
+static void scrub_free_pages(unsigned int node)
+{
+ struct page_info *pg;
+ unsigned int zone;
+
+ ASSERT(spin_is_locked(&heap_lock));
+
+ if ( !node_need_scrub[node] )
+ return;
+
+ for ( zone = 0; zone < NR_ZONES; zone++ )
+ {
+ unsigned int order = MAX_ORDER;
+ do {
+ while ( !page_list_empty(&heap(node, zone, order)) )
+ {
+ unsigned int i;
+
+ /* Unscrubbed pages are always at the end of the list. */
+ pg = page_list_last(&heap(node, zone, order));
+ if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX )
+ break;
+
+ for ( i = pg->u.free.first_dirty; i < (1U << order); i++)
+ {
+ if ( test_bit(_PGC_need_scrub, &pg[i].count_info) )
+ {
+ scrub_one_page(&pg[i]);
+ pg[i].count_info &= ~PGC_need_scrub;
+ node_need_scrub[node]--;
+ }
+ }
+
+ page_list_del(pg, &heap(node, zone, order));
+ page_list_add_scrub(pg, node, zone, order, INVALID_DIRTY_IDX);
+
+ if ( node_need_scrub[node] == 0 )
+ return;
+ }
+ } while ( order-- != 0 );
+ }
+}
+
/* Free 2^@order set of pages. */
static void free_heap_pages(
- struct page_info *pg, unsigned int order)
+ struct page_info *pg, unsigned int order, bool need_scrub)
{
unsigned long mask, mfn = page_to_mfn(pg);
unsigned int i, node = phys_to_nid(page_to_maddr(pg)), tainted = 0;
@@ -961,10 +1050,20 @@ static void free_heap_pages(
/* This page is not a guest frame any more. */
page_set_owner(&pg[i], NULL); /* set_gpfn_from_mfn snoops pg owner */
set_gpfn_from_mfn(mfn + i, INVALID_M2P_ENTRY);
+
+ if ( need_scrub )
+ pg[i].count_info |= PGC_need_scrub;
}
avail[node][zone] += 1 << order;
total_avail_pages += 1 << order;
+ if ( need_scrub )
+ {
+ node_need_scrub[node] += 1 << order;
+ pg->u.free.first_dirty = 0;
+ }
+ else
+ pg->u.free.first_dirty = INVALID_DIRTY_IDX;
if ( tmem_enabled() )
midsize_alloc_zone_pages = max(
@@ -977,35 +1076,54 @@ static void free_heap_pages(
if ( (page_to_mfn(pg) & mask) )
{
+ struct page_info *predecessor = pg - mask;
+
/* Merge with predecessor block? */
- if ( !mfn_valid(_mfn(page_to_mfn(pg-mask))) ||
- !page_state_is(pg-mask, free) ||
- (PFN_ORDER(pg-mask) != order) ||
- (phys_to_nid(page_to_maddr(pg-mask)) != node) )
+ if ( !mfn_valid(_mfn(page_to_mfn(predecessor))) ||
+ !page_state_is(predecessor, free) ||
+ (PFN_ORDER(predecessor) != order) ||
+ (phys_to_nid(page_to_maddr(predecessor)) != node) )
break;
- pg -= mask;
- page_list_del(pg, &heap(node, zone, order));
+
+ page_list_del(predecessor, &heap(node, zone, order));
+
+ if ( predecessor->u.free.first_dirty != INVALID_DIRTY_IDX )
+ need_scrub = true;
+ /* ... and keep predecessor's first_dirty. */
+ else if ( pg->u.free.first_dirty != INVALID_DIRTY_IDX )
+ predecessor->u.free.first_dirty = (1U << order) +
+ pg->u.free.first_dirty;
+
+ pg->u.free.first_dirty = INVALID_DIRTY_IDX;
+ pg = predecessor;
}
else
{
+ struct page_info *successor = pg + mask;
+
/* Merge with successor block? */
- if ( !mfn_valid(_mfn(page_to_mfn(pg+mask))) ||
- !page_state_is(pg+mask, free) ||
- (PFN_ORDER(pg+mask) != order) ||
- (phys_to_nid(page_to_maddr(pg+mask)) != node) )
+ if ( !mfn_valid(_mfn(page_to_mfn(successor))) ||
+ !page_state_is(successor, free) ||
+ (PFN_ORDER(successor) != order) ||
+ (phys_to_nid(page_to_maddr(successor)) != node) )
break;
- page_list_del(pg + mask, &heap(node, zone, order));
+ page_list_del(successor, &heap(node, zone, order));
+
+ need_scrub |= (successor->u.free.first_dirty != INVALID_DIRTY_IDX);
+ successor->u.free.first_dirty = INVALID_DIRTY_IDX;
}
order++;
}
- PFN_ORDER(pg) = order;
- page_list_add_tail(pg, &heap(node, zone, order));
+ page_list_add_scrub(pg, node, zone, order, pg->u.free.first_dirty);
if ( tainted )
reserve_offlined_page(pg);
+ if ( need_scrub )
+ scrub_free_pages(node);
+
spin_unlock(&heap_lock);
}
@@ -1226,7 +1344,7 @@ unsigned int online_page(unsigned long mfn, uint32_t *status)
spin_unlock(&heap_lock);
if ( (y & PGC_state) == PGC_state_offlined )
- free_heap_pages(pg, 0);
+ free_heap_pages(pg, 0, false);
return ret;
}
@@ -1295,7 +1413,7 @@ static void init_heap_pages(
nr_pages -= n;
}
- free_heap_pages(pg+i, 0);
+ free_heap_pages(pg + i, 0, false);
}
}
@@ -1622,7 +1740,7 @@ void free_xenheap_pages(void *v, unsigned int order)
memguard_guard_range(v, 1 << (order + PAGE_SHIFT));
- free_heap_pages(virt_to_page(v), order);
+ free_heap_pages(virt_to_page(v), order, false);
}
#else
@@ -1676,12 +1794,9 @@ void free_xenheap_pages(void *v, unsigned int order)
pg = virt_to_page(v);
for ( i = 0; i < (1u << order); i++ )
- {
- scrub_one_page(&pg[i]);
pg[i].count_info &= ~PGC_xen_heap;
- }
- free_heap_pages(pg, order);
+ free_heap_pages(pg, order, true);
}
#endif
@@ -1790,7 +1905,7 @@ struct page_info *alloc_domheap_pages(
if ( d && !(memflags & MEMF_no_owner) &&
assign_pages(d, pg, order, memflags) )
{
- free_heap_pages(pg, order);
+ free_heap_pages(pg, order, false);
return NULL;
}
@@ -1858,11 +1973,7 @@ void free_domheap_pages(struct page_info *pg, unsigned int order)
scrub = 1;
}
- if ( unlikely(scrub) )
- for ( i = 0; i < (1 << order); i++ )
- scrub_one_page(&pg[i]);
-
- free_heap_pages(pg, order);
+ free_heap_pages(pg, order, scrub);
}
if ( drop_dom_ref )
diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h
index f6915ad..38d4fba 100644
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -43,6 +43,9 @@ struct page_info
} inuse;
/* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
struct {
+ /* Index of the first *possibly* unscrubbed page in the buddy. */
+#define INVALID_DIRTY_IDX -1U
+ unsigned int first_dirty;
/* Do TLBs need flushing for safety before next page use? */
bool_t need_tlbflush;
} free;
@@ -115,6 +118,13 @@ struct page_info
#define PGC_count_width PG_shift(9)
#define PGC_count_mask ((1UL<<PGC_count_width)-1)
+/*
+ * Page needs to be scrubbed. Since this bit can only be set on a page that is
+ * free (i.e. in PGC_state_free) we can reuse PGC_allocated bit.
+ */
+#define _PGC_need_scrub _PGC_allocated
+#define PGC_need_scrub PGC_allocated
+
extern unsigned long xenheap_mfn_start, xenheap_mfn_end;
extern vaddr_t xenheap_virt_end;
#ifdef CONFIG_ARM_64
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index 119d7de..e20f161 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -87,6 +87,9 @@ struct page_info
/* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
struct {
+ /* Index of the first *possibly* unscrubbed page in the buddy. */
+#define INVALID_DIRTY_IDX -1U
+ unsigned int first_dirty;
/* Do TLBs need flushing for safety before next page use? */
bool_t need_tlbflush;
} free;
@@ -233,6 +236,13 @@ struct page_info
#define PGC_count_width PG_shift(9)
#define PGC_count_mask ((1UL<<PGC_count_width)-1)
+/*
+ * Page needs to be scrubbed. Since this bit can only be set on a page that is
+ * free (i.e. in PGC_state_free) we can reuse PGC_allocated bit.
+ */
+#define _PGC_need_scrub _PGC_allocated
+#define PGC_need_scrub PGC_allocated
+
struct spage_info
{
unsigned long type_info;
--
1.8.3.1
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel
next prev parent reply other threads:[~2017-05-19 15:50 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-05-19 15:50 [PATCH v4 0/8] Memory scrubbing from idle loop Boris Ostrovsky
2017-05-19 15:50 ` Boris Ostrovsky [this message]
2017-06-09 14:50 ` [PATCH v4 1/8] mm: Place unscrubbed pages at the end of pagelist Jan Beulich
2017-06-09 20:07 ` Boris Ostrovsky
2017-06-12 6:50 ` Jan Beulich
2017-05-19 15:50 ` [PATCH v4 2/8] mm: Extract allocation loop from alloc_heap_pages() Boris Ostrovsky
2017-06-09 15:08 ` Jan Beulich
2017-05-19 15:50 ` [PATCH v4 3/8] mm: Scrub pages in alloc_heap_pages() if needed Boris Ostrovsky
2017-06-09 15:22 ` Jan Beulich
2017-06-09 20:55 ` Boris Ostrovsky
2017-06-12 6:54 ` Jan Beulich
2017-05-19 15:50 ` [PATCH v4 4/8] mm: Scrub memory from idle loop Boris Ostrovsky
2017-06-12 8:08 ` Jan Beulich
2017-06-12 17:01 ` Boris Ostrovsky
2017-06-12 21:28 ` Dario Faggioli
2017-06-13 8:19 ` Jan Beulich
2017-06-13 18:39 ` Boris Ostrovsky
2017-06-13 20:36 ` Dario Faggioli
2017-06-13 21:54 ` Boris Ostrovsky
2017-06-14 9:18 ` Jan Beulich
2017-06-13 8:12 ` Jan Beulich
2017-06-13 18:20 ` Boris Ostrovsky
2017-06-14 9:17 ` Jan Beulich
2017-05-19 15:50 ` [PATCH v4 5/8] spinlock: Introduce spin_lock_cb() Boris Ostrovsky
2017-06-12 8:23 ` Jan Beulich
2017-05-19 15:50 ` [PATCH v4 6/8] mm: Keep heap accessible to others while scrubbing Boris Ostrovsky
2017-06-12 8:30 ` Jan Beulich
2017-06-12 17:11 ` Boris Ostrovsky
2017-05-19 15:50 ` [PATCH v4 7/8] mm: Print number of unscrubbed pages in 'H' debug handler Boris Ostrovsky
2017-05-19 15:50 ` [PATCH v4 8/8] mm: Make sure pages are scrubbed Boris Ostrovsky
2017-06-12 8:43 ` Jan Beulich
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1495209040-11101-2-git-send-email-boris.ostrovsky@oracle.com \
--to=boris.ostrovsky@oracle.com \
--cc=George.Dunlap@eu.citrix.com \
--cc=andrew.cooper3@citrix.com \
--cc=ian.jackson@eu.citrix.com \
--cc=jbeulich@suse.com \
--cc=sstabellini@kernel.org \
--cc=tim@xen.org \
--cc=wei.liu2@citrix.com \
--cc=xen-devel@lists.xen.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).