* [PATCH RFC 1/8] mm: Separate free page chunk merging into its own routine
2017-02-27 0:22 [PATCH RFC 0/8] Memory scrubbing from idle loop Boris Ostrovsky
@ 2017-02-27 0:22 ` Boris Ostrovsky
2017-02-27 0:22 ` [PATCH RFC 2/8] mm: Place unscrubbed pages at the end of pagelist Boris Ostrovsky
` (6 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Boris Ostrovsky @ 2017-02-27 0:22 UTC (permalink / raw)
To: xen-devel
Cc: sstabellini, wei.liu2, George.Dunlap, andrew.cooper3, ian.jackson,
jbeulich, tim, Boris Ostrovsky
This is needed for subsequent changes to memory scrubbing.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
xen/common/page_alloc.c | 82 ++++++++++++++++++++++++++++-------------------
1 files changed, 49 insertions(+), 33 deletions(-)
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index bbd7bc6..352eba9 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -926,11 +926,58 @@ static int reserve_offlined_page(struct page_info *head)
return count;
}
+static bool_t can_merge(struct page_info *head, unsigned int node,
+ unsigned int order)
+{
+ if ( !mfn_valid(page_to_mfn(head)) ||
+ !page_state_is(head, free) ||
+ (PFN_ORDER(head) != order) ||
+ (phys_to_nid(page_to_maddr(head)) != node) )
+ return 0;
+
+ return 1;
+}
+
+static void merge_chunks(struct page_info *pg, unsigned int node,
+ unsigned int zone, unsigned int order)
+{
+ ASSERT(spin_is_locked(&heap_lock));
+
+ /* Merge chunks as far as possible. */
+ while ( order < MAX_ORDER )
+ {
+ unsigned int mask = 1UL << order;
+
+ if ( (page_to_mfn(pg) & mask) )
+ {
+ /* Merge with predecessor block? */
+ if ( !can_merge(pg - mask, node, order) )
+ break;
+
+ pg -= mask;
+ page_list_del(pg, &heap(node, zone, order));
+ }
+ else
+ {
+ /* Merge with successor block? */
+ if ( !can_merge(pg + mask, node, order) )
+ break;
+
+ page_list_del(pg + mask, &heap(node, zone, order));
+ }
+
+ order++;
+ }
+
+ PFN_ORDER(pg) = order;
+ page_list_add_tail(pg, &heap(node, zone, order));
+}
+
/* Free 2^@order set of pages. */
static void free_heap_pages(
struct page_info *pg, unsigned int order)
{
- unsigned long mask, mfn = page_to_mfn(pg);
+ unsigned long mfn = page_to_mfn(pg);
unsigned int i, node = phys_to_nid(page_to_maddr(pg)), tainted = 0;
unsigned int zone = page_to_zone(pg);
@@ -977,38 +1024,7 @@ static void free_heap_pages(
midsize_alloc_zone_pages = max(
midsize_alloc_zone_pages, total_avail_pages / MIDSIZE_ALLOC_FRAC);
- /* Merge chunks as far as possible. */
- while ( order < MAX_ORDER )
- {
- mask = 1UL << order;
-
- if ( (page_to_mfn(pg) & mask) )
- {
- /* Merge with predecessor block? */
- if ( !mfn_valid(page_to_mfn(pg-mask)) ||
- !page_state_is(pg-mask, free) ||
- (PFN_ORDER(pg-mask) != order) ||
- (phys_to_nid(page_to_maddr(pg-mask)) != node) )
- break;
- pg -= mask;
- page_list_del(pg, &heap(node, zone, order));
- }
- else
- {
- /* Merge with successor block? */
- if ( !mfn_valid(page_to_mfn(pg+mask)) ||
- !page_state_is(pg+mask, free) ||
- (PFN_ORDER(pg+mask) != order) ||
- (phys_to_nid(page_to_maddr(pg+mask)) != node) )
- break;
- page_list_del(pg + mask, &heap(node, zone, order));
- }
-
- order++;
- }
-
- PFN_ORDER(pg) = order;
- page_list_add_tail(pg, &heap(node, zone, order));
+ merge_chunks(pg, node, zone, order);
if ( tainted )
reserve_offlined_page(pg);
--
1.7.1
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH RFC 2/8] mm: Place unscrubbed pages at the end of pagelist
2017-02-27 0:22 [PATCH RFC 0/8] Memory scrubbing from idle loop Boris Ostrovsky
2017-02-27 0:22 ` [PATCH RFC 1/8] mm: Separate free page chunk merging into its own routine Boris Ostrovsky
@ 2017-02-27 0:22 ` Boris Ostrovsky
2017-02-27 0:22 ` [PATCH RFC 3/8] mm: Scrub pages in alloc_heap_pages() if needed Boris Ostrovsky
` (5 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Boris Ostrovsky @ 2017-02-27 0:22 UTC (permalink / raw)
To: xen-devel
Cc: sstabellini, wei.liu2, George.Dunlap, andrew.cooper3, ian.jackson,
jbeulich, tim, Boris Ostrovsky
. so that it's easy to find pages that need to be scrubbed (those pages are
now marked with _PGC_need_scrub bit).
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
xen/common/page_alloc.c | 97 +++++++++++++++++++++++++++++++++++----------
xen/include/asm-x86/mm.h | 4 ++
2 files changed, 79 insertions(+), 22 deletions(-)
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 352eba9..653bb91 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -385,6 +385,8 @@ typedef struct page_list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];
static heap_by_zone_and_order_t *_heap[MAX_NUMNODES];
#define heap(node, zone, order) ((*_heap[node])[zone][order])
+static unsigned node_need_scrub[MAX_NUMNODES];
+
static unsigned long *avail[MAX_NUMNODES];
static long total_avail_pages;
@@ -809,7 +811,7 @@ static struct page_info *alloc_heap_pages(
while ( j != order )
{
PFN_ORDER(pg) = --j;
- page_list_add_tail(pg, &heap(node, zone, j));
+ page_list_add(pg, &heap(node, zone, j));
pg += 1 << j;
}
@@ -829,6 +831,8 @@ static struct page_info *alloc_heap_pages(
BUG_ON(pg[i].count_info != PGC_state_free);
pg[i].count_info = PGC_state_inuse;
+ BUG_ON(test_bit(_PGC_need_scrub, &pg[i].count_info));
+
if ( !(memflags & MEMF_no_tlbflush) )
accumulate_tlbflush(&need_tlbflush, &pg[i],
&tlbflush_timestamp);
@@ -899,7 +903,10 @@ static int reserve_offlined_page(struct page_info *head)
{
merge:
/* We don't consider merging outside the head_order. */
- page_list_add_tail(cur_head, &heap(node, zone, cur_order));
+ if ( test_bit(_PGC_need_scrub, &cur_head->count_info) )
+ page_list_add_tail(cur_head, &heap(node, zone, cur_order));
+ else
+ page_list_add(cur_head, &heap(node, zone, cur_order));
PFN_ORDER(cur_head) = cur_order;
cur_head += (1 << cur_order);
break;
@@ -927,7 +934,7 @@ static int reserve_offlined_page(struct page_info *head)
}
static bool_t can_merge(struct page_info *head, unsigned int node,
- unsigned int order)
+ unsigned int order, bool_t need_scrub)
{
if ( !mfn_valid(page_to_mfn(head)) ||
!page_state_is(head, free) ||
@@ -935,11 +942,16 @@ static bool_t can_merge(struct page_info *head, unsigned int node,
(phys_to_nid(page_to_maddr(head)) != node) )
return 0;
+ if ( !!need_scrub ^
+ !!test_bit(_PGC_need_scrub, &head->count_info) )
+ return 0;
+
return 1;
}
static void merge_chunks(struct page_info *pg, unsigned int node,
- unsigned int zone, unsigned int order)
+ unsigned int zone, unsigned int order,
+ bool_t need_scrub)
{
ASSERT(spin_is_locked(&heap_lock));
@@ -951,7 +963,7 @@ static void merge_chunks(struct page_info *pg, unsigned int node,
if ( (page_to_mfn(pg) & mask) )
{
/* Merge with predecessor block? */
- if ( !can_merge(pg - mask, node, order) )
+ if ( !can_merge(pg - mask, node, order, need_scrub) )
break;
pg -= mask;
@@ -960,7 +972,7 @@ static void merge_chunks(struct page_info *pg, unsigned int node,
else
{
/* Merge with successor block? */
- if ( !can_merge(pg + mask, node, order) )
+ if ( !can_merge(pg + mask, node, order, need_scrub) )
break;
page_list_del(pg + mask, &heap(node, zone, order));
@@ -970,12 +982,49 @@ static void merge_chunks(struct page_info *pg, unsigned int node,
}
PFN_ORDER(pg) = order;
- page_list_add_tail(pg, &heap(node, zone, order));
+ if ( need_scrub )
+ page_list_add_tail(pg, &heap(node, zone, order));
+ else
+ page_list_add(pg, &heap(node, zone, order));
+}
+
+static void scrub_free_pages(unsigned int node)
+{
+ struct page_info *pg;
+ unsigned int i, zone;
+ int order;
+
+ ASSERT(spin_is_locked(&heap_lock));
+
+ if ( !node_need_scrub[node] )
+ return;
+
+ for ( zone = 0; zone < NR_ZONES; zone++ )
+ {
+ for ( order = MAX_ORDER; order >= 0; order-- )
+ {
+ while ( !page_list_empty(&heap(node, zone, order)) )
+ {
+ /* Unscrubbed pages are always at the end of the list. */
+ pg = page_list_last(&heap(node, zone, order));
+ if ( !test_bit(_PGC_need_scrub, &pg[0].count_info) )
+ break;
+
+ for ( i = 0; i < (1 << order); i++)
+ {
+ scrub_one_page(&pg[i]);
+ pg[i].count_info &= ~PGC_need_scrub;
+ }
+
+ node_need_scrub[node] -= (1 << order);
+ }
+ }
+ }
}
/* Free 2^@order set of pages. */
static void free_heap_pages(
- struct page_info *pg, unsigned int order)
+ struct page_info *pg, unsigned int order, bool_t need_scrub)
{
unsigned long mfn = page_to_mfn(pg);
unsigned int i, node = phys_to_nid(page_to_maddr(pg)), tainted = 0;
@@ -1024,11 +1073,22 @@ static void free_heap_pages(
midsize_alloc_zone_pages = max(
midsize_alloc_zone_pages, total_avail_pages / MIDSIZE_ALLOC_FRAC);
- merge_chunks(pg, node, zone, order);
+ if ( need_scrub && !tainted )
+ {
+ for ( i = 0; i < (1 << order); i++ )
+ pg[i].count_info |= PGC_need_scrub;
+
+ node_need_scrub[node] += (1 << order);
+ }
+
+ merge_chunks(pg, node, zone, order, need_scrub);
if ( tainted )
reserve_offlined_page(pg);
+ if ( need_scrub )
+ scrub_free_pages(node);
+
spin_unlock(&heap_lock);
}
@@ -1249,7 +1309,7 @@ unsigned int online_page(unsigned long mfn, uint32_t *status)
spin_unlock(&heap_lock);
if ( (y & PGC_state) == PGC_state_offlined )
- free_heap_pages(pg, 0);
+ free_heap_pages(pg, 0, 0);
return ret;
}
@@ -1318,7 +1378,7 @@ static void init_heap_pages(
nr_pages -= n;
}
- free_heap_pages(pg+i, 0);
+ free_heap_pages(pg + i, 0, 0);
}
}
@@ -1645,7 +1705,7 @@ void free_xenheap_pages(void *v, unsigned int order)
memguard_guard_range(v, 1 << (order + PAGE_SHIFT));
- free_heap_pages(virt_to_page(v), order);
+ free_heap_pages(virt_to_page(v), order, 0);
}
#else
@@ -1699,12 +1759,9 @@ void free_xenheap_pages(void *v, unsigned int order)
pg = virt_to_page(v);
for ( i = 0; i < (1u << order); i++ )
- {
- scrub_one_page(&pg[i]);
pg[i].count_info &= ~PGC_xen_heap;
- }
- free_heap_pages(pg, order);
+ free_heap_pages(pg, order, 1);
}
#endif
@@ -1813,7 +1870,7 @@ struct page_info *alloc_domheap_pages(
if ( d && !(memflags & MEMF_no_owner) &&
assign_pages(d, pg, order, memflags) )
{
- free_heap_pages(pg, order);
+ free_heap_pages(pg, order, 0);
return NULL;
}
@@ -1881,11 +1938,7 @@ void free_domheap_pages(struct page_info *pg, unsigned int order)
scrub = 1;
}
- if ( unlikely(scrub) )
- for ( i = 0; i < (1 << order); i++ )
- scrub_one_page(&pg[i]);
-
- free_heap_pages(pg, order);
+ free_heap_pages(pg, order, scrub);
}
if ( drop_dom_ref )
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index a66d5b1..b11124f 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -233,6 +233,10 @@ struct page_info
#define PGC_count_width PG_shift(9)
#define PGC_count_mask ((1UL<<PGC_count_width)-1)
+/* Page needs to be scrubbed */
+#define _PGC_need_scrub PG_shift(10)
+#define PGC_need_scrub PG_mask(1, 10)
+
struct spage_info
{
unsigned long type_info;
--
1.7.1
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH RFC 3/8] mm: Scrub pages in alloc_heap_pages() if needed
2017-02-27 0:22 [PATCH RFC 0/8] Memory scrubbing from idle loop Boris Ostrovsky
2017-02-27 0:22 ` [PATCH RFC 1/8] mm: Separate free page chunk merging into its own routine Boris Ostrovsky
2017-02-27 0:22 ` [PATCH RFC 2/8] mm: Place unscrubbed pages at the end of pagelist Boris Ostrovsky
@ 2017-02-27 0:22 ` Boris Ostrovsky
2017-02-27 0:22 ` [PATCH RFC 4/8] mm: Scrub memory from idle loop Boris Ostrovsky
` (4 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Boris Ostrovsky @ 2017-02-27 0:22 UTC (permalink / raw)
To: xen-devel
Cc: sstabellini, wei.liu2, George.Dunlap, andrew.cooper3, ian.jackson,
jbeulich, tim, Boris Ostrovsky
When allocating pages in alloc_heap_pages() first look for clean pages. If none
is found then retry, take pages marked as unscrubbed and scrub them.
Note that we shouldn't find unscrubbed pages in alloc_heap_pages() yet. However,
this will become possible when we stop scrubbing from free_heap_pages() and
instead do it from idle loop.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
xen/common/page_alloc.c | 92 ++++++++++++++++++++++++++++++++++------------
1 files changed, 68 insertions(+), 24 deletions(-)
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 653bb91..6dbe13c 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -693,34 +693,17 @@ static struct page_info *alloc_heap_pages(
unsigned int order, unsigned int memflags,
struct domain *d)
{
- unsigned int i, j, zone = 0, nodemask_retry = 0;
- nodeid_t first_node, node = MEMF_get_node(memflags), req_node = node;
+ unsigned int i, j, zone, nodemask_retry;
+ nodeid_t first_node, node, req_node;
unsigned long request = 1UL << order;
struct page_info *pg;
- nodemask_t nodemask = (d != NULL ) ? d->node_affinity : node_online_map;
- bool_t need_tlbflush = 0;
+ nodemask_t nodemask;
+ bool_t need_tlbflush = 0, use_unscrubbed = 0;
uint32_t tlbflush_timestamp = 0;
/* Make sure there are enough bits in memflags for nodeID. */
BUILD_BUG_ON((_MEMF_bits - _MEMF_node) < (8 * sizeof(nodeid_t)));
- if ( node == NUMA_NO_NODE )
- {
- if ( d != NULL )
- {
- node = next_node(d->last_alloc_node, nodemask);
- if ( node >= MAX_NUMNODES )
- node = first_node(nodemask);
- }
- if ( node >= MAX_NUMNODES )
- node = cpu_to_node(smp_processor_id());
- }
- first_node = node;
-
- ASSERT(node < MAX_NUMNODES);
- ASSERT(zone_lo <= zone_hi);
- ASSERT(zone_hi < NR_ZONES);
-
if ( unlikely(order > MAX_ORDER) )
return NULL;
@@ -734,7 +717,10 @@ static struct page_info *alloc_heap_pages(
total_avail_pages + tmem_freeable_pages()) &&
((memflags & MEMF_no_refcount) ||
!d || d->outstanding_pages < request) )
- goto not_found;
+ {
+ spin_unlock(&heap_lock);
+ return NULL;
+ }
/*
* TMEM: When available memory is scarce due to tmem absorbing it, allow
@@ -747,6 +733,28 @@ static struct page_info *alloc_heap_pages(
tmem_freeable_pages() )
goto try_tmem;
+ again:
+
+ nodemask_retry = 0;
+ nodemask = (d != NULL ) ? d->node_affinity : node_online_map;
+ node = req_node = MEMF_get_node(memflags);
+ if ( node == NUMA_NO_NODE )
+ {
+ if ( d != NULL )
+ {
+ node = next_node(d->last_alloc_node, nodemask);
+ if ( node >= MAX_NUMNODES )
+ node = first_node(nodemask);
+ }
+ if ( node >= MAX_NUMNODES )
+ node = cpu_to_node(smp_processor_id());
+ }
+ first_node = node;
+
+ ASSERT(node < MAX_NUMNODES);
+ ASSERT(zone_lo <= zone_hi);
+ ASSERT(zone_hi < NR_ZONES);
+
/*
* Start with requested node, but exhaust all node memory in requested
* zone before failing, only calc new node value if we fail to find memory
@@ -762,8 +770,16 @@ static struct page_info *alloc_heap_pages(
/* Find smallest order which can satisfy the request. */
for ( j = order; j <= MAX_ORDER; j++ )
+ {
if ( (pg = page_list_remove_head(&heap(node, zone, j))) )
- goto found;
+ {
+ if ( (order == 0) || use_unscrubbed ||
+ !test_bit(_PGC_need_scrub, &pg[0].count_info) )
+ goto found;
+
+ page_list_add_tail(pg, &heap(node, zone, j));
+ }
+ }
} while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */
if ( (memflags & MEMF_exact_node) && req_node != NUMA_NO_NODE )
@@ -802,6 +818,16 @@ static struct page_info *alloc_heap_pages(
}
not_found:
+ /*
+ * If we couldn't find clean page let's search again and this time
+ * take unscrubbed pages if available.
+ */
+ if ( !use_unscrubbed )
+ {
+ use_unscrubbed = 1;
+ goto again;
+ }
+
/* No suitable memory blocks. Fail the request. */
spin_unlock(&heap_lock);
return NULL;
@@ -811,7 +837,10 @@ static struct page_info *alloc_heap_pages(
while ( j != order )
{
PFN_ORDER(pg) = --j;
- page_list_add(pg, &heap(node, zone, j));
+ if ( test_bit(_PGC_need_scrub, &pg[0].count_info) )
+ page_list_add_tail(pg, &heap(node, zone, j));
+ else
+ page_list_add(pg, &heap(node, zone, j));
pg += 1 << j;
}
@@ -825,6 +854,21 @@ static struct page_info *alloc_heap_pages(
if ( d != NULL )
d->last_alloc_node = node;
+ /*
+ * Whole chunk is either clean or dirty so we can
+ * test only the head.
+ */
+ if ( test_bit(_PGC_need_scrub, &pg[0].count_info) )
+ {
+ for ( i = 0; i < (1 << order); i++ )
+ {
+ scrub_one_page(&pg[i]);
+ pg[i].count_info &= ~PGC_need_scrub;
+ }
+ node_need_scrub[node] -= (1 << order);
+ }
+
+
for ( i = 0; i < (1 << order); i++ )
{
/* Reference count must continuously be zero for free pages. */
--
1.7.1
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH RFC 4/8] mm: Scrub memory from idle loop
2017-02-27 0:22 [PATCH RFC 0/8] Memory scrubbing from idle loop Boris Ostrovsky
` (2 preceding siblings ...)
2017-02-27 0:22 ` [PATCH RFC 3/8] mm: Scrub pages in alloc_heap_pages() if needed Boris Ostrovsky
@ 2017-02-27 0:22 ` Boris Ostrovsky
2017-02-27 0:22 ` [PATCH RFC 5/8] mm: Do not discard already-scrubbed pages softirqs are pending Boris Ostrovsky
` (3 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Boris Ostrovsky @ 2017-02-27 0:22 UTC (permalink / raw)
To: xen-devel
Cc: sstabellini, wei.liu2, George.Dunlap, andrew.cooper3, ian.jackson,
jbeulich, tim, Boris Ostrovsky
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
xen/arch/x86/domain.c | 3 ++-
xen/common/page_alloc.c | 29 ++++++++++++++++++++---------
xen/include/xen/mm.h | 1 +
3 files changed, 23 insertions(+), 10 deletions(-)
diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
index 7d3071e..ce1d97f 100644
--- a/xen/arch/x86/domain.c
+++ b/xen/arch/x86/domain.c
@@ -117,7 +117,8 @@ static void idle_loop(void)
{
if ( cpu_is_offline(smp_processor_id()) )
play_dead();
- (*pm_idle)();
+ if ( !scrub_free_pages(cpu_to_node(smp_processor_id())) )
+ (*pm_idle)();
do_tasklet();
do_softirq();
/*
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 6dbe13c..ac15406 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -1032,16 +1032,22 @@ static void merge_chunks(struct page_info *pg, unsigned int node,
page_list_add(pg, &heap(node, zone, order));
}
-static void scrub_free_pages(unsigned int node)
+bool_t scrub_free_pages(unsigned int node)
{
struct page_info *pg;
- unsigned int i, zone;
+ unsigned int i, zone, cpu;
int order;
-
- ASSERT(spin_is_locked(&heap_lock));
+ static unsigned node_scrubbing;
if ( !node_need_scrub[node] )
- return;
+ return 0;
+
+ if (test_and_set_bit(node, &node_scrubbing) )
+ return 0;
+
+ cpu = smp_processor_id();
+
+ spin_lock(&heap_lock);
for ( zone = 0; zone < NR_ZONES; zone++ )
{
@@ -1057,13 +1063,21 @@ static void scrub_free_pages(unsigned int node)
for ( i = 0; i < (1 << order); i++)
{
scrub_one_page(&pg[i]);
- pg[i].count_info &= ~PGC_need_scrub;
+ if ( softirq_pending(cpu) )
+ goto out;
}
node_need_scrub[node] -= (1 << order);
+ for ( i = 0; i < (1 << order); i++)
+ pg[i].count_info &= ~PGC_need_scrub;
}
}
}
+
+ out:
+ spin_unlock(&heap_lock);
+ clear_bit(node, &node_scrubbing);
+ return (node_need_scrub[node] != 0);
}
/* Free 2^@order set of pages. */
@@ -1130,9 +1144,6 @@ static void free_heap_pages(
if ( tainted )
reserve_offlined_page(pg);
- if ( need_scrub )
- scrub_free_pages(node);
-
spin_unlock(&heap_lock);
}
diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
index 88de3c1..048d4bf 100644
--- a/xen/include/xen/mm.h
+++ b/xen/include/xen/mm.h
@@ -138,6 +138,7 @@ void init_xenheap_pages(paddr_t ps, paddr_t pe);
void xenheap_max_mfn(unsigned long mfn);
void *alloc_xenheap_pages(unsigned int order, unsigned int memflags);
void free_xenheap_pages(void *v, unsigned int order);
+bool_t scrub_free_pages(unsigned int node);
#define alloc_xenheap_page() (alloc_xenheap_pages(0,0))
#define free_xenheap_page(v) (free_xenheap_pages(v,0))
/* Map machine page range in Xen virtual address space. */
--
1.7.1
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH RFC 5/8] mm: Do not discard already-scrubbed pages softirqs are pending
2017-02-27 0:22 [PATCH RFC 0/8] Memory scrubbing from idle loop Boris Ostrovsky
` (3 preceding siblings ...)
2017-02-27 0:22 ` [PATCH RFC 4/8] mm: Scrub memory from idle loop Boris Ostrovsky
@ 2017-02-27 0:22 ` Boris Ostrovsky
2017-02-27 0:22 ` [PATCH RFC 6/8] spinlock: Introduce _spin_lock_cond() Boris Ostrovsky
` (2 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Boris Ostrovsky @ 2017-02-27 0:22 UTC (permalink / raw)
To: xen-devel
Cc: sstabellini, wei.liu2, George.Dunlap, andrew.cooper3, ian.jackson,
jbeulich, tim, Boris Ostrovsky
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
xen/common/page_alloc.c | 65 +++++++++++++++++++++++++++++++++++++++++-----
1 files changed, 58 insertions(+), 7 deletions(-)
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index ac15406..3469185 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -1032,11 +1032,14 @@ static void merge_chunks(struct page_info *pg, unsigned int node,
page_list_add(pg, &heap(node, zone, order));
}
+#define SCRUB_CHUNK_ORDER 8
bool_t scrub_free_pages(unsigned int node)
{
struct page_info *pg;
unsigned int i, zone, cpu;
int order;
+ unsigned int num_scrubbed, scrub_order, start, end;
+ bool_t preempt;
static unsigned node_scrubbing;
if ( !node_need_scrub[node] )
@@ -1046,6 +1049,7 @@ bool_t scrub_free_pages(unsigned int node)
return 0;
cpu = smp_processor_id();
+ preempt = 0;
spin_lock(&heap_lock);
@@ -1060,16 +1064,63 @@ bool_t scrub_free_pages(unsigned int node)
if ( !test_bit(_PGC_need_scrub, &pg[0].count_info) )
break;
- for ( i = 0; i < (1 << order); i++)
+ page_list_del(pg, &heap(node, zone, order));
+
+ scrub_order = (order > SCRUB_CHUNK_ORDER) ? SCRUB_CHUNK_ORDER : order;
+ num_scrubbed = 0;
+ while ( num_scrubbed < (1 << order) )
{
- scrub_one_page(&pg[i]);
+ for ( i = 0; i < (1 << scrub_order); i++ )
+ scrub_one_page(&pg[num_scrubbed + i]);
+
+ num_scrubbed += (1 << scrub_order);
if ( softirq_pending(cpu) )
- goto out;
+ {
+ preempt = 1;
+ break;
+ }
+ }
+
+ start = 0;
+ end = num_scrubbed;
+
+ /* Merge clean pages */
+ while ( start < end )
+ {
+ /*
+ * Largest power-of-two chunk starting @start,
+ * not greater than @end
+ */
+ unsigned chunk_order = flsl(end - start) - 1;
+ struct page_info *ppg = &pg[start];
+
+ for ( i = 0; i < (1 << chunk_order); i++ )
+ ppg[i].count_info &= ~PGC_need_scrub;
+
+ node_need_scrub[node] -= (1 << chunk_order);
+
+ PFN_ORDER(ppg) = chunk_order;
+ merge_chunks(ppg, node, zone, chunk_order, 0);
+ start += (1 << chunk_order);
+ }
+
+ /* Merge unscrubbed pages */
+ while ( end < (1 << order) )
+ {
+ /*
+ * Largest power-of-two chunk starting @end, not crossing
+ * next power-of-two boundary
+ */
+ unsigned chunk_order = ffsl(end) - 1;
+ struct page_info *ppg = &pg[end];
+
+ PFN_ORDER(ppg) = chunk_order;
+ merge_chunks(ppg, node, zone, chunk_order, 1);
+ end += (1 << chunk_order);
}
-
- node_need_scrub[node] -= (1 << order);
- for ( i = 0; i < (1 << order); i++)
- pg[i].count_info &= ~PGC_need_scrub;
+
+ if ( preempt )
+ goto out;
}
}
}
--
1.7.1
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH RFC 6/8] spinlock: Introduce _spin_lock_cond()
2017-02-27 0:22 [PATCH RFC 0/8] Memory scrubbing from idle loop Boris Ostrovsky
` (4 preceding siblings ...)
2017-02-27 0:22 ` [PATCH RFC 5/8] mm: Do not discard already-scrubbed pages softirqs are pending Boris Ostrovsky
@ 2017-02-27 0:22 ` Boris Ostrovsky
2017-02-27 0:22 ` [PATCH RFC 7/8] mm: Keep pages available for allocation while scrubbing Boris Ostrovsky
2017-02-27 0:22 ` [PATCH RFC 8/8] mm: Print number of unscrubbed pages in 'H' debug handler Boris Ostrovsky
7 siblings, 0 replies; 9+ messages in thread
From: Boris Ostrovsky @ 2017-02-27 0:22 UTC (permalink / raw)
To: xen-devel
Cc: sstabellini, wei.liu2, George.Dunlap, andrew.cooper3, ian.jackson,
jbeulich, tim, Boris Ostrovsky
Because _spin_trylock() doesn't take lock ticket it may take a
long time until the lock is taken.
Add _spin_lock_cond() that waits for the lock while periodically
checking condition that may cause the lock request to be dropped.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
xen/common/spinlock.c | 25 +++++++++++++++++++++++++
xen/include/xen/spinlock.h | 3 +++
2 files changed, 28 insertions(+), 0 deletions(-)
diff --git a/xen/common/spinlock.c b/xen/common/spinlock.c
index 2a06406..4fd46e5 100644
--- a/xen/common/spinlock.c
+++ b/xen/common/spinlock.c
@@ -129,6 +129,31 @@ static always_inline u16 observe_head(spinlock_tickets_t *t)
return read_atomic(&t->head);
}
+int _spin_lock_cond(spinlock_t *lock, bool_t (*cond)(void *), void *data)
+{
+ spinlock_tickets_t tickets = SPINLOCK_TICKET_INC;
+ LOCK_PROFILE_VAR;
+
+ check_lock(&lock->debug);
+ tickets.head_tail = arch_fetch_and_add(&lock->tickets.head_tail,
+ tickets.head_tail);
+ while ( tickets.tail != observe_head(&lock->tickets) )
+ {
+ LOCK_PROFILE_BLOCK;
+ arch_lock_relax();
+ if ( cond && !cond(data) )
+ {
+ add_sized(&lock->tickets.head, 1);
+ arch_lock_signal();
+ return 0;
+ }
+ }
+ LOCK_PROFILE_GOT;
+ preempt_disable();
+ arch_lock_acquire_barrier();
+ return 1;
+}
+
void _spin_lock(spinlock_t *lock)
{
spinlock_tickets_t tickets = SPINLOCK_TICKET_INC;
diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h
index c1883bd..b8bf72a 100644
--- a/xen/include/xen/spinlock.h
+++ b/xen/include/xen/spinlock.h
@@ -153,6 +153,7 @@ typedef struct spinlock {
#define spin_lock_init(l) (*(l) = (spinlock_t)SPIN_LOCK_UNLOCKED)
void _spin_lock(spinlock_t *lock);
+int _spin_lock_cond(spinlock_t *lock, bool_t (*cond)(void *), void *data);
void _spin_lock_irq(spinlock_t *lock);
unsigned long _spin_lock_irqsave(spinlock_t *lock);
@@ -169,6 +170,8 @@ void _spin_lock_recursive(spinlock_t *lock);
void _spin_unlock_recursive(spinlock_t *lock);
#define spin_lock(l) _spin_lock(l)
+#define spin_lock_cond(l, c, d) _spin_lock_cond(l, c, d)
+#define spink_lock_kick(l) arch_lock_signal()
#define spin_lock_irq(l) _spin_lock_irq(l)
#define spin_lock_irqsave(l, f) \
({ \
--
1.7.1
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH RFC 7/8] mm: Keep pages available for allocation while scrubbing
2017-02-27 0:22 [PATCH RFC 0/8] Memory scrubbing from idle loop Boris Ostrovsky
` (5 preceding siblings ...)
2017-02-27 0:22 ` [PATCH RFC 6/8] spinlock: Introduce _spin_lock_cond() Boris Ostrovsky
@ 2017-02-27 0:22 ` Boris Ostrovsky
2017-02-27 0:22 ` [PATCH RFC 8/8] mm: Print number of unscrubbed pages in 'H' debug handler Boris Ostrovsky
7 siblings, 0 replies; 9+ messages in thread
From: Boris Ostrovsky @ 2017-02-27 0:22 UTC (permalink / raw)
To: xen-devel
Cc: sstabellini, wei.liu2, George.Dunlap, andrew.cooper3, ian.jackson,
jbeulich, tim, Boris Ostrovsky
Instead of scrubbing pages while holding heap lock we can mark
buddy's head as being scrubbed and drop the lock temporarily.
If someone (most likely alloc_heap_pages()) tries to access
this chunk it will signal the scrubber to abort scrub by setting
head's PAGE_SCRUB_ABORT bit. The scrubber checks this bit after
processing each page and stops its work as soon as it sees it.
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
xen/common/page_alloc.c | 67 +++++++++++++++++++++++++++++++++++++++++----
xen/include/asm-x86/mm.h | 4 +++
2 files changed, 65 insertions(+), 6 deletions(-)
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index 3469185..a39afd4 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -687,6 +687,18 @@ static void check_low_mem_virq(void)
}
}
+static void check_and_stop_scrub(struct page_info *head)
+{
+ if ( head->u.free.scrub_state & PAGE_SCRUBBING )
+ {
+ head->u.free.scrub_state |= PAGE_SCRUB_ABORT;
+ smp_mb();
+ spink_lock_kick();
+ while ( ACCESS_ONCE(head->u.free.scrub_state) & PAGE_SCRUB_ABORT )
+ cpu_relax();
+ }
+}
+
/* Allocate 2^@order contiguous pages. */
static struct page_info *alloc_heap_pages(
unsigned int zone_lo, unsigned int zone_hi,
@@ -773,10 +785,15 @@ static struct page_info *alloc_heap_pages(
{
if ( (pg = page_list_remove_head(&heap(node, zone, j))) )
{
- if ( (order == 0) || use_unscrubbed ||
- !test_bit(_PGC_need_scrub, &pg[0].count_info) )
+ if ( !test_bit(_PGC_need_scrub, &pg[0].count_info) )
goto found;
+ if ( (order == 0) || use_unscrubbed )
+ {
+ check_and_stop_scrub(pg);
+ goto found;
+ }
+
page_list_add_tail(pg, &heap(node, zone, j));
}
}
@@ -911,6 +928,8 @@ static int reserve_offlined_page(struct page_info *head)
cur_head = head;
+ check_and_stop_scrub(head);
+
page_list_del(head, &heap(node, zone, head_order));
while ( cur_head < (head + (1 << head_order)) )
@@ -990,6 +1009,9 @@ static bool_t can_merge(struct page_info *head, unsigned int node,
!!test_bit(_PGC_need_scrub, &head->count_info) )
return 0;
+ if ( head->u.free.scrub_state & PAGE_SCRUBBING )
+ return 0;
+
return 1;
}
@@ -1033,6 +1055,14 @@ static void merge_chunks(struct page_info *pg, unsigned int node,
}
#define SCRUB_CHUNK_ORDER 8
+
+static bool_t scrub_continue(void *data)
+{
+ struct page_info *head = (struct page_info *)data;
+
+ return !(ACCESS_ONCE(head->u.free.scrub_state) & PAGE_SCRUB_ABORT);
+}
+
bool_t scrub_free_pages(unsigned int node)
{
struct page_info *pg;
@@ -1064,14 +1094,25 @@ bool_t scrub_free_pages(unsigned int node)
if ( !test_bit(_PGC_need_scrub, &pg[0].count_info) )
break;
- page_list_del(pg, &heap(node, zone, order));
+ ASSERT(!pg[0].u.free.scrub_state);
+ pg[0].u.free.scrub_state = PAGE_SCRUBBING;
+
+ spin_unlock(&heap_lock);
scrub_order = (order > SCRUB_CHUNK_ORDER) ? SCRUB_CHUNK_ORDER : order;
num_scrubbed = 0;
- while ( num_scrubbed < (1 << order) )
+ while ( scrub_continue(&pg[0]) && num_scrubbed < (1 << order) )
{
for ( i = 0; i < (1 << scrub_order); i++ )
+ {
scrub_one_page(&pg[num_scrubbed + i]);
+ if ( !scrub_continue(&pg[0]) )
+ {
+ /* Someone wants this chunk. Drop everything. */
+ pg[0].u.free.scrub_state = 0;
+ goto out_nolock;
+ }
+ }
num_scrubbed += (1 << scrub_order);
if ( softirq_pending(cpu) )
@@ -1080,7 +1121,15 @@ bool_t scrub_free_pages(unsigned int node)
break;
}
}
-
+
+ if ( !spin_lock_cond(&heap_lock, scrub_continue, &pg[0]) )
+ {
+ pg[0].u.free.scrub_state = 0;
+ goto out_nolock;
+ }
+
+ page_list_del(pg, &heap(node, zone, order));
+
start = 0;
end = num_scrubbed;
@@ -1118,7 +1167,9 @@ bool_t scrub_free_pages(unsigned int node)
merge_chunks(ppg, node, zone, chunk_order, 1);
end += (1 << chunk_order);
}
-
+
+ pg[0].u.free.scrub_state = 0;
+
if ( preempt )
goto out;
}
@@ -1127,6 +1178,8 @@ bool_t scrub_free_pages(unsigned int node)
out:
spin_unlock(&heap_lock);
+
+ out_nolock:
clear_bit(node, &node_scrubbing);
return (node_need_scrub[node] != 0);
}
@@ -1165,6 +1218,8 @@ static void free_heap_pages(
if ( page_state_is(&pg[i], offlined) )
tainted = 1;
+ pg[i].u.free.scrub_state=0;
+
/* If a page has no owner it will need no safety TLB flush. */
pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL);
if ( pg[i].u.free.need_tlbflush )
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index b11124f..dd84835 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -87,6 +87,10 @@ struct page_info
/* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
struct {
+#define PAGE_SCRUBBING (1<<1)
+#define PAGE_SCRUB_ABORT (1<<2)
+ unsigned char scrub_state;
+
/* Do TLBs need flushing for safety before next page use? */
bool_t need_tlbflush;
} free;
--
1.7.1
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH RFC 8/8] mm: Print number of unscrubbed pages in 'H' debug handler
2017-02-27 0:22 [PATCH RFC 0/8] Memory scrubbing from idle loop Boris Ostrovsky
` (6 preceding siblings ...)
2017-02-27 0:22 ` [PATCH RFC 7/8] mm: Keep pages available for allocation while scrubbing Boris Ostrovsky
@ 2017-02-27 0:22 ` Boris Ostrovsky
7 siblings, 0 replies; 9+ messages in thread
From: Boris Ostrovsky @ 2017-02-27 0:22 UTC (permalink / raw)
To: xen-devel
Cc: sstabellini, wei.liu2, George.Dunlap, andrew.cooper3, ian.jackson,
jbeulich, root, tim, Boris Ostrovsky
Signed-off-by: root <root@sca05-0a81fadc.us.ORACLE.com>
---
xen/common/page_alloc.c | 7 +++++++
1 files changed, 7 insertions(+), 0 deletions(-)
diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
index a39afd4..0266667 100644
--- a/xen/common/page_alloc.c
+++ b/xen/common/page_alloc.c
@@ -2198,6 +2198,13 @@ static void dump_heap(unsigned char key)
printk("heap[node=%d][zone=%d] -> %lu pages\n",
i, j, avail[i][j]);
}
+
+ for ( i = 0; i < MAX_NUMNODES; i++ )
+ {
+ if ( !node_need_scrub[i] )
+ continue;
+ printk("Node %d has %u unscrubbed pages\n", i, node_need_scrub[i]);
+ }
}
static __init int register_heap_trigger(void)
--
1.7.1
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel
^ permalink raw reply related [flat|nested] 9+ messages in thread