* [PATCHv13 1/2] gnttab: use per-VCPU maptrack free lists
2015-06-15 14:47 [PATCHv13 0/2] gnttab: Improve scaleability David Vrabel
@ 2015-06-15 14:47 ` David Vrabel
2015-06-16 6:27 ` Jan Beulich
2015-06-15 14:47 ` [PATCHv13 2/2] gnttab: steal maptrack entries from other VCPUs David Vrabel
1 sibling, 1 reply; 6+ messages in thread
From: David Vrabel @ 2015-06-15 14:47 UTC (permalink / raw)
To: xen-devel
Cc: Keir Fraser, Ian Campbell, Tim Deegan, David Vrabel, Jan Beulich,
Malcolm Crossley
From: Malcolm Crossley <malcolm.crossley@citrix.com>
Performance analysis of aggregate network throughput with many VMs
shows that performance is signficantly limited by contention on the
maptrack lock when obtaining/releasing maptrack handles from the free
list.
Instead of a single free list use a per-VCPU list. This avoids any
contention when obtaining a handle. Handles must be released back to
their original list and since this may occur on a different VCPU there
is some contention on the destination VCPU's free list tail pointer
(but this is much better than a per-domain lock).
Increase the default maximum number of maptrack frames by 4 times
because: a) struct grant_mapping is now 16 bytes (instead of 8); and
b) a guest may not evenly distribute all the grant map operations
across the VCPUs (meaning some VCPUs need more maptrack entries than
others).
Signed-off-by: Malcolm Crossley <malcolm.crossley@citrix.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
v13:
- When adding new frames, update v->maptrack_head last.
- Each VCPU no longer gets a reserved number of frames -- they're
allocated first-come, first-served.
- Use original gt->maptrack_limit (instead of gt->maptrack_pages).
v11:
- Allocate maptrack array with vzalloc() since it may be >1 page.
v10:
- Divide max_maptrack_frames evenly amongst the VCPUs.
- Increase default max_maptrack_frames to compensate.
---
xen/common/grant_table.c | 130 ++++++++++++++++++++++++++---------------
xen/include/xen/grant_table.h | 5 +-
xen/include/xen/sched.h | 4 ++
3 files changed, 90 insertions(+), 49 deletions(-)
diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index e134f13..8dfd30b 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -37,6 +37,7 @@
#include <xen/iommu.h>
#include <xen/paging.h>
#include <xen/keyhandler.h>
+#include <xen/vmap.h>
#include <xsm/xsm.h>
#include <asm/flushtlb.h>
@@ -57,7 +58,7 @@ integer_param("gnttab_max_frames", max_grant_frames);
* New options allow to set max_maptrack_frames and
* map_grant_table_frames independently.
*/
-#define DEFAULT_MAX_MAPTRACK_FRAMES 256
+#define DEFAULT_MAX_MAPTRACK_FRAMES 1024
static unsigned int __read_mostly max_maptrack_frames;
integer_param("gnttab_max_maptrack_frames", max_maptrack_frames);
@@ -279,66 +280,103 @@ double_gt_unlock(struct grant_table *lgt, struct grant_table *rgt)
static inline int
__get_maptrack_handle(
- struct grant_table *t)
+ struct grant_table *t,
+ struct vcpu *v)
{
- unsigned int h;
- if ( unlikely((h = t->maptrack_head) == MAPTRACK_TAIL) )
+ unsigned int head, next;
+
+ /* No maptrack pages allocated for this VCPU yet? */
+ head = v->maptrack_head;
+ if ( unlikely(head == MAPTRACK_TAIL) )
+ return -1;
+
+ /*
+ * Always keep one entry in the free list to make it easier to add
+ * free entries to the tail.
+ */
+ next = read_atomic(&maptrack_entry(t, head).ref);
+ if ( unlikely(next == MAPTRACK_TAIL) )
return -1;
- t->maptrack_head = maptrack_entry(t, h).ref;
- return h;
+
+ v->maptrack_head = next;
+
+ return head;
}
static inline void
put_maptrack_handle(
struct grant_table *t, int handle)
{
- spin_lock(&t->maptrack_lock);
- maptrack_entry(t, handle).ref = t->maptrack_head;
- t->maptrack_head = handle;
- spin_unlock(&t->maptrack_lock);
+ struct domain *d = current->domain;
+ struct vcpu *v;
+ unsigned int prev_tail, cur_tail;
+
+ /* 1. Set entry to be a tail. */
+ maptrack_entry(t, handle).ref = MAPTRACK_TAIL;
+
+ /* 2. Add entry to the tail of the list on the original VCPU. */
+ v = d->vcpu[maptrack_entry(t,handle).vcpu];
+
+ cur_tail = read_atomic(&v->maptrack_tail);
+ do {
+ prev_tail = cur_tail;
+ cur_tail = cmpxchg(&v->maptrack_tail, prev_tail, handle);
+ } while ( cur_tail != prev_tail );
+
+ /* 3. Update the old tail entry to point to the new entry. */
+ write_atomic(&maptrack_entry(t, prev_tail).ref, handle);
}
static inline int
get_maptrack_handle(
struct grant_table *lgt)
{
+ struct vcpu *v = current;
int i;
grant_handle_t handle;
struct grant_mapping *new_mt;
- unsigned int new_mt_limit, nr_frames;
+
+ handle = __get_maptrack_handle(lgt, v);
+ if ( likely(handle != -1) )
+ return handle;
spin_lock(&lgt->maptrack_lock);
- while ( unlikely((handle = __get_maptrack_handle(lgt)) == -1) )
+ if ( nr_maptrack_frames(lgt) >= max_maptrack_frames )
{
- nr_frames = nr_maptrack_frames(lgt);
- if ( nr_frames >= max_maptrack_frames )
- break;
-
- new_mt = alloc_xenheap_page();
- if ( !new_mt )
- break;
+ spin_unlock(&lgt->maptrack_lock);
+ return -1;
+ }
- clear_page(new_mt);
+ new_mt = alloc_xenheap_page();
+ if ( !new_mt )
+ {
+ spin_unlock(&lgt->maptrack_lock);
+ return -1;
+ }
+ clear_page(new_mt);
- new_mt_limit = lgt->maptrack_limit + MAPTRACK_PER_PAGE;
+ for ( i = 1; i < MAPTRACK_PER_PAGE; i++ )
+ {
+ new_mt[i - 1].ref = lgt->maptrack_limit + i;
+ new_mt[i - 1].vcpu = v->vcpu_id;
+ }
+ /* Set last entry vcpu and ref */
+ new_mt[i - 1].ref = v->maptrack_head;
+ new_mt[i - 1].vcpu = v->vcpu_id;
- for ( i = 1; i < MAPTRACK_PER_PAGE; i++ )
- new_mt[i - 1].ref = lgt->maptrack_limit + i;
- new_mt[i - 1].ref = lgt->maptrack_head;
- lgt->maptrack_head = lgt->maptrack_limit;
+ /* Set tail directly if this is the first page for this VCPU. */
+ if ( v->maptrack_tail == MAPTRACK_TAIL )
+ v->maptrack_tail = lgt->maptrack_limit + MAPTRACK_PER_PAGE - 1;
- lgt->maptrack[nr_frames] = new_mt;
- smp_wmb();
- lgt->maptrack_limit = new_mt_limit;
+ v->maptrack_head = lgt->maptrack_limit;
- gdprintk(XENLOG_INFO, "Increased maptrack size to %u frames\n",
- nr_frames + 1);
- }
+ lgt->maptrack[nr_maptrack_frames(lgt)] = new_mt;
+ lgt->maptrack_limit += MAPTRACK_PER_PAGE;
spin_unlock(&lgt->maptrack_lock);
- return handle;
+ return __get_maptrack_handle(lgt, v);
}
/* Number of grant table entries. Caller must hold d's grant table lock. */
@@ -1427,6 +1465,7 @@ gnttab_setup_table(
struct gnttab_setup_table op;
struct domain *d;
struct grant_table *gt;
+ struct vcpu *v;
int i;
xen_pfn_t gmfn;
@@ -1468,6 +1507,17 @@ gnttab_setup_table(
gt = d->grant_table;
write_lock(>->lock);
+ /* Tracking of mapped foreign frames table */
+ gt->maptrack = vzalloc(max_maptrack_frames * sizeof(*gt->maptrack));
+ if ( gt->maptrack == NULL )
+ goto out3;
+ for_each_vcpu( d, v )
+ {
+ v->maptrack_head = MAPTRACK_TAIL;
+ v->maptrack_tail = MAPTRACK_TAIL;
+ }
+ gt->maptrack_limit = 0;
+
if ( gt->gt_version == 0 )
gt->gt_version = 1;
@@ -3061,18 +3111,6 @@ grant_table_create(
spin_lock_init(&t->active[i][j].lock);
}
- /* Tracking of mapped foreign frames table */
- if ( (t->maptrack = xzalloc_array(struct grant_mapping *,
- max_maptrack_frames)) == NULL )
- goto no_mem_2;
- if ( (t->maptrack[0] = alloc_xenheap_page()) == NULL )
- goto no_mem_3;
- clear_page(t->maptrack[0]);
- t->maptrack_limit = MAPTRACK_PER_PAGE;
- for ( i = 1; i < MAPTRACK_PER_PAGE; i++ )
- t->maptrack[0][i - 1].ref = i;
- t->maptrack[0][i - 1].ref = MAPTRACK_TAIL;
-
/* Shared grant table. */
if ( (t->shared_raw = xzalloc_array(void *, max_grant_frames)) == NULL )
goto no_mem_3;
@@ -3103,8 +3141,6 @@ grant_table_create(
free_xenheap_page(t->shared_raw[i]);
xfree(t->shared_raw);
no_mem_3:
- free_xenheap_page(t->maptrack[0]);
- xfree(t->maptrack);
no_mem_2:
for ( i = 0;
i < num_act_frames_from_sha_frames(INITIAL_NR_GRANT_FRAMES); i++ )
@@ -3239,7 +3275,7 @@ grant_table_destroy(
for ( i = 0; i < nr_maptrack_frames(t); i++ )
free_xenheap_page(t->maptrack[i]);
- xfree(t->maptrack);
+ vfree(t->maptrack);
for ( i = 0; i < nr_active_grant_frames(t); i++ )
free_xenheap_page(t->active[i]);
diff --git a/xen/include/xen/grant_table.h b/xen/include/xen/grant_table.h
index f22ebd0..0150f3c 100644
--- a/xen/include/xen/grant_table.h
+++ b/xen/include/xen/grant_table.h
@@ -60,6 +60,8 @@ struct grant_mapping {
u32 ref; /* grant ref */
u16 flags; /* 0-4: GNTMAP_* ; 5-15: unused */
domid_t domid; /* granting domain */
+ u32 vcpu; /* vcpu which created the grant mapping */
+ u32 pad; /* round size to a power of 2 */
};
/* Per-domain grant information. */
@@ -83,9 +85,8 @@ struct grant_table {
grant_status_t **status;
/* Active grant table. */
struct active_grant_entry **active;
- /* Mapping tracking table. */
+ /* Mapping tracking table per vcpu. */
struct grant_mapping **maptrack;
- unsigned int maptrack_head;
unsigned int maptrack_limit;
/* Lock protecting the maptrack page list, head, and limit */
spinlock_t maptrack_lock;
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 80c6f62..246c5d7 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -219,6 +219,10 @@ struct vcpu
/* VCPU paused by system controller. */
int controller_pause_count;
+ /* Maptrack */
+ unsigned int maptrack_head;
+ unsigned int maptrack_tail;
+
/* IRQ-safe virq_lock protects against delivering VIRQ to stale evtchn. */
evtchn_port_t virq_to_evtchn[NR_VIRQS];
spinlock_t virq_lock;
--
1.7.10.4
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCHv13 2/2] gnttab: steal maptrack entries from other VCPUs
2015-06-15 14:47 [PATCHv13 0/2] gnttab: Improve scaleability David Vrabel
2015-06-15 14:47 ` [PATCHv13 1/2] gnttab: use per-VCPU maptrack free lists David Vrabel
@ 2015-06-15 14:47 ` David Vrabel
2015-06-16 6:36 ` Jan Beulich
1 sibling, 1 reply; 6+ messages in thread
From: David Vrabel @ 2015-06-15 14:47 UTC (permalink / raw)
To: xen-devel
Cc: Keir Fraser, Tim Deegan, David Vrabel, Jan Beulich, Ian Campbell
If a guest is not evenly grant mapping across its VCPUs one of the
VCPUs may run out of free maptrack entries even though other VCPUs
have many free.
If this happens, "steal" free entries from other VCPUs. We want to
steal entries such that:
a) We avoid ping-ponging stolen entries between VCPUs.
b) The number of free entries owned by each VCPUs tends (over time) to
the number it uses.
So when stealing, we select a VCPU at random (reducing (a)) and we
transfer the stolen entries to the thief VCPU (aiming for (b)).
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
v13:
- Steal an extra entry for the sentinel if a VCPU's free list is
unitialized.
v12:
- New.
---
xen/common/grant_table.c | 91 ++++++++++++++++++++++++++++++++++++++--------
1 file changed, 76 insertions(+), 15 deletions(-)
diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
index 8dfd30b..d1c271e 100644
--- a/xen/common/grant_table.c
+++ b/xen/common/grant_table.c
@@ -283,26 +283,66 @@ __get_maptrack_handle(
struct grant_table *t,
struct vcpu *v)
{
- unsigned int head, next;
+ unsigned int head, next, prev_head;
- /* No maptrack pages allocated for this VCPU yet? */
- head = v->maptrack_head;
- if ( unlikely(head == MAPTRACK_TAIL) )
- return -1;
+ do {
+ /* No maptrack pages allocated for this VCPU yet? */
+ head = read_atomic(&v->maptrack_head);
+ if ( unlikely(head == MAPTRACK_TAIL) )
+ return -1;
- /*
- * Always keep one entry in the free list to make it easier to add
- * free entries to the tail.
- */
- next = read_atomic(&maptrack_entry(t, head).ref);
- if ( unlikely(next == MAPTRACK_TAIL) )
- return -1;
+ /*
+ * Always keep one entry in the free list to make it easier to
+ * add free entries to the tail.
+ */
+ next = read_atomic(&maptrack_entry(t, head).ref);
+ if ( unlikely(next == MAPTRACK_TAIL) )
+ return -1;
- v->maptrack_head = next;
+ prev_head = head;
+ head = cmpxchg(&v->maptrack_head, prev_head, next);
+ } while ( head != prev_head );
return head;
}
+/*
+ * Try to "steal" a free maptrack entry from another VCPU.
+ *
+ * A stolen entry is transferred to the thief, so the number of
+ * entries for each VCPU should tend to the usage pattern.
+ *
+ * To avoid having to atomically count the number of free entries on
+ * each VCPU and to avoid two VCPU repeatedly stealing entries from
+ * each other, the initial victim VCPU is selected randomly.
+ */
+static int steal_maptrack_handle(struct grant_table *t, struct vcpu *v)
+{
+ struct domain *d = v->domain;
+ unsigned int first, i;
+
+ /* Find an initial victim. */
+ first = i = NOW() % d->max_vcpus;
+
+ do {
+ unsigned int handle;
+
+ handle = __get_maptrack_handle(t, d->vcpu[i]);
+ if ( handle >= 0 )
+ {
+ maptrack_entry(t, handle).vcpu = v->vcpu_id;
+ return handle;
+ }
+
+ i++;
+ if ( i == d->max_vcpus )
+ i = 0;
+ } while ( i != first );
+
+ /* No free handles on any VCPU. */
+ return -1;
+}
+
static inline void
put_maptrack_handle(
struct grant_table *t, int handle)
@@ -342,10 +382,31 @@ get_maptrack_handle(
spin_lock(&lgt->maptrack_lock);
+ /*
+ * If we've run out of frames, try stealing an entry from another
+ * VCPU (in case the guest isn't mapping across its VCPUs evenly).
+ */
if ( nr_maptrack_frames(lgt) >= max_maptrack_frames )
{
+ /*
+ * Can drop the lock since no other VCPU can be adding a new
+ * frame once they've run out.
+ */
spin_unlock(&lgt->maptrack_lock);
- return -1;
+
+ /*
+ * Uninitialized free list? Steal an extra entry for the tail
+ * sentinel.
+ */
+ if ( v->maptrack_tail == MAPTRACK_TAIL )
+ {
+ handle = steal_maptrack_handle(lgt, v);
+ if ( handle == -1 )
+ return -1;
+ v->maptrack_tail = handle;
+ write_atomic(&v->maptrack_head, handle);
+ }
+ return steal_maptrack_handle(lgt, v);
}
new_mt = alloc_xenheap_page();
@@ -369,7 +430,7 @@ get_maptrack_handle(
if ( v->maptrack_tail == MAPTRACK_TAIL )
v->maptrack_tail = lgt->maptrack_limit + MAPTRACK_PER_PAGE - 1;
- v->maptrack_head = lgt->maptrack_limit;
+ write_atomic(&v->maptrack_head, lgt->maptrack_limit);
lgt->maptrack[nr_maptrack_frames(lgt)] = new_mt;
lgt->maptrack_limit += MAPTRACK_PER_PAGE;
--
1.7.10.4
^ permalink raw reply related [flat|nested] 6+ messages in thread