* Re: [PATCH v2 2/2] ioreq-server: Support scatter page forwarding
2014-08-22 19:18 ` [PATCH v2 2/2] ioreq-server: Support scatter page forwarding Wei Ye
@ 2014-08-22 10:35 ` Andrew Cooper
2014-08-26 8:40 ` Ye, Wei
2014-08-26 11:35 ` Paul Durrant
2014-08-26 13:26 ` Jan Beulich
2 siblings, 1 reply; 14+ messages in thread
From: Andrew Cooper @ 2014-08-22 10:35 UTC (permalink / raw)
To: Wei Ye, xen-devel
Cc: kevin.tian, keir, ian.campbell, stefano.stabellini, tim,
ian.jackson, donald.d.dugger, Paul.Durrant, zhiyuan.lv, JBeulich,
yang.z.zhang
On 22/08/14 20:18, Wei Ye wrote:
> Extend the interface to support add/remove scatter page list to be
> forwarded by a dedicated ioreq-server instance. Check and select
> a right ioreq-server instance for forwarding the write operation
> for a write protected page.
>
> Signed-off-by: Wei Ye <wei.ye@intel.com>
> ---
> tools/libxc/xc_domain.c | 32 ++++++
> tools/libxc/xenctrl.h | 18 ++++
> xen/arch/x86/hvm/hvm.c | 209 ++++++++++++++++++++++++++++++++++++++
> xen/include/asm-x86/hvm/domain.h | 9 ++
> xen/include/public/hvm/hvm_op.h | 12 +++
> 5 files changed, 280 insertions(+)
>
> diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
> index 37ed141..36e4e59 100644
> --- a/tools/libxc/xc_domain.c
> +++ b/tools/libxc/xc_domain.c
> @@ -1485,6 +1485,38 @@ int xc_hvm_unmap_pcidev_from_ioreq_server(xc_interface *xch, domid_t domid,
> return rc;
> }
>
> +int xc_hvm_map_pages_to_ioreq_server(xc_interface *xch, domid_t domid,
> + ioservid_t id, uint16_t set,
> + uint16_t nr_pages, uint64_t *gpfn)
> +{
> + DECLARE_HYPERCALL;
> + DECLARE_HYPERCALL_BUFFER(xen_hvm_map_pages_to_ioreq_server_t, arg);
> + int pg, rc = -1;
> +
> + if ( arg == NULL )
> + return -1;
You must set errno before exiting -1.
> +
> + if ( nr_pages > XEN_IOREQSVR_MAX_MAP_BATCH )
> + goto out;
> +
> + hypercall.op = __HYPERVISOR_hvm_op;
> + hypercall.arg[0] = HVMOP_map_pages_to_ioreq_server;
> + hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg);
> +
> + arg->domid = domid;
> + arg->id = id;
> + arg->set = set;
> + arg->nr_pages = nr_pages;
> + for ( pg = 0; pg < nr_pages; pg++ )
> + arg->page_list[pg] = gpfn[pg];
memcpy()
> +
> + rc = do_xen_hypercall(xch, &hypercall);
> +
> +out:
> + xc_hypercall_buffer_free(xch, arg);
> + return rc;
> +}
> +
> int xc_hvm_destroy_ioreq_server(xc_interface *xch,
> domid_t domid,
> ioservid_t id)
> diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
> index b55d857..84e8465 100644
> --- a/tools/libxc/xenctrl.h
> +++ b/tools/libxc/xenctrl.h
> @@ -1943,6 +1943,24 @@ int xc_hvm_unmap_pcidev_from_ioreq_server(xc_interface *xch,
> uint8_t function);
>
> /**
> + * This function registers/deregisters a set of pages to be write protected.
> + *
> + * @parm xch a handle to an open hypervisor interface.
> + * @parm domid the domain id to be serviced
> + * @parm id the IOREQ Server id.
> + * @parm set whether registers or deregisters: 1 for register, 0 for deregister
> + * @parm nr_pages the count of pages
> + * @parm pgfn the guest page frame number list
> + * @return 0 on success, -1 on failure.
> + */
> +int xc_hvm_map_pages_to_ioreq_server(xc_interface *xch,
> + domid_t domid,
> + ioservid_t id,
> + uint16_t set,
> + uint16_t nr_pages,
> + uint64_t *gpfn);
Alignment of parameters.
> +
> +/**
> * This function destroys an IOREQ Server.
> *
> * @parm xch a handle to an open hypervisor interface.
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index 4984149..bbbacc3 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -68,6 +68,12 @@
> #include <public/mem_event.h>
> #include <xen/rangeset.h>
This hash chain code belongs in its own patch. Please split it out, to
allow the hypercall interface changes to be in a smaller patch.
>
> +#define PGLIST_HASH_SIZE_SHIFT 8
> +#define PGLIST_HASH_SIZE (1 << PGLIST_HASH_SIZE_SHIFT)
> +#define pglist_hash(x) ((x) % PGLIST_HASH_SIZE)
> +#define PGLIST_INVALID_GPFN 0
Use INVALID_MFN. GFN 0 is perfectly legitimate as the target of scatter
forwarding.
> +#define PGLIST_HASH_ENTRY_SIZE sizeof(struct pglist_hash_table)
> +
> bool_t __read_mostly hvm_enabled;
>
> unsigned int opt_hvm_debug_level __read_mostly;
> @@ -744,6 +750,98 @@ static void hvm_ioreq_server_remove_all_vcpus(struct hvm_ioreq_server *s)
> spin_unlock(&s->lock);
> }
>
> +static struct pglist_hash_table *hvm_ioreq_server_lookup_pglist_hash_table(
> + struct pglist_hash_table *pglist_ht, uint64_t gpfn)
> +{
> + unsigned int index = pglist_hash(gpfn);
> + struct pglist_hash_table *entry;
> +
> + for ( entry = &pglist_ht[index]; entry != NULL; entry = entry->next )
> + if ( entry->gpfn != PGLIST_INVALID_GPFN && entry->gpfn == gpfn )
> + break;
> +
> + return entry;
> +}
> +
> +static void hvm_ioreq_server_free_hash_chain(struct pglist_hash_table *chain)
> +{
> + struct pglist_hash_table *p = chain;
> + struct pglist_hash_table *n;
> +
> + while ( p )
> + {
> + n = p->next;
> + xfree(p);
> + p = n;
> + }
> +}
> +
> +static void hvm_ioreq_server_free_pglist_hash(struct pglist_hash_table *pglist_ht)
> +{
> + unsigned int i;
> +
> + for ( i = 0; i < PGLIST_HASH_SIZE; i++ )
> + if ( pglist_ht[i].next != NULL )
> + hvm_ioreq_server_free_hash_chain(pglist_ht[i].next);
> +}
> +
> +static int hvm_ioreq_server_pglist_hash_add(struct pglist_hash_table *pglist_ht,
> + uint64_t gpfn)
> +{
> + unsigned int index = pglist_hash(gpfn);
> + struct pglist_hash_table *ne;
> +
> + if ( hvm_ioreq_server_lookup_pglist_hash_table(pglist_ht, gpfn) != NULL )
> + return -EINVAL;
> +
> + if ( pglist_ht[index].gpfn == PGLIST_INVALID_GPFN )
> + pglist_ht[index].gpfn = gpfn;
> + else
> + {
> + ne = xmalloc(struct pglist_hash_table);
> + if ( !ne )
> + return -ENOMEM;
> + ne->next = pglist_ht[index].next;
> + ne->gpfn = gpfn;
> + pglist_ht[index].next = ne;
> + }
> +
> + return 0;
> +}
> +
> +static int hvm_ioreq_server_pglist_hash_rem(struct pglist_hash_table *pglist_ht,
> + uint64_t gpfn)
> +{
> + unsigned int index = pglist_hash(gpfn);
> + struct pglist_hash_table *next, *prev;
> +
> + if ( pglist_ht[index].gpfn == gpfn )
> + pglist_ht[index].gpfn = PGLIST_INVALID_GPFN;
> + else
> + {
> + prev = &pglist_ht[index];
> + while ( 1 )
> + {
> + next = prev->next;
> + if ( !next )
> + {
> + printk(XENLOG_G_WARNING "hvm_ioreq_server_pglist_hash_rem hash_table %p remove"
> + " %lx not found\n", pglist_ht, gpfn);
> + return -EINVAL;
> + }
> + if ( next->gpfn == gpfn )
> + {
> + prev->next = next->next;
> + xfree(next);
> + break;
> + }
> + prev = next;
> + }
> + }
> +
> + return 0;
> +}
> +
> static int hvm_ioreq_server_map_pages(struct hvm_ioreq_server *s,
> bool_t is_default, bool_t handle_bufioreq)
> {
> @@ -948,7 +1046,14 @@ static int hvm_ioreq_server_init(struct hvm_ioreq_server *s, struct domain *d,
> spin_lock_init(&s->lock);
> INIT_LIST_HEAD(&s->ioreq_vcpu_list);
> spin_lock_init(&s->bufioreq_lock);
> + spin_lock_init(&s->pglist_hash_lock);
>
> + rc = -ENOMEM;
> + s->pglist_hash_base = xmalloc_array(struct pglist_hash_table, PGLIST_HASH_SIZE);
> + if ( s->pglist_hash_base == NULL )
> + goto fail1;
> + memset(s->pglist_hash_base, 0, PGLIST_HASH_ENTRY_SIZE * PGLIST_HASH_SIZE);
xzalloc_array()
> +
> rc = hvm_ioreq_server_alloc_rangesets(s, is_default);
> if ( rc )
> goto fail1;
> @@ -1087,6 +1192,9 @@ static int hvm_destroy_ioreq_server(struct domain *d, ioservid_t id)
>
> hvm_ioreq_server_deinit(s, 0);
>
> + hvm_ioreq_server_free_pglist_hash(s->pglist_hash_base);
> + xfree(s->pglist_hash_base);
> +
> domain_unpause(d);
>
> xfree(s);
> @@ -1279,6 +1387,63 @@ static int hvm_set_ioreq_server_state(struct domain *d, ioservid_t id,
> return rc;
> }
>
> +static int hvm_map_pages_to_ioreq_server(struct domain *d, ioservid_t id,
> + uint16_t set, uint16_t nr_pages,
> + uint64_t *gpfn)
> +{
> + struct hvm_ioreq_server *s;
> + int rc;
> + unsigned int i;
> + p2m_type_t ot, nt;
> +
> + if ( set )
> + {
> + ot = p2m_ram_rw;
> + nt = p2m_mmio_write_dm;
> + }
> + else
> + {
> + ot = p2m_mmio_write_dm;
> + nt = p2m_ram_rw;
> + }
> +
> + spin_lock(&d->arch.hvm_domain.ioreq_server.lock);
> +
> + rc = -ENOENT;
> + list_for_each_entry ( s,
> + &d->arch.hvm_domain.ioreq_server.list,
> + list_entry )
> + {
> + if ( s != d->arch.hvm_domain.default_ioreq_server &&
> + s->id == id )
> + {
> + spin_lock(&s->pglist_hash_lock);
> +
> + for ( i = 0; i < nr_pages; i++ )
> + {
> + rc = p2m_change_type_one(d, gpfn[i], ot, nt);
> + if ( !rc )
> + {
> + if ( set )
> + rc = hvm_ioreq_server_pglist_hash_add(s->pglist_hash_base, gpfn[i]);
> + else
> + rc = hvm_ioreq_server_pglist_hash_rem(s->pglist_hash_base, gpfn[i]);
> + }
> +
> + if ( rc )
> + break;
> + }
> +
> + spin_unlock(&s->pglist_hash_lock);
> + break;
> + }
> + }
> +
> + spin_unlock(&d->arch.hvm_domain.ioreq_server.lock);
> +
> + return rc;
> +}
> +
> static int hvm_all_ioreq_servers_add_vcpu(struct domain *d, struct vcpu *v)
> {
> struct hvm_ioreq_server *s;
> @@ -2349,6 +2514,8 @@ static struct hvm_ioreq_server *hvm_select_ioreq_server(struct domain *d,
> switch ( type )
> {
> unsigned long end;
> + uint64_t gpfn;
> + struct pglist_hash_table *he;
>
> case IOREQ_TYPE_PIO:
> end = addr + p->size - 1;
> @@ -2361,6 +2528,14 @@ static struct hvm_ioreq_server *hvm_select_ioreq_server(struct domain *d,
> if ( rangeset_contains_range(r, addr, end) )
> return s;
>
> + gpfn = addr >> PAGE_SHIFT;
> + spin_lock(&s->pglist_hash_lock);
> + he = hvm_ioreq_server_lookup_pglist_hash_table(s->pglist_hash_base, gpfn);
> + spin_unlock(&s->pglist_hash_lock);
> +
> + if ( he )
> + return s;
> +
> break;
> case IOREQ_TYPE_PCI_CONFIG:
> if ( rangeset_contains_singleton(r, addr >> 32) )
> @@ -5309,6 +5484,35 @@ static int hvmop_set_ioreq_server_state(
> return rc;
> }
>
> +static int hvmop_map_pages_to_ioreq_server(
> + XEN_GUEST_HANDLE_PARAM(xen_hvm_map_pages_to_ioreq_server_t) uop)
> +{
> + xen_hvm_map_pages_to_ioreq_server_t op;
> + struct domain *d;
> + int rc;
> +
> + if ( copy_from_guest(&op, uop, 1) )
> + return -EFAULT;
> +
> + rc = rcu_lock_remote_domain_by_id(op.domid, &d);
> + if ( rc != 0 )
> + return rc;
> +
> + rc = -EINVAL;
> + if ( !is_hvm_domain(d) )
> + goto out;
> +
> + rc = xsm_hvm_ioreq_server(XSM_DM_PRIV, d, HVMOP_map_pages_to_ioreq_server);
> + if ( rc != 0 )
> + goto out;
> +
> + rc = hvm_map_pages_to_ioreq_server(d, op.id, op.set, op.nr_pages, op.page_list);
> +
> +out:
> + rcu_unlock_domain(d);
> + return rc;
> +}
> +
> static int hvmop_destroy_ioreq_server(
> XEN_GUEST_HANDLE_PARAM(xen_hvm_destroy_ioreq_server_t) uop)
> {
> @@ -5374,6 +5578,11 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg)
> guest_handle_cast(arg, xen_hvm_set_ioreq_server_state_t));
> break;
>
> + case HVMOP_map_pages_to_ioreq_server:
> + rc = hvmop_map_pages_to_ioreq_server(
> + guest_handle_cast(arg, xen_hvm_map_pages_to_ioreq_server_t));
> + break;
> +
> case HVMOP_destroy_ioreq_server:
> rc = hvmop_destroy_ioreq_server(
> guest_handle_cast(arg, xen_hvm_destroy_ioreq_server_t));
> diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
> index 291a2e0..c28fbbe 100644
> --- a/xen/include/asm-x86/hvm/domain.h
> +++ b/xen/include/asm-x86/hvm/domain.h
> @@ -48,6 +48,11 @@ struct hvm_ioreq_vcpu {
> evtchn_port_t ioreq_evtchn;
> };
>
> +struct pglist_hash_table {
> + struct pglist_hash_table *next;
> + uint64_t gpfn;
> +};
> +
> #define NR_IO_RANGE_TYPES (HVMOP_IO_RANGE_PCI + 1)
> #define MAX_NR_IO_RANGES 256
>
> @@ -70,6 +75,10 @@ struct hvm_ioreq_server {
> evtchn_port_t bufioreq_evtchn;
> struct rangeset *range[NR_IO_RANGE_TYPES];
> bool_t enabled;
> +
> + /* scatter page list need write protection */
> + struct pglist_hash_table *pglist_hash_base;
> + spinlock_t pglist_hash_lock;
> };
>
> struct hvm_domain {
> diff --git a/xen/include/public/hvm/hvm_op.h b/xen/include/public/hvm/hvm_op.h
> index eeb0a60..f7c989a 100644
> --- a/xen/include/public/hvm/hvm_op.h
> +++ b/xen/include/public/hvm/hvm_op.h
> @@ -367,6 +367,18 @@ struct xen_hvm_set_ioreq_server_state {
> typedef struct xen_hvm_set_ioreq_server_state xen_hvm_set_ioreq_server_state_t;
> DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_ioreq_server_state_t);
>
> +#define HVMOP_map_pages_to_ioreq_server 23
> +#define XEN_IOREQSVR_MAX_MAP_BATCH 128
> +struct xen_hvm_map_pages_to_ioreq_server {
> + domid_t domid; /* IN - domain to be serviced */
> + ioservid_t id; /* IN - server id */
> + uint16_t set; /* IN - 1: map pages, 0: unmap pages */
> + uint16_t nr_pages; /* IN - page count */
> + uint64_t page_list[XEN_IOREQSVR_MAX_MAP_BATCH]; /* IN - gpfn list */
No arbitrary limits like this in the API please. Use a GUEST_HANDLE_64
instead.
~Andrew
> +};
> +typedef struct xen_hvm_map_pages_to_ioreq_server xen_hvm_map_pages_to_ioreq_server_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_hvm_map_pages_to_ioreq_server_t);
> +
> #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
>
> #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH v2 2/2] ioreq-server: Support scatter page forwarding
2014-08-22 10:35 ` Andrew Cooper
@ 2014-08-26 8:40 ` Ye, Wei
2014-08-26 9:37 ` Andrew Cooper
0 siblings, 1 reply; 14+ messages in thread
From: Ye, Wei @ 2014-08-26 8:40 UTC (permalink / raw)
To: Andrew Cooper, xen-devel@lists.xen.org
Cc: Tian, Kevin, keir@xen.org, ian.campbell@citrix.com,
stefano.stabellini@eu.citrix.com, tim@xen.org,
ian.jackson@eu.citrix.com, Dugger, Donald D,
Paul.Durrant@citrix.com, Lv, Zhiyuan, JBeulich@suse.com,
Zhang, Yang Z
> -----Original Message-----
> From: Andrew Cooper [mailto:andrew.cooper3@citrix.com]
> Sent: Friday, August 22, 2014 6:35 PM
> To: Ye, Wei; xen-devel@lists.xen.org
> Cc: Tian, Kevin; keir@xen.org; ian.campbell@citrix.com;
> stefano.stabellini@eu.citrix.com; tim@xen.org; ian.jackson@eu.citrix.com;
> Dugger, Donald D; Paul.Durrant@citrix.com; Lv, Zhiyuan; JBeulich@suse.com;
> Zhang, Yang Z
> Subject: Re: [Xen-devel] [PATCH v2 2/2] ioreq-server: Support scatter page
> forwarding
>
> On 22/08/14 20:18, Wei Ye wrote:
> > Extend the interface to support add/remove scatter page list to be
> > forwarded by a dedicated ioreq-server instance. Check and select a
> > right ioreq-server instance for forwarding the write operation for a
> > write protected page.
> >
> > Signed-off-by: Wei Ye <wei.ye@intel.com>
> > ---
> > tools/libxc/xc_domain.c | 32 ++++++
> > tools/libxc/xenctrl.h | 18 ++++
> > xen/arch/x86/hvm/hvm.c | 209
> ++++++++++++++++++++++++++++++++++++++
> > xen/include/asm-x86/hvm/domain.h | 9 ++
> > xen/include/public/hvm/hvm_op.h | 12 +++
> > 5 files changed, 280 insertions(+)
> >
> > diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c index
> > 37ed141..36e4e59 100644
> > --- a/tools/libxc/xc_domain.c
> > +++ b/tools/libxc/xc_domain.c
> > @@ -1485,6 +1485,38 @@ int
> xc_hvm_unmap_pcidev_from_ioreq_server(xc_interface *xch, domid_t
> domid,
> > return rc;
> > }
> >
> > +int xc_hvm_map_pages_to_ioreq_server(xc_interface *xch, domid_t
> domid,
> > + ioservid_t id, uint16_t set,
> > + uint16_t nr_pages, uint64_t
> > +*gpfn) {
> > + DECLARE_HYPERCALL;
> > +
> DECLARE_HYPERCALL_BUFFER(xen_hvm_map_pages_to_ioreq_server_t,
> arg);
> > + int pg, rc = -1;
> > +
> > + if ( arg == NULL )
> > + return -1;
>
> You must set errno before exiting -1.
>
I'm not sure what kind of errno shoud be set. I just follow the similar existing functions like
"xc_hvm_map_io_range_to_ioreq_server...", it also didn't set errno before exiting. What's
Your suggestion for the errno?
> > +
> > + if ( nr_pages > XEN_IOREQSVR_MAX_MAP_BATCH )
> > + goto out;
> > +
> > + hypercall.op = __HYPERVISOR_hvm_op;
> > + hypercall.arg[0] = HVMOP_map_pages_to_ioreq_server;
> > + hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg);
> > +
> > + arg->domid = domid;
> > + arg->id = id;
> > + arg->set = set;
> > + arg->nr_pages = nr_pages;
> > + for ( pg = 0; pg < nr_pages; pg++ )
> > + arg->page_list[pg] = gpfn[pg];
>
> memcpy()
>
Ok.
> > +
> > + rc = do_xen_hypercall(xch, &hypercall);
> > +
> > +out:
> > + xc_hypercall_buffer_free(xch, arg);
> > + return rc;
> > +}
> > +
> > int xc_hvm_destroy_ioreq_server(xc_interface *xch,
> > domid_t domid,
> > ioservid_t id) diff --git
> > a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h index b55d857..84e8465
> > 100644
> > --- a/tools/libxc/xenctrl.h
> > +++ b/tools/libxc/xenctrl.h
> > @@ -1943,6 +1943,24 @@ int
> xc_hvm_unmap_pcidev_from_ioreq_server(xc_interface *xch,
> > uint8_t function);
> >
> > /**
> > + * This function registers/deregisters a set of pages to be write protected.
> > + *
> > + * @parm xch a handle to an open hypervisor interface.
> > + * @parm domid the domain id to be serviced
> > + * @parm id the IOREQ Server id.
> > + * @parm set whether registers or deregisters: 1 for register, 0 for
> > +deregister
> > + * @parm nr_pages the count of pages
> > + * @parm pgfn the guest page frame number list
> > + * @return 0 on success, -1 on failure.
> > + */
> > +int xc_hvm_map_pages_to_ioreq_server(xc_interface *xch,
> > + domid_t domid,
> > + ioservid_t id,
> > + uint16_t set,
> > + uint16_t nr_pages,
> > + uint64_t *gpfn);
>
> Alignment of parameters.
Ok.
>
> > +
> > +/**
> > * This function destroys an IOREQ Server.
> > *
> > * @parm xch a handle to an open hypervisor interface.
> > diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index
> > 4984149..bbbacc3 100644
> > --- a/xen/arch/x86/hvm/hvm.c
> > +++ b/xen/arch/x86/hvm/hvm.c
> > @@ -68,6 +68,12 @@
> > #include <public/mem_event.h>
> > #include <xen/rangeset.h>
>
> This hash chain code belongs in its own patch. Please split it out, to allow the
> hypercall interface changes to be in a smaller patch.
>
Will split the hash chain code into a single patch, thanks.
> >
> > +#define PGLIST_HASH_SIZE_SHIFT 8
> > +#define PGLIST_HASH_SIZE (1 << PGLIST_HASH_SIZE_SHIFT)
> > +#define pglist_hash(x) ((x) % PGLIST_HASH_SIZE)
> > +#define PGLIST_INVALID_GPFN 0
>
> Use INVALID_MFN. GFN 0 is perfectly legitimate as the target of scatter
> forwarding.
Ok.
>
> > +#define PGLIST_HASH_ENTRY_SIZE sizeof(struct pglist_hash_table)
> > +
> > bool_t __read_mostly hvm_enabled;
> >
> > unsigned int opt_hvm_debug_level __read_mostly; @@ -744,6 +750,98
> @@
> > static void hvm_ioreq_server_remove_all_vcpus(struct hvm_ioreq_server
> *s)
> > spin_unlock(&s->lock);
> > }
> >
> > +static struct pglist_hash_table
> *hvm_ioreq_server_lookup_pglist_hash_table(
> > + struct pglist_hash_table *pglist_ht,
> > +uint64_t gpfn) {
> > + unsigned int index = pglist_hash(gpfn);
> > + struct pglist_hash_table *entry;
> > +
> > + for ( entry = &pglist_ht[index]; entry != NULL; entry = entry->next )
> > + if ( entry->gpfn != PGLIST_INVALID_GPFN && entry->gpfn == gpfn )
> > + break;
> > +
> > + return entry;
> > +}
> > +
> > +static void hvm_ioreq_server_free_hash_chain(struct pglist_hash_table
> > +*chain) {
> > + struct pglist_hash_table *p = chain;
> > + struct pglist_hash_table *n;
> > +
> > + while ( p )
> > + {
> > + n = p->next;
> > + xfree(p);
> > + p = n;
> > + }
> > +}
> > +
> > +static void hvm_ioreq_server_free_pglist_hash(struct
> > +pglist_hash_table *pglist_ht) {
> > + unsigned int i;
> > +
> > + for ( i = 0; i < PGLIST_HASH_SIZE; i++ )
> > + if ( pglist_ht[i].next != NULL )
> > + hvm_ioreq_server_free_hash_chain(pglist_ht[i].next);
> > +}
> > +
> > +static int hvm_ioreq_server_pglist_hash_add(struct pglist_hash_table
> *pglist_ht,
> > + uint64_t gpfn) {
> > + unsigned int index = pglist_hash(gpfn);
> > + struct pglist_hash_table *ne;
> > +
> > + if ( hvm_ioreq_server_lookup_pglist_hash_table(pglist_ht, gpfn) !=
> NULL )
> > + return -EINVAL;
> > +
> > + if ( pglist_ht[index].gpfn == PGLIST_INVALID_GPFN )
> > + pglist_ht[index].gpfn = gpfn;
> > + else
> > + {
> > + ne = xmalloc(struct pglist_hash_table);
> > + if ( !ne )
> > + return -ENOMEM;
> > + ne->next = pglist_ht[index].next;
> > + ne->gpfn = gpfn;
> > + pglist_ht[index].next = ne;
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +static int hvm_ioreq_server_pglist_hash_rem(struct pglist_hash_table
> *pglist_ht,
> > + uint64_t gpfn) {
> > + unsigned int index = pglist_hash(gpfn);
> > + struct pglist_hash_table *next, *prev;
> > +
> > + if ( pglist_ht[index].gpfn == gpfn )
> > + pglist_ht[index].gpfn = PGLIST_INVALID_GPFN;
> > + else
> > + {
> > + prev = &pglist_ht[index];
> > + while ( 1 )
> > + {
> > + next = prev->next;
> > + if ( !next )
> > + {
> > + printk(XENLOG_G_WARNING
> "hvm_ioreq_server_pglist_hash_rem hash_table %p remove"
> > + " %lx not found\n", pglist_ht, gpfn);
> > + return -EINVAL;
> > + }
> > + if ( next->gpfn == gpfn )
> > + {
> > + prev->next = next->next;
> > + xfree(next);
> > + break;
> > + }
> > + prev = next;
> > + }
> > + }
> > +
> > + return 0;
> > +}
> > +
> > static int hvm_ioreq_server_map_pages(struct hvm_ioreq_server *s,
> > bool_t is_default, bool_t
> > handle_bufioreq) { @@ -948,7 +1046,14 @@ static int
> > hvm_ioreq_server_init(struct hvm_ioreq_server *s, struct domain *d,
> > spin_lock_init(&s->lock);
> > INIT_LIST_HEAD(&s->ioreq_vcpu_list);
> > spin_lock_init(&s->bufioreq_lock);
> > + spin_lock_init(&s->pglist_hash_lock);
> >
> > + rc = -ENOMEM;
> > + s->pglist_hash_base = xmalloc_array(struct pglist_hash_table,
> PGLIST_HASH_SIZE);
> > + if ( s->pglist_hash_base == NULL )
> > + goto fail1;
> > + memset(s->pglist_hash_base, 0, PGLIST_HASH_ENTRY_SIZE *
> > + PGLIST_HASH_SIZE);
>
> xzalloc_array()
Ok.
>
> > +
> > rc = hvm_ioreq_server_alloc_rangesets(s, is_default);
> > if ( rc )
> > goto fail1;
> > @@ -1087,6 +1192,9 @@ static int hvm_destroy_ioreq_server(struct
> > domain *d, ioservid_t id)
> >
> > hvm_ioreq_server_deinit(s, 0);
> >
> > + hvm_ioreq_server_free_pglist_hash(s->pglist_hash_base);
> > + xfree(s->pglist_hash_base);
> > +
> > domain_unpause(d);
> >
> > xfree(s);
> > @@ -1279,6 +1387,63 @@ static int hvm_set_ioreq_server_state(struct
> domain *d, ioservid_t id,
> > return rc;
> > }
> >
> > +static int hvm_map_pages_to_ioreq_server(struct domain *d, ioservid_t
> id,
> > + uint16_t set, uint16_t nr_pages,
> > + uint64_t *gpfn) {
> > + struct hvm_ioreq_server *s;
> > + int rc;
> > + unsigned int i;
> > + p2m_type_t ot, nt;
> > +
> > + if ( set )
> > + {
> > + ot = p2m_ram_rw;
> > + nt = p2m_mmio_write_dm;
> > + }
> > + else
> > + {
> > + ot = p2m_mmio_write_dm;
> > + nt = p2m_ram_rw;
> > + }
> > +
> > + spin_lock(&d->arch.hvm_domain.ioreq_server.lock);
> > +
> > + rc = -ENOENT;
> > + list_for_each_entry ( s,
> > + &d->arch.hvm_domain.ioreq_server.list,
> > + list_entry )
> > + {
> > + if ( s != d->arch.hvm_domain.default_ioreq_server &&
> > + s->id == id )
> > + {
> > + spin_lock(&s->pglist_hash_lock);
> > +
> > + for ( i = 0; i < nr_pages; i++ )
> > + {
> > + rc = p2m_change_type_one(d, gpfn[i], ot, nt);
> > + if ( !rc )
> > + {
> > + if ( set )
> > + rc = hvm_ioreq_server_pglist_hash_add(s->pglist_hash_base,
> gpfn[i]);
> > + else
> > + rc = hvm_ioreq_server_pglist_hash_rem(s->pglist_hash_base,
> gpfn[i]);
> > + }
> > +
> > + if ( rc )
> > + break;
> > + }
> > +
> > + spin_unlock(&s->pglist_hash_lock);
> > + break;
> > + }
> > + }
> > +
> > + spin_unlock(&d->arch.hvm_domain.ioreq_server.lock);
> > +
> > + return rc;
> > +}
> > +
> > static int hvm_all_ioreq_servers_add_vcpu(struct domain *d, struct
> > vcpu *v) {
> > struct hvm_ioreq_server *s;
> > @@ -2349,6 +2514,8 @@ static struct hvm_ioreq_server
> *hvm_select_ioreq_server(struct domain *d,
> > switch ( type )
> > {
> > unsigned long end;
> > + uint64_t gpfn;
> > + struct pglist_hash_table *he;
> >
> > case IOREQ_TYPE_PIO:
> > end = addr + p->size - 1; @@ -2361,6 +2528,14 @@ static
> > struct hvm_ioreq_server *hvm_select_ioreq_server(struct domain *d,
> > if ( rangeset_contains_range(r, addr, end) )
> > return s;
> >
> > + gpfn = addr >> PAGE_SHIFT;
> > + spin_lock(&s->pglist_hash_lock);
> > + he = hvm_ioreq_server_lookup_pglist_hash_table(s-
> >pglist_hash_base, gpfn);
> > + spin_unlock(&s->pglist_hash_lock);
> > +
> > + if ( he )
> > + return s;
> > +
> > break;
> > case IOREQ_TYPE_PCI_CONFIG:
> > if ( rangeset_contains_singleton(r, addr >> 32) ) @@
> > -5309,6 +5484,35 @@ static int hvmop_set_ioreq_server_state(
> > return rc;
> > }
> >
> > +static int hvmop_map_pages_to_ioreq_server(
> > +
> XEN_GUEST_HANDLE_PARAM(xen_hvm_map_pages_to_ioreq_server_t)
> uop)
> > +{
> > + xen_hvm_map_pages_to_ioreq_server_t op;
> > + struct domain *d;
> > + int rc;
> > +
> > + if ( copy_from_guest(&op, uop, 1) )
> > + return -EFAULT;
> > +
> > + rc = rcu_lock_remote_domain_by_id(op.domid, &d);
> > + if ( rc != 0 )
> > + return rc;
> > +
> > + rc = -EINVAL;
> > + if ( !is_hvm_domain(d) )
> > + goto out;
> > +
> > + rc = xsm_hvm_ioreq_server(XSM_DM_PRIV, d,
> HVMOP_map_pages_to_ioreq_server);
> > + if ( rc != 0 )
> > + goto out;
> > +
> > + rc = hvm_map_pages_to_ioreq_server(d, op.id, op.set, op.nr_pages,
> > + op.page_list);
> > +
> > +out:
> > + rcu_unlock_domain(d);
> > + return rc;
> > +}
> > +
> > static int hvmop_destroy_ioreq_server(
> > XEN_GUEST_HANDLE_PARAM(xen_hvm_destroy_ioreq_server_t) uop)
> { @@
> > -5374,6 +5578,11 @@ long do_hvm_op(unsigned long op,
> XEN_GUEST_HANDLE_PARAM(void) arg)
> > guest_handle_cast(arg, xen_hvm_set_ioreq_server_state_t));
> > break;
> >
> > + case HVMOP_map_pages_to_ioreq_server:
> > + rc = hvmop_map_pages_to_ioreq_server(
> > + guest_handle_cast(arg, xen_hvm_map_pages_to_ioreq_server_t));
> > + break;
> > +
> > case HVMOP_destroy_ioreq_server:
> > rc = hvmop_destroy_ioreq_server(
> > guest_handle_cast(arg, xen_hvm_destroy_ioreq_server_t));
> > diff --git a/xen/include/asm-x86/hvm/domain.h
> > b/xen/include/asm-x86/hvm/domain.h
> > index 291a2e0..c28fbbe 100644
> > --- a/xen/include/asm-x86/hvm/domain.h
> > +++ b/xen/include/asm-x86/hvm/domain.h
> > @@ -48,6 +48,11 @@ struct hvm_ioreq_vcpu {
> > evtchn_port_t ioreq_evtchn;
> > };
> >
> > +struct pglist_hash_table {
> > + struct pglist_hash_table *next;
> > + uint64_t gpfn;
> > +};
> > +
> > #define NR_IO_RANGE_TYPES (HVMOP_IO_RANGE_PCI + 1) #define
> > MAX_NR_IO_RANGES 256
> >
> > @@ -70,6 +75,10 @@ struct hvm_ioreq_server {
> > evtchn_port_t bufioreq_evtchn;
> > struct rangeset *range[NR_IO_RANGE_TYPES];
> > bool_t enabled;
> > +
> > + /* scatter page list need write protection */
> > + struct pglist_hash_table *pglist_hash_base;
> > + spinlock_t pglist_hash_lock;
> > };
> >
> > struct hvm_domain {
> > diff --git a/xen/include/public/hvm/hvm_op.h
> > b/xen/include/public/hvm/hvm_op.h index eeb0a60..f7c989a 100644
> > --- a/xen/include/public/hvm/hvm_op.h
> > +++ b/xen/include/public/hvm/hvm_op.h
> > @@ -367,6 +367,18 @@ struct xen_hvm_set_ioreq_server_state
> { typedef
> > struct xen_hvm_set_ioreq_server_state
> > xen_hvm_set_ioreq_server_state_t;
> > DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_ioreq_server_state_t);
> >
> > +#define HVMOP_map_pages_to_ioreq_server 23
> > +#define XEN_IOREQSVR_MAX_MAP_BATCH 128
> > +struct xen_hvm_map_pages_to_ioreq_server {
> > + domid_t domid; /* IN - domain to be serviced */
> > + ioservid_t id; /* IN - server id */
> > + uint16_t set; /* IN - 1: map pages, 0: unmap pages */
> > + uint16_t nr_pages; /* IN - page count */
> > + uint64_t page_list[XEN_IOREQSVR_MAX_MAP_BATCH]; /* IN - gpfn list
> > +*/
>
> No arbitrary limits like this in the API please. Use a GUEST_HANDLE_64
> instead.
Ok.
>
> ~Andrew
>
> > +};
> > +typedef struct xen_hvm_map_pages_to_ioreq_server
> > +xen_hvm_map_pages_to_ioreq_server_t;
> >
> +DEFINE_XEN_GUEST_HANDLE(xen_hvm_map_pages_to_ioreq_server_t);
> > +
> > #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
> >
> > #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH v2 2/2] ioreq-server: Support scatter page forwarding
2014-08-26 8:40 ` Ye, Wei
@ 2014-08-26 9:37 ` Andrew Cooper
0 siblings, 0 replies; 14+ messages in thread
From: Andrew Cooper @ 2014-08-26 9:37 UTC (permalink / raw)
To: Ye, Wei, xen-devel@lists.xen.org
Cc: Tian, Kevin, keir@xen.org, ian.campbell@citrix.com,
stefano.stabellini@eu.citrix.com, tim@xen.org,
ian.jackson@eu.citrix.com, Dugger, Donald D,
Paul.Durrant@citrix.com, Lv, Zhiyuan, JBeulich@suse.com,
Zhang, Yang Z
On 26/08/14 09:40, Ye, Wei wrote:
>
>> -----Original Message-----
>> From: Andrew Cooper [mailto:andrew.cooper3@citrix.com]
>> Sent: Friday, August 22, 2014 6:35 PM
>> To: Ye, Wei; xen-devel@lists.xen.org
>> Cc: Tian, Kevin; keir@xen.org; ian.campbell@citrix.com;
>> stefano.stabellini@eu.citrix.com; tim@xen.org; ian.jackson@eu.citrix.com;
>> Dugger, Donald D; Paul.Durrant@citrix.com; Lv, Zhiyuan; JBeulich@suse.com;
>> Zhang, Yang Z
>> Subject: Re: [Xen-devel] [PATCH v2 2/2] ioreq-server: Support scatter page
>> forwarding
>>
>> On 22/08/14 20:18, Wei Ye wrote:
>>> Extend the interface to support add/remove scatter page list to be
>>> forwarded by a dedicated ioreq-server instance. Check and select a
>>> right ioreq-server instance for forwarding the write operation for a
>>> write protected page.
>>>
>>> Signed-off-by: Wei Ye <wei.ye@intel.com>
>>> ---
>>> tools/libxc/xc_domain.c | 32 ++++++
>>> tools/libxc/xenctrl.h | 18 ++++
>>> xen/arch/x86/hvm/hvm.c | 209
>> ++++++++++++++++++++++++++++++++++++++
>>> xen/include/asm-x86/hvm/domain.h | 9 ++
>>> xen/include/public/hvm/hvm_op.h | 12 +++
>>> 5 files changed, 280 insertions(+)
>>>
>>> diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c index
>>> 37ed141..36e4e59 100644
>>> --- a/tools/libxc/xc_domain.c
>>> +++ b/tools/libxc/xc_domain.c
>>> @@ -1485,6 +1485,38 @@ int
>> xc_hvm_unmap_pcidev_from_ioreq_server(xc_interface *xch, domid_t
>> domid,
>>> return rc;
>>> }
>>>
>>> +int xc_hvm_map_pages_to_ioreq_server(xc_interface *xch, domid_t
>> domid,
>>> + ioservid_t id, uint16_t set,
>>> + uint16_t nr_pages, uint64_t
>>> +*gpfn) {
>>> + DECLARE_HYPERCALL;
>>> +
>> DECLARE_HYPERCALL_BUFFER(xen_hvm_map_pages_to_ioreq_server_t,
>> arg);
>>> + int pg, rc = -1;
>>> +
>>> + if ( arg == NULL )
>>> + return -1;
>> You must set errno before exiting -1.
>>
> I'm not sure what kind of errno shoud be set. I just follow the similar existing functions like
> "xc_hvm_map_io_range_to_ioreq_server...", it also didn't set errno before exiting. What's
> Your suggestion for the errno?
The error handling/consistency in libxc is admittedly appalling, but new
code should not be making it any worse.
In this case, EFAULT might be acceptable, as NULL is a bad address to pass.
~Andrew
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v2 2/2] ioreq-server: Support scatter page forwarding
2014-08-22 19:18 ` [PATCH v2 2/2] ioreq-server: Support scatter page forwarding Wei Ye
2014-08-22 10:35 ` Andrew Cooper
@ 2014-08-26 11:35 ` Paul Durrant
2014-08-26 13:26 ` Jan Beulich
2 siblings, 0 replies; 14+ messages in thread
From: Paul Durrant @ 2014-08-26 11:35 UTC (permalink / raw)
To: Wei Ye, xen-devel@lists.xen.org
Cc: Kevin Tian, Keir (Xen.org), Ian Campbell, Tim (Xen.org),
donald.d.dugger@intel.com, Stefano Stabellini,
zhiyuan.lv@intel.com, JBeulich@suse.com, yang.z.zhang@intel.com,
Ian Jackson
> -----Original Message-----
> From: Wei Ye [mailto:wei.ye@intel.com]
> Sent: 22 August 2014 20:19
> To: xen-devel@lists.xen.org
> Cc: JBeulich@suse.com; Ian Campbell; Paul Durrant; Ian Jackson; Stefano
> Stabellini; donald.d.dugger@intel.com; Kevin Tian; yang.z.zhang@intel.com;
> zhiyuan.lv@intel.com; konrad.wilk@oracle.com; Keir (Xen.org); Tim
> (Xen.org); Wei Ye
> Subject: [PATCH v2 2/2] ioreq-server: Support scatter page forwarding
>
> Extend the interface to support add/remove scatter page list to be
> forwarded by a dedicated ioreq-server instance. Check and select
> a right ioreq-server instance for forwarding the write operation
> for a write protected page.
>
> Signed-off-by: Wei Ye <wei.ye@intel.com>
> ---
> tools/libxc/xc_domain.c | 32 ++++++
> tools/libxc/xenctrl.h | 18 ++++
> xen/arch/x86/hvm/hvm.c | 209
> ++++++++++++++++++++++++++++++++++++++
> xen/include/asm-x86/hvm/domain.h | 9 ++
> xen/include/public/hvm/hvm_op.h | 12 +++
> 5 files changed, 280 insertions(+)
>
> diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
> index 37ed141..36e4e59 100644
> --- a/tools/libxc/xc_domain.c
> +++ b/tools/libxc/xc_domain.c
> @@ -1485,6 +1485,38 @@ int
> xc_hvm_unmap_pcidev_from_ioreq_server(xc_interface *xch, domid_t
> domid,
> return rc;
> }
>
> +int xc_hvm_map_pages_to_ioreq_server(xc_interface *xch, domid_t
> domid,
> + ioservid_t id, uint16_t set,
> + uint16_t nr_pages, uint64_t *gpfn)
> +{
> + DECLARE_HYPERCALL;
> +
> DECLARE_HYPERCALL_BUFFER(xen_hvm_map_pages_to_ioreq_server_t,
> arg);
> + int pg, rc = -1;
> +
> + if ( arg == NULL )
> + return -1;
> +
> + if ( nr_pages > XEN_IOREQSVR_MAX_MAP_BATCH )
> + goto out;
> +
> + hypercall.op = __HYPERVISOR_hvm_op;
> + hypercall.arg[0] = HVMOP_map_pages_to_ioreq_server;
> + hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg);
> +
> + arg->domid = domid;
> + arg->id = id;
> + arg->set = set;
> + arg->nr_pages = nr_pages;
> + for ( pg = 0; pg < nr_pages; pg++ )
> + arg->page_list[pg] = gpfn[pg];
> +
> + rc = do_xen_hypercall(xch, &hypercall);
> +
> +out:
> + xc_hypercall_buffer_free(xch, arg);
> + return rc;
> +}
> +
> int xc_hvm_destroy_ioreq_server(xc_interface *xch,
> domid_t domid,
> ioservid_t id)
> diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
> index b55d857..84e8465 100644
> --- a/tools/libxc/xenctrl.h
> +++ b/tools/libxc/xenctrl.h
> @@ -1943,6 +1943,24 @@ int
> xc_hvm_unmap_pcidev_from_ioreq_server(xc_interface *xch,
> uint8_t function);
>
> /**
> + * This function registers/deregisters a set of pages to be write protected.
> + *
> + * @parm xch a handle to an open hypervisor interface.
> + * @parm domid the domain id to be serviced
> + * @parm id the IOREQ Server id.
> + * @parm set whether registers or deregisters: 1 for register, 0 for
> deregister
> + * @parm nr_pages the count of pages
> + * @parm pgfn the guest page frame number list
> + * @return 0 on success, -1 on failure.
> + */
> +int xc_hvm_map_pages_to_ioreq_server(xc_interface *xch,
> + domid_t domid,
> + ioservid_t id,
> + uint16_t set,
> + uint16_t nr_pages,
> + uint64_t *gpfn);
> +
> +/**
> * This function destroys an IOREQ Server.
> *
> * @parm xch a handle to an open hypervisor interface.
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index 4984149..bbbacc3 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -68,6 +68,12 @@
> #include <public/mem_event.h>
> #include <xen/rangeset.h>
>
> +#define PGLIST_HASH_SIZE_SHIFT 8
> +#define PGLIST_HASH_SIZE (1 << PGLIST_HASH_SIZE_SHIFT)
> +#define pglist_hash(x) ((x) % PGLIST_HASH_SIZE)
> +#define PGLIST_INVALID_GPFN 0
> +#define PGLIST_HASH_ENTRY_SIZE sizeof(struct pglist_hash_table)
> +
Why are you persisting with the hash? I may have missed something as I was away on vacation, but Jan's query about why your v1 version of this patch was not using rangesets doesn't seem to have been addresses. I would have thought you could unify mapping of pages and mmio ranges fairly easily, which would probably massively reduce the size of this patch.
Paul
> bool_t __read_mostly hvm_enabled;
>
> unsigned int opt_hvm_debug_level __read_mostly;
> @@ -744,6 +750,98 @@ static void
> hvm_ioreq_server_remove_all_vcpus(struct hvm_ioreq_server *s)
> spin_unlock(&s->lock);
> }
>
> +static struct pglist_hash_table
> *hvm_ioreq_server_lookup_pglist_hash_table(
> + struct pglist_hash_table *pglist_ht, uint64_t gpfn)
> +{
> + unsigned int index = pglist_hash(gpfn);
> + struct pglist_hash_table *entry;
> +
> + for ( entry = &pglist_ht[index]; entry != NULL; entry = entry->next )
> + if ( entry->gpfn != PGLIST_INVALID_GPFN && entry->gpfn == gpfn )
> + break;
> +
> + return entry;
> +}
> +
> +static void hvm_ioreq_server_free_hash_chain(struct pglist_hash_table
> *chain)
> +{
> + struct pglist_hash_table *p = chain;
> + struct pglist_hash_table *n;
> +
> + while ( p )
> + {
> + n = p->next;
> + xfree(p);
> + p = n;
> + }
> +}
> +
> +static void hvm_ioreq_server_free_pglist_hash(struct pglist_hash_table
> *pglist_ht)
> +{
> + unsigned int i;
> +
> + for ( i = 0; i < PGLIST_HASH_SIZE; i++ )
> + if ( pglist_ht[i].next != NULL )
> + hvm_ioreq_server_free_hash_chain(pglist_ht[i].next);
> +}
> +
> +static int hvm_ioreq_server_pglist_hash_add(struct pglist_hash_table
> *pglist_ht,
> + uint64_t gpfn)
> +{
> + unsigned int index = pglist_hash(gpfn);
> + struct pglist_hash_table *ne;
> +
> + if ( hvm_ioreq_server_lookup_pglist_hash_table(pglist_ht, gpfn) != NULL
> )
> + return -EINVAL;
> +
> + if ( pglist_ht[index].gpfn == PGLIST_INVALID_GPFN )
> + pglist_ht[index].gpfn = gpfn;
> + else
> + {
> + ne = xmalloc(struct pglist_hash_table);
> + if ( !ne )
> + return -ENOMEM;
> + ne->next = pglist_ht[index].next;
> + ne->gpfn = gpfn;
> + pglist_ht[index].next = ne;
> + }
> +
> + return 0;
> +}
> +
> +static int hvm_ioreq_server_pglist_hash_rem(struct pglist_hash_table
> *pglist_ht,
> + uint64_t gpfn)
> +{
> + unsigned int index = pglist_hash(gpfn);
> + struct pglist_hash_table *next, *prev;
> +
> + if ( pglist_ht[index].gpfn == gpfn )
> + pglist_ht[index].gpfn = PGLIST_INVALID_GPFN;
> + else
> + {
> + prev = &pglist_ht[index];
> + while ( 1 )
> + {
> + next = prev->next;
> + if ( !next )
> + {
> + printk(XENLOG_G_WARNING
> "hvm_ioreq_server_pglist_hash_rem hash_table %p remove"
> + " %lx not found\n", pglist_ht, gpfn);
> + return -EINVAL;
> + }
> + if ( next->gpfn == gpfn )
> + {
> + prev->next = next->next;
> + xfree(next);
> + break;
> + }
> + prev = next;
> + }
> + }
> +
> + return 0;
> +}
> +
> static int hvm_ioreq_server_map_pages(struct hvm_ioreq_server *s,
> bool_t is_default, bool_t handle_bufioreq)
> {
> @@ -948,7 +1046,14 @@ static int hvm_ioreq_server_init(struct
> hvm_ioreq_server *s, struct domain *d,
> spin_lock_init(&s->lock);
> INIT_LIST_HEAD(&s->ioreq_vcpu_list);
> spin_lock_init(&s->bufioreq_lock);
> + spin_lock_init(&s->pglist_hash_lock);
>
> + rc = -ENOMEM;
> + s->pglist_hash_base = xmalloc_array(struct pglist_hash_table,
> PGLIST_HASH_SIZE);
> + if ( s->pglist_hash_base == NULL )
> + goto fail1;
> + memset(s->pglist_hash_base, 0, PGLIST_HASH_ENTRY_SIZE *
> PGLIST_HASH_SIZE);
> +
> rc = hvm_ioreq_server_alloc_rangesets(s, is_default);
> if ( rc )
> goto fail1;
> @@ -1087,6 +1192,9 @@ static int hvm_destroy_ioreq_server(struct domain
> *d, ioservid_t id)
>
> hvm_ioreq_server_deinit(s, 0);
>
> + hvm_ioreq_server_free_pglist_hash(s->pglist_hash_base);
> + xfree(s->pglist_hash_base);
> +
> domain_unpause(d);
>
> xfree(s);
> @@ -1279,6 +1387,63 @@ static int hvm_set_ioreq_server_state(struct
> domain *d, ioservid_t id,
> return rc;
> }
>
> +static int hvm_map_pages_to_ioreq_server(struct domain *d, ioservid_t id,
> + uint16_t set, uint16_t nr_pages,
> + uint64_t *gpfn)
> +{
> + struct hvm_ioreq_server *s;
> + int rc;
> + unsigned int i;
> + p2m_type_t ot, nt;
> +
> + if ( set )
> + {
> + ot = p2m_ram_rw;
> + nt = p2m_mmio_write_dm;
> + }
> + else
> + {
> + ot = p2m_mmio_write_dm;
> + nt = p2m_ram_rw;
> + }
> +
> + spin_lock(&d->arch.hvm_domain.ioreq_server.lock);
> +
> + rc = -ENOENT;
> + list_for_each_entry ( s,
> + &d->arch.hvm_domain.ioreq_server.list,
> + list_entry )
> + {
> + if ( s != d->arch.hvm_domain.default_ioreq_server &&
> + s->id == id )
> + {
> + spin_lock(&s->pglist_hash_lock);
> +
> + for ( i = 0; i < nr_pages; i++ )
> + {
> + rc = p2m_change_type_one(d, gpfn[i], ot, nt);
> + if ( !rc )
> + {
> + if ( set )
> + rc = hvm_ioreq_server_pglist_hash_add(s->pglist_hash_base,
> gpfn[i]);
> + else
> + rc = hvm_ioreq_server_pglist_hash_rem(s->pglist_hash_base,
> gpfn[i]);
> + }
> +
> + if ( rc )
> + break;
> + }
> +
> + spin_unlock(&s->pglist_hash_lock);
> + break;
> + }
> + }
> +
> + spin_unlock(&d->arch.hvm_domain.ioreq_server.lock);
> +
> + return rc;
> +}
> +
> static int hvm_all_ioreq_servers_add_vcpu(struct domain *d, struct vcpu
> *v)
> {
> struct hvm_ioreq_server *s;
> @@ -2349,6 +2514,8 @@ static struct hvm_ioreq_server
> *hvm_select_ioreq_server(struct domain *d,
> switch ( type )
> {
> unsigned long end;
> + uint64_t gpfn;
> + struct pglist_hash_table *he;
>
> case IOREQ_TYPE_PIO:
> end = addr + p->size - 1;
> @@ -2361,6 +2528,14 @@ static struct hvm_ioreq_server
> *hvm_select_ioreq_server(struct domain *d,
> if ( rangeset_contains_range(r, addr, end) )
> return s;
>
> + gpfn = addr >> PAGE_SHIFT;
> + spin_lock(&s->pglist_hash_lock);
> + he = hvm_ioreq_server_lookup_pglist_hash_table(s-
> >pglist_hash_base, gpfn);
> + spin_unlock(&s->pglist_hash_lock);
> +
> + if ( he )
> + return s;
> +
> break;
> case IOREQ_TYPE_PCI_CONFIG:
> if ( rangeset_contains_singleton(r, addr >> 32) )
> @@ -5309,6 +5484,35 @@ static int hvmop_set_ioreq_server_state(
> return rc;
> }
>
> +static int hvmop_map_pages_to_ioreq_server(
> +
> XEN_GUEST_HANDLE_PARAM(xen_hvm_map_pages_to_ioreq_server_t)
> uop)
> +{
> + xen_hvm_map_pages_to_ioreq_server_t op;
> + struct domain *d;
> + int rc;
> +
> + if ( copy_from_guest(&op, uop, 1) )
> + return -EFAULT;
> +
> + rc = rcu_lock_remote_domain_by_id(op.domid, &d);
> + if ( rc != 0 )
> + return rc;
> +
> + rc = -EINVAL;
> + if ( !is_hvm_domain(d) )
> + goto out;
> +
> + rc = xsm_hvm_ioreq_server(XSM_DM_PRIV, d,
> HVMOP_map_pages_to_ioreq_server);
> + if ( rc != 0 )
> + goto out;
> +
> + rc = hvm_map_pages_to_ioreq_server(d, op.id, op.set, op.nr_pages,
> op.page_list);
> +
> +out:
> + rcu_unlock_domain(d);
> + return rc;
> +}
> +
> static int hvmop_destroy_ioreq_server(
> XEN_GUEST_HANDLE_PARAM(xen_hvm_destroy_ioreq_server_t) uop)
> {
> @@ -5374,6 +5578,11 @@ long do_hvm_op(unsigned long op,
> XEN_GUEST_HANDLE_PARAM(void) arg)
> guest_handle_cast(arg, xen_hvm_set_ioreq_server_state_t));
> break;
>
> + case HVMOP_map_pages_to_ioreq_server:
> + rc = hvmop_map_pages_to_ioreq_server(
> + guest_handle_cast(arg, xen_hvm_map_pages_to_ioreq_server_t));
> + break;
> +
> case HVMOP_destroy_ioreq_server:
> rc = hvmop_destroy_ioreq_server(
> guest_handle_cast(arg, xen_hvm_destroy_ioreq_server_t));
> diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-
> x86/hvm/domain.h
> index 291a2e0..c28fbbe 100644
> --- a/xen/include/asm-x86/hvm/domain.h
> +++ b/xen/include/asm-x86/hvm/domain.h
> @@ -48,6 +48,11 @@ struct hvm_ioreq_vcpu {
> evtchn_port_t ioreq_evtchn;
> };
>
> +struct pglist_hash_table {
> + struct pglist_hash_table *next;
> + uint64_t gpfn;
> +};
> +
> #define NR_IO_RANGE_TYPES (HVMOP_IO_RANGE_PCI + 1)
> #define MAX_NR_IO_RANGES 256
>
> @@ -70,6 +75,10 @@ struct hvm_ioreq_server {
> evtchn_port_t bufioreq_evtchn;
> struct rangeset *range[NR_IO_RANGE_TYPES];
> bool_t enabled;
> +
> + /* scatter page list need write protection */
> + struct pglist_hash_table *pglist_hash_base;
> + spinlock_t pglist_hash_lock;
> };
>
> struct hvm_domain {
> diff --git a/xen/include/public/hvm/hvm_op.h
> b/xen/include/public/hvm/hvm_op.h
> index eeb0a60..f7c989a 100644
> --- a/xen/include/public/hvm/hvm_op.h
> +++ b/xen/include/public/hvm/hvm_op.h
> @@ -367,6 +367,18 @@ struct xen_hvm_set_ioreq_server_state {
> typedef struct xen_hvm_set_ioreq_server_state
> xen_hvm_set_ioreq_server_state_t;
> DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_ioreq_server_state_t);
>
> +#define HVMOP_map_pages_to_ioreq_server 23
> +#define XEN_IOREQSVR_MAX_MAP_BATCH 128
> +struct xen_hvm_map_pages_to_ioreq_server {
> + domid_t domid; /* IN - domain to be serviced */
> + ioservid_t id; /* IN - server id */
> + uint16_t set; /* IN - 1: map pages, 0: unmap pages */
> + uint16_t nr_pages; /* IN - page count */
> + uint64_t page_list[XEN_IOREQSVR_MAX_MAP_BATCH]; /* IN - gpfn list
> */
> +};
> +typedef struct xen_hvm_map_pages_to_ioreq_server
> xen_hvm_map_pages_to_ioreq_server_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_hvm_map_pages_to_ioreq_server_t);
> +
> #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
>
> #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
> --
> 1.7.9.5
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH v2 2/2] ioreq-server: Support scatter page forwarding
2014-08-22 19:18 ` [PATCH v2 2/2] ioreq-server: Support scatter page forwarding Wei Ye
2014-08-22 10:35 ` Andrew Cooper
2014-08-26 11:35 ` Paul Durrant
@ 2014-08-26 13:26 ` Jan Beulich
2 siblings, 0 replies; 14+ messages in thread
From: Jan Beulich @ 2014-08-26 13:26 UTC (permalink / raw)
To: Wei Ye
Cc: kevin.tian, keir, ian.campbell, stefano.stabellini, tim,
ian.jackson, donald.d.dugger, xen-devel, Paul.Durrant, zhiyuan.lv,
yang.z.zhang
>>> On 22.08.14 at 21:18, <wei.ye@intel.com> wrote:
Independent of my earlier and Paul's recent question regarding the
need for all this code, a couple of mechanical comments:
> @@ -744,6 +750,98 @@ static void hvm_ioreq_server_remove_all_vcpus(struct hvm_ioreq_server *s)
> spin_unlock(&s->lock);
> }
>
> +static struct pglist_hash_table *hvm_ioreq_server_lookup_pglist_hash_table(
> + struct pglist_hash_table *pglist_ht, uint64_t gpfn)
> +{
> + unsigned int index = pglist_hash(gpfn);
> + struct pglist_hash_table *entry;
> +
> + for ( entry = &pglist_ht[index]; entry != NULL; entry = entry->next )
> + if ( entry->gpfn != PGLIST_INVALID_GPFN && entry->gpfn == gpfn )
Please write efficient code: By making sure up front that gpfn isn't
PGLIST_INVALID_GPFN, the left side of the && (executed on every
loop iteration) can be dropped.
> +static int hvm_map_pages_to_ioreq_server(struct domain *d, ioservid_t id,
> + uint16_t set, uint16_t nr_pages,
Pointless uses of uint16_t. The first one is bool_t, the second
unsigned int.
> + uint64_t *gpfn)
And this one would conventionally by unsigned long * or xen_pfn_t *.
> +{
> + struct hvm_ioreq_server *s;
> + int rc;
> + unsigned int i;
> + p2m_type_t ot, nt;
> +
> + if ( set )
> + {
> + ot = p2m_ram_rw;
> + nt = p2m_mmio_write_dm;
> + }
> + else
> + {
> + ot = p2m_mmio_write_dm;
> + nt = p2m_ram_rw;
> + }
> +
> + spin_lock(&d->arch.hvm_domain.ioreq_server.lock);
> +
> + rc = -ENOENT;
> + list_for_each_entry ( s,
> + &d->arch.hvm_domain.ioreq_server.list,
> + list_entry )
> + {
> + if ( s != d->arch.hvm_domain.default_ioreq_server &&
> + s->id == id )
> + {
> + spin_lock(&s->pglist_hash_lock);
> +
> + for ( i = 0; i < nr_pages; i++ )
Up to 64k iterations without preemption is already quite a lot. Did
you measure how much worst case input would take to process?
(Note that I raised this question before without you addressing it.)
> --- a/xen/include/public/hvm/hvm_op.h
> +++ b/xen/include/public/hvm/hvm_op.h
> @@ -367,6 +367,18 @@ struct xen_hvm_set_ioreq_server_state {
> typedef struct xen_hvm_set_ioreq_server_state
> xen_hvm_set_ioreq_server_state_t;
> DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_ioreq_server_state_t);
>
> +#define HVMOP_map_pages_to_ioreq_server 23
> +#define XEN_IOREQSVR_MAX_MAP_BATCH 128
> +struct xen_hvm_map_pages_to_ioreq_server {
> + domid_t domid; /* IN - domain to be serviced */
> + ioservid_t id; /* IN - server id */
> + uint16_t set; /* IN - 1: map pages, 0: unmap pages */
The comment doesn't match the implementation.
> + uint16_t nr_pages; /* IN - page count */
> + uint64_t page_list[XEN_IOREQSVR_MAX_MAP_BATCH]; /* IN - gpfn list */
Ah, so the iteration limit above really is XEN_IOREQSVR_MAX_MAP_BATCH.
That's fine then, except that you need to bound check the input
value before iterating.
However I wonder whether you really want to bake such a fixed
limit into the hypercall. Using a handle here and limiting the count
to a (currently) sane maximum in the implementation would then
allow for bumping the count in the future.
Jan
^ permalink raw reply [flat|nested] 14+ messages in thread