xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Wei Ye <wei.ye@intel.com>, xen-devel@lists.xen.org
Cc: kevin.tian@intel.com, keir@xen.org, ian.campbell@citrix.com,
	stefano.stabellini@eu.citrix.com, tim@xen.org,
	ian.jackson@eu.citrix.com, donald.d.dugger@intel.com,
	Paul.Durrant@citrix.com, zhiyuan.lv@intel.com, JBeulich@suse.com,
	yang.z.zhang@intel.com
Subject: Re: [PATCH v2 2/2] ioreq-server: Support scatter page forwarding
Date: Fri, 22 Aug 2014 11:35:03 +0100	[thread overview]
Message-ID: <53F71CD7.8020607@citrix.com> (raw)
In-Reply-To: <1408735115-6023-3-git-send-email-wei.ye@intel.com>

On 22/08/14 20:18, Wei Ye wrote:
> Extend the interface to support add/remove scatter page list to be
> forwarded by a dedicated ioreq-server instance. Check and select
> a right ioreq-server instance for forwarding the write operation
> for a write protected page.
>
> Signed-off-by: Wei Ye <wei.ye@intel.com>
> ---
>  tools/libxc/xc_domain.c          |   32 ++++++
>  tools/libxc/xenctrl.h            |   18 ++++
>  xen/arch/x86/hvm/hvm.c           |  209 ++++++++++++++++++++++++++++++++++++++
>  xen/include/asm-x86/hvm/domain.h |    9 ++
>  xen/include/public/hvm/hvm_op.h  |   12 +++
>  5 files changed, 280 insertions(+)
>
> diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
> index 37ed141..36e4e59 100644
> --- a/tools/libxc/xc_domain.c
> +++ b/tools/libxc/xc_domain.c
> @@ -1485,6 +1485,38 @@ int xc_hvm_unmap_pcidev_from_ioreq_server(xc_interface *xch, domid_t domid,
>      return rc;
>  }
>  
> +int xc_hvm_map_pages_to_ioreq_server(xc_interface *xch, domid_t domid,
> +                                     ioservid_t id, uint16_t set,
> +                                     uint16_t nr_pages, uint64_t *gpfn)
> +{
> +    DECLARE_HYPERCALL;
> +    DECLARE_HYPERCALL_BUFFER(xen_hvm_map_pages_to_ioreq_server_t, arg);
> +    int pg, rc = -1;
> +
> +    if ( arg == NULL )
> +        return -1;

You must set errno before exiting -1.

> +    
> +    if ( nr_pages > XEN_IOREQSVR_MAX_MAP_BATCH )
> +        goto out;
> +
> +    hypercall.op     = __HYPERVISOR_hvm_op;
> +    hypercall.arg[0] = HVMOP_map_pages_to_ioreq_server;
> +    hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg);
> +
> +    arg->domid = domid;
> +    arg->id = id;
> +    arg->set = set;
> +    arg->nr_pages = nr_pages;
> +    for ( pg = 0; pg < nr_pages; pg++ )
> +        arg->page_list[pg] = gpfn[pg];

memcpy()

> +
> +    rc = do_xen_hypercall(xch, &hypercall);
> +
> +out:
> +    xc_hypercall_buffer_free(xch, arg);
> +    return rc;
> +}
> +
>  int xc_hvm_destroy_ioreq_server(xc_interface *xch,
>                                  domid_t domid,
>                                  ioservid_t id)
> diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
> index b55d857..84e8465 100644
> --- a/tools/libxc/xenctrl.h
> +++ b/tools/libxc/xenctrl.h
> @@ -1943,6 +1943,24 @@ int xc_hvm_unmap_pcidev_from_ioreq_server(xc_interface *xch,
>                                            uint8_t function);
>  
>  /**
> + * This function registers/deregisters a set of pages to be write protected.
> + *
> + * @parm xch a handle to an open hypervisor interface.
> + * @parm domid the domain id to be serviced
> + * @parm id the IOREQ Server id.
> + * @parm set whether registers or deregisters: 1 for register, 0 for deregister
> + * @parm nr_pages the count of pages
> + * @parm pgfn the guest page frame number list
> + * @return 0 on success, -1 on failure.
> + */
> +int xc_hvm_map_pages_to_ioreq_server(xc_interface *xch,
> +                                          domid_t domid,
> +                                          ioservid_t id,
> +                                          uint16_t set,
> +                                          uint16_t nr_pages,
> +                                          uint64_t *gpfn);

Alignment of parameters.

> +
> +/**
>   * This function destroys an IOREQ Server.
>   *
>   * @parm xch a handle to an open hypervisor interface.
> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
> index 4984149..bbbacc3 100644
> --- a/xen/arch/x86/hvm/hvm.c
> +++ b/xen/arch/x86/hvm/hvm.c
> @@ -68,6 +68,12 @@
>  #include <public/mem_event.h>
>  #include <xen/rangeset.h>

This hash chain code belongs in its own patch.  Please split it out, to
allow the hypercall interface changes to be in a smaller patch.

>  
> +#define PGLIST_HASH_SIZE_SHIFT    8
> +#define PGLIST_HASH_SIZE    (1 << PGLIST_HASH_SIZE_SHIFT)
> +#define pglist_hash(x)      ((x) % PGLIST_HASH_SIZE)
> +#define PGLIST_INVALID_GPFN       0

Use INVALID_MFN.  GFN 0 is perfectly legitimate as the target of scatter
forwarding.

> +#define PGLIST_HASH_ENTRY_SIZE    sizeof(struct pglist_hash_table)
> +
>  bool_t __read_mostly hvm_enabled;
>  
>  unsigned int opt_hvm_debug_level __read_mostly;
> @@ -744,6 +750,98 @@ static void hvm_ioreq_server_remove_all_vcpus(struct hvm_ioreq_server *s)
>      spin_unlock(&s->lock);
>  }
>  
> +static struct pglist_hash_table *hvm_ioreq_server_lookup_pglist_hash_table(
> +                          struct pglist_hash_table *pglist_ht, uint64_t gpfn)
> +{
> +    unsigned int index = pglist_hash(gpfn);
> +    struct pglist_hash_table *entry;
> +
> +    for ( entry = &pglist_ht[index]; entry != NULL; entry = entry->next )
> +        if ( entry->gpfn != PGLIST_INVALID_GPFN && entry->gpfn == gpfn )
> +            break;
> +
> +    return entry;
> +}
> +
> +static void hvm_ioreq_server_free_hash_chain(struct pglist_hash_table *chain)
> +{
> +    struct pglist_hash_table *p = chain;
> +    struct pglist_hash_table *n;
> +
> +    while ( p )
> +    {
> +        n = p->next;
> +        xfree(p);
> +        p = n;
> +    }
> +}
> +
> +static void hvm_ioreq_server_free_pglist_hash(struct pglist_hash_table *pglist_ht)
> +{
> +    unsigned int i;
> +
> +    for ( i = 0; i < PGLIST_HASH_SIZE; i++ )
> +        if ( pglist_ht[i].next != NULL )
> +            hvm_ioreq_server_free_hash_chain(pglist_ht[i].next);
> +}
> +
> +static int hvm_ioreq_server_pglist_hash_add(struct pglist_hash_table *pglist_ht,
> +                                            uint64_t gpfn)
> +{
> +    unsigned int index = pglist_hash(gpfn);
> +    struct pglist_hash_table *ne;
> +
> +    if ( hvm_ioreq_server_lookup_pglist_hash_table(pglist_ht, gpfn) != NULL )
> +        return -EINVAL;
> +
> +    if ( pglist_ht[index].gpfn == PGLIST_INVALID_GPFN )
> +        pglist_ht[index].gpfn = gpfn;
> +    else
> +    {
> +        ne = xmalloc(struct pglist_hash_table);
> +        if ( !ne )
> +            return -ENOMEM;
> +        ne->next = pglist_ht[index].next;
> +        ne->gpfn = gpfn;
> +        pglist_ht[index].next = ne;
> +    }
> +
> +    return 0;
> +}
> +
> +static int hvm_ioreq_server_pglist_hash_rem(struct pglist_hash_table *pglist_ht,
> +                                            uint64_t gpfn)
> +{
> +    unsigned int index = pglist_hash(gpfn);
> +    struct pglist_hash_table *next, *prev;
> +
> +    if ( pglist_ht[index].gpfn == gpfn )
> +        pglist_ht[index].gpfn = PGLIST_INVALID_GPFN;
> +    else
> +    {
> +        prev = &pglist_ht[index];
> +        while ( 1 )
> +        {
> +            next = prev->next;
> +            if ( !next )
> +            {
> +                printk(XENLOG_G_WARNING "hvm_ioreq_server_pglist_hash_rem hash_table %p remove"
> +                       " %lx not found\n", pglist_ht, gpfn);
> +                return -EINVAL;
> +            }
> +            if ( next->gpfn == gpfn )
> +            {
> +                prev->next = next->next;
> +                xfree(next);
> +                break;
> +            }
> +            prev = next;
> +         }
> +    }
> +
> +    return 0;
> +}
> +
>  static int hvm_ioreq_server_map_pages(struct hvm_ioreq_server *s,
>                                        bool_t is_default, bool_t handle_bufioreq)
>  {
> @@ -948,7 +1046,14 @@ static int hvm_ioreq_server_init(struct hvm_ioreq_server *s, struct domain *d,
>      spin_lock_init(&s->lock);
>      INIT_LIST_HEAD(&s->ioreq_vcpu_list);
>      spin_lock_init(&s->bufioreq_lock);
> +    spin_lock_init(&s->pglist_hash_lock);
>  
> +    rc = -ENOMEM;
> +    s->pglist_hash_base = xmalloc_array(struct pglist_hash_table, PGLIST_HASH_SIZE);
> +    if ( s->pglist_hash_base == NULL )
> +        goto fail1;
> +    memset(s->pglist_hash_base, 0, PGLIST_HASH_ENTRY_SIZE * PGLIST_HASH_SIZE);

xzalloc_array()

> + 
>      rc = hvm_ioreq_server_alloc_rangesets(s, is_default);
>      if ( rc )
>          goto fail1;
> @@ -1087,6 +1192,9 @@ static int hvm_destroy_ioreq_server(struct domain *d, ioservid_t id)
>          
>          hvm_ioreq_server_deinit(s, 0);
>  
> +        hvm_ioreq_server_free_pglist_hash(s->pglist_hash_base);
> +        xfree(s->pglist_hash_base);
> +
>          domain_unpause(d);
>  
>          xfree(s);
> @@ -1279,6 +1387,63 @@ static int hvm_set_ioreq_server_state(struct domain *d, ioservid_t id,
>      return rc;
>  }
>  
> +static int hvm_map_pages_to_ioreq_server(struct domain *d, ioservid_t id,
> +                                         uint16_t set, uint16_t nr_pages,
> +                                         uint64_t *gpfn)
> +{
> +    struct hvm_ioreq_server *s;
> +    int rc;
> +    unsigned int i;
> +    p2m_type_t ot, nt;
> +
> +    if ( set )
> +    {
> +        ot = p2m_ram_rw;
> +        nt = p2m_mmio_write_dm;
> +    }
> +    else
> +    {
> +        ot = p2m_mmio_write_dm;
> +        nt = p2m_ram_rw;
> +    }
> +
> +    spin_lock(&d->arch.hvm_domain.ioreq_server.lock);
> +
> +    rc = -ENOENT;
> +    list_for_each_entry ( s,
> +                          &d->arch.hvm_domain.ioreq_server.list,
> +                          list_entry )
> +    {
> +        if ( s != d->arch.hvm_domain.default_ioreq_server &&
> +             s->id == id )
> +        {
> +            spin_lock(&s->pglist_hash_lock);
> +
> +            for ( i = 0; i < nr_pages; i++ )
> +            {
> +                rc = p2m_change_type_one(d, gpfn[i], ot, nt);
> +                if ( !rc )
> +                {
> +                    if ( set )
> +                        rc = hvm_ioreq_server_pglist_hash_add(s->pglist_hash_base, gpfn[i]);
> +                    else
> +                        rc = hvm_ioreq_server_pglist_hash_rem(s->pglist_hash_base, gpfn[i]);
> +                }
> +
> +                if ( rc )
> +                    break;
> +            }
> +
> +            spin_unlock(&s->pglist_hash_lock);
> +            break;
> +        }
> +    }
> +
> +    spin_unlock(&d->arch.hvm_domain.ioreq_server.lock);
> +
> +    return rc;
> +}
> +
>  static int hvm_all_ioreq_servers_add_vcpu(struct domain *d, struct vcpu *v)
>  {
>      struct hvm_ioreq_server *s;
> @@ -2349,6 +2514,8 @@ static struct hvm_ioreq_server *hvm_select_ioreq_server(struct domain *d,
>          switch ( type )
>          {
>              unsigned long end;
> +            uint64_t gpfn;
> +            struct pglist_hash_table *he;
>  
>          case IOREQ_TYPE_PIO:
>              end = addr + p->size - 1;
> @@ -2361,6 +2528,14 @@ static struct hvm_ioreq_server *hvm_select_ioreq_server(struct domain *d,
>              if ( rangeset_contains_range(r, addr, end) )
>                  return s;
>  
> +            gpfn = addr >> PAGE_SHIFT;
> +            spin_lock(&s->pglist_hash_lock);
> +            he = hvm_ioreq_server_lookup_pglist_hash_table(s->pglist_hash_base, gpfn);
> +            spin_unlock(&s->pglist_hash_lock);
> +
> +            if ( he )
> +                return s;
> +
>              break;
>          case IOREQ_TYPE_PCI_CONFIG:
>              if ( rangeset_contains_singleton(r, addr >> 32) )
> @@ -5309,6 +5484,35 @@ static int hvmop_set_ioreq_server_state(
>      return rc;
>  }
>  
> +static int hvmop_map_pages_to_ioreq_server(
> +    XEN_GUEST_HANDLE_PARAM(xen_hvm_map_pages_to_ioreq_server_t) uop)
> +{
> +    xen_hvm_map_pages_to_ioreq_server_t op;
> +    struct domain *d;
> +    int rc;
> +
> +    if ( copy_from_guest(&op, uop, 1) )
> +        return -EFAULT;
> +
> +    rc = rcu_lock_remote_domain_by_id(op.domid, &d);
> +    if ( rc != 0 )
> +        return rc;
> +
> +    rc = -EINVAL;
> +    if ( !is_hvm_domain(d) )
> +        goto out;
> +
> +    rc = xsm_hvm_ioreq_server(XSM_DM_PRIV, d, HVMOP_map_pages_to_ioreq_server);
> +    if ( rc != 0 )
> +        goto out;
> +
> +    rc = hvm_map_pages_to_ioreq_server(d, op.id, op.set, op.nr_pages, op.page_list);
> +
> +out:
> +    rcu_unlock_domain(d);
> +    return rc;
> +}
> +
>  static int hvmop_destroy_ioreq_server(
>      XEN_GUEST_HANDLE_PARAM(xen_hvm_destroy_ioreq_server_t) uop)
>  {
> @@ -5374,6 +5578,11 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg)
>              guest_handle_cast(arg, xen_hvm_set_ioreq_server_state_t));
>          break;
>      
> +    case HVMOP_map_pages_to_ioreq_server:
> +        rc = hvmop_map_pages_to_ioreq_server(
> +            guest_handle_cast(arg, xen_hvm_map_pages_to_ioreq_server_t));
> +        break;
> + 
>      case HVMOP_destroy_ioreq_server:
>          rc = hvmop_destroy_ioreq_server(
>              guest_handle_cast(arg, xen_hvm_destroy_ioreq_server_t));
> diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm-x86/hvm/domain.h
> index 291a2e0..c28fbbe 100644
> --- a/xen/include/asm-x86/hvm/domain.h
> +++ b/xen/include/asm-x86/hvm/domain.h
> @@ -48,6 +48,11 @@ struct hvm_ioreq_vcpu {
>      evtchn_port_t    ioreq_evtchn;
>  };
>  
> +struct pglist_hash_table {
> +    struct pglist_hash_table *next;
> +    uint64_t gpfn;
> +};
> +
>  #define NR_IO_RANGE_TYPES (HVMOP_IO_RANGE_PCI + 1)
>  #define MAX_NR_IO_RANGES  256
>  
> @@ -70,6 +75,10 @@ struct hvm_ioreq_server {
>      evtchn_port_t          bufioreq_evtchn;
>      struct rangeset        *range[NR_IO_RANGE_TYPES];
>      bool_t                 enabled;
> +
> +    /* scatter page list need write protection */
> +    struct pglist_hash_table   *pglist_hash_base;
> +    spinlock_t             pglist_hash_lock;
>  };
>  
>  struct hvm_domain {
> diff --git a/xen/include/public/hvm/hvm_op.h b/xen/include/public/hvm/hvm_op.h
> index eeb0a60..f7c989a 100644
> --- a/xen/include/public/hvm/hvm_op.h
> +++ b/xen/include/public/hvm/hvm_op.h
> @@ -367,6 +367,18 @@ struct xen_hvm_set_ioreq_server_state {
>  typedef struct xen_hvm_set_ioreq_server_state xen_hvm_set_ioreq_server_state_t;
>  DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_ioreq_server_state_t);
>  
> +#define HVMOP_map_pages_to_ioreq_server 23
> +#define XEN_IOREQSVR_MAX_MAP_BATCH      128
> +struct xen_hvm_map_pages_to_ioreq_server {
> +    domid_t domid;   /* IN - domain to be serviced */
> +    ioservid_t id;   /* IN - server id */
> +    uint16_t set;    /* IN - 1: map pages, 0: unmap pages */
> +    uint16_t nr_pages;  /* IN - page count */
> +    uint64_t page_list[XEN_IOREQSVR_MAX_MAP_BATCH]; /* IN - gpfn list */

No arbitrary limits like this in the API please.  Use a GUEST_HANDLE_64
instead.

~Andrew

> +};
> +typedef struct xen_hvm_map_pages_to_ioreq_server xen_hvm_map_pages_to_ioreq_server_t;
> +DEFINE_XEN_GUEST_HANDLE(xen_hvm_map_pages_to_ioreq_server_t);
> +
>  #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
>  
>  #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */

  reply	other threads:[~2014-08-22 10:35 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-08-22 19:18 [PATCH v2 0/2] Extend ioreq-server to support page write protection Wei Ye
2014-08-22 19:18 ` [PATCH v2 1/2] x86: add p2m_mmio_write_dm Wei Ye
2014-08-22 10:13   ` Andrew Cooper
2014-08-26  3:18     ` Ye, Wei
2014-08-22 13:24   ` David Vrabel
2014-08-25 22:17     ` Tian, Kevin
2014-08-25 10:46   ` Jan Beulich
2014-08-25 22:25     ` Tian, Kevin
2014-08-22 19:18 ` [PATCH v2 2/2] ioreq-server: Support scatter page forwarding Wei Ye
2014-08-22 10:35   ` Andrew Cooper [this message]
2014-08-26  8:40     ` Ye, Wei
2014-08-26  9:37       ` Andrew Cooper
2014-08-26 11:35   ` Paul Durrant
2014-08-26 13:26   ` Jan Beulich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=53F71CD7.8020607@citrix.com \
    --to=andrew.cooper3@citrix.com \
    --cc=JBeulich@suse.com \
    --cc=Paul.Durrant@citrix.com \
    --cc=donald.d.dugger@intel.com \
    --cc=ian.campbell@citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=keir@xen.org \
    --cc=kevin.tian@intel.com \
    --cc=stefano.stabellini@eu.citrix.com \
    --cc=tim@xen.org \
    --cc=wei.ye@intel.com \
    --cc=xen-devel@lists.xen.org \
    --cc=yang.z.zhang@intel.com \
    --cc=zhiyuan.lv@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).