From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
To: David Vrabel <david.vrabel@citrix.com>
Cc: xen-devel@lists.xenproject.org,
Boris Ostrovsky <boris.ostrovsky@oracle.com>
Subject: Re: [PATCHv3] xen/gntdev: add ioctl for grant copy
Date: Mon, 30 Nov 2015 16:39:16 -0500 [thread overview]
Message-ID: <20151130213916.GE14317@char.us.oracle.com> (raw)
In-Reply-To: <1448644625-28449-1-git-send-email-david.vrabel@citrix.com>
On Fri, Nov 27, 2015 at 05:17:05PM +0000, David Vrabel wrote:
> Add IOCTL_GNTDEV_GRANT_COPY to allow applications to copy between user
> space buffers and grant references.
>
> This interface is similar to the GNTTABOP_copy hypercall ABI except
> the local buffers are provided using a virtual address (instead of a
> GFN and offset). To avoid userspace from having to page align its
> buffers the driver will use two or more ops if required.
>
> If the ioctl returns 0, the application must check the status of each
> segment with the segments status field. If the ioctl returns a -ve
> error code (EINVAL or EFAULT), the status of individual ops is
> undefined.
Are there any test tools that could be used for this? To make sure that
regression wise this does not get broken?
>
> Signed-off-by: David Vrabel <david.vrabel@citrix.com>
> ---
> v3:
> - Rewrite with different API that matches the capabilities of the
> hypervisor ABI and eliminates some of the size/alignment
> restrictions.
> ---
> drivers/xen/gntdev.c | 193 ++++++++++++++++++++++++++++++++++++++++++++++
> include/uapi/xen/gntdev.h | 47 +++++++++++
> 2 files changed, 240 insertions(+)
>
> diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
> index 2ea0b3b..5d78c79 100644
> --- a/drivers/xen/gntdev.c
> +++ b/drivers/xen/gntdev.c
> @@ -748,6 +748,196 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
> return rc;
> }
>
> +#define GNTDEV_COPY_BATCH 16
> +
> +struct gntdev_copy_batch {
> + struct gnttab_copy ops[GNTDEV_COPY_BATCH];
> + struct page *pages[2 * GNTDEV_COPY_BATCH];
> + s16 __user *status[GNTDEV_COPY_BATCH];
> + unsigned int nr_ops;
> + unsigned int nr_pages;
> +};
> +
> +static int gntdev_get_page(struct gntdev_copy_batch *batch, void __user *virt,
> + unsigned long *gfn)
> +{
> + unsigned long addr = (unsigned long)virt;
> + struct page *page;
> + unsigned long xen_pfn;
> + int ret;
> +
> + ret = get_user_pages_fast(addr, 1, 1, &page);
Could the '1,1' have a comment please?
> + if (ret < 0)
> + return ret;
> +
> + batch->pages[batch->nr_pages++] = page;
> +
> + xen_pfn = page_to_xen_pfn(page) + XEN_PFN_DOWN(addr & ~PAGE_MASK);
> + *gfn = pfn_to_gfn(xen_pfn);
> +
> + return 0;
> +}
> +
> +static void gntdev_put_pages(struct gntdev_copy_batch *batch)
> +{
> + unsigned int i;
> +
> + for (i = 0; i < batch->nr_pages; i++)
> + put_page(batch->pages[i]);
> + batch->nr_pages = 0;
> +}
> +
> +static int gntdev_copy(struct gntdev_copy_batch *batch)
> +{
> + unsigned int i;
> +
> + gnttab_batch_copy(batch->ops, batch->nr_ops);
> + gntdev_put_pages(batch);
> +
> + /*
> + * For each completed op, update the status if the op failed
> + * and a previous failure for the segment hasn't been
> + * recorded.
How could an previous failure not be recorded? Could you mention that
in this nice comment please?
> + */
> + for (i = 0; i < batch->nr_ops; i++) {
> + s16 status = batch->ops[i].status;
> + s16 old_status;
> +
> + if (status == GNTST_okay)
> + continue;
> +
> + if (__get_user(old_status, batch->status[i]))
> + return -EFAULT;
> +
> + if (old_status != GNTST_okay)
> + continue;
> +
> + if (__put_user(status, batch->status[i]))
> + return -EFAULT;
> + }
> +
> + batch->nr_ops = 0;
> + return 0;
> +}
> +
> +static int gntdev_grant_copy_seg(struct gntdev_copy_batch *batch,
> + struct gntdev_grant_copy_segment *seg,
> + s16 __user *status)
> +{
> + uint16_t copied = 0;
> +
> + /* Can't cross page if source/dest is a grant ref. */
> + if (seg->flags & GNTCOPY_source_gref) {
> + if (seg->source.foreign.offset + seg->len > XEN_PAGE_SIZE)
> + return -EINVAL;
> + }
> + if (seg->flags & GNTCOPY_dest_gref) {
> + if (seg->dest.foreign.offset + seg->len > XEN_PAGE_SIZE)
> + return -EINVAL;
> + }
> +
> + if (put_user(GNTST_okay, status))
> + return -EFAULT;
> +
> + while (copied < seg->len) {
> + struct gnttab_copy *op;
> + void __user *virt;
> + size_t len, off;
> + unsigned long gfn;
> + int ret;
> +
> + if (batch->nr_ops >= GNTDEV_COPY_BATCH) {
> + ret = gntdev_copy(batch);
> + if (ret < 0)
> + return ret;
> + }
> +
> + len = seg->len - copied;
> +
> + op = &batch->ops[batch->nr_ops];
> + op->flags = 0;
> +
> + if (seg->flags & GNTCOPY_source_gref) {
> + op->source.u.ref = seg->source.foreign.ref;
> + op->source.domid = seg->source.foreign.domid;
> + op->source.offset = seg->source.foreign.offset + copied;
> + op->flags |= GNTCOPY_source_gref;
> + } else {
> + virt = seg->source.virt + copied;
> + off = (unsigned long)virt & ~XEN_PAGE_MASK;
> + len = min(len, XEN_PAGE_SIZE - off);
> +
> + ret = gntdev_get_page(batch, virt, &gfn);
> + if (ret < 0)
> + return ret;
> +
> + op->source.u.gmfn = gfn;
> + op->source.domid = DOMID_SELF;
> + op->source.offset = off;
> + }
> +
> + if (seg->flags & GNTCOPY_dest_gref) {
> + op->dest.u.ref = seg->dest.foreign.ref;
> + op->dest.domid = seg->dest.foreign.domid;
> + op->dest.offset = seg->dest.foreign.offset + copied;
> + op->flags |= GNTCOPY_dest_gref;
> + } else {
> + virt = seg->dest.virt + copied;
> + off = (unsigned long)virt & ~XEN_PAGE_MASK;
> + len = min(len, XEN_PAGE_SIZE - off);
> +
> + ret = gntdev_get_page(batch, virt, &gfn);
> + if (ret < 0)
> + return ret;
> +
> + op->dest.u.gmfn = gfn;
> + op->dest.domid = DOMID_SELF;
> + op->dest.offset = off;
> + }
> +
> + op->len = len;
> + copied += len;
> +
> + batch->status[batch->nr_ops] = status;
> + batch->nr_ops++;
> + }
> +
> + return 0;
> +}
> +
> +static long gntdev_ioctl_grant_copy(struct gntdev_priv *priv, void __user *u)
> +{
> + struct ioctl_gntdev_grant_copy copy;
> + struct gntdev_copy_batch batch = { .nr_ops = 0, .nr_pages = 0, };
> + unsigned int i;
> + int ret = 0;
> +
> + if (copy_from_user(©, u, sizeof(copy)))
> + return -EFAULT;
>
+
No check on the .nr_pages' ? What if it is 0xfffffffffffffffffffffffff?
Ditto for .nr_ops?
> + for (i = 0; i < copy.count; i++) {
> + struct gntdev_grant_copy_segment seg;
> +
> + if (copy_from_user(&seg, ©.segments[i], sizeof(seg))) {
> + ret = -EFAULT;
> + goto out;
> + }
> +
> + ret = gntdev_grant_copy_seg(&batch, &seg, ©.segments[i].status);
> + if (ret < 0)
> + goto out;
> +
> + cond_resched();
> + }
> + if (batch.nr_ops)
> + ret = gntdev_copy(&batch);
> + return ret;
> +
> + out:
> + gntdev_put_pages(&batch);
> + return ret;
> +}
> +
> static long gntdev_ioctl(struct file *flip,
> unsigned int cmd, unsigned long arg)
> {
> @@ -767,6 +957,9 @@ static long gntdev_ioctl(struct file *flip,
> case IOCTL_GNTDEV_SET_UNMAP_NOTIFY:
> return gntdev_ioctl_notify(priv, ptr);
>
> + case IOCTL_GNTDEV_GRANT_COPY:
> + return gntdev_ioctl_grant_copy(priv, ptr);
> +
> default:
> pr_debug("priv %p, unknown cmd %x\n", priv, cmd);
> return -ENOIOCTLCMD;
> diff --git a/include/uapi/xen/gntdev.h b/include/uapi/xen/gntdev.h
> index aa7610a..4b02343 100644
> --- a/include/uapi/xen/gntdev.h
> +++ b/include/uapi/xen/gntdev.h
> @@ -144,6 +144,53 @@ struct ioctl_gntdev_unmap_notify {
> __u32 event_channel_port;
> };
>
> +struct gntdev_grant_copy_segment {
> + union {
> + void __user *virt;
> + struct {
> + grant_ref_t ref;
> + __u16 offset;
> + domid_t domid;
> + } foreign;
> + } source, dest;
> + __u16 len;
> +
> + __u16 flags; /* GNTCOPY_* */
> + __s16 status; /* GNTST_* */
> +};
> +
> +/*
> + * Copy between grant references and local buffers.
> + *
> + * The copy is split into @count @segments, each of which can copy
> + * to/from one grant reference.
> + *
> + * Each segment is similar to struct gnttab_copy in the hypervisor ABI
> + * except the local buffer is specified using a virtual address
> + * (instead of a GFN and offset).
> + *
> + * The local buffer may cross a Xen page boundary -- the driver will
> + * split segments into multiple ops if required.
> + *
> + * Returns 0 if all segments have been processed and @status in each
> + * segment is valid. Note that one or more segments may have failed
> + * (status != GNTST_okay).
> + *
> + * If the driver had to split a segment into two or more ops, @status
> + * includes the status of the first failed op for that segment (or
> + * GNTST_okay if all ops were successful).
> + *
> + * If -1 is returned, the status of all segments is undefined.
> + *
> + * - EINVAL: a segment crosses the boundary of a foreign page.
> + */
> +#define IOCTL_GNTDEV_GRANT_COPY \
> + _IOC(_IOC_NONE, 'G', 8, sizeof(struct ioctl_gntdev_grant_copy))
> +struct ioctl_gntdev_grant_copy {
> + unsigned int count;
> + struct gntdev_grant_copy_segment __user *segments;
> +};
> +
> /* Clear (set to zero) the byte specified by index */
> #define UNMAP_NOTIFY_CLEAR_BYTE 0x1
> /* Send an interrupt on the indicated event channel */
> --
> 2.1.4
>
next prev parent reply other threads:[~2015-11-30 21:39 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-11-27 17:17 [PATCHv3] xen/gntdev: add ioctl for grant copy David Vrabel
2015-11-30 21:39 ` Konrad Rzeszutek Wilk [this message]
2015-12-01 10:35 ` David Vrabel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20151130213916.GE14317@char.us.oracle.com \
--to=konrad.wilk@oracle.com \
--cc=boris.ostrovsky@oracle.com \
--cc=david.vrabel@citrix.com \
--cc=xen-devel@lists.xenproject.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.