From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
To: Andres Lagar-Cavilla <andreslc@gridcentric.ca>
Cc: xen-devel@lists.xensource.com, David Vrabel <david.vrabel@citrix.com>
Subject: Re: [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl
Date: Wed, 5 Sep 2012 12:21:48 -0400 [thread overview]
Message-ID: <20120905162148.GC11949@phenom.dumpdata.com> (raw)
In-Reply-To: <C989BE9D-E520-4D04-9028-6CE6CC765E76@gridcentric.ca>
On Fri, Aug 31, 2012 at 09:59:30AM -0400, Andres Lagar-Cavilla wrote:
> Re-spin of alternative patch after David's feedback.
> Thanks
> Andres
applied. fixed some whitespace issues.
>
> commit ab351a5cef1797935b083c2f6e72800a8949c515
> Author: Andres Lagar-Cavilla <andres@lagarcavilla.org>
> Date: Thu Aug 30 12:23:33 2012 -0400
>
> xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl
>
> PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
> field for reporting the error code for every frame that could not be
> mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.
>
> Also expand PRIVCMD_MMAPBATCH to return appropriate error-encoding top nibble
> in the mfn array.
>
> Signed-off-by: David Vrabel <david.vrabel@citrix.com>
> Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>
>
> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
> index 85226cb..5386f20 100644
> --- a/drivers/xen/privcmd.c
> +++ b/drivers/xen/privcmd.c
> @@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages)
> */
> static int gather_array(struct list_head *pagelist,
> unsigned nelem, size_t size,
> - void __user *data)
> + const void __user *data)
> {
> unsigned pageidx;
> void *pagedata;
> @@ -246,61 +246,117 @@ struct mmap_batch_state {
> domid_t domain;
> unsigned long va;
> struct vm_area_struct *vma;
> - int err;
> -
> - xen_pfn_t __user *user;
> + /* A tristate:
> + * 0 for no errors
> + * 1 if at least one error has happened (and no
> + * -ENOENT errors have happened)
> + * -ENOENT if at least 1 -ENOENT has happened.
> + */
> + int global_error;
> + /* An array for individual errors */
> + int *err;
> +
> + /* User-space mfn array to store errors in the second pass for V1. */
> + xen_pfn_t __user *user_mfn;
> };
>
> static int mmap_batch_fn(void *data, void *state)
> {
> xen_pfn_t *mfnp = data;
> struct mmap_batch_state *st = state;
> + int ret;
>
> - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> - st->vma->vm_page_prot, st->domain) < 0) {
> - *mfnp |= 0xf0000000U;
> - st->err++;
> + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> + st->vma->vm_page_prot, st->domain);
> +
> + /* Store error code for second pass. */
> + *(st->err++) = ret;
> +
> + /* And see if it affects the global_error. */
> + if (ret < 0) {
> + if (ret == -ENOENT)
> + st->global_error = -ENOENT;
> + else {
> + /* Record that at least one error has happened. */
> + if (st->global_error == 0)
> + st->global_error = 1;
> + }
> }
> st->va += PAGE_SIZE;
>
> return 0;
> }
>
> -static int mmap_return_errors(void *data, void *state)
> +static int mmap_return_errors_v1(void *data, void *state)
> {
> xen_pfn_t *mfnp = data;
> struct mmap_batch_state *st = state;
> -
> - return put_user(*mfnp, st->user++);
> + int err = *(st->err++);
> +
> + /*
> + * V1 encodes the error codes in the 32bit top nibble of the
> + * mfn (with its known limitations vis-a-vis 64 bit callers).
> + */
> + *mfnp |= (err == -ENOENT) ?
> + PRIVCMD_MMAPBATCH_PAGED_ERROR :
> + PRIVCMD_MMAPBATCH_MFN_ERROR;
> + return __put_user(*mfnp, st->user_mfn++);
> }
>
> static struct vm_operations_struct privcmd_vm_ops;
>
> -static long privcmd_ioctl_mmap_batch(void __user *udata)
> +static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
> {
> int ret;
> - struct privcmd_mmapbatch m;
> + struct privcmd_mmapbatch_v2 m;
> struct mm_struct *mm = current->mm;
> struct vm_area_struct *vma;
> unsigned long nr_pages;
> LIST_HEAD(pagelist);
> + int *err_array = NULL;
> struct mmap_batch_state state;
>
> if (!xen_initial_domain())
> return -EPERM;
>
> - if (copy_from_user(&m, udata, sizeof(m)))
> - return -EFAULT;
> + switch (version) {
> + case 1:
> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
> + return -EFAULT;
> + /* Returns per-frame error in m.arr. */
> + m.err = NULL;
> + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
> + return -EFAULT;
> + break;
> + case 2:
> + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
> + return -EFAULT;
> + /* Returns per-frame error code in m.err. */
> + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
> + return -EFAULT;
> + break;
> + default:
> + return -EINVAL;
> + }
>
> nr_pages = m.num;
> if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
> return -EINVAL;
>
> - ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
> - m.arr);
> + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
> +
> + if (ret)
> + goto out;
> + if (list_empty(&pagelist)) {
> + ret = -EINVAL;
> + goto out;
> + }
>
> - if (ret || list_empty(&pagelist))
> + err_array = kcalloc(m.num, sizeof(int), GFP_KERNEL);
> + if (err_array == NULL) {
> + ret = -ENOMEM;
> goto out;
> + }
>
> down_write(&mm->mmap_sem);
>
> @@ -315,24 +371,34 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
> goto out;
> }
>
> - state.domain = m.dom;
> - state.vma = vma;
> - state.va = m.addr;
> - state.err = 0;
> + state.domain = m.dom;
> + state.vma = vma;
> + state.va = m.addr;
> + state.global_error = 0;
> + state.err = err_array;
>
> - ret = traverse_pages(m.num, sizeof(xen_pfn_t),
> - &pagelist, mmap_batch_fn, &state);
> + /* mmap_batch_fn guarantees ret == 0 */
> + BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t),
> + &pagelist, mmap_batch_fn, &state));
>
> up_write(&mm->mmap_sem);
>
> - if (state.err > 0) {
> - state.user = m.arr;
> + if (state.global_error && (version == 1)) {
> + /* Write back errors in second pass. */
> + state.user_mfn = (xen_pfn_t *)m.arr;
> + state.err = err_array;
> ret = traverse_pages(m.num, sizeof(xen_pfn_t),
> - &pagelist,
> - mmap_return_errors, &state);
> - }
> + &pagelist, mmap_return_errors_v1, &state);
> + } else
> + ret = __copy_to_user(m.err, err_array, m.num * sizeof(int));
> +
> + /* If we have not had any EFAULT-like global errors then set the global
> + * error to -ENOENT if necessary. */
> + if ((ret == 0) && (state.global_error == -ENOENT))
> + ret = -ENOENT;
>
> out:
> + kfree(err_array);
> free_page_list(&pagelist);
>
> return ret;
> @@ -354,7 +420,11 @@ static long privcmd_ioctl(struct file *file,
> break;
>
> case IOCTL_PRIVCMD_MMAPBATCH:
> - ret = privcmd_ioctl_mmap_batch(udata);
> + ret = privcmd_ioctl_mmap_batch(udata, 1);
> + break;
> +
> + case IOCTL_PRIVCMD_MMAPBATCH_V2:
> + ret = privcmd_ioctl_mmap_batch(udata, 2);
> break;
>
> default:
> diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
> index 45c1aa1..a853168 100644
> --- a/include/xen/privcmd.h
> +++ b/include/xen/privcmd.h
> @@ -58,13 +58,33 @@ struct privcmd_mmapbatch {
> int num; /* number of pages to populate */
> domid_t dom; /* target domain */
> __u64 addr; /* virtual address */
> - xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
> + xen_pfn_t __user *arr; /* array of mfns - or'd with
> + PRIVCMD_MMAPBATCH_*_ERROR on err */
> +};
> +
> +#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U
> +#define PRIVCMD_MMAPBATCH_PAGED_ERROR 0x80000000U
> +
> +struct privcmd_mmapbatch_v2 {
> + unsigned int num; /* number of pages to populate */
> + domid_t dom; /* target domain */
> + __u64 addr; /* virtual address */
> + const xen_pfn_t __user *arr; /* array of mfns */
> + int __user *err; /* array of error codes */
> };
>
> /*
> * @cmd: IOCTL_PRIVCMD_HYPERCALL
> * @arg: &privcmd_hypercall_t
> * Return: Value returned from execution of the specified hypercall.
> + *
> + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
> + * @arg: &struct privcmd_mmapbatch_v2
> + * Return: 0 on success (i.e., arg->err contains valid error codes for
> + * each frame). On an error other than a failed frame remap, -1 is
> + * returned and errno is set to EINVAL, EFAULT etc. As an exception,
> + * if the operation was otherwise successful but any frame failed with
> + * -ENOENT, then -1 is returned and errno is set to ENOENT.
> */
> #define IOCTL_PRIVCMD_HYPERCALL \
> _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
> @@ -72,5 +92,7 @@ struct privcmd_mmapbatch {
> _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
> #define IOCTL_PRIVCMD_MMAPBATCH \
> _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
> +#define IOCTL_PRIVCMD_MMAPBATCH_V2 \
> + _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))
>
> #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
>
> On Aug 30, 2012, at 8:58 AM, David Vrabel wrote:
>
> > From: David Vrabel <david.vrabel@citrix.com>
> >
> > PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
> > field for reporting the error code for every frame that could not be
> > mapped. libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.
> >
> > Signed-off-by: David Vrabel <david.vrabel@citrix.com>
> > ---
> > drivers/xen/privcmd.c | 99 +++++++++++++++++++++++++++++++++++++++---------
> > include/xen/privcmd.h | 23 +++++++++++-
> > 2 files changed, 102 insertions(+), 20 deletions(-)
> >
> > diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
> > index ccee0f1..c0e89e7 100644
> > --- a/drivers/xen/privcmd.c
> > +++ b/drivers/xen/privcmd.c
> > @@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages)
> > */
> > static int gather_array(struct list_head *pagelist,
> > unsigned nelem, size_t size,
> > - void __user *data)
> > + const void __user *data)
> > {
> > unsigned pageidx;
> > void *pagedata;
> > @@ -248,18 +248,37 @@ struct mmap_batch_state {
> > struct vm_area_struct *vma;
> > int err;
> >
> > - xen_pfn_t __user *user;
> > + xen_pfn_t __user *user_mfn;
> > + int __user *user_err;
> > };
> >
> > static int mmap_batch_fn(void *data, void *state)
> > {
> > xen_pfn_t *mfnp = data;
> > struct mmap_batch_state *st = state;
> > + int ret;
> >
> > - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> > - st->vma->vm_page_prot, st->domain) < 0) {
> > - *mfnp |= 0xf0000000U;
> > - st->err++;
> > + ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> > + st->vma->vm_page_prot, st->domain);
> > + if (ret < 0) {
> > + /*
> > + * Error reporting is a mess but userspace relies on
> > + * it behaving this way.
> > + *
> > + * V2 needs to a) return the result of each frame's
> > + * remap; and b) return -ENOENT if any frame failed
> > + * with -ENOENT.
> > + *
> > + * In this first pass the error code is saved by
> > + * overwriting the mfn and an error is indicated in
> > + * st->err.
> > + *
> > + * The second pass by mmap_return_errors() will write
> > + * the error codes to user space and get the right
> > + * ioctl return value.
> > + */
> > + *(int *)mfnp = ret;
> > + st->err = ret;
> > }
> > st->va += PAGE_SIZE;
> >
> > @@ -270,16 +289,33 @@ static int mmap_return_errors(void *data, void *state)
> > {
> > xen_pfn_t *mfnp = data;
> > struct mmap_batch_state *st = state;
> > + int ret;
> > +
> > + if (st->user_err) {
> > + int err = *(int *)mfnp;
> > +
> > + if (err == -ENOENT)
> > + st->err = err;
> >
> > - return put_user(*mfnp, st->user++);
> > + return __put_user(err, st->user_err++);
> > + } else {
> > + xen_pfn_t mfn;
> > +
> > + ret = __get_user(mfn, st->user_mfn);
> > + if (ret < 0)
> > + return ret;
> > +
> > + mfn |= PRIVCMD_MMAPBATCH_MFN_ERROR;
> > + return __put_user(mfn, st->user_mfn++);
> > + }
> > }
> >
> > static struct vm_operations_struct privcmd_vm_ops;
> >
> > -static long privcmd_ioctl_mmap_batch(void __user *udata)
> > +static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
> > {
> > int ret;
> > - struct privcmd_mmapbatch m;
> > + struct privcmd_mmapbatch_v2 m;
> > struct mm_struct *mm = current->mm;
> > struct vm_area_struct *vma;
> > unsigned long nr_pages;
> > @@ -289,15 +325,31 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
> > if (!xen_initial_domain())
> > return -EPERM;
> >
> > - if (copy_from_user(&m, udata, sizeof(m)))
> > - return -EFAULT;
> > + switch (version) {
> > + case 1:
> > + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
> > + return -EFAULT;
> > + /* Returns per-frame error in m.arr. */
> > + m.err = NULL;
> > + if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
> > + return -EFAULT;
> > + break;
> > + case 2:
> > + if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
> > + return -EFAULT;
> > + /* Returns per-frame error code in m.err. */
> > + if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
> > + return -EFAULT;
> > + break;
> > + default:
> > + return -EINVAL;
> > + }
> >
> > nr_pages = m.num;
> > if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
> > return -EINVAL;
> >
> > - ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
> > - m.arr);
> > + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
> >
> > if (ret || list_empty(&pagelist))
> > goto out;
> > @@ -325,12 +377,17 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
> >
> > up_write(&mm->mmap_sem);
> >
> > - if (state.err > 0) {
> > - state.user = m.arr;
> > + if (state.err) {
> > + state.err = 0;
> > + state.user_mfn = (xen_pfn_t *)m.arr;
> > + state.user_err = m.err;
> > ret = traverse_pages(m.num, sizeof(xen_pfn_t),
> > - &pagelist,
> > - mmap_return_errors, &state);
> > - }
> > + &pagelist,
> > + mmap_return_errors, &state);
> > + if (ret >= 0)
> > + ret = state.err;
> > + } else if (m.err)
> > + __clear_user(m.err, m.num * sizeof(*m.err));
> >
> > out:
> > free_page_list(&pagelist);
> > @@ -354,7 +411,11 @@ static long privcmd_ioctl(struct file *file,
> > break;
> >
> > case IOCTL_PRIVCMD_MMAPBATCH:
> > - ret = privcmd_ioctl_mmap_batch(udata);
> > + ret = privcmd_ioctl_mmap_batch(udata, 1);
> > + break;
> > +
> > + case IOCTL_PRIVCMD_MMAPBATCH_V2:
> > + ret = privcmd_ioctl_mmap_batch(udata, 2);
> > break;
> >
> > default:
> > diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
> > index 17857fb..f60d75c 100644
> > --- a/include/xen/privcmd.h
> > +++ b/include/xen/privcmd.h
> > @@ -59,13 +59,32 @@ struct privcmd_mmapbatch {
> > int num; /* number of pages to populate */
> > domid_t dom; /* target domain */
> > __u64 addr; /* virtual address */
> > - xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
> > + xen_pfn_t __user *arr; /* array of mfns - or'd with
> > + PRIVCMD_MMAPBATCH_MFN_ERROR on err */
> > +};
> > +
> > +#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U
> > +
> > +struct privcmd_mmapbatch_v2 {
> > + unsigned int num; /* number of pages to populate */
> > + domid_t dom; /* target domain */
> > + __u64 addr; /* virtual address */
> > + const xen_pfn_t __user *arr; /* array of mfns */
> > + int __user *err; /* array of error codes */
> > };
> >
> > /*
> > * @cmd: IOCTL_PRIVCMD_HYPERCALL
> > * @arg: &privcmd_hypercall_t
> > * Return: Value returned from execution of the specified hypercall.
> > + *
> > + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
> > + * @arg: &struct privcmd_mmapbatch_v2
> > + * Return: 0 on success (i.e., arg->err contains valid error codes for
> > + * each frame). On an error other than a failed frame remap, -1 is
> > + * returned and errno is set to EINVAL, EFAULT etc. As an exception,
> > + * if the operation was otherwise successful but any frame failed with
> > + * -ENOENT, then -1 is returned and errno is set to ENOENT.
> > */
> > #define IOCTL_PRIVCMD_HYPERCALL \
> > _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
> > @@ -73,5 +92,7 @@ struct privcmd_mmapbatch {
> > _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
> > #define IOCTL_PRIVCMD_MMAPBATCH \
> > _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
> > +#define IOCTL_PRIVCMD_MMAPBATCH_V2 \
> > + _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))
> >
> > #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
> > --
> > 1.7.2.5
> >
next prev parent reply other threads:[~2012-09-05 16:21 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-08-30 12:58 [PATCHv3 0/2] xen/privcmd: support for paged-out frames David Vrabel
2012-08-30 12:58 ` [PATCH 1/2] xen/mm: return more precise error from xen_remap_domain_range() David Vrabel
2012-08-30 15:07 ` Andres Lagar-Cavilla
2012-08-30 12:58 ` [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl David Vrabel
2012-08-30 16:41 ` Andres Lagar-Cavilla
2012-08-30 17:04 ` David Vrabel
2012-08-30 18:29 ` Andres Lagar-Cavilla
2012-08-31 7:02 ` Ian Campbell
2012-08-30 18:32 ` Andres Lagar-Cavilla
2012-08-31 13:08 ` David Vrabel
2012-08-31 13:13 ` Andres Lagar-Cavilla
2012-09-05 16:17 ` Konrad Rzeszutek Wilk
2012-08-31 13:59 ` Andres Lagar-Cavilla
2012-09-05 16:21 ` Konrad Rzeszutek Wilk [this message]
2012-09-05 17:09 ` Andres Lagar-Cavilla
2012-09-05 17:40 ` Konrad Rzeszutek Wilk
2012-09-06 13:41 ` Andres Lagar-Cavilla
2012-09-06 16:20 ` Konrad Rzeszutek Wilk
2012-08-30 20:05 ` [PATCHv3 0/2] xen/privcmd: support for paged-out frames Konrad Rzeszutek Wilk
2012-08-30 20:12 ` Andres Lagar-Cavilla
2012-09-05 18:57 ` Konrad Rzeszutek Wilk
2012-09-05 19:51 ` Andres Lagar-Cavilla
2012-09-05 20:05 ` Konrad Rzeszutek Wilk
-- strict thread matches above, loose matches on Subject: below --
2012-08-29 13:15 [PATCHv2 " David Vrabel
2012-08-29 13:15 ` [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl David Vrabel
2012-08-29 16:14 ` Andres Lagar-Cavilla
2012-08-29 16:36 ` David Vrabel
2012-08-29 18:10 ` Andres Lagar-Cavilla
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120905162148.GC11949@phenom.dumpdata.com \
--to=konrad.wilk@oracle.com \
--cc=andreslc@gridcentric.ca \
--cc=david.vrabel@citrix.com \
--cc=xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).