xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Andres Lagar-Cavilla <andreslc@gridcentric.ca>
To: David Vrabel <david.vrabel@citrix.com>
Cc: xen-devel@lists.xensource.com,
	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Subject: Re: [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl
Date: Thu, 30 Aug 2012 12:41:24 -0400	[thread overview]
Message-ID: <12E7F3C7-86B7-4B6B-8F53-23CCFCEF80FB@gridcentric.ca> (raw)
In-Reply-To: <1346331492-15027-3-git-send-email-david.vrabel@citrix.com>

David,
The patch looks functionally ok, but I still have two lingering concerns:
- the hideous casting of mfn into err
- why not signal paged out frames for V1

Rather than keep writing English, I wrote some C :)

And took the liberty to include your signed-off. David & Konrad, let me know what you think, and once we settle on either version we can move into unit testing this.

Thanks
Andres

commit 3c0c619f11a26b7bc3f12a1c477cf969c25de231
Author: Andres Lagar-Cavilla <andres@lagarcavilla.org>
Date:   Thu Aug 30 12:23:33 2012 -0400

    xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl
    
    PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
    field for reporting the error code for every frame that could not be
    mapped.  libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.
    
    Also expand PRIVCMD_MMAPBATCH to return appropriate error-encoding top nibble
    in the mfn array.
    
    Signed-off-by: David Vrabel <david.vrabel@citrix.com>
    Signed-off-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>

diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index 85226cb..6562e29 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages)
  */
 static int gather_array(struct list_head *pagelist,
 			unsigned nelem, size_t size,
-			void __user *data)
+			const void __user *data)
 {
 	unsigned pageidx;
 	void *pagedata;
@@ -246,20 +246,54 @@ struct mmap_batch_state {
 	domid_t domain;
 	unsigned long va;
 	struct vm_area_struct *vma;
+	/* A tristate: 
+	 *      0 for no errors
+	 *      1 if at least one error has happened (and no
+	 *          -ENOENT errors have happened)
+	 *      -ENOENT if at least 1 -ENOENT has happened.
+	 */
 	int err;
 
-	xen_pfn_t __user *user;
+	xen_pfn_t __user *user_mfn;
+	int __user *user_err;
 };
 
 static int mmap_batch_fn(void *data, void *state)
 {
 	xen_pfn_t *mfnp = data;
 	struct mmap_batch_state *st = state;
+	int ret;
+
+	ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
+					 st->vma->vm_page_prot, st->domain);
+	if (ret < 0) {
+		/*
+		 * V2 provides a user-space (pre-checked for access) user_err
+		 * pointer, in which we store the individual map error codes.
+		 * 
+		 * V1 encodes the error codes in the 32bit top nibble of the 
+		 * mfn (with its known limitations vis-a-vis 64 bit callers).
+		 * 
+		 * In either case, global state.err is zero unless one or more
+		 * individual maps fail with -ENOENT, in which case it is -ENOENT.
+		 *
+		 */
+		if (st->user_err)
+			BUG_ON(__put_user(ret, st->user_err++));
+		else {
+			xen_pfn_t nibble = (ret == -ENOENT) ?
+					PRIVCMD_MMAPBATCH_PAGED_ERROR :
+					PRIVCMD_MMAPBATCH_MFN_ERROR;
+			*mfnp |= nibble;
+		}
 
-	if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
-				       st->vma->vm_page_prot, st->domain) < 0) {
-		*mfnp |= 0xf0000000U;
-		st->err++;
+		if (ret == -ENOENT)
+			st->err = -ENOENT;
+		else {
+			/* Record that at least one error has happened. */
+			if (st->err == 0)
+				st->err = 1;
+		}
 	}
 	st->va += PAGE_SIZE;
 
@@ -271,15 +305,18 @@ static int mmap_return_errors(void *data, void *state)
 	xen_pfn_t *mfnp = data;
 	struct mmap_batch_state *st = state;
 
-	return put_user(*mfnp, st->user++);
+	if (st->user_err == NULL)
+		return __put_user(*mfnp, st->user_mfn++);
+
+	return 0;
 }
 
 static struct vm_operations_struct privcmd_vm_ops;
 
-static long privcmd_ioctl_mmap_batch(void __user *udata)
+static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
 {
 	int ret;
-	struct privcmd_mmapbatch m;
+	struct privcmd_mmapbatch_v2 m;
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
 	unsigned long nr_pages;
@@ -289,15 +326,31 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
 	if (!xen_initial_domain())
 		return -EPERM;
 
-	if (copy_from_user(&m, udata, sizeof(m)))
-		return -EFAULT;
+	switch (version) {
+	case 1:
+		if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
+			return -EFAULT;
+		/* Returns per-frame error in m.arr. */
+		m.err = NULL;
+		if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
+			return -EFAULT;
+		break;
+	case 2:
+		if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
+			return -EFAULT;
+		/* Returns per-frame error code in m.err. */
+		if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
+			return -EFAULT;
+		break;
+	default:
+		return -EINVAL;
+	}
 
 	nr_pages = m.num;
 	if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
 		return -EINVAL;
 
-	ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
-			   m.arr);
+	ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
 
 	if (ret || list_empty(&pagelist))
 		goto out;
@@ -315,22 +368,34 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
 		goto out;
 	}
 
-	state.domain = m.dom;
-	state.vma = vma;
-	state.va = m.addr;
-	state.err = 0;
+	state.domain    = m.dom;
+	state.vma       = vma;
+	state.va        = m.addr;
+	state.err       = 0;
+	state.user_err  = m.err;
 
-	ret = traverse_pages(m.num, sizeof(xen_pfn_t),
-			     &pagelist, mmap_batch_fn, &state);
+	/* mmap_batch_fn guarantees ret == 0 */
+	BUG_ON(traverse_pages(m.num, sizeof(xen_pfn_t),
+			     &pagelist, mmap_batch_fn, &state));
 
 	up_write(&mm->mmap_sem);
 
-	if (state.err > 0) {
-		state.user = m.arr;
-		ret = traverse_pages(m.num, sizeof(xen_pfn_t),
-			       &pagelist,
-			       mmap_return_errors, &state);
-	}
+	if (state.err) {
+		if (state.err == -ENOENT)
+			ret = -ENOENT;
+		/* V1 still needs to write back nibbles. */
+		if (m.err == NULL)
+		{
+			int efault;
+			state.user_mfn = (xen_pfn_t *)m.arr;
+			efault = traverse_pages(m.num, sizeof(xen_pfn_t),
+						 &pagelist,
+						 mmap_return_errors, &state);
+			if (efault)
+				ret = efault;
+		}
+	} else if (m.err)
+		__clear_user(m.err, m.num * sizeof(*m.err));
 
 out:
 	free_page_list(&pagelist);
@@ -354,7 +419,11 @@ static long privcmd_ioctl(struct file *file,
 		break;
 
 	case IOCTL_PRIVCMD_MMAPBATCH:
-		ret = privcmd_ioctl_mmap_batch(udata);
+		ret = privcmd_ioctl_mmap_batch(udata, 1);
+		break;
+
+	case IOCTL_PRIVCMD_MMAPBATCH_V2:
+		ret = privcmd_ioctl_mmap_batch(udata, 2);
 		break;
 
 	default:
diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
index 45c1aa1..a853168 100644
--- a/include/xen/privcmd.h
+++ b/include/xen/privcmd.h
@@ -58,13 +58,33 @@ struct privcmd_mmapbatch {
 	int num;     /* number of pages to populate */
 	domid_t dom; /* target domain */
 	__u64 addr;  /* virtual address */
-	xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
+	xen_pfn_t __user *arr; /* array of mfns - or'd with
+				  PRIVCMD_MMAPBATCH_*_ERROR on err */
+};
+
+#define PRIVCMD_MMAPBATCH_MFN_ERROR     0xf0000000U
+#define PRIVCMD_MMAPBATCH_PAGED_ERROR   0x80000000U
+
+struct privcmd_mmapbatch_v2 {
+	unsigned int num; /* number of pages to populate */
+	domid_t dom;      /* target domain */
+	__u64 addr;       /* virtual address */
+	const xen_pfn_t __user *arr; /* array of mfns */
+	int __user *err;  /* array of error codes */
 };
 
 /*
  * @cmd: IOCTL_PRIVCMD_HYPERCALL
  * @arg: &privcmd_hypercall_t
  * Return: Value returned from execution of the specified hypercall.
+ *
+ * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
+ * @arg: &struct privcmd_mmapbatch_v2
+ * Return: 0 on success (i.e., arg->err contains valid error codes for
+ * each frame).  On an error other than a failed frame remap, -1 is
+ * returned and errno is set to EINVAL, EFAULT etc.  As an exception,
+ * if the operation was otherwise successful but any frame failed with
+ * -ENOENT, then -1 is returned and errno is set to ENOENT.
  */
 #define IOCTL_PRIVCMD_HYPERCALL					\
 	_IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
@@ -72,5 +92,7 @@ struct privcmd_mmapbatch {
 	_IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
 #define IOCTL_PRIVCMD_MMAPBATCH					\
 	_IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
+#define IOCTL_PRIVCMD_MMAPBATCH_V2				\
+	_IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))
 
 #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
 
On Aug 30, 2012, at 8:58 AM, David Vrabel wrote:

> From: David Vrabel <david.vrabel@citrix.com>
> 
> PRIVCMD_MMAPBATCH_V2 extends PRIVCMD_MMAPBATCH with an additional
> field for reporting the error code for every frame that could not be
> mapped.  libxc prefers PRIVCMD_MMAPBATCH_V2 over PRIVCMD_MMAPBATCH.
> 
> Signed-off-by: David Vrabel <david.vrabel@citrix.com>
> ---
> drivers/xen/privcmd.c |   99 +++++++++++++++++++++++++++++++++++++++---------
> include/xen/privcmd.h |   23 +++++++++++-
> 2 files changed, 102 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
> index ccee0f1..c0e89e7 100644
> --- a/drivers/xen/privcmd.c
> +++ b/drivers/xen/privcmd.c
> @@ -76,7 +76,7 @@ static void free_page_list(struct list_head *pages)
>  */
> static int gather_array(struct list_head *pagelist,
> 			unsigned nelem, size_t size,
> -			void __user *data)
> +			const void __user *data)
> {
> 	unsigned pageidx;
> 	void *pagedata;
> @@ -248,18 +248,37 @@ struct mmap_batch_state {
> 	struct vm_area_struct *vma;
> 	int err;
> 
> -	xen_pfn_t __user *user;
> +	xen_pfn_t __user *user_mfn;
> +	int __user *user_err;
> };
> 
> static int mmap_batch_fn(void *data, void *state)
> {
> 	xen_pfn_t *mfnp = data;
> 	struct mmap_batch_state *st = state;
> +	int ret;
> 
> -	if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> -				       st->vma->vm_page_prot, st->domain) < 0) {
> -		*mfnp |= 0xf0000000U;
> -		st->err++;
> +	ret = xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
> +					 st->vma->vm_page_prot, st->domain);
> +	if (ret < 0) {
> +		/*
> +		 * Error reporting is a mess but userspace relies on
> +		 * it behaving this way.
> +		 *
> +		 * V2 needs to a) return the result of each frame's
> +		 * remap; and b) return -ENOENT if any frame failed
> +		 * with -ENOENT.
> +		 *
> +		 * In this first pass the error code is saved by
> +		 * overwriting the mfn and an error is indicated in
> +		 * st->err.
> +		 *
> +		 * The second pass by mmap_return_errors() will write
> +		 * the error codes to user space and get the right
> +		 * ioctl return value.
> +		 */
> +		*(int *)mfnp = ret;
> +		st->err = ret;
> 	}
> 	st->va += PAGE_SIZE;
> 
> @@ -270,16 +289,33 @@ static int mmap_return_errors(void *data, void *state)
> {
> 	xen_pfn_t *mfnp = data;
> 	struct mmap_batch_state *st = state;
> +	int ret;
> +
> +	if (st->user_err) {
> +		int err = *(int *)mfnp;
> +
> +		if (err == -ENOENT)
> +			st->err = err;
> 
> -	return put_user(*mfnp, st->user++);
> +		return __put_user(err, st->user_err++);
> +	} else {
> +		xen_pfn_t mfn;
> +
> +		ret = __get_user(mfn, st->user_mfn);
> +		if (ret < 0)
> +			return ret;
> +
> +		mfn |= PRIVCMD_MMAPBATCH_MFN_ERROR;
> +		return __put_user(mfn, st->user_mfn++);
> +	}
> }
> 
> static struct vm_operations_struct privcmd_vm_ops;
> 
> -static long privcmd_ioctl_mmap_batch(void __user *udata)
> +static long privcmd_ioctl_mmap_batch(void __user *udata, int version)
> {
> 	int ret;
> -	struct privcmd_mmapbatch m;
> +	struct privcmd_mmapbatch_v2 m;
> 	struct mm_struct *mm = current->mm;
> 	struct vm_area_struct *vma;
> 	unsigned long nr_pages;
> @@ -289,15 +325,31 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
> 	if (!xen_initial_domain())
> 		return -EPERM;
> 
> -	if (copy_from_user(&m, udata, sizeof(m)))
> -		return -EFAULT;
> +	switch (version) {
> +	case 1:
> +		if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch)))
> +			return -EFAULT;
> +		/* Returns per-frame error in m.arr. */
> +		m.err = NULL;
> +		if (!access_ok(VERIFY_WRITE, m.arr, m.num * sizeof(*m.arr)))
> +			return -EFAULT;
> +		break;
> +	case 2:
> +		if (copy_from_user(&m, udata, sizeof(struct privcmd_mmapbatch_v2)))
> +			return -EFAULT;
> +		/* Returns per-frame error code in m.err. */
> +		if (!access_ok(VERIFY_WRITE, m.err, m.num * (sizeof(*m.err))))
> +			return -EFAULT;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> 
> 	nr_pages = m.num;
> 	if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
> 		return -EINVAL;
> 
> -	ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
> -			   m.arr);
> +	ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
> 
> 	if (ret || list_empty(&pagelist))
> 		goto out;
> @@ -325,12 +377,17 @@ static long privcmd_ioctl_mmap_batch(void __user *udata)
> 
> 	up_write(&mm->mmap_sem);
> 
> -	if (state.err > 0) {
> -		state.user = m.arr;
> +	if (state.err) {
> +		state.err = 0;
> +		state.user_mfn = (xen_pfn_t *)m.arr;
> +		state.user_err = m.err;
> 		ret = traverse_pages(m.num, sizeof(xen_pfn_t),
> -			       &pagelist,
> -			       mmap_return_errors, &state);
> -	}
> +				     &pagelist,
> +				     mmap_return_errors, &state);
> +		if (ret >= 0)
> +			ret = state.err;
> +	} else if (m.err)
> +		__clear_user(m.err, m.num * sizeof(*m.err));
> 
> out:
> 	free_page_list(&pagelist);
> @@ -354,7 +411,11 @@ static long privcmd_ioctl(struct file *file,
> 		break;
> 
> 	case IOCTL_PRIVCMD_MMAPBATCH:
> -		ret = privcmd_ioctl_mmap_batch(udata);
> +		ret = privcmd_ioctl_mmap_batch(udata, 1);
> +		break;
> +
> +	case IOCTL_PRIVCMD_MMAPBATCH_V2:
> +		ret = privcmd_ioctl_mmap_batch(udata, 2);
> 		break;
> 
> 	default:
> diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
> index 17857fb..f60d75c 100644
> --- a/include/xen/privcmd.h
> +++ b/include/xen/privcmd.h
> @@ -59,13 +59,32 @@ struct privcmd_mmapbatch {
> 	int num;     /* number of pages to populate */
> 	domid_t dom; /* target domain */
> 	__u64 addr;  /* virtual address */
> -	xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
> +	xen_pfn_t __user *arr; /* array of mfns - or'd with
> +				  PRIVCMD_MMAPBATCH_MFN_ERROR on err */
> +};
> +
> +#define PRIVCMD_MMAPBATCH_MFN_ERROR 0xf0000000U
> +
> +struct privcmd_mmapbatch_v2 {
> +	unsigned int num; /* number of pages to populate */
> +	domid_t dom;      /* target domain */
> +	__u64 addr;       /* virtual address */
> +	const xen_pfn_t __user *arr; /* array of mfns */
> +	int __user *err;  /* array of error codes */
> };
> 
> /*
>  * @cmd: IOCTL_PRIVCMD_HYPERCALL
>  * @arg: &privcmd_hypercall_t
>  * Return: Value returned from execution of the specified hypercall.
> + *
> + * @cmd: IOCTL_PRIVCMD_MMAPBATCH_V2
> + * @arg: &struct privcmd_mmapbatch_v2
> + * Return: 0 on success (i.e., arg->err contains valid error codes for
> + * each frame).  On an error other than a failed frame remap, -1 is
> + * returned and errno is set to EINVAL, EFAULT etc.  As an exception,
> + * if the operation was otherwise successful but any frame failed with
> + * -ENOENT, then -1 is returned and errno is set to ENOENT.
>  */
> #define IOCTL_PRIVCMD_HYPERCALL					\
> 	_IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
> @@ -73,5 +92,7 @@ struct privcmd_mmapbatch {
> 	_IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
> #define IOCTL_PRIVCMD_MMAPBATCH					\
> 	_IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
> +#define IOCTL_PRIVCMD_MMAPBATCH_V2				\
> +	_IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2))
> 
> #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
> -- 
> 1.7.2.5
> 

  reply	other threads:[~2012-08-30 16:41 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-08-30 12:58 [PATCHv3 0/2] xen/privcmd: support for paged-out frames David Vrabel
2012-08-30 12:58 ` [PATCH 1/2] xen/mm: return more precise error from xen_remap_domain_range() David Vrabel
2012-08-30 15:07   ` Andres Lagar-Cavilla
2012-08-30 12:58 ` [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl David Vrabel
2012-08-30 16:41   ` Andres Lagar-Cavilla [this message]
2012-08-30 17:04     ` David Vrabel
2012-08-30 18:29       ` Andres Lagar-Cavilla
2012-08-31  7:02         ` Ian Campbell
2012-08-30 18:32   ` Andres Lagar-Cavilla
2012-08-31 13:08     ` David Vrabel
2012-08-31 13:13       ` Andres Lagar-Cavilla
2012-09-05 16:17         ` Konrad Rzeszutek Wilk
2012-08-31 13:59   ` Andres Lagar-Cavilla
2012-09-05 16:21     ` Konrad Rzeszutek Wilk
2012-09-05 17:09       ` Andres Lagar-Cavilla
2012-09-05 17:40         ` Konrad Rzeszutek Wilk
2012-09-06 13:41           ` Andres Lagar-Cavilla
2012-09-06 16:20             ` Konrad Rzeszutek Wilk
2012-08-30 20:05 ` [PATCHv3 0/2] xen/privcmd: support for paged-out frames Konrad Rzeszutek Wilk
2012-08-30 20:12   ` Andres Lagar-Cavilla
2012-09-05 18:57     ` Konrad Rzeszutek Wilk
2012-09-05 19:51       ` Andres Lagar-Cavilla
2012-09-05 20:05         ` Konrad Rzeszutek Wilk
  -- strict thread matches above, loose matches on Subject: below --
2012-08-29 13:15 [PATCHv2 " David Vrabel
2012-08-29 13:15 ` [PATCH 2/2] xen/privcmd: add PRIVCMD_MMAPBATCH_V2 ioctl David Vrabel
2012-08-29 16:14   ` Andres Lagar-Cavilla
2012-08-29 16:36     ` David Vrabel
2012-08-29 18:10       ` Andres Lagar-Cavilla

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=12E7F3C7-86B7-4B6B-8F53-23CCFCEF80FB@gridcentric.ca \
    --to=andreslc@gridcentric.ca \
    --cc=david.vrabel@citrix.com \
    --cc=konrad.wilk@oracle.com \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).