All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mark McLoughlin <markmc@redhat.com>
To: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: xen-devel <xen-devel@lists.xensource.com>,
	Eduardo Habkost <ehabkost@redhat.com>,
	Michael Abd-El-Malek <mabdelmalek@cmu.edu>
Subject: Re: Vanilla Linux and has_foreign_mapping
Date: Mon, 21 Apr 2008 17:36:52 +0100	[thread overview]
Message-ID: <1208795812.3761.6.camel@muff> (raw)
In-Reply-To: <480C7EA5.4070007@goop.org>

[-- Attachment #1: Type: text/plain, Size: 1464 bytes --]

On Mon, 2008-04-21 at 21:46 +1000, Jeremy Fitzhardinge wrote:
> Michael Abd-El-Malek wrote:
> > I'm trying to add support to Linux 2.6.25 for the 
> > "has_foreign_mappings" MMU context flag.  Xen's Linux 2.6.18 tree uses 
> > this flag, so that page tables are properly disposed of when an 
> > application exits when it has foreign mappings.
> 
> I was hoping to avoid having to introduce that flag, but I have to admit 
> I haven't given it much analysis.  How are you using it?

I looked at this a while back, but am somewhat sparse on the details
now.

Attaching a commit from the dom0 tree that references this.

>From my notes:

  http://lists.xensource.com/archives/html/xen-devel/2006-08/msg00014.html
  http://xenbits.xensource.com/xen-unstable.hg?rev/e351aace191e

  it sounds like the scenario is thus:

    1) process with foreign mappings exits, arch_exit_mmap() called
    2) the page tables get unpinned, no other users of the foreign
       pages so they get returned to the xen heap
    3) dom0 balloons and the machine physical page which had been
       foreign mapped is now allocated to another dom0 process
    4) when the original process goes to clean up its ptes, it
       reverse maps the mfn in the pte to the now allocated and
       drops a reference count it doesn't own

I've also a vague memory of thinking that the PAGE_IO flag recently
introduced to linux-2.6.18-xen.hg could be used to avoid this condition
too.

Cheers,
Mark.

[-- Attachment #2: ioctl-privcmd-mmap.patch --]
[-- Type: text/x-patch, Size: 7038 bytes --]

commit 2f0e2bc01dac626b7fb236b82d0133c850754988
Author: Mark McLoughlin <markmc@redhat.com>
Date:   Thu Feb 7 19:05:10 2008 +0000

    xen dom0: Add IOCTL_PRIVCMD_MMAP
    
    IOCTL_PRIVCMD_MMAP is used by Dom0 userspace (via
    libxc's xc_map_foreign_range() function) to map
    pages from a foreign domain without requiring a
    grant and without adding the page to Dom0's
    p2m map.
    
    Signed-off-by: Mark McLoughlin <markmc@redhat.com>

diff --git a/arch/x86/xen/ioremap.c b/arch/x86/xen/ioremap.c
index 70f77bd..e963ff9 100644
--- a/arch/x86/xen/ioremap.c
+++ b/arch/x86/xen/ioremap.c
@@ -145,3 +145,33 @@ int xen_ioremap_page_range(unsigned long addr, unsigned long end,
 	return rc;
 }
 
+int xen_map_foreign_range(struct vm_area_struct *vma, unsigned long addr,
+			  unsigned long mfn, unsigned long size,
+			  pgprot_t prot, domid_t domid)
+{
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return remap_pfn_range(vma, addr, mfn, size, prot);
+
+	if (domid == DOMID_SELF)
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_RESERVED;
+
+#if 0
+	/* FIXME:
+	 * has_foreign_mappings is needed to avoid a race
+	 * condition whereby pages are freed by the unpin
+	 * in arch_exit_mmap() and then allocated to the
+	 * domain before the PTE is destroyed, leading to
+	 * an erroneous reference count update. See:
+	 * 
+	 * http://lists.xensource.com/archives/html/xen-devel/2006-08/msg00014.html
+	 * http://xenbits.xensource.com/xen-unstable.hg?rev/e351aace191e
+	 */
+	vma->vm_mm->context.has_foreign_mappings = 1;
+#endif
+
+	return __direct_remap_pfn_range(vma->vm_mm, addr, mfn,
+					size, prot, domid);
+}
+EXPORT_SYMBOL(xen_map_foreign_range);
diff --git a/drivers/xen/xenctrl/privcmd.c b/drivers/xen/xenctrl/privcmd.c
index 58c4b83..0b3526e 100644
--- a/drivers/xen/xenctrl/privcmd.c
+++ b/drivers/xen/xenctrl/privcmd.c
@@ -33,8 +33,157 @@
 #include <linux/proc_fs.h>
 #include <linux/module.h>
 #include <linux/uaccess.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
 #include <asm/xen/hypervisor.h>
 #include <xen/sys/privcmd.h>
+#include <xen/features.h>
+
+static struct page *privcmd_nopage(struct vm_area_struct *vma,
+				   unsigned long address, int *type)
+{
+	return NOPAGE_SIGBUS;
+}
+
+static struct vm_operations_struct privcmd_vm_ops = {
+	.nopage = privcmd_nopage
+};
+
+static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	/* Unsupported for auto-translate guests. */
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return -ENOSYS;
+
+	/* FIXME:
+	 * the VM_PFNMAP here may be dubious. See:
+	 *
+	 *   http://lists.xensource.com/archives/html/xen-devel/2007-12/msg00207.html
+	 *
+	 * Note, in recent kernels it is also needed to
+	 * avoid the mapping being dropped to VM_PRIVATE
+	 * by vma_wants_writenotify()
+	 */
+
+	/* DONTCOPY is essential for Xen as copy_page_range is broken. */
+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
+	vma->vm_ops = &privcmd_vm_ops;
+	vma->vm_private_data = NULL;
+
+	return 0;
+}
+
+static struct vm_area_struct *
+privcmd_verify_mapping(struct mm_struct *mm, unsigned long addr)
+{
+	struct vm_area_struct *vma;
+
+	vma = find_vma(mm, addr);
+	if (!vma || vma->vm_start != addr)
+		return NULL;
+
+	/* Disallow multiple mappings */
+	return xchg(&vma->vm_private_data, (void *)1) ? NULL : vma;
+}
+
+static int privcmd_mmap_cmd(privcmd_mmap_t *cmd)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	privcmd_mmap_entry_t __user *p;
+	unsigned long va;
+	int i, ret = 0;
+
+	down_write(&mm->mmap_sem);
+
+	for (i = 0, p = cmd->entry, vma = NULL; i < cmd->num; i++, p++) {
+		privcmd_mmap_entry_t entry;
+		unsigned long end;
+
+		if (copy_from_user(&entry, p, sizeof(entry))) {
+			ret = -EFAULT;
+			break;
+		}
+
+		ret = -EINVAL;
+
+		if (!vma) {
+			vma = privcmd_verify_mapping(mm, entry.va);
+			if (!vma)
+				break;
+			va = vma->vm_start;
+		}
+
+		/* Do not allow range to wrap the address space. */
+		if (entry.npages > (LONG_MAX >> PAGE_SHIFT) ||
+		    (unsigned long)(entry.npages << PAGE_SHIFT) >= -va)
+			break;
+
+		end = entry.va + (entry.npages << PAGE_SHIFT);
+
+		/* Range chunks must be contiguous in va space. */
+		if (entry.va != va || end > vma->vm_end)
+			break;
+
+		ret = xen_map_foreign_range(vma,
+					    entry.va & PAGE_MASK,
+					    entry.mfn,
+					    entry.npages << PAGE_SHIFT,
+					    vma->vm_page_prot,
+					    cmd->dom);
+		if (ret < 0)
+			break;
+
+		va = end;
+	}
+
+	up_write(&mm->mmap_sem);
+
+	return ret;
+}
+
+static int privcmd_mmap_batch_cmd(privcmd_mmapbatch_t *cmd)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	int i;
+
+	if (cmd->num <= 0 || cmd->num > (LONG_MAX >> PAGE_SHIFT))
+		return -EINVAL;
+
+	down_write(&mm->mmap_sem);
+
+	vma = privcmd_verify_mapping(mm, cmd->addr);
+	if (!vma ||
+	    cmd->addr + (cmd->num << PAGE_SHIFT) != vma->vm_end) {
+		up_write(&mm->mmap_sem);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < cmd->num; i++) {
+		unsigned long __user *p;
+		unsigned long addr, mfn;
+		int ret;
+
+		p = cmd->arr + i;
+
+		if (get_user(mfn, p)) {
+			up_write(&mm->mmap_sem);
+			return -EFAULT;
+		}
+
+		addr = (cmd->addr & PAGE_MASK) + (i << PAGE_SHIFT);
+
+		ret = xen_map_foreign_range(vma, addr, mfn, PAGE_SIZE,
+					    vma->vm_page_prot, cmd->dom);
+		if (ret < 0)
+			put_user(0xF0000000 | mfn, p);
+	}
+
+	up_write(&mm->mmap_sem);
+
+	return 0;
+}
 
 static long privcmd_ioctl(struct file *file, unsigned int cmd,
 			  unsigned long arg)
@@ -49,9 +198,30 @@ static long privcmd_ioctl(struct file *file, unsigned int cmd,
 		return privcmd_hypercall(&cmd);
 	}
 
-	case IOCTL_PRIVCMD_MMAP:
-	case IOCTL_PRIVCMD_MMAPBATCH:
-		printk(KERN_WARNING "IOCTL_PRIVCMD_MMAP ioctl not yet implemented\n");
+	case IOCTL_PRIVCMD_MMAP: {
+		privcmd_mmap_t cmd;
+
+		if (!is_initial_xendomain())
+			return -EPERM;
+
+		if (copy_from_user(&cmd, (void __user *)arg, sizeof(cmd)))
+			return -EFAULT;
+
+		return privcmd_mmap_cmd(&cmd);
+	}
+
+	case IOCTL_PRIVCMD_MMAPBATCH: {
+		privcmd_mmapbatch_t cmd;
+
+		if (!is_initial_xendomain())
+			return -EPERM;
+
+		if (copy_from_user(&cmd, (void __user *)arg, sizeof(cmd)))
+			return -EFAULT;
+
+		return privcmd_mmap_batch_cmd(&cmd);
+	}
+
 	default:
 		return -EINVAL;
 	}
@@ -59,6 +229,7 @@ static long privcmd_ioctl(struct file *file, unsigned int cmd,
 
 static const struct file_operations privcmd_file_ops = {
 	.unlocked_ioctl = privcmd_ioctl,
+	.mmap = privcmd_mmap,
 };
 
 int __init privcmd_create_proc_entry(void)
diff --git a/include/asm-x86/xen/hypervisor.h b/include/asm-x86/xen/hypervisor.h
index 8e15dd2..a266f4f 100644
--- a/include/asm-x86/xen/hypervisor.h
+++ b/include/asm-x86/xen/hypervisor.h
@@ -70,4 +70,8 @@ u64 jiffies_to_st(unsigned long jiffies);
 
 #define is_running_on_xen()	(xen_start_info ? 1 : 0)
 
+int xen_map_foreign_range(struct vm_area_struct *vma, unsigned long addr,
+			  unsigned long mfn, unsigned long size,
+			  pgprot_t prot, domid_t domid);
+
 #endif /* __HYPERVISOR_H__ */

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

  reply	other threads:[~2008-04-21 16:36 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-04-20 21:19 Vanilla Linux and has_foreign_mapping Michael Abd-El-Malek
2008-04-21 11:46 ` Jeremy Fitzhardinge
2008-04-21 16:36   ` Mark McLoughlin [this message]
2008-04-21 18:00   ` Michael Abd-El-Malek
2008-04-21 17:52 ` Keir Fraser
2008-04-21 18:10   ` Michael Abd-El-Malek
2008-04-21 18:17     ` Michael Abd-El-Malek
2008-04-21 18:30       ` Keir Fraser
2008-04-25  0:18         ` Jeremy Fitzhardinge
2008-04-25  6:01           ` Keir Fraser
2008-04-25 17:11           ` Michael Abd-El-Malek
2008-04-25 18:22             ` Jeremy Fitzhardinge
2008-04-25 18:31               ` Michael Abd-El-Malek
2008-04-25 22:33                 ` Jeremy Fitzhardinge
2008-04-29 16:39                   ` Michael Abd-El-Malek
2008-04-29 17:32                     ` Jeremy Fitzhardinge
2008-04-30 16:31                       ` Michael Abd-El-Malek
     [not found]             ` <20080425172416.GC23300@duo.random>
2008-04-26  0:14               ` Jeremy Fitzhardinge

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1208795812.3761.6.camel@muff \
    --to=markmc@redhat.com \
    --cc=ehabkost@redhat.com \
    --cc=jeremy@goop.org \
    --cc=mabdelmalek@cmu.edu \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.