From: Haozhong Zhang <haozhong.zhang@intel.com>
To: xen-devel@lists.xen.org
Cc: Haozhong Zhang <haozhong.zhang@intel.com>,
	Wei Liu <wei.liu2@citrix.com>,
	Andrew Cooper <andrew.cooper3@citrix.com>,
	Ian Jackson <ian.jackson@eu.citrix.com>,
	Jan Beulich <jbeulich@suse.com>,
	Xiao Guangrong <guangrong.xiao@linux.intel.com>
Subject: [RFC XEN PATCH 04/16] xen/x86: add XENMEM_populate_pmemmap to map host pmem pages to guest
Date: Mon, 10 Oct 2016 08:32:23 +0800	[thread overview]
Message-ID: <20161010003235.4213-5-haozhong.zhang@intel.com> (raw)
In-Reply-To: <20161010003235.4213-1-haozhong.zhang@intel.com>
XENMEM_populate_pmemmap is used by toolstack to map given host pmem pages
to given guest pages. Only pages in the data area of a pmem region are
allowed to be mapped to guest.
Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
---
Cc: Ian Jackson <ian.jackson@eu.citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
---
 tools/libxc/include/xenctrl.h |   8 +++
 tools/libxc/xc_domain.c       |  14 +++++
 xen/arch/x86/pmem.c           | 123 ++++++++++++++++++++++++++++++++++++++++++
 xen/common/domain.c           |   3 ++
 xen/common/memory.c           |  31 +++++++++++
 xen/include/public/memory.h   |  14 ++++-
 xen/include/xen/pmem.h        |  10 ++++
 xen/include/xen/sched.h       |   3 ++
 8 files changed, 205 insertions(+), 1 deletion(-)
diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h
index 2c83544..46c71fc 100644
--- a/tools/libxc/include/xenctrl.h
+++ b/tools/libxc/include/xenctrl.h
@@ -2710,6 +2710,14 @@ int xc_livepatch_revert(xc_interface *xch, char *name, uint32_t timeout);
 int xc_livepatch_unload(xc_interface *xch, char *name, uint32_t timeout);
 int xc_livepatch_replace(xc_interface *xch, char *name, uint32_t timeout);
 
+/**
+ * Map host pmem pages at PFNs @mfn ~ (@mfn + @nr_mfns - 1) to
+ * guest physical pages at guest PFNs @gpfn ~ (@gpfn + @nr_mfns - 1)
+ */
+int xc_domain_populate_pmemmap(xc_interface *xch, uint32_t domid,
+                               xen_pfn_t mfn, xen_pfn_t gpfn,
+                               unsigned int nr_mfns);
+
 /* Compat shims */
 #include "xenctrl_compat.h"
 
diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c
index 296b852..81a90a1 100644
--- a/tools/libxc/xc_domain.c
+++ b/tools/libxc/xc_domain.c
@@ -2520,6 +2520,20 @@ int xc_domain_soft_reset(xc_interface *xch,
     domctl.domain = (domid_t)domid;
     return do_domctl(xch, &domctl);
 }
+
+int xc_domain_populate_pmemmap(xc_interface *xch, uint32_t domid,
+                               xen_pfn_t mfn, xen_pfn_t gpfn,
+                               unsigned int nr_mfns)
+{
+    struct xen_pmemmap pmemmap = {
+        .domid   = domid,
+        .mfn     = mfn,
+        .gpfn    = gpfn,
+        .nr_mfns = nr_mfns,
+    };
+    return do_memory_op(xch, XENMEM_populate_pmemmap, &pmemmap, sizeof(pmemmap));
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/pmem.c b/xen/arch/x86/pmem.c
index 70358ed..e4dc685 100644
--- a/xen/arch/x86/pmem.c
+++ b/xen/arch/x86/pmem.c
@@ -24,6 +24,9 @@
 #include <xen/spinlock.h>
 #include <xen/pmem.h>
 #include <xen/iocap.h>
+#include <xen/sched.h>
+#include <xen/event.h>
+#include <xen/paging.h>
 #include <asm-x86/mm.h>
 
 /*
@@ -63,6 +66,48 @@ static int check_reserved_size(unsigned long rsv_mfns, unsigned long total_mfns)
         ((sizeof(*machine_to_phys_mapping) * total_mfns) >> PAGE_SHIFT);
 }
 
+static int is_data_mfn(unsigned long mfn)
+{
+    struct list_head *cur;
+    int data = 0;
+
+    ASSERT(spin_is_locked(&pmem_list_lock));
+
+    list_for_each(cur, &pmem_list)
+    {
+        struct pmem *pmem = list_entry(cur, struct pmem, link);
+
+        if ( pmem->data_spfn <= mfn && mfn < pmem->data_epfn )
+        {
+            data = 1;
+            break;
+        }
+    }
+
+    return data;
+}
+
+static int pmem_page_valid(struct page_info *page, struct domain *d)
+{
+    /* only data area can be mapped to guest */
+    if ( !is_data_mfn(page_to_mfn(page)) )
+    {
+        dprintk(XENLOG_DEBUG, "pmem: mfn 0x%lx is not a pmem data page\n",
+                page_to_mfn(page));
+        return 0;
+    }
+
+    /* inuse/offlined/offlining pmem page cannot be mapped to guest */
+    if ( !page_state_is(page, free) )
+    {
+        dprintk(XENLOG_DEBUG, "pmem: invalid page state of mfn 0x%lx: 0x%lx\n",
+                page_to_mfn(page), page->count_info & PGC_state);
+        return 0;
+    }
+
+    return 1;
+}
+
 static int pmem_add_check(unsigned long spfn, unsigned long epfn,
                           unsigned long rsv_spfn, unsigned long rsv_epfn,
                           unsigned long data_spfn, unsigned long data_epfn)
@@ -159,3 +204,81 @@ int pmem_add(unsigned long spfn, unsigned long epfn,
  out:
     return ret;
 }
+
+static int pmem_assign_pages(struct domain *d,
+                             struct page_info *pg, unsigned int order)
+{
+    int rc = 0;
+    unsigned long i;
+
+    spin_lock(&d->pmem_lock);
+
+    if ( unlikely(d->is_dying) )
+    {
+        rc = -EINVAL;
+        goto out;
+    }
+
+    for ( i = 0; i < (1 << order); i++ )
+    {
+        ASSERT(page_get_owner(&pg[i]) == NULL);
+        ASSERT((pg[i].count_info & ~(PGC_allocated | 1)) == 0);
+        page_set_owner(&pg[i], d);
+        smp_wmb();
+        pg[i].count_info = PGC_allocated | 1;
+        page_list_add_tail(&pg[i], &d->pmem_page_list);
+    }
+
+ out:
+    spin_unlock(&d->pmem_lock);
+    return rc;
+}
+
+int pmem_populate(struct xen_pmemmap_args *args)
+{
+    struct domain *d = args->domain;
+    unsigned long i, mfn, gpfn;
+    struct page_info *page;
+    int rc = 0;
+
+    if ( !has_hvm_container_domain(d) || !paging_mode_translate(d) )
+        return -EINVAL;
+
+    for ( i = args->nr_done, mfn = args->mfn + i, gpfn = args->gpfn + i;
+          i < args->nr_mfns;
+          i++, mfn++, gpfn++ )
+    {
+        if ( i != args->nr_done && hypercall_preempt_check() )
+        {
+            args->preempted = 1;
+            goto out;
+        }
+
+        page = mfn_to_page(mfn);
+
+        spin_lock(&pmem_list_lock);
+        if ( !pmem_page_valid(page, d) )
+        {
+            dprintk(XENLOG_DEBUG, "pmem: MFN 0x%lx not a valid pmem page\n", mfn);
+            spin_unlock(&pmem_list_lock);
+            rc = -EINVAL;
+            goto out;
+        }
+        page->count_info = PGC_state_inuse;
+        spin_unlock(&pmem_list_lock);
+
+        page->u.inuse.type_info = 0;
+
+        guest_physmap_add_page(d, _gfn(gpfn), _mfn(mfn), 0);
+        if ( pmem_assign_pages(d, page, 0) )
+        {
+            guest_physmap_remove_page(d, _gfn(gpfn), _mfn(mfn), 0);
+            rc = -EFAULT;
+            goto out;
+        }
+    }
+
+ out:
+    args->nr_done = i;
+    return rc;
+}
diff --git a/xen/common/domain.c b/xen/common/domain.c
index 3abaca9..8192548 100644
--- a/xen/common/domain.c
+++ b/xen/common/domain.c
@@ -288,6 +288,9 @@ struct domain *domain_create(domid_t domid, unsigned int domcr_flags,
     INIT_PAGE_LIST_HEAD(&d->page_list);
     INIT_PAGE_LIST_HEAD(&d->xenpage_list);
 
+    spin_lock_init_prof(d, pmem_lock);
+    INIT_PAGE_LIST_HEAD(&d->pmem_page_list);
+
     spin_lock_init(&d->node_affinity_lock);
     d->node_affinity = NODE_MASK_ALL;
     d->auto_node_affinity = 1;
diff --git a/xen/common/memory.c b/xen/common/memory.c
index 21797ca..09cb1c9 100644
--- a/xen/common/memory.c
+++ b/xen/common/memory.c
@@ -24,6 +24,7 @@
 #include <xen/numa.h>
 #include <xen/mem_access.h>
 #include <xen/trace.h>
+#include <xen/pmem.h>
 #include <asm/current.h>
 #include <asm/hardirq.h>
 #include <asm/p2m.h>
@@ -1329,6 +1330,36 @@ long do_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
     }
 #endif
 
+    case XENMEM_populate_pmemmap:
+    {
+        struct xen_pmemmap pmemmap;
+        struct xen_pmemmap_args args;
+
+        if ( copy_from_guest(&pmemmap, arg, 1) )
+            return -EFAULT;
+
+        d = rcu_lock_domain_by_any_id(pmemmap.domid);
+        if ( !d )
+            return -EINVAL;
+
+        args.domain = d;
+        args.mfn = pmemmap.mfn;
+        args.gpfn = pmemmap.gpfn;
+        args.nr_mfns = pmemmap.nr_mfns;
+        args.nr_done = start_extent;
+        args.preempted = 0;
+
+        rc = pmem_populate(&args);
+        rcu_unlock_domain(d);
+
+        if ( !rc && args.preempted )
+            return hypercall_create_continuation(
+                __HYPERVISOR_memory_op, "lh",
+                op | (args.nr_done << MEMOP_EXTENT_SHIFT), arg);
+
+        break;
+    }
+
     default:
         rc = arch_memory_op(cmd, arg);
         break;
diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
index 5bf840f..8c048fc 100644
--- a/xen/include/public/memory.h
+++ b/xen/include/public/memory.h
@@ -646,7 +646,19 @@ struct xen_vnuma_topology_info {
 typedef struct xen_vnuma_topology_info xen_vnuma_topology_info_t;
 DEFINE_XEN_GUEST_HANDLE(xen_vnuma_topology_info_t);
 
-/* Next available subop number is 28 */
+#define XENMEM_populate_pmemmap 28
+
+struct xen_pmemmap {
+    /* IN */
+    domid_t domid;
+    xen_pfn_t mfn;
+    xen_pfn_t gpfn;
+    unsigned int nr_mfns;
+};
+typedef struct xen_pmemmap xen_pmemmap_t;
+DEFINE_XEN_GUEST_HANDLE(xen_pmemmap_t);
+
+/* Next available subop number is 29 */
 
 #endif /* __XEN_PUBLIC_MEMORY_H__ */
 
diff --git a/xen/include/xen/pmem.h b/xen/include/xen/pmem.h
index a670ab8..60adf56 100644
--- a/xen/include/xen/pmem.h
+++ b/xen/include/xen/pmem.h
@@ -24,8 +24,18 @@
 
 #include <xen/types.h>
 
+struct xen_pmemmap_args {
+    struct domain *domain;
+    xen_pfn_t mfn;
+    xen_pfn_t gpfn;
+    unsigned int nr_mfns;
+    unsigned int nr_done;
+    int preempted;
+};
+
 int pmem_add(unsigned long spfn, unsigned long epfn,
              unsigned long rsv_spfn, unsigned long rsv_epfn,
              unsigned long data_spfn, unsigned long data_epfn);
+int pmem_populate(struct xen_pmemmap_args *args);
 
 #endif /* __XEN_PMEM_H__ */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index 1fbda87..3c66225 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -329,6 +329,9 @@ struct domain
     atomic_t         shr_pages;       /* number of shared pages             */
     atomic_t         paged_pages;     /* number of paged-out pages          */
 
+    spinlock_t       pmem_lock;       /* protect all following pmem_ fields */
+    struct page_list_head pmem_page_list; /* linked list of pmem pages      */
+
     /* Scheduling. */
     void            *sched_priv;    /* scheduler-specific data */
     struct cpupool  *cpupool;
-- 
2.10.1
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel
next prev parent reply	other threads:[~2016-10-10  0:32 UTC|newest]
Thread overview: 77+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-10-10  0:32 [RFC XEN PATCH 00/16] Add vNVDIMM support to HVM domains Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 01/16] x86_64/mm: explicitly specify the location to place the frame table Haozhong Zhang
2016-12-09 21:35   ` Konrad Rzeszutek Wilk
2016-12-12  2:27     ` Haozhong Zhang
2016-12-12  8:25       ` Jan Beulich
2016-10-10  0:32 ` [RFC XEN PATCH 02/16] x86_64/mm: explicitly specify the location to place the M2P table Haozhong Zhang
2016-12-09 21:38   ` Konrad Rzeszutek Wilk
2016-12-12  2:31     ` Haozhong Zhang
2016-12-12  8:26       ` Jan Beulich
2016-12-12  8:35         ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 03/16] xen/x86: add a hypercall XENPF_pmem_add to report host pmem regions Haozhong Zhang
2016-10-11 19:13   ` Andrew Cooper
2016-12-09 22:02   ` Konrad Rzeszutek Wilk
2016-12-12  4:16     ` Haozhong Zhang
2016-12-12  8:30       ` Jan Beulich
2016-12-12  8:38         ` Haozhong Zhang
2016-12-12 14:44           ` Konrad Rzeszutek Wilk
2016-12-13  1:08             ` Haozhong Zhang
2016-12-22 11:58   ` Jan Beulich
2016-10-10  0:32 ` Haozhong Zhang [this message]
2016-12-09 22:22   ` [RFC XEN PATCH 04/16] xen/x86: add XENMEM_populate_pmemmap to map host pmem pages to guest Konrad Rzeszutek Wilk
2016-12-12  4:38     ` Haozhong Zhang
2016-12-22 12:19   ` Jan Beulich
2016-10-10  0:32 ` [RFC XEN PATCH 05/16] xen/x86: release pmem pages at domain destroy Haozhong Zhang
2016-12-09 22:27   ` Konrad Rzeszutek Wilk
2016-12-12  4:47     ` Haozhong Zhang
2016-12-22 12:22   ` Jan Beulich
2016-10-10  0:32 ` [RFC XEN PATCH 06/16] tools: reserve guest memory for ACPI from device model Haozhong Zhang
2017-01-27 20:44   ` Konrad Rzeszutek Wilk
2017-02-08  1:39     ` Haozhong Zhang
2017-02-08 14:31       ` Konrad Rzeszutek Wilk
2016-10-10  0:32 ` [RFC XEN PATCH 07/16] tools/libacpi: add callback acpi_ctxt.p2v to get a pointer from physical address Haozhong Zhang
2017-01-27 20:46   ` Konrad Rzeszutek Wilk
2017-02-08  1:42     ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 08/16] tools/libacpi: expose details of memory allocation callback Haozhong Zhang
2017-01-27 20:58   ` Konrad Rzeszutek Wilk
2017-02-08  2:12     ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 09/16] tools/libacpi: add callbacks to access XenStore Haozhong Zhang
2017-01-27 21:10   ` Konrad Rzeszutek Wilk
2017-02-08  2:19     ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 10/16] tools/libacpi: add a simple AML builder Haozhong Zhang
2017-01-27 21:19   ` Konrad Rzeszutek Wilk
2017-02-08  2:33     ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 11/16] tools/libacpi: load ACPI built by the device model Haozhong Zhang
2017-01-27 21:40   ` Konrad Rzeszutek Wilk
2017-02-08  5:38     ` Haozhong Zhang
2017-02-08 14:35       ` Konrad Rzeszutek Wilk
2016-10-10  0:32 ` [RFC XEN PATCH 12/16] tools/libxl: build qemu options from xl vNVDIMM configs Haozhong Zhang
2017-01-27 21:47   ` Konrad Rzeszutek Wilk
2017-02-08  5:42     ` Haozhong Zhang
2017-01-27 21:48   ` Konrad Rzeszutek Wilk
2017-02-08  5:47     ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 13/16] tools/libxl: add support to map host pmem device to guests Haozhong Zhang
2017-01-27 22:06   ` Konrad Rzeszutek Wilk
2017-01-27 22:09     ` Konrad Rzeszutek Wilk
2017-02-08  5:59     ` Haozhong Zhang
2017-02-08 14:37       ` Konrad Rzeszutek Wilk
2016-10-10  0:32 ` [RFC XEN PATCH 14/16] tools/libxl: add support to map files on pmem devices " Haozhong Zhang
2017-01-27 22:10   ` Konrad Rzeszutek Wilk
2017-02-08  6:03     ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 15/16] tools/libxl: handle return code of libxl__qmp_initializations() Haozhong Zhang
2017-01-27 22:11   ` Konrad Rzeszutek Wilk
2017-02-08  6:07     ` Haozhong Zhang
2017-02-08 10:31       ` Wei Liu
2017-02-09  2:47         ` Haozhong Zhang
2017-02-09 10:13           ` Wei Liu
2017-02-09 10:16             ` Wei Liu
2017-02-10  2:37             ` Haozhong Zhang
2017-02-10  8:11               ` Wei Liu
2017-02-10  8:23                 ` Wei Liu
2017-02-10  8:24                 ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 16/16] tools/libxl: initiate pmem mapping via qmp callback Haozhong Zhang
2017-01-27 22:13   ` Konrad Rzeszutek Wilk
2017-02-08  6:08     ` Haozhong Zhang
2016-10-24 16:37 ` [RFC XEN PATCH 00/16] Add vNVDIMM support to HVM domains Wei Liu
2016-10-25  6:55   ` Haozhong Zhang
2016-10-25 11:28     ` Wei Liu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox
  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):
  git send-email \
    --in-reply-to=20161010003235.4213-5-haozhong.zhang@intel.com \
    --to=haozhong.zhang@intel.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=guangrong.xiao@linux.intel.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=jbeulich@suse.com \
    --cc=wei.liu2@citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY
  https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
  Be sure your reply has a Subject: header at the top and a blank line
  before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).