xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Haozhong Zhang <haozhong.zhang@intel.com>
To: xen-devel@lists.xen.org
Cc: Haozhong Zhang <haozhong.zhang@intel.com>,
	Xiao Guangrong <guangrong.xiao@linux.intel.com>,
	Andrew Cooper <andrew.cooper3@citrix.com>,
	Jan Beulich <jbeulich@suse.com>,
	Daniel De Graaf <dgdegra@tycho.nsa.gov>
Subject: [RFC XEN PATCH 03/16] xen/x86: add a hypercall XENPF_pmem_add to report host pmem regions
Date: Mon, 10 Oct 2016 08:32:22 +0800	[thread overview]
Message-ID: <20161010003235.4213-4-haozhong.zhang@intel.com> (raw)
In-Reply-To: <20161010003235.4213-1-haozhong.zhang@intel.com>

Xen hypervisor does not include a pmem driver. Instead, it relies on the
pmem driver in Dom0 to report the PFN ranges of the entire pmem region,
its reserved area and data area via XENPF_pmem_add. The reserved area is
used by Xen hypervisor to place the frame table and M2P table, and is
disallowed to be accessed from Dom0 once it's reported.

Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
---
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Daniel De Graaf <dgdegra@tycho.nsa.gov>
---
 xen/arch/x86/Makefile             |   1 +
 xen/arch/x86/platform_hypercall.c |   7 ++
 xen/arch/x86/pmem.c               | 161 ++++++++++++++++++++++++++++++++++++++
 xen/arch/x86/x86_64/mm.c          |  54 +++++++++++++
 xen/include/asm-x86/mm.h          |   4 +
 xen/include/public/platform.h     |  14 ++++
 xen/include/xen/pmem.h            |  31 ++++++++
 xen/xsm/flask/hooks.c             |   1 +
 8 files changed, 273 insertions(+)
 create mode 100644 xen/arch/x86/pmem.c
 create mode 100644 xen/include/xen/pmem.h

diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile
index 931917d..9cf2da1 100644
--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -67,6 +67,7 @@ obj-$(CONFIG_TBOOT) += tboot.o
 obj-y += hpet.o
 obj-y += vm_event.o
 obj-y += xstate.o
+obj-y += pmem.o
 
 x86_emulate.o: x86_emulate/x86_emulate.c x86_emulate/x86_emulate.h
 
diff --git a/xen/arch/x86/platform_hypercall.c b/xen/arch/x86/platform_hypercall.c
index 0879e19..c47eea4 100644
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -24,6 +24,7 @@
 #include <xen/pmstat.h>
 #include <xen/irq.h>
 #include <xen/symbols.h>
+#include <xen/pmem.h>
 #include <asm/current.h>
 #include <public/platform.h>
 #include <acpi/cpufreq/processor_perf.h>
@@ -822,6 +823,12 @@ ret_t do_platform_op(XEN_GUEST_HANDLE_PARAM(xen_platform_op_t) u_xenpf_op)
     }
     break;
 
+    case XENPF_pmem_add:
+        ret = pmem_add(op->u.pmem_add.spfn, op->u.pmem_add.epfn,
+                       op->u.pmem_add.rsv_spfn, op->u.pmem_add.rsv_epfn,
+                       op->u.pmem_add.data_spfn, op->u.pmem_add.data_epfn);
+        break;
+
     default:
         ret = -ENOSYS;
         break;
diff --git a/xen/arch/x86/pmem.c b/xen/arch/x86/pmem.c
new file mode 100644
index 0000000..70358ed
--- /dev/null
+++ b/xen/arch/x86/pmem.c
@@ -0,0 +1,161 @@
+/******************************************************************************
+ * arch/x86/pmem.c
+ *
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Haozhong Zhang <haozhong.zhang@intel.com>
+ */
+
+#include <xen/guest_access.h>
+#include <xen/list.h>
+#include <xen/spinlock.h>
+#include <xen/pmem.h>
+#include <xen/iocap.h>
+#include <asm-x86/mm.h>
+
+/*
+ * All pmem regions reported from Dom0 are linked in pmem_list, which
+ * is proected by pmem_list_lock. Its entries are of type struct pmem
+ * and sorted incrementally by field spa.
+ */
+static DEFINE_SPINLOCK(pmem_list_lock);
+static LIST_HEAD(pmem_list);
+
+struct pmem {
+    struct list_head link;   /* link to pmem_list */
+    unsigned long spfn;      /* start PFN of the whole pmem region */
+    unsigned long epfn;      /* end PFN of the whole pmem region */
+    unsigned long rsv_spfn;  /* start PFN of the reserved area */
+    unsigned long rsv_epfn;  /* end PFN of the reserved area */
+    unsigned long data_spfn; /* start PFN of the data area */
+    unsigned long data_epfn; /* end PFN of the data area */
+};
+
+static int is_included(unsigned long s1, unsigned long e1,
+                       unsigned long s2, unsigned long e2)
+{
+    return s1 <= s2 && s2 < e2 && e2 <= e1;
+}
+
+static int is_overlaped(unsigned long s1, unsigned long e1,
+                        unsigned long s2, unsigned long e2)
+{
+    return (s1 <= s2 && s2 < e1) || (s2 < s1 && s1 < e2);
+}
+
+static int check_reserved_size(unsigned long rsv_mfns, unsigned long total_mfns)
+{
+    return rsv_mfns >=
+        ((sizeof(struct page_info) * total_mfns) >> PAGE_SHIFT) +
+        ((sizeof(*machine_to_phys_mapping) * total_mfns) >> PAGE_SHIFT);
+}
+
+static int pmem_add_check(unsigned long spfn, unsigned long epfn,
+                          unsigned long rsv_spfn, unsigned long rsv_epfn,
+                          unsigned long data_spfn, unsigned long data_epfn)
+{
+    if ( spfn >= epfn || rsv_spfn >= rsv_epfn || data_spfn >= data_epfn )
+        return 0;
+
+    if ( !is_included(spfn, epfn, rsv_spfn, rsv_epfn) ||
+         !is_included(spfn, epfn, data_spfn, data_epfn) )
+        return 0;
+
+    if ( is_overlaped(rsv_spfn, rsv_epfn, data_spfn, data_epfn) )
+        return 0;
+
+    if ( !check_reserved_size(rsv_epfn - rsv_spfn, epfn - spfn) )
+        return 0;
+
+    return 1;
+}
+
+static int pmem_list_add(unsigned long spfn, unsigned long epfn,
+                         unsigned long rsv_spfn, unsigned long rsv_epfn,
+                         unsigned long data_spfn, unsigned long data_epfn)
+{
+    struct list_head *cur;
+    struct pmem *new_pmem;
+    int ret = 0;
+
+    spin_lock(&pmem_list_lock);
+
+    list_for_each_prev(cur, &pmem_list)
+    {
+        struct pmem *cur_pmem = list_entry(cur, struct pmem, link);
+        unsigned long cur_spfn = cur_pmem->spfn;
+        unsigned long cur_epfn = cur_pmem->epfn;
+
+        if ( (cur_spfn <= spfn && spfn < cur_epfn) ||
+             (spfn <= cur_spfn && cur_spfn < epfn) )
+        {
+            ret = -EINVAL;
+            goto out;
+        }
+
+        if ( cur_spfn < spfn )
+            break;
+    }
+
+    new_pmem = xmalloc(struct pmem);
+    if ( !new_pmem )
+    {
+        ret = -ENOMEM;
+        goto out;
+    }
+    new_pmem->spfn      = spfn;
+    new_pmem->epfn      = epfn;
+    new_pmem->rsv_spfn  = rsv_spfn;
+    new_pmem->rsv_epfn  = rsv_epfn;
+    new_pmem->data_spfn = data_spfn;
+    new_pmem->data_epfn = data_epfn;
+    list_add(&new_pmem->link, cur);
+
+ out:
+    spin_unlock(&pmem_list_lock);
+    return ret;
+}
+
+int pmem_add(unsigned long spfn, unsigned long epfn,
+             unsigned long rsv_spfn, unsigned long rsv_epfn,
+             unsigned long data_spfn, unsigned long data_epfn)
+{
+    int ret;
+
+    if ( !pmem_add_check(spfn, epfn, rsv_spfn, rsv_epfn, data_spfn, data_epfn) )
+        return -EINVAL;
+
+    ret = pmem_setup(spfn, epfn, rsv_spfn, rsv_epfn, data_spfn, data_epfn);
+    if ( ret )
+        goto out;
+
+    ret = iomem_deny_access(current->domain, rsv_spfn, rsv_epfn);
+    if ( ret )
+        goto out;
+
+    ret = pmem_list_add(spfn, epfn, rsv_spfn, rsv_epfn, data_spfn, data_epfn);
+    if ( ret )
+        goto out;
+
+    printk(XENLOG_INFO
+           "pmem: pfns     0x%lx - 0x%lx\n"
+           "      reserved 0x%lx - 0x%lx\n"
+           "      data     0x%lx - 0x%lx\n",
+           spfn, epfn, rsv_spfn, rsv_epfn, data_spfn, data_epfn);
+
+ out:
+    return ret;
+}
diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c
index 5c0f527..b1f92f6 100644
--- a/xen/arch/x86/x86_64/mm.c
+++ b/xen/arch/x86/x86_64/mm.c
@@ -1474,6 +1474,60 @@ destroy_frametable:
     return ret;
 }
 
+int pmem_setup(unsigned long spfn, unsigned long epfn,
+               unsigned long rsv_spfn, unsigned long rsv_epfn,
+               unsigned long data_spfn, unsigned long data_epfn)
+{
+    unsigned old_max = max_page, old_total = total_pages;
+    struct mem_hotadd_info info =
+        { .spfn = spfn, .epfn = epfn, .cur = spfn };
+    struct mem_hotadd_info rsv_info =
+        { .spfn = rsv_spfn, .epfn = rsv_epfn, .cur = rsv_spfn };
+    int ret;
+    unsigned long i;
+    struct page_info *pg;
+
+    if ( !mem_hotadd_check(spfn, epfn) )
+        return -EINVAL;
+
+    ret = extend_frame_table(&info, &rsv_info);
+    if ( ret )
+        goto destroy_frametable;
+
+    if ( max_page < epfn )
+    {
+        max_page = epfn;
+        max_pdx = pfn_to_pdx(max_page - 1) + 1;
+    }
+    total_pages += epfn - spfn;
+
+    set_pdx_range(spfn, epfn);
+    ret = setup_m2p_table(&info, &rsv_info);
+    if ( ret )
+        goto destroy_m2p;
+
+    share_hotadd_m2p_table(&info);
+
+    for ( i = spfn; i < epfn; i++ )
+    {
+        pg = mfn_to_page(i);
+        pg->count_info = (rsv_spfn <= i && i < rsv_info.cur) ?
+                         PGC_state_inuse : PGC_state_free;
+    }
+
+    return 0;
+
+destroy_m2p:
+    destroy_m2p_mapping(&info);
+    max_page = old_max;
+    total_pages = old_total;
+    max_pdx = pfn_to_pdx(max_page - 1) + 1;
+destroy_frametable:
+    cleanup_frame_table(&info);
+
+    return ret;
+}
+
 #include "compat/mm.c"
 
 /*
diff --git a/xen/include/asm-x86/mm.h b/xen/include/asm-x86/mm.h
index b781495..e31f1c8 100644
--- a/xen/include/asm-x86/mm.h
+++ b/xen/include/asm-x86/mm.h
@@ -597,4 +597,8 @@ typedef struct mm_rwlock {
 
 extern const char zero_page[];
 
+int pmem_setup(unsigned long spfn, unsigned long epfn,
+               unsigned long rsv_spfn, unsigned long rsv_epfn,
+               unsigned long data_spfn, unsigned long data_epfn);
+
 #endif /* __ASM_X86_MM_H__ */
diff --git a/xen/include/public/platform.h b/xen/include/public/platform.h
index 1e6a6ce..c7e7cce 100644
--- a/xen/include/public/platform.h
+++ b/xen/include/public/platform.h
@@ -608,6 +608,19 @@ struct xenpf_symdata {
 typedef struct xenpf_symdata xenpf_symdata_t;
 DEFINE_XEN_GUEST_HANDLE(xenpf_symdata_t);
 
+#define XENPF_pmem_add     64
+struct xenpf_pmem_add {
+    /* IN variables */
+    uint64_t spfn;      /* start PFN of the whole pmem region */
+    uint64_t epfn;      /* end PFN of the whole pmem region */
+    uint64_t rsv_spfn;  /* start PFN of the reserved area within the region */
+    uint64_t rsv_epfn;  /* end PFN of the reserved area within the region */
+    uint64_t data_spfn; /* start PFN of the data area within the region */
+    uint64_t data_epfn; /* end PFN of the data area within the region */
+};
+typedef struct xenpf_pmem_add xenpf_pmem_add_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_pmem_add_t);
+
 /*
  * ` enum neg_errnoval
  * ` HYPERVISOR_platform_op(const struct xen_platform_op*);
@@ -638,6 +651,7 @@ struct xen_platform_op {
         struct xenpf_core_parking      core_parking;
         struct xenpf_resource_op       resource_op;
         struct xenpf_symdata           symdata;
+        struct xenpf_pmem_add          pmem_add;
         uint8_t                        pad[128];
     } u;
 };
diff --git a/xen/include/xen/pmem.h b/xen/include/xen/pmem.h
new file mode 100644
index 0000000..a670ab8
--- /dev/null
+++ b/xen/include/xen/pmem.h
@@ -0,0 +1,31 @@
+/*
+ * xen/include/xen/pmem.h
+ *
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Haozhong Zhang <haozhong.zhang@intel.com>
+ */
+
+#ifndef __XEN_PMEM_H__
+#define __XEN_PMEM_H__
+
+#include <xen/types.h>
+
+int pmem_add(unsigned long spfn, unsigned long epfn,
+             unsigned long rsv_spfn, unsigned long rsv_epfn,
+             unsigned long data_spfn, unsigned long data_epfn);
+
+#endif /* __XEN_PMEM_H__ */
diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
index 177c11f..948a161 100644
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -1360,6 +1360,7 @@ static int flask_platform_op(uint32_t op)
     case XENPF_cpu_offline:
     case XENPF_cpu_hotadd:
     case XENPF_mem_hotadd:
+    case XENPF_pmem_add:
         return 0;
 #endif
 
-- 
2.10.1


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

  parent reply	other threads:[~2016-10-10  0:32 UTC|newest]

Thread overview: 77+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-10-10  0:32 [RFC XEN PATCH 00/16] Add vNVDIMM support to HVM domains Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 01/16] x86_64/mm: explicitly specify the location to place the frame table Haozhong Zhang
2016-12-09 21:35   ` Konrad Rzeszutek Wilk
2016-12-12  2:27     ` Haozhong Zhang
2016-12-12  8:25       ` Jan Beulich
2016-10-10  0:32 ` [RFC XEN PATCH 02/16] x86_64/mm: explicitly specify the location to place the M2P table Haozhong Zhang
2016-12-09 21:38   ` Konrad Rzeszutek Wilk
2016-12-12  2:31     ` Haozhong Zhang
2016-12-12  8:26       ` Jan Beulich
2016-12-12  8:35         ` Haozhong Zhang
2016-10-10  0:32 ` Haozhong Zhang [this message]
2016-10-11 19:13   ` [RFC XEN PATCH 03/16] xen/x86: add a hypercall XENPF_pmem_add to report host pmem regions Andrew Cooper
2016-12-09 22:02   ` Konrad Rzeszutek Wilk
2016-12-12  4:16     ` Haozhong Zhang
2016-12-12  8:30       ` Jan Beulich
2016-12-12  8:38         ` Haozhong Zhang
2016-12-12 14:44           ` Konrad Rzeszutek Wilk
2016-12-13  1:08             ` Haozhong Zhang
2016-12-22 11:58   ` Jan Beulich
2016-10-10  0:32 ` [RFC XEN PATCH 04/16] xen/x86: add XENMEM_populate_pmemmap to map host pmem pages to guest Haozhong Zhang
2016-12-09 22:22   ` Konrad Rzeszutek Wilk
2016-12-12  4:38     ` Haozhong Zhang
2016-12-22 12:19   ` Jan Beulich
2016-10-10  0:32 ` [RFC XEN PATCH 05/16] xen/x86: release pmem pages at domain destroy Haozhong Zhang
2016-12-09 22:27   ` Konrad Rzeszutek Wilk
2016-12-12  4:47     ` Haozhong Zhang
2016-12-22 12:22   ` Jan Beulich
2016-10-10  0:32 ` [RFC XEN PATCH 06/16] tools: reserve guest memory for ACPI from device model Haozhong Zhang
2017-01-27 20:44   ` Konrad Rzeszutek Wilk
2017-02-08  1:39     ` Haozhong Zhang
2017-02-08 14:31       ` Konrad Rzeszutek Wilk
2016-10-10  0:32 ` [RFC XEN PATCH 07/16] tools/libacpi: add callback acpi_ctxt.p2v to get a pointer from physical address Haozhong Zhang
2017-01-27 20:46   ` Konrad Rzeszutek Wilk
2017-02-08  1:42     ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 08/16] tools/libacpi: expose details of memory allocation callback Haozhong Zhang
2017-01-27 20:58   ` Konrad Rzeszutek Wilk
2017-02-08  2:12     ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 09/16] tools/libacpi: add callbacks to access XenStore Haozhong Zhang
2017-01-27 21:10   ` Konrad Rzeszutek Wilk
2017-02-08  2:19     ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 10/16] tools/libacpi: add a simple AML builder Haozhong Zhang
2017-01-27 21:19   ` Konrad Rzeszutek Wilk
2017-02-08  2:33     ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 11/16] tools/libacpi: load ACPI built by the device model Haozhong Zhang
2017-01-27 21:40   ` Konrad Rzeszutek Wilk
2017-02-08  5:38     ` Haozhong Zhang
2017-02-08 14:35       ` Konrad Rzeszutek Wilk
2016-10-10  0:32 ` [RFC XEN PATCH 12/16] tools/libxl: build qemu options from xl vNVDIMM configs Haozhong Zhang
2017-01-27 21:47   ` Konrad Rzeszutek Wilk
2017-02-08  5:42     ` Haozhong Zhang
2017-01-27 21:48   ` Konrad Rzeszutek Wilk
2017-02-08  5:47     ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 13/16] tools/libxl: add support to map host pmem device to guests Haozhong Zhang
2017-01-27 22:06   ` Konrad Rzeszutek Wilk
2017-01-27 22:09     ` Konrad Rzeszutek Wilk
2017-02-08  5:59     ` Haozhong Zhang
2017-02-08 14:37       ` Konrad Rzeszutek Wilk
2016-10-10  0:32 ` [RFC XEN PATCH 14/16] tools/libxl: add support to map files on pmem devices " Haozhong Zhang
2017-01-27 22:10   ` Konrad Rzeszutek Wilk
2017-02-08  6:03     ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 15/16] tools/libxl: handle return code of libxl__qmp_initializations() Haozhong Zhang
2017-01-27 22:11   ` Konrad Rzeszutek Wilk
2017-02-08  6:07     ` Haozhong Zhang
2017-02-08 10:31       ` Wei Liu
2017-02-09  2:47         ` Haozhong Zhang
2017-02-09 10:13           ` Wei Liu
2017-02-09 10:16             ` Wei Liu
2017-02-10  2:37             ` Haozhong Zhang
2017-02-10  8:11               ` Wei Liu
2017-02-10  8:23                 ` Wei Liu
2017-02-10  8:24                 ` Haozhong Zhang
2016-10-10  0:32 ` [RFC XEN PATCH 16/16] tools/libxl: initiate pmem mapping via qmp callback Haozhong Zhang
2017-01-27 22:13   ` Konrad Rzeszutek Wilk
2017-02-08  6:08     ` Haozhong Zhang
2016-10-24 16:37 ` [RFC XEN PATCH 00/16] Add vNVDIMM support to HVM domains Wei Liu
2016-10-25  6:55   ` Haozhong Zhang
2016-10-25 11:28     ` Wei Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20161010003235.4213-4-haozhong.zhang@intel.com \
    --to=haozhong.zhang@intel.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=dgdegra@tycho.nsa.gov \
    --cc=guangrong.xiao@linux.intel.com \
    --cc=jbeulich@suse.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).