From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Yu, Zhang" Subject: Re: [PATCH v5 1/2] Differentiate IO/mem resources tracked by ioreq server Date: Wed, 19 Aug 2015 17:09:36 +0800 Message-ID: <55D447D0.7040307@linux.intel.com> References: <1439553764-7865-1-git-send-email-yu.c.zhang@linux.intel.com> <1439553764-7865-2-git-send-email-yu.c.zhang@linux.intel.com> <9AAE0902D5BC7E449B7C8E4E778ABCD02F582BDD@AMSPEX01CL01.citrite.net> <55D2C3B2.1070409@linux.intel.com> <9AAE0902D5BC7E449B7C8E4E778ABCD02F589FE0@AMSPEX01CL01.citrite.net> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii"; Format="flowed" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <9AAE0902D5BC7E449B7C8E4E778ABCD02F589FE0@AMSPEX01CL01.citrite.net> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Paul Durrant , "xen-devel@lists.xen.org" , Ian Jackson , Stefano Stabellini , Ian Campbell , Wei Liu , "Keir (Xen.org)" , "jbeulich@suse.com" , Andrew Cooper Cc: Kevin Tian , "zhiyuan.lv@intel.com" List-Id: xen-devel@lists.xenproject.org On 8/18/2015 9:25 PM, Paul Durrant wrote: >> -----Original Message----- >> From: Yu, Zhang [mailto:yu.c.zhang@linux.intel.com] >> Sent: 17 August 2015 22:34 >> To: Paul Durrant; xen-devel@lists.xen.org; Ian Jackson; Stefano Stabellini; Ian >> Campbell; Wei Liu; Keir (Xen.org); jbeulich@suse.com; Andrew Cooper >> Cc: Kevin Tian; zhiyuan.lv@intel.com >> Subject: Re: [Xen-devel] [PATCH v5 1/2] Differentiate IO/mem resources >> tracked by ioreq server >> >> >> >> On 8/14/2015 9:51 PM, Paul Durrant wrote: >>>> -----Original Message----- >>>> From: Yu Zhang [mailto:yu.c.zhang@linux.intel.com] >>>> Sent: 14 August 2015 13:03 >>>> To: xen-devel@lists.xen.org; Paul Durrant; Ian Jackson; Stefano Stabellini; >> Ian >>>> Campbell; Wei Liu; Keir (Xen.org); jbeulich@suse.com; Andrew Cooper >>>> Cc: Kevin Tian; zhiyuan.lv@intel.com >>>> Subject: [PATCH v5 1/2] Differentiate IO/mem resources tracked by ioreq >>>> server >>>> >>>> Currently in ioreq server, guest write-protected ram pages are >>>> tracked in the same rangeset with device mmio resources. Yet >>>> unlike device mmio, which can be in big chunks, the guest write- >>>> protected pages may be discrete ranges with 4K bytes each. >>>> >>>> This patch uses a seperate rangeset for the guest ram pages. >>>> And a new ioreq type, IOREQ_TYPE_WP_MEM, is defined. >>>> >>>> Note: Previously, a new hypercall or subop was suggested to map >>>> write-protected pages into ioreq server. However, it turned out >>>> handler of this new hypercall would be almost the same with the >>>> existing pair - HVMOP_[un]map_io_range_to_ioreq_server, and there's >>>> already a type parameter in this hypercall. So no new hypercall >>>> defined, only a new type is introduced. >>>> >>>> Signed-off-by: Yu Zhang >>>> --- >>>> tools/libxc/include/xenctrl.h | 31 ++++++++++++++++++++++ >>>> tools/libxc/xc_domain.c | 55 >>>> ++++++++++++++++++++++++++++++++++++++++ >>>> xen/arch/x86/hvm/hvm.c | 25 +++++++++++++++++- >>>> xen/include/asm-x86/hvm/domain.h | 4 +-- >>>> xen/include/public/hvm/hvm_op.h | 1 + >>>> xen/include/public/hvm/ioreq.h | 1 + >>>> 6 files changed, 114 insertions(+), 3 deletions(-) >>>> >>>> diff --git a/tools/libxc/include/xenctrl.h b/tools/libxc/include/xenctrl.h >>>> index de3c0ad..3c276b7 100644 >>>> --- a/tools/libxc/include/xenctrl.h >>>> +++ b/tools/libxc/include/xenctrl.h >>>> @@ -2010,6 +2010,37 @@ int >>>> xc_hvm_unmap_io_range_from_ioreq_server(xc_interface *xch, >>>> int is_mmio, >>>> uint64_t start, >>>> uint64_t end); >>>> +/** >>>> + * This function registers a range of write-protected memory for >> emulation. >>>> + * >>>> + * @parm xch a handle to an open hypervisor interface. >>>> + * @parm domid the domain id to be serviced >>>> + * @parm id the IOREQ Server id. >>>> + * @parm start start of range >>>> + * @parm end end of range (inclusive). >>>> + * @return 0 on success, -1 on failure. >>>> + */ >>>> +int xc_hvm_map_mem_range_to_ioreq_server(xc_interface *xch, >>>> + domid_t domid, >>>> + ioservid_t id, >>>> + xen_pfn_t start, >>>> + xen_pfn_t end); >>>> + >>>> +/** >>>> + * This function deregisters a range of write-protected memory for >>>> emulation. >>>> + * >>>> + * @parm xch a handle to an open hypervisor interface. >>>> + * @parm domid the domain id to be serviced >>>> + * @parm id the IOREQ Server id. >>>> + * @parm start start of range >>>> + * @parm end end of range (inclusive). >>>> + * @return 0 on success, -1 on failure. >>>> + */ >>>> +int xc_hvm_unmap_mem_range_from_ioreq_server(xc_interface *xch, >>>> + domid_t domid, >>>> + ioservid_t id, >>>> + xen_pfn_t start, >>>> + xen_pfn_t end); >>>> >>>> /** >>>> * This function registers a PCI device for config space emulation. >>>> diff --git a/tools/libxc/xc_domain.c b/tools/libxc/xc_domain.c >>>> index 2ee26fb..9db05b3 100644 >>>> --- a/tools/libxc/xc_domain.c >>>> +++ b/tools/libxc/xc_domain.c >>>> @@ -1552,6 +1552,61 @@ int >>>> xc_hvm_unmap_io_range_from_ioreq_server(xc_interface *xch, >> domid_t >>>> domid, >>>> return rc; >>>> } >>>> >>>> +int xc_hvm_map_mem_range_to_ioreq_server(xc_interface *xch, >>>> domid_t domid, >>>> + ioservid_t id, xen_pfn_t start, xen_pfn_t end) >>>> +{ >>>> + DECLARE_HYPERCALL; >>>> + DECLARE_HYPERCALL_BUFFER(xen_hvm_io_range_t, arg); >>>> + int rc; >>>> + >>>> + arg = xc_hypercall_buffer_alloc(xch, arg, sizeof(*arg)); >>>> + if ( arg == NULL ) >>>> + return -1; >>>> + >>>> + hypercall.op = __HYPERVISOR_hvm_op; >>>> + hypercall.arg[0] = HVMOP_map_io_range_to_ioreq_server; >>>> + hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg); >>>> + >>>> + arg->domid = domid; >>>> + arg->id = id; >>>> + arg->type = HVMOP_IO_RANGE_WP_MEM; >>>> + arg->start = start; >>>> + arg->end = end; >>>> + >>>> + rc = do_xen_hypercall(xch, &hypercall); >>>> + >>>> + xc_hypercall_buffer_free(xch, arg); >>>> + return rc; >>>> +} >>>> + >>>> +int xc_hvm_unmap_mem_range_from_ioreq_server(xc_interface *xch, >>>> domid_t domid, >>>> + ioservid_t id, xen_pfn_t start, xen_pfn_t end) >>>> +{ >>>> + DECLARE_HYPERCALL; >>>> + DECLARE_HYPERCALL_BUFFER(xen_hvm_io_range_t, arg); >>>> + int rc; >>>> + >>>> + arg = xc_hypercall_buffer_alloc(xch, arg, sizeof(*arg)); >>>> + if ( arg == NULL ) >>>> + return -1; >>>> + >>>> + hypercall.op = __HYPERVISOR_hvm_op; >>>> + hypercall.arg[0] = HVMOP_unmap_io_range_from_ioreq_server; >>>> + hypercall.arg[1] = HYPERCALL_BUFFER_AS_ARG(arg); >>>> + >>>> + arg->domid = domid; >>>> + arg->id = id; >>>> + arg->type = HVMOP_IO_RANGE_WP_MEM; >>>> + arg->start = start; >>>> + arg->end = end; >>>> + >>>> + rc = do_xen_hypercall(xch, &hypercall); >>>> + >>>> + xc_hypercall_buffer_free(xch, arg); >>>> + return rc; >>>> + >>>> +} >>>> + >>>> int xc_hvm_map_pcidev_to_ioreq_server(xc_interface *xch, domid_t >>>> domid, >>>> ioservid_t id, uint16_t segment, >>>> uint8_t bus, uint8_t device, >>>> diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c >>>> index c957610..5eb7864 100644 >>>> --- a/xen/arch/x86/hvm/hvm.c >>>> +++ b/xen/arch/x86/hvm/hvm.c >>>> @@ -938,8 +938,9 @@ static int >> hvm_ioreq_server_alloc_rangesets(struct >>>> hvm_ioreq_server *s, >>>> >>>> rc = asprintf(&name, "ioreq_server %d %s", s->id, >>>> (i == HVMOP_IO_RANGE_PORT) ? "port" : >>>> - (i == HVMOP_IO_RANGE_MEMORY) ? "memory" : >>>> + (i == HVMOP_IO_RANGE_MEMORY) ? "mmio" : >>>> (i == HVMOP_IO_RANGE_PCI) ? "pci" : >>>> + (i == HVMOP_IO_RANGE_WP_MEM) ? "wp-ed memory" : >>>> ""); >>>> if ( rc ) >>>> goto fail; >>>> @@ -1258,6 +1259,7 @@ static int >>>> hvm_map_io_range_to_ioreq_server(struct domain *d, ioservid_t id, >>>> case HVMOP_IO_RANGE_PORT: >>>> case HVMOP_IO_RANGE_MEMORY: >>>> case HVMOP_IO_RANGE_PCI: >>>> + case HVMOP_IO_RANGE_WP_MEM: >>>> r = s->range[type]; >>>> break; >>>> >>>> @@ -1309,6 +1311,7 @@ static int >>>> hvm_unmap_io_range_from_ioreq_server(struct domain *d, ioservid_t >> id, >>>> case HVMOP_IO_RANGE_PORT: >>>> case HVMOP_IO_RANGE_MEMORY: >>>> case HVMOP_IO_RANGE_PCI: >>>> + case HVMOP_IO_RANGE_WP_MEM: >>>> r = s->range[type]; >>>> break; >>>> >>>> @@ -2523,6 +2526,8 @@ struct hvm_ioreq_server >>>> *hvm_select_ioreq_server(struct domain *d, >>>> uint32_t cf8; >>>> uint8_t type; >>>> uint64_t addr; >>>> + p2m_type_t p2mt; >>>> + struct page_info *ram_page; >>>> >>>> if ( list_empty(&d->arch.hvm_domain.ioreq_server.list) ) >>>> return NULL; >>>> @@ -2565,6 +2570,17 @@ struct hvm_ioreq_server >>>> *hvm_select_ioreq_server(struct domain *d, >>>> { >>>> type = p->type; >>>> addr = p->addr; >>>> + >>>> + if ( p->type == IOREQ_TYPE_COPY ) >>>> + { >>>> + ram_page = get_page_from_gfn(d, p->addr >> PAGE_SHIFT, >>>> + &p2mt, P2M_UNSHARE); >>>> + if ( p2mt == p2m_mmio_write_dm ) >>>> + type = IOREQ_TYPE_WP_MEM; >>>> + >>>> + if ( ram_page ) >>>> + put_page(ram_page); >>>> + } >>>> } >>>> >>>> list_for_each_entry ( s, >>>> @@ -2582,6 +2598,7 @@ struct hvm_ioreq_server >>>> *hvm_select_ioreq_server(struct domain *d, >>>> BUILD_BUG_ON(IOREQ_TYPE_PIO != HVMOP_IO_RANGE_PORT); >>>> BUILD_BUG_ON(IOREQ_TYPE_COPY != >> HVMOP_IO_RANGE_MEMORY); >>>> BUILD_BUG_ON(IOREQ_TYPE_PCI_CONFIG != >>>> HVMOP_IO_RANGE_PCI); >>>> + BUILD_BUG_ON(IOREQ_TYPE_WP_MEM != >>>> HVMOP_IO_RANGE_WP_MEM); >>> >>> I wonder whether it's time to break this identity relationship. Is there really >> a need for the emulator to know the difference between MMIO and >> WP_MEM? If not then, IOREQ_TYPE_COPY would do for both but it does >> mean you'd need to index below using HVMOP_IO_RANGE_XXX values >> rather than the raw ioreq type itself. >>> >>> Paul >> >> Well, that sounds reasonable. The identity relationship is not so >> necessary as long as hvm_select_ioreq_server() can decide which >> rangeset to search. >> Maybe we can also remove the IOREQ_TYPE_PCI_CONFIG? With a separate >> patch as the first one in this patch series? :) > > No, you can't remove that type. That is actually passed to emulators. See upstream QEMU for usage. > > Paul Thank you, Paul. You are right. I kept the IOREQ_TYPE_PCI_CONFIG and added another patch to use the HVMOP_IO_RANGE_XXX to index the rangeset in hvm_select_ioreq_server() in version 6 patch series. Yu > >> >> Thanks >> Yu >>> >>>> r = s->range[type]; >>>> >>>> switch ( type ) >>>> @@ -2609,6 +2626,12 @@ struct hvm_ioreq_server >>>> *hvm_select_ioreq_server(struct domain *d, >>>> } >>>> >>>> break; >>>> + >>>> + case IOREQ_TYPE_WP_MEM: >>>> + if ( rangeset_contains_singleton(r, addr >> PAGE_SHIFT) ) >>>> + return s; >>>> + >>>> + break; >>>> } >>>> } >>>> >>>> diff --git a/xen/include/asm-x86/hvm/domain.h b/xen/include/asm- >>>> x86/hvm/domain.h >>>> index 992d5d1..b2e4234 100644 >>>> --- a/xen/include/asm-x86/hvm/domain.h >>>> +++ b/xen/include/asm-x86/hvm/domain.h >>>> @@ -48,8 +48,8 @@ struct hvm_ioreq_vcpu { >>>> bool_t pending; >>>> }; >>>> >>>> -#define NR_IO_RANGE_TYPES (HVMOP_IO_RANGE_PCI + 1) >>>> -#define MAX_NR_IO_RANGES 256 >>>> +#define NR_IO_RANGE_TYPES (HVMOP_IO_RANGE_WP_MEM + 1) >>>> +#define MAX_NR_IO_RANGES 8192 >>>> >>>> struct hvm_ioreq_server { >>>> struct list_head list_entry; >>>> diff --git a/xen/include/public/hvm/hvm_op.h >>>> b/xen/include/public/hvm/hvm_op.h >>>> index 014546a..c02c91a 100644 >>>> --- a/xen/include/public/hvm/hvm_op.h >>>> +++ b/xen/include/public/hvm/hvm_op.h >>>> @@ -331,6 +331,7 @@ struct xen_hvm_io_range { >>>> # define HVMOP_IO_RANGE_PORT 0 /* I/O port range */ >>>> # define HVMOP_IO_RANGE_MEMORY 1 /* MMIO range */ >>>> # define HVMOP_IO_RANGE_PCI 2 /* PCI segment/bus/dev/func >> range */ >>>> +# define HVMOP_IO_RANGE_WP_MEM 3 /* Write-protected memory >>>> range */ >>>> uint64_aligned_t start, end; /* IN - inclusive start and end of range */ >>>> }; >>>> typedef struct xen_hvm_io_range xen_hvm_io_range_t; >>>> diff --git a/xen/include/public/hvm/ioreq.h >>>> b/xen/include/public/hvm/ioreq.h >>>> index 2e5809b..2b712ac 100644 >>>> --- a/xen/include/public/hvm/ioreq.h >>>> +++ b/xen/include/public/hvm/ioreq.h >>>> @@ -35,6 +35,7 @@ >>>> #define IOREQ_TYPE_PIO 0 /* pio */ >>>> #define IOREQ_TYPE_COPY 1 /* mmio ops */ >>>> #define IOREQ_TYPE_PCI_CONFIG 2 >>>> +#define IOREQ_TYPE_WP_MEM 3 >>>> #define IOREQ_TYPE_TIMEOFFSET 7 >>>> #define IOREQ_TYPE_INVALIDATE 8 /* mapcache */ >>>> >>>> -- >>>> 1.9.1 >>> >>> >>> _______________________________________________ >>> Xen-devel mailing list >>> Xen-devel@lists.xen.org >>> http://lists.xen.org/xen-devel >>> >>> > > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xen.org > http://lists.xen.org/xen-devel > >