From mboxrd@z Thu Jan 1 00:00:00 1970 From: Juergen Gross Subject: Re: [PATCH v3 8/9] libxc: rework of domain builder's page table handler Date: Thu, 29 Oct 2015 14:18:31 +0100 Message-ID: <56321CA7.7080602@suse.com> References: <1444741878-16610-1-git-send-email-jgross@suse.com> <1444741878-16610-9-git-send-email-jgross@suse.com> <20151029124829.GM18674@zion.uk.xensource.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii"; Format="flowed" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <20151029124829.GM18674@zion.uk.xensource.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Wei Liu Cc: roger.pau@citrix.com, stefano.stabellini@eu.citrix.com, ian.jackson@eu.citrix.com, Ian.Campbell@citrix.com, xen-devel@lists.xen.org List-Id: xen-devel@lists.xenproject.org On 10/29/2015 01:48 PM, Wei Liu wrote: > On Tue, Oct 13, 2015 at 03:11:17PM +0200, Juergen Gross wrote: >> In order to prepare a p2m list outside of the initial kernel mapping >> do a rework of the domain builder's page table handler. The goal is >> to be able to use common helpers for page table allocation and setup >> for initial kernel page tables and page tables mapping the p2m list. >> This is achieved by supporting multiple mapping areas. The mapped >> virtual addresses of the single areas must not overlap, while the >> page tables of a new area added might already be partially present. >> Especially the top level page table is existing only once, of course. >> > > Currently restrict the number of mappings to 1 because the only mapping > now is the initial mapping created by toolstack. There should not be > behaviour change and guest visible change introduced. > > If my understanding is correct, can yo please add that to the commit > message? Sure. I'm currently thinking about changing this patch even further. While doing similar work in grub-xen I found the page table building there to be much more generic and more compact. Instead of open coding the different page table levels all is done there in a loop over those levels. The main loop consists of only 33 lines (and this is after adding support of multiple mapping areas)! What do you think? > Given this is a particularly thorny area, a second eye would be much > appreciated. On the positive side: any bug in this code should be really easy to spot, as the domain wouldn't be able to work reliably (this was at least my experience when developing this patch and the related one in grub). > Some comments below. > >> Signed-off-by: Juergen Gross >> --- >> tools/libxc/xc_dom_x86.c | 404 ++++++++++++++++++++++++++++------------------- >> 1 file changed, 240 insertions(+), 164 deletions(-) >> >> diff --git a/tools/libxc/xc_dom_x86.c b/tools/libxc/xc_dom_x86.c >> index c815e10..333ef6b 100644 >> --- a/tools/libxc/xc_dom_x86.c >> +++ b/tools/libxc/xc_dom_x86.c >> @@ -65,17 +65,27 @@ >> #define NR_IOREQ_SERVER_PAGES 8 >> #define ioreq_server_pfn(x) (special_pfn(0) - NR_IOREQ_SERVER_PAGES + (x)) >> >> -#define bits_to_mask(bits) (((xen_vaddr_t)1 << (bits))-1) >> +#define bits_to_mask(bits) (((xen_vaddr_t)1 << (bits)) - 1) > > Whitespace-only change here. Oops, sorry. > >> #define round_down(addr, mask) ((addr) & ~(mask)) >> #define round_up(addr, mask) ((addr) | (mask)) >> >> -struct xc_dom_image_x86 { >> - /* initial page tables */ >> +struct xc_dom_x86_mapping_lvl { >> + xen_vaddr_t from; >> + xen_vaddr_t to; >> + xen_pfn_t pfn; >> unsigned int pgtables; >> - unsigned int pg_l4; >> - unsigned int pg_l3; >> - unsigned int pg_l2; >> - unsigned int pg_l1; >> +}; >> + >> +struct xc_dom_x86_mapping { >> + struct xc_dom_x86_mapping_lvl area; >> + struct xc_dom_x86_mapping_lvl lvls[4]; >> + xen_pfn_t pfn_start; > > This is unused throughout the patch and the next patch. You can delete it. Indeed. > >> +}; >> + >> +struct xc_dom_image_x86 { >> + unsigned n_mappings; >> +#define MAPPING_MAX 1 >> + struct xc_dom_x86_mapping maps[MAPPING_MAX]; >> }; >> >> /* get guest IO ABI protocol */ >> @@ -105,43 +115,107 @@ const char *xc_domain_get_native_protocol(xc_interface *xch, >> return protocol; >> } >> >> -static unsigned long >> -nr_page_tables(struct xc_dom_image *dom, >> - xen_vaddr_t start, xen_vaddr_t end, unsigned long bits) >> +static void >> +nr_page_tables(struct xc_dom_image *dom, int lvl, >> + xen_vaddr_t from, xen_vaddr_t to, unsigned long bits) >> { >> xen_vaddr_t mask = bits_to_mask(bits); >> - int tables; >> + struct xc_dom_image_x86 *domx86 = dom->arch_private; >> + struct xc_dom_x86_mapping *map = domx86->maps + domx86->n_mappings; >> + struct xc_dom_x86_mapping *map_cmp; >> + unsigned map_n; >> >> if ( bits == 0 ) >> - return 0; /* unused */ >> + return; /* unused */ >> + >> + if ( lvl == 3 && domx86->n_mappings != 0 ) >> + return; /* Top level page table already in first mapping. */ >> >> if ( bits == (8 * sizeof(unsigned long)) ) >> { >> - /* must be pgd, need one */ >> - start = 0; >> - end = -1; >> - tables = 1; >> + /* 32 bit top level page table special case */ >> + map->lvls[lvl].from = 0; >> + map->lvls[lvl].to = -1; >> + map->lvls[lvl].pgtables = 1; >> + goto done; >> } >> - else >> + >> + from = round_down(from, mask); >> + to = round_up(to, mask); >> + >> + for ( map_n = 0, map_cmp = domx86->maps; map_n < domx86->n_mappings; >> + map_n++, map_cmp++ ) >> { >> - start = round_down(start, mask); >> - end = round_up(end, mask); >> - tables = ((end - start) >> bits) + 1; >> + if ( map_cmp->lvls[lvl].from == map_cmp->lvls[lvl].to ) >> + continue; >> + if ( from >= map_cmp->lvls[lvl].from && to <= map_cmp->lvls[lvl].to ) >> + return; /* Area already completely covered on this level. */ >> + if ( from >= map_cmp->lvls[lvl].from && from <= map_cmp->lvls[lvl].to ) >> + from = map_cmp->lvls[lvl].to + 1; >> + if ( to >= map_cmp->lvls[lvl].from && to <= map_cmp->lvls[lvl].to ) >> + to = map_cmp->lvls[lvl].from - 1; > > Is it possible that later mapping covers the previous ones? How is that > handled? I'm not sure I understand your question. In case you are asking whether different mappings are allowed to overlap in terms of virtual addresses: no. I haven't added any checking code to ensure this. I can do it, if you want. > >> } >> >> + map->lvls[lvl].from = from; >> + map->lvls[lvl].to = to; >> + map->lvls[lvl].pgtables = ((to - from) >> bits) + 1; >> + >> + done: >> DOMPRINTF("%s: 0x%016" PRIx64 "/%ld: 0x%016" PRIx64 >> - " -> 0x%016" PRIx64 ", %d table(s)", >> - __FUNCTION__, mask, bits, start, end, tables); >> - return tables; >> + " -> 0x%016" PRIx64 ", %d table(s)", __FUNCTION__, mask, bits, >> + map->lvls[lvl].from, map->lvls[lvl].to, map->lvls[lvl].pgtables); >> } >> >> -static int alloc_pgtables(struct xc_dom_image *dom, int pae, >> - int l4_bits, int l3_bits, int l2_bits, int l1_bits) >> +static int count_pgtables(struct xc_dom_image *dom, int pae, int bits[4], >> + xen_vaddr_t from, xen_vaddr_t to, xen_pfn_t pfn) >> +{ >> + struct xc_dom_image_x86 *domx86 = dom->arch_private; >> + struct xc_dom_x86_mapping *map; >> + xen_pfn_t pfn_end; >> + int level; >> + >> + if ( domx86->n_mappings == MAPPING_MAX ) >> + { >> + xc_dom_panic(dom->xch, XC_OUT_OF_MEMORY, >> + "%s: too many mappings\n", __FUNCTION__); >> + return -ENOMEM; >> + } >> + map = domx86->maps + domx86->n_mappings; >> + >> + pfn_end = pfn + ((to - from) >> PAGE_SHIFT_X86); >> + if ( pfn_end >= dom->p2m_size ) >> + { >> + xc_dom_panic(dom->xch, XC_OUT_OF_MEMORY, >> + "%s: not enough memory for initial mapping (%#"PRIpfn" > %#"PRIpfn")", >> + __FUNCTION__, pfn_end, dom->p2m_size); >> + return -ENOMEM; >> + } >> + >> + memset(map, 0, sizeof(*map)); >> + >> + map->area.from = from; >> + map->area.to = to; >> + for (level = 3; level >= 0; level--) >> + { >> + map->lvls[level].pfn = pfn + map->area.pgtables; >> + nr_page_tables(dom, level, from, to, bits[level]); >> + if ( pae && to < 0xc0000000 && level == 1) >> + { >> + DOMPRINTF("%s: PAE: extra l2 page table for l3#3", __FUNCTION__); >> + map->lvls[level].pgtables++; >> + } >> + map->area.pgtables += map->lvls[level].pgtables; >> + } >> + >> + return 0; >> +} >> + >> +static int alloc_pgtables(struct xc_dom_image *dom, int pae, int bits[4]) >> { >> int pages, extra_pages; >> xen_vaddr_t try_virt_end; >> - xen_pfn_t try_pfn_end; >> struct xc_dom_image_x86 *domx86 = dom->arch_private; >> + struct xc_dom_x86_mapping *map = domx86->maps + domx86->n_mappings; >> >> extra_pages = dom->alloc_bootstack ? 1 : 0; >> extra_pages += 128; /* 512kB padding */ > > Hmm... Not really related to this patch: Should this be derived from > PAGE_SIZE_X86 as well? Probably, yes. > The rest looks OK to me. Thanks, Juergen