From mboxrd@z Thu Jan 1 00:00:00 1970 From: Sergey Tovpeko Subject: Re: xl: pci completion error Date: Thu, 07 Oct 2010 16:47:27 +0400 Message-ID: <4CADC15F.3050202@gmail.com> References: <4CAB0E9A.5050507@gmail.com> <4CAC74DD.3030703@gmail.com> <1286375398.12843.98.camel@qabil.uk.xensource.com> Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <1286375398.12843.98.camel@qabil.uk.xensource.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xensource.com Errors-To: xen-devel-bounces@lists.xensource.com To: Gianni Tedesco Cc: "xen-devel@lists.xensource.com" , Stefano Stabellini List-Id: xen-devel@lists.xenproject.org Hello, Gianni! I was interested, how your patch would behave with many pci devices given to a domain. It seemed that each device removal takes 10 seconds for the timeout. pci=[ '01:00.0','00:1d.0', '00:1d.1', '00:1d.2','00:1d.3', '00:1d.7' ] The result was a bit different. After the first pci removal, the others somehow were destroyed. So the second do_pci_remove run into the failure that there were no pci devices at all. So I got the following log. Waiting for domain workxp (domid 1) to die [pid 3900] Domain 1 is dead Action for shutdown reason code 0 is destroy Domain 1 needs to be cleaned up: destroying the domain do_pci_remove device 01:00.0 libxl: error: libxl_device.c:448:libxl__wait_for_device_model Device Model not ready libxl: error: libxl_pci.c:861:do_pci_remove Device Model didn't respond in time do_pci_remove device 00:1d.0 libxl: error: libxl_pci.c:839:do_pci_remove PCI device not attached to this domain libxl: error: libxl.c:944:libxl_domain_destroy pci shutdown failed for domid 1 libxl: error: libxl.c:896:libxl_destroy_device_model Couldn't find device model's pid: No such file or directory libxl: error: libxl.c:956:libxl_domain_destroy libxl_destroy_device_model failed for 1 libxl: error: libxl_device.c:307:libxl__devices_destroy /local/domain/1/device is empty Sergey. > Sergeys analysis sounds very plausible to me actually. The co-ordination > required between qemu and libxl for PCI passthrough is very complicated > and one ought to have fairly low confidence in it's correctness :P > > Below is a less hacky version of the patch I just sent. Stefano, please > consider this for inclusion. > > --- > xl: Implement PCI passthrough force removal > > This fixes two errors with removing PCI devices from HVM domains. The > first error is that the handling of "pci-rem" device-model command is > erroneously implemented in qemu and difficult (impossible?) to get > right. > > For example, during domain shutdown there can be a race where the guest > OS unloads it's drivers and perhaps even shuts down PCI subsystem before > the pci-rem command has been received by qemu. This means that no OS is > present to write to the port which causes the dm command to be > acknowledged. > > We fix this by implementing a 'force removal' option to > libxl_device_pci_remove which is always set to 1 during guest shutdown. > It can be optionally enabled on the xl command line for other occasions. > > The second error is that if a guest OS doesn't respond to the SCI > interrupt and therefore the pci-rem dm command, which can happen if the > guest OS has no ACPI PCI hotplug support, then device removal bails with > an error but only AFTER removing the device from xenstore. This means > that xenstore gets in to an inconsistent state where an assigned device > also appears to be assignable. > > This is fixed by moving xenstore device removal to occur only after the > device has really been removed. > > Signed-off-by: Gianni Tedesco > > diff -r 02e199c96ece tools/libxl/libxl.h > --- a/tools/libxl/libxl.h Wed Oct 06 11:00:19 2010 +0100 > +++ b/tools/libxl/libxl.h Wed Oct 06 15:29:12 2010 +0100 > @@ -406,7 +406,7 @@ int libxl_device_vfb_clean_shutdown(libx > int libxl_device_vfb_hard_shutdown(libxl_ctx *ctx, uint32_t domid); > > int libxl_device_pci_add(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev); > -int libxl_device_pci_remove(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev); > +int libxl_device_pci_remove(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev, int force); > int libxl_device_pci_shutdown(libxl_ctx *ctx, uint32_t domid); > int libxl_device_pci_list_assigned(libxl_ctx *ctx, libxl_device_pci **list, uint32_t domid, int *num); > int libxl_device_pci_list_assignable(libxl_ctx *ctx, libxl_device_pci **list, int *num); > diff -r 02e199c96ece tools/libxl/libxl_pci.c > --- a/tools/libxl/libxl_pci.c Wed Oct 06 11:00:19 2010 +0100 > +++ b/tools/libxl/libxl_pci.c Wed Oct 06 15:29:12 2010 +0100 > @@ -841,7 +841,8 @@ out: > return rc; > } > > -static int do_pci_remove(libxl__gc *gc, uint32_t domid, libxl_device_pci *pcidev) > +static int do_pci_remove(libxl__gc *gc, uint32_t domid, > + libxl_device_pci *pcidev, int force) > { > libxl_ctx *ctx = libxl__gc_owner(gc); > libxl_device_pci *assigned; > @@ -858,8 +859,6 @@ static int do_pci_remove(libxl__gc *gc, > } > } > > - libxl_device_pci_remove_xenstore(gc, domid, pcidev); > - > hvm = libxl__domain_is_hvm(ctx, domid); > if (hvm) { > if (libxl__wait_for_device_model(ctx, domid, "running", NULL, NULL) < 0) { > @@ -878,7 +877,12 @@ static int do_pci_remove(libxl__gc *gc, > xs_write(ctx->xsh, XBT_NULL, path, "pci-rem", strlen("pci-rem")); > if (libxl__wait_for_device_model(ctx, domid, "pci-removed", NULL, NULL) < 0) { > LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Device Model didn't respond in time"); > - return ERROR_FAIL; > + /* This depends on guest operating system acknowledging the > + * SCI, if it doesn't respond in time then we may wish to > + * force the removal. > + */ > + if ( !force ) > + return ERROR_FAIL; > } > } > path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/state", domid); > @@ -924,7 +928,7 @@ skip1: > if ((fscanf(f, "%u", &irq) == 1) && irq) { > rc = xc_physdev_unmap_pirq(ctx->xch, domid, irq); > if (rc < 0) { > - LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "xc_physdev_map_pirq irq=%d", irq); > + LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "xc_physdev_unmap_pirq irq=%d", irq); > } > rc = xc_domain_irq_permission(ctx->xch, domid, irq, 0); > if (rc < 0) { > @@ -948,13 +952,16 @@ out: > stubdomid = libxl_get_stubdom_id(ctx, domid); > if (stubdomid != 0) { > libxl_device_pci pcidev_s = *pcidev; > - libxl_device_pci_remove(ctx, stubdomid, &pcidev_s); > + libxl_device_pci_remove(ctx, stubdomid, &pcidev_s, force); > } > > + libxl_device_pci_remove_xenstore(gc, domid, pcidev); > + > return 0; > } > > -int libxl_device_pci_remove(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev) > +int libxl_device_pci_remove(libxl_ctx *ctx, uint32_t domid, > + libxl_device_pci *pcidev, int force) > { > libxl__gc gc = LIBXL_INIT_GC(ctx); > unsigned int orig_vdev, pfunc_mask; > @@ -980,7 +987,7 @@ int libxl_device_pci_remove(libxl_ctx *c > }else{ > pcidev->vdevfn = orig_vdev; > } > - if ( do_pci_remove(&gc, domid, pcidev) ) > + if ( do_pci_remove(&gc, domid, pcidev, force) ) > rc = ERROR_FAIL; > } > } > @@ -1049,7 +1056,11 @@ int libxl_device_pci_shutdown(libxl_ctx > if ( rc ) > return rc; > for (i = 0; i < num; i++) { > - if (libxl_device_pci_remove(ctx, domid, pcidevs + i) < 0) > + /* Force remove on shutdown since, on HVM, qemu will not always > + * respond to SCI interrupt because the guest kernel has shut down the > + * devices by the time we even get here! > + */ > + if (libxl_device_pci_remove(ctx, domid, pcidevs + i, 1) < 0) > return ERROR_FAIL; > } > free(pcidevs); > diff -r 02e199c96ece tools/libxl/xl_cmdimpl.c > --- a/tools/libxl/xl_cmdimpl.c Wed Oct 06 11:00:19 2010 +0100 > +++ b/tools/libxl/xl_cmdimpl.c Wed Oct 06 15:29:12 2010 +0100 > @@ -2262,7 +2262,7 @@ int main_pcilist(int argc, char **argv) > return 0; > } > > -static void pcidetach(char *dom, char *bdf) > +static void pcidetach(char *dom, char *bdf, int force) > { > libxl_device_pci pcidev; > > @@ -2273,20 +2273,24 @@ static void pcidetach(char *dom, char *b > fprintf(stderr, "pci-detach: malformed BDF specification \"%s\"\n", bdf); > exit(2); > } > - libxl_device_pci_remove(&ctx, domid, &pcidev); > + libxl_device_pci_remove(&ctx, domid, &pcidev, force); > libxl_device_pci_destroy(&pcidev); > } > > int main_pcidetach(int argc, char **argv) > { > int opt; > + int force = 0; > char *domname = NULL, *bdf = NULL; > > - while ((opt = getopt(argc, argv, "h")) != -1) { > + while ((opt = getopt(argc, argv, "hf")) != -1) { > switch (opt) { > case 'h': > help("pci-detach"); > return 0; > + case 'f': > + force = 1; > + break; > default: > fprintf(stderr, "option not supported\n"); > break; > @@ -2300,7 +2304,7 @@ int main_pcidetach(int argc, char **argv > domname = argv[optind]; > bdf = argv[optind + 1]; > > - pcidetach(domname, bdf); > + pcidetach(domname, bdf, force); > return 0; > } > static void pciattach(char *dom, char *bdf, char *vs) > > >