- * Re: [Qemu-devel] [PATCH v1 2/2] hw/pvrdma: add live migration support
  2019-08-28 14:23 ` [Qemu-devel] [PATCH v1 2/2] hw/pvrdma: add live migration support Sukrit Bhatnagar
@ 2019-08-29 12:53   ` Yuval Shaia
  2019-09-03 21:33     ` Sukrit Bhatnagar
  2019-08-29 12:56   ` Yuval Shaia
  2019-08-31 19:45   ` Marcel Apfelbaum
  2 siblings, 1 reply; 14+ messages in thread
From: Yuval Shaia @ 2019-08-29 12:53 UTC (permalink / raw)
  To: Sukrit Bhatnagar; +Cc: qemu-devel
On Wed, Aug 28, 2019 at 07:53:28PM +0530, Sukrit Bhatnagar wrote:
> vmstate_pvrdma describes the PCI and MSIX states as well as the dma
> address for dsr and the gid table of device.
> vmstate_pvrdma_gids describes each gid in the gid table.
> 
> pvrdma_post_save() does the job of unregistering gid entries from the
> backend device in the source host.
> 
> pvrdma_post_load() maps to dsr using the loaded dma address, registers
> each loaded gid into the backend device, and finally calls load_dsr()
> to perform other mappings and ring init operations.
I think it worth to mention that the dma address is kept in driver/device
shared memory (dsr->dma) which is migrated as part of memory migration and
it is out of the scope of this change and so we do not need to save/load
the dma address during migration.
Also you should specifically comment that this migration-support does not
includes QP migration. This means that support for life migration *during*
traffic is not yet supported.
> 
> Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
> Cc: Yuval Shaia <yuval.shaia@oracle.com>
> Signed-off-by: Sukrit Bhatnagar <skrtbhtngr@gmail.com>
> ---
>  hw/rdma/vmw/pvrdma_main.c | 77 +++++++++++++++++++++++++++++++++++++++
>  1 file changed, 77 insertions(+)
> 
> diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c
> index 6c90db96f9..6f8b56dea3 100644
> --- a/hw/rdma/vmw/pvrdma_main.c
> +++ b/hw/rdma/vmw/pvrdma_main.c
> @@ -28,6 +28,7 @@
>  #include "sysemu/sysemu.h"
>  #include "monitor/monitor.h"
>  #include "hw/rdma/rdma.h"
> +#include "migration/register.h"
>  
>  #include "../rdma_rm.h"
>  #include "../rdma_backend.h"
> @@ -593,6 +594,81 @@ static void pvrdma_shutdown_notifier(Notifier *n, void *opaque)
>      pvrdma_fini(pci_dev);
>  }
>  
> +static int pvrdma_post_save(void *opaque)
> +{
> +    int i, rc;
> +    PVRDMADev *dev = opaque;
> +
> +    for (i = 0; i < MAX_GIDS; i++) {
> +
Empty line is redundant here.
> +        if (!dev->rdma_dev_res.port.gid_tbl[i].gid.global.interface_id) {
> +            continue;
> +        }
> +        rc = rdma_backend_del_gid(&dev->backend_dev,
> +                                   dev->backend_eth_device_name,
> +                                   &dev->rdma_dev_res.port.gid_tbl[i].gid);
> +        if (rc) {
> +            return -EINVAL;
Some error report will help here i guess.
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +static int pvrdma_post_load(void *opaque, int version_id)
> +{
> +    int i, rc;
> +    PVRDMADev *dev = opaque;
> +    PCIDevice *pci_dev = PCI_DEVICE(dev);
> +    DSRInfo *dsr_info = &dev->dsr_info;
> +
> +    dsr_info->dsr = rdma_pci_dma_map(pci_dev, dsr_info->dma,
> +                                sizeof(struct pvrdma_device_shared_region));
> +    if (!dsr_info->dsr) {
> +        rdma_error_report("Failed to map to DSR");
> +        return -ENOMEM;
> +    }
> +
> +    for (i = 0; i < MAX_GIDS; i++) {
> +
Empty line is redundant here.
> +        if (!dev->rdma_dev_res.port.gid_tbl[i].gid.global.interface_id) {
> +            continue;
> +        }
> +
> +        rc = rdma_backend_add_gid(&dev->backend_dev,
> +                                  dev->backend_eth_device_name,
> +                                  &dev->rdma_dev_res.port.gid_tbl[i].gid);
> +        if (rc) {
> +            return -EINVAL;
> +        }
> +    }
> +
> +    return load_dsr(dev);
> +}
> +
> +static const VMStateDescription vmstate_pvrdma_gids = {
> +    .name = "pvrdma-gids",
> +    .fields = (VMStateField[]) {
> +            VMSTATE_UINT8_ARRAY_V(gid.raw, RdmaRmGid, 16, 0),
> +            VMSTATE_END_OF_LIST()
> +    }
> +};
> +
> +static const VMStateDescription vmstate_pvrdma = {
> +    .name = PVRDMA_HW_NAME,
> +    .post_save = pvrdma_post_save,
> +    .post_load = pvrdma_post_load,
> +    .fields = (VMStateField[]) {
> +            VMSTATE_PCI_DEVICE(parent_obj, PVRDMADev),
> +            VMSTATE_MSIX(parent_obj, PVRDMADev),
> +            VMSTATE_UINT64(dsr_info.dma, PVRDMADev),
> +            VMSTATE_STRUCT_ARRAY(rdma_dev_res.port.gid_tbl, PVRDMADev,
> +                                 MAX_PORT_GIDS, 0, vmstate_pvrdma_gids,
> +                                 RdmaRmGid),
> +            VMSTATE_END_OF_LIST()
> +    }
> +};
> +
>  static void pvrdma_realize(PCIDevice *pdev, Error **errp)
>  {
>      int rc = 0;
> @@ -688,6 +764,7 @@ static void pvrdma_class_init(ObjectClass *klass, void *data)
>  
>      dc->desc = "RDMA Device";
>      dc->props = pvrdma_dev_properties;
> +    dc->vmsd = &vmstate_pvrdma;
>      set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
>  
>      ir->print_statistics = pvrdma_print_statistics;
> -- 
> 2.21.0
> 
> 
^ permalink raw reply	[flat|nested] 14+ messages in thread
- * Re: [Qemu-devel] [PATCH v1 2/2] hw/pvrdma: add live migration support
  2019-08-29 12:53   ` Yuval Shaia
@ 2019-09-03 21:33     ` Sukrit Bhatnagar
  2019-09-04  5:04       ` Yuval Shaia
  0 siblings, 1 reply; 14+ messages in thread
From: Sukrit Bhatnagar @ 2019-09-03 21:33 UTC (permalink / raw)
  To: Yuval Shaia; +Cc: qemu-devel
On Thu, 29 Aug 2019 at 18:23, Yuval Shaia <yuval.shaia@oracle.com> wrote:
>
> On Wed, Aug 28, 2019 at 07:53:28PM +0530, Sukrit Bhatnagar wrote:
> > vmstate_pvrdma describes the PCI and MSIX states as well as the dma
> > address for dsr and the gid table of device.
> > vmstate_pvrdma_gids describes each gid in the gid table.
> >
> > pvrdma_post_save() does the job of unregistering gid entries from the
> > backend device in the source host.
> >
> > pvrdma_post_load() maps to dsr using the loaded dma address, registers
> > each loaded gid into the backend device, and finally calls load_dsr()
> > to perform other mappings and ring init operations.
>
> I think it worth to mention that the dma address is kept in driver/device
> shared memory (dsr->dma) which is migrated as part of memory migration and
> it is out of the scope of this change and so we do not need to save/load
> the dma address during migration.
>
> Also you should specifically comment that this migration-support does not
> includes QP migration. This means that support for life migration *during*
> traffic is not yet supported.
>
> >
> > Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
> > Cc: Yuval Shaia <yuval.shaia@oracle.com>
> > Signed-off-by: Sukrit Bhatnagar <skrtbhtngr@gmail.com>
> > ---
> >  hw/rdma/vmw/pvrdma_main.c | 77 +++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 77 insertions(+)
> >
> > diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c
> > index 6c90db96f9..6f8b56dea3 100644
> > --- a/hw/rdma/vmw/pvrdma_main.c
> > +++ b/hw/rdma/vmw/pvrdma_main.c
> > @@ -28,6 +28,7 @@
> >  #include "sysemu/sysemu.h"
> >  #include "monitor/monitor.h"
> >  #include "hw/rdma/rdma.h"
> > +#include "migration/register.h"
> >
> >  #include "../rdma_rm.h"
> >  #include "../rdma_backend.h"
> > @@ -593,6 +594,81 @@ static void pvrdma_shutdown_notifier(Notifier *n, void *opaque)
> >      pvrdma_fini(pci_dev);
> >  }
> >
> > +static int pvrdma_post_save(void *opaque)
> > +{
> > +    int i, rc;
> > +    PVRDMADev *dev = opaque;
> > +
> > +    for (i = 0; i < MAX_GIDS; i++) {
> > +
>
> Empty line is redundant here.
>
> > +        if (!dev->rdma_dev_res.port.gid_tbl[i].gid.global.interface_id) {
> > +            continue;
> > +        }
> > +        rc = rdma_backend_del_gid(&dev->backend_dev,
> > +                                   dev->backend_eth_device_name,
> > +                                   &dev->rdma_dev_res.port.gid_tbl[i].gid);
> > +        if (rc) {
> > +            return -EINVAL;
>
> Some error report will help here i guess.
rdma_backend_del_gid() already generates an error report
when rc isn't 0.
Adding another statement for the same seems redundant.
> > +        }
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static int pvrdma_post_load(void *opaque, int version_id)
> > +{
> > +    int i, rc;
> > +    PVRDMADev *dev = opaque;
> > +    PCIDevice *pci_dev = PCI_DEVICE(dev);
> > +    DSRInfo *dsr_info = &dev->dsr_info;
> > +
> > +    dsr_info->dsr = rdma_pci_dma_map(pci_dev, dsr_info->dma,
> > +                                sizeof(struct pvrdma_device_shared_region));
> > +    if (!dsr_info->dsr) {
> > +        rdma_error_report("Failed to map to DSR");
> > +        return -ENOMEM;
> > +    }
> > +
> > +    for (i = 0; i < MAX_GIDS; i++) {
> > +
>
> Empty line is redundant here.
>
> > +        if (!dev->rdma_dev_res.port.gid_tbl[i].gid.global.interface_id) {
> > +            continue;
> > +        }
> > +
> > +        rc = rdma_backend_add_gid(&dev->backend_dev,
> > +                                  dev->backend_eth_device_name,
> > +                                  &dev->rdma_dev_res.port.gid_tbl[i].gid);
> > +        if (rc) {
> > +            return -EINVAL;
> > +        }
> > +    }
> > +
> > +    return load_dsr(dev);
Now that I will move load_dsr() before the del_gid loop,
I can use goto jumps on exit/error paths, so that I can
undo load_dsr if any del_gid fails.
> > +}
> > +
> > +static const VMStateDescription vmstate_pvrdma_gids = {
> > +    .name = "pvrdma-gids",
> > +    .fields = (VMStateField[]) {
> > +            VMSTATE_UINT8_ARRAY_V(gid.raw, RdmaRmGid, 16, 0),
> > +            VMSTATE_END_OF_LIST()
> > +    }
> > +};
> > +
> > +static const VMStateDescription vmstate_pvrdma = {
> > +    .name = PVRDMA_HW_NAME,
> > +    .post_save = pvrdma_post_save,
> > +    .post_load = pvrdma_post_load,
> > +    .fields = (VMStateField[]) {
> > +            VMSTATE_PCI_DEVICE(parent_obj, PVRDMADev),
> > +            VMSTATE_MSIX(parent_obj, PVRDMADev),
> > +            VMSTATE_UINT64(dsr_info.dma, PVRDMADev),
> > +            VMSTATE_STRUCT_ARRAY(rdma_dev_res.port.gid_tbl, PVRDMADev,
> > +                                 MAX_PORT_GIDS, 0, vmstate_pvrdma_gids,
> > +                                 RdmaRmGid),
> > +            VMSTATE_END_OF_LIST()
> > +    }
> > +};
> > +
> >  static void pvrdma_realize(PCIDevice *pdev, Error **errp)
> >  {
> >      int rc = 0;
> > @@ -688,6 +764,7 @@ static void pvrdma_class_init(ObjectClass *klass, void *data)
> >
> >      dc->desc = "RDMA Device";
> >      dc->props = pvrdma_dev_properties;
> > +    dc->vmsd = &vmstate_pvrdma;
> >      set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
> >
> >      ir->print_statistics = pvrdma_print_statistics;
> > --
> > 2.21.0
> >
> >
^ permalink raw reply	[flat|nested] 14+ messages in thread
- * Re: [Qemu-devel] [PATCH v1 2/2] hw/pvrdma: add live migration support
  2019-09-03 21:33     ` Sukrit Bhatnagar
@ 2019-09-04  5:04       ` Yuval Shaia
  0 siblings, 0 replies; 14+ messages in thread
From: Yuval Shaia @ 2019-09-04  5:04 UTC (permalink / raw)
  To: Sukrit Bhatnagar; +Cc: qemu-devel
On Wed, Sep 04, 2019 at 03:03:20AM +0530, Sukrit Bhatnagar wrote:
> On Thu, 29 Aug 2019 at 18:23, Yuval Shaia <yuval.shaia@oracle.com> wrote:
> >
> > On Wed, Aug 28, 2019 at 07:53:28PM +0530, Sukrit Bhatnagar wrote:
> > > vmstate_pvrdma describes the PCI and MSIX states as well as the dma
> > > address for dsr and the gid table of device.
> > > vmstate_pvrdma_gids describes each gid in the gid table.
> > >
> > > pvrdma_post_save() does the job of unregistering gid entries from the
> > > backend device in the source host.
> > >
> > > pvrdma_post_load() maps to dsr using the loaded dma address, registers
> > > each loaded gid into the backend device, and finally calls load_dsr()
> > > to perform other mappings and ring init operations.
> >
> > I think it worth to mention that the dma address is kept in driver/device
> > shared memory (dsr->dma) which is migrated as part of memory migration and
> > it is out of the scope of this change and so we do not need to save/load
> > the dma address during migration.
> >
> > Also you should specifically comment that this migration-support does not
> > includes QP migration. This means that support for life migration *during*
> > traffic is not yet supported.
> >
> > >
> > > Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
> > > Cc: Yuval Shaia <yuval.shaia@oracle.com>
> > > Signed-off-by: Sukrit Bhatnagar <skrtbhtngr@gmail.com>
> > > ---
> > >  hw/rdma/vmw/pvrdma_main.c | 77 +++++++++++++++++++++++++++++++++++++++
> > >  1 file changed, 77 insertions(+)
> > >
> > > diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c
> > > index 6c90db96f9..6f8b56dea3 100644
> > > --- a/hw/rdma/vmw/pvrdma_main.c
> > > +++ b/hw/rdma/vmw/pvrdma_main.c
> > > @@ -28,6 +28,7 @@
> > >  #include "sysemu/sysemu.h"
> > >  #include "monitor/monitor.h"
> > >  #include "hw/rdma/rdma.h"
> > > +#include "migration/register.h"
> > >
> > >  #include "../rdma_rm.h"
> > >  #include "../rdma_backend.h"
> > > @@ -593,6 +594,81 @@ static void pvrdma_shutdown_notifier(Notifier *n, void *opaque)
> > >      pvrdma_fini(pci_dev);
> > >  }
> > >
> > > +static int pvrdma_post_save(void *opaque)
> > > +{
> > > +    int i, rc;
> > > +    PVRDMADev *dev = opaque;
> > > +
> > > +    for (i = 0; i < MAX_GIDS; i++) {
> > > +
> >
> > Empty line is redundant here.
> >
> > > +        if (!dev->rdma_dev_res.port.gid_tbl[i].gid.global.interface_id) {
> > > +            continue;
> > > +        }
> > > +        rc = rdma_backend_del_gid(&dev->backend_dev,
> > > +                                   dev->backend_eth_device_name,
> > > +                                   &dev->rdma_dev_res.port.gid_tbl[i].gid);
> > > +        if (rc) {
> > > +            return -EINVAL;
> >
> > Some error report will help here i guess.
> 
> rdma_backend_del_gid() already generates an error report
> when rc isn't 0.
> 
> Adding another statement for the same seems redundant.
Sure, make sense.
> 
> > > +        }
> > > +    }
> > > +
> > > +    return 0;
> > > +}
> > > +
> > > +static int pvrdma_post_load(void *opaque, int version_id)
> > > +{
> > > +    int i, rc;
> > > +    PVRDMADev *dev = opaque;
> > > +    PCIDevice *pci_dev = PCI_DEVICE(dev);
> > > +    DSRInfo *dsr_info = &dev->dsr_info;
> > > +
> > > +    dsr_info->dsr = rdma_pci_dma_map(pci_dev, dsr_info->dma,
> > > +                                sizeof(struct pvrdma_device_shared_region));
> > > +    if (!dsr_info->dsr) {
> > > +        rdma_error_report("Failed to map to DSR");
> > > +        return -ENOMEM;
> > > +    }
> > > +
> > > +    for (i = 0; i < MAX_GIDS; i++) {
> > > +
> >
> > Empty line is redundant here.
> >
> > > +        if (!dev->rdma_dev_res.port.gid_tbl[i].gid.global.interface_id) {
> > > +            continue;
> > > +        }
> > > +
> > > +        rc = rdma_backend_add_gid(&dev->backend_dev,
> > > +                                  dev->backend_eth_device_name,
> > > +                                  &dev->rdma_dev_res.port.gid_tbl[i].gid);
> > > +        if (rc) {
> > > +            return -EINVAL;
> > > +        }
> > > +    }
> > > +
> > > +    return load_dsr(dev);
> 
> Now that I will move load_dsr() before the del_gid loop,
You probably meant before add_gid loop.
> I can use goto jumps on exit/error paths, so that I can
> undo load_dsr if any del_gid fails.
Yeah, it will be easier to undo load_dsr than add_gid.
> 
> > > +}
> > > +
> > > +static const VMStateDescription vmstate_pvrdma_gids = {
> > > +    .name = "pvrdma-gids",
> > > +    .fields = (VMStateField[]) {
> > > +            VMSTATE_UINT8_ARRAY_V(gid.raw, RdmaRmGid, 16, 0),
> > > +            VMSTATE_END_OF_LIST()
> > > +    }
> > > +};
> > > +
> > > +static const VMStateDescription vmstate_pvrdma = {
> > > +    .name = PVRDMA_HW_NAME,
> > > +    .post_save = pvrdma_post_save,
> > > +    .post_load = pvrdma_post_load,
> > > +    .fields = (VMStateField[]) {
> > > +            VMSTATE_PCI_DEVICE(parent_obj, PVRDMADev),
> > > +            VMSTATE_MSIX(parent_obj, PVRDMADev),
> > > +            VMSTATE_UINT64(dsr_info.dma, PVRDMADev),
> > > +            VMSTATE_STRUCT_ARRAY(rdma_dev_res.port.gid_tbl, PVRDMADev,
> > > +                                 MAX_PORT_GIDS, 0, vmstate_pvrdma_gids,
> > > +                                 RdmaRmGid),
> > > +            VMSTATE_END_OF_LIST()
> > > +    }
> > > +};
> > > +
> > >  static void pvrdma_realize(PCIDevice *pdev, Error **errp)
> > >  {
> > >      int rc = 0;
> > > @@ -688,6 +764,7 @@ static void pvrdma_class_init(ObjectClass *klass, void *data)
> > >
> > >      dc->desc = "RDMA Device";
> > >      dc->props = pvrdma_dev_properties;
> > > +    dc->vmsd = &vmstate_pvrdma;
> > >      set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
> > >
> > >      ir->print_statistics = pvrdma_print_statistics;
> > > --
> > > 2.21.0
> > >
> > >
^ permalink raw reply	[flat|nested] 14+ messages in thread
 
 
- * Re: [Qemu-devel] [PATCH v1 2/2] hw/pvrdma: add live migration support
  2019-08-28 14:23 ` [Qemu-devel] [PATCH v1 2/2] hw/pvrdma: add live migration support Sukrit Bhatnagar
  2019-08-29 12:53   ` Yuval Shaia
@ 2019-08-29 12:56   ` Yuval Shaia
  2019-08-31 19:45   ` Marcel Apfelbaum
  2 siblings, 0 replies; 14+ messages in thread
From: Yuval Shaia @ 2019-08-29 12:56 UTC (permalink / raw)
  To: Sukrit Bhatnagar; +Cc: qemu-devel
On Wed, Aug 28, 2019 at 07:53:28PM +0530, Sukrit Bhatnagar wrote:
> vmstate_pvrdma describes the PCI and MSIX states as well as the dma
> address for dsr and the gid table of device.
> vmstate_pvrdma_gids describes each gid in the gid table.
> 
> pvrdma_post_save() does the job of unregistering gid entries from the
> backend device in the source host.
> 
> pvrdma_post_load() maps to dsr using the loaded dma address, registers
> each loaded gid into the backend device, and finally calls load_dsr()
> to perform other mappings and ring init operations.
> 
> Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
> Cc: Yuval Shaia <yuval.shaia@oracle.com>
> Signed-off-by: Sukrit Bhatnagar <skrtbhtngr@gmail.com>
> ---
>  hw/rdma/vmw/pvrdma_main.c | 77 +++++++++++++++++++++++++++++++++++++++
>  1 file changed, 77 insertions(+)
> 
> diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c
> index 6c90db96f9..6f8b56dea3 100644
> --- a/hw/rdma/vmw/pvrdma_main.c
> +++ b/hw/rdma/vmw/pvrdma_main.c
> @@ -28,6 +28,7 @@
>  #include "sysemu/sysemu.h"
>  #include "monitor/monitor.h"
>  #include "hw/rdma/rdma.h"
> +#include "migration/register.h"
>  
>  #include "../rdma_rm.h"
>  #include "../rdma_backend.h"
> @@ -593,6 +594,81 @@ static void pvrdma_shutdown_notifier(Notifier *n, void *opaque)
>      pvrdma_fini(pci_dev);
>  }
>  
> +static int pvrdma_post_save(void *opaque)
> +{
> +    int i, rc;
> +    PVRDMADev *dev = opaque;
> +
> +    for (i = 0; i < MAX_GIDS; i++) {
> +
> +        if (!dev->rdma_dev_res.port.gid_tbl[i].gid.global.interface_id) {
> +            continue;
> +        }
> +        rc = rdma_backend_del_gid(&dev->backend_dev,
> +                                   dev->backend_eth_device_name,
> +                                   &dev->rdma_dev_res.port.gid_tbl[i].gid);
> +        if (rc) {
> +            return -EINVAL;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +static int pvrdma_post_load(void *opaque, int version_id)
> +{
> +    int i, rc;
> +    PVRDMADev *dev = opaque;
> +    PCIDevice *pci_dev = PCI_DEVICE(dev);
> +    DSRInfo *dsr_info = &dev->dsr_info;
> +
> +    dsr_info->dsr = rdma_pci_dma_map(pci_dev, dsr_info->dma,
> +                                sizeof(struct pvrdma_device_shared_region));
> +    if (!dsr_info->dsr) {
> +        rdma_error_report("Failed to map to DSR");
> +        return -ENOMEM;
> +    }
> +
> +    for (i = 0; i < MAX_GIDS; i++) {
> +
> +        if (!dev->rdma_dev_res.port.gid_tbl[i].gid.global.interface_id) {
> +            continue;
> +        }
> +
> +        rc = rdma_backend_add_gid(&dev->backend_dev,
> +                                  dev->backend_eth_device_name,
> +                                  &dev->rdma_dev_res.port.gid_tbl[i].gid);
> +        if (rc) {
> +            return -EINVAL;
> +        }
> +    }
> +
> +    return load_dsr(dev);
This check is better performed before any gid manipulation on the host
because no one will undo it if load_dsr fails.
> +}
> +
> +static const VMStateDescription vmstate_pvrdma_gids = {
> +    .name = "pvrdma-gids",
> +    .fields = (VMStateField[]) {
> +            VMSTATE_UINT8_ARRAY_V(gid.raw, RdmaRmGid, 16, 0),
> +            VMSTATE_END_OF_LIST()
> +    }
> +};
> +
> +static const VMStateDescription vmstate_pvrdma = {
> +    .name = PVRDMA_HW_NAME,
> +    .post_save = pvrdma_post_save,
> +    .post_load = pvrdma_post_load,
> +    .fields = (VMStateField[]) {
> +            VMSTATE_PCI_DEVICE(parent_obj, PVRDMADev),
> +            VMSTATE_MSIX(parent_obj, PVRDMADev),
> +            VMSTATE_UINT64(dsr_info.dma, PVRDMADev),
> +            VMSTATE_STRUCT_ARRAY(rdma_dev_res.port.gid_tbl, PVRDMADev,
> +                                 MAX_PORT_GIDS, 0, vmstate_pvrdma_gids,
> +                                 RdmaRmGid),
> +            VMSTATE_END_OF_LIST()
> +    }
> +};
> +
>  static void pvrdma_realize(PCIDevice *pdev, Error **errp)
>  {
>      int rc = 0;
> @@ -688,6 +764,7 @@ static void pvrdma_class_init(ObjectClass *klass, void *data)
>  
>      dc->desc = "RDMA Device";
>      dc->props = pvrdma_dev_properties;
> +    dc->vmsd = &vmstate_pvrdma;
>      set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
>  
>      ir->print_statistics = pvrdma_print_statistics;
> -- 
> 2.21.0
> 
> 
^ permalink raw reply	[flat|nested] 14+ messages in thread
- * Re: [Qemu-devel] [PATCH v1 2/2] hw/pvrdma: add live migration support
  2019-08-28 14:23 ` [Qemu-devel] [PATCH v1 2/2] hw/pvrdma: add live migration support Sukrit Bhatnagar
  2019-08-29 12:53   ` Yuval Shaia
  2019-08-29 12:56   ` Yuval Shaia
@ 2019-08-31 19:45   ` Marcel Apfelbaum
  2019-09-01  9:35     ` Yuval Shaia
  2019-09-03 11:05     ` Sukrit Bhatnagar
  2 siblings, 2 replies; 14+ messages in thread
From: Marcel Apfelbaum @ 2019-08-31 19:45 UTC (permalink / raw)
  To: Sukrit Bhatnagar, qemu-devel; +Cc: Yuval Shaia
On 8/28/19 5:23 PM, Sukrit Bhatnagar wrote:
> vmstate_pvrdma describes the PCI and MSIX states as well as the dma
> address for dsr and the gid table of device.
> vmstate_pvrdma_gids describes each gid in the gid table.
>
> pvrdma_post_save() does the job of unregistering gid entries from the
> backend device in the source host.
>
> pvrdma_post_load() maps to dsr using the loaded dma address, registers
> each loaded gid into the backend device, and finally calls load_dsr()
> to perform other mappings and ring init operations.
>
> Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
> Cc: Yuval Shaia <yuval.shaia@oracle.com>
> Signed-off-by: Sukrit Bhatnagar <skrtbhtngr@gmail.com>
> ---
>   hw/rdma/vmw/pvrdma_main.c | 77 +++++++++++++++++++++++++++++++++++++++
>   1 file changed, 77 insertions(+)
>
> diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c
> index 6c90db96f9..6f8b56dea3 100644
> --- a/hw/rdma/vmw/pvrdma_main.c
> +++ b/hw/rdma/vmw/pvrdma_main.c
> @@ -28,6 +28,7 @@
>   #include "sysemu/sysemu.h"
>   #include "monitor/monitor.h"
>   #include "hw/rdma/rdma.h"
> +#include "migration/register.h"
>   
>   #include "../rdma_rm.h"
>   #include "../rdma_backend.h"
> @@ -593,6 +594,81 @@ static void pvrdma_shutdown_notifier(Notifier *n, void *opaque)
>       pvrdma_fini(pci_dev);
>   }
>   
> +static int pvrdma_post_save(void *opaque)
> +{
> +    int i, rc;
> +    PVRDMADev *dev = opaque;
> +
> +    for (i = 0; i < MAX_GIDS; i++) {
> +
No need for the extra line
> +        if (!dev->rdma_dev_res.port.gid_tbl[i].gid.global.interface_id) {
> +            continue;
> +        }
> +        rc = rdma_backend_del_gid(&dev->backend_dev,
> +                                   dev->backend_eth_device_name,
> +                                   &dev->rdma_dev_res.port.gid_tbl[i].gid);
> +        if (rc) {
> +            return -EINVAL;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +static int pvrdma_post_load(void *opaque, int version_id)
> +{
> +    int i, rc;
> +    PVRDMADev *dev = opaque;
> +    PCIDevice *pci_dev = PCI_DEVICE(dev);
> +    DSRInfo *dsr_info = &dev->dsr_info;
> +
> +    dsr_info->dsr = rdma_pci_dma_map(pci_dev, dsr_info->dma,
> +                                sizeof(struct pvrdma_device_shared_region));
> +    if (!dsr_info->dsr) {
> +        rdma_error_report("Failed to map to DSR");
> +        return -ENOMEM;
> +    }
> +
> +    for (i = 0; i < MAX_GIDS; i++) {
> +
The same here
> +        if (!dev->rdma_dev_res.port.gid_tbl[i].gid.global.interface_id) {
> +            continue;
> +        }
> +
> +        rc = rdma_backend_add_gid(&dev->backend_dev,
> +                                  dev->backend_eth_device_name,
> +                                  &dev->rdma_dev_res.port.gid_tbl[i].gid);
> +        if (rc) {
> +            return -EINVAL;
> +        }
> +    }
> +
> +    return load_dsr(dev);
> +}
> +
> +static const VMStateDescription vmstate_pvrdma_gids = {
> +    .name = "pvrdma-gids",
> +    .fields = (VMStateField[]) {
> +            VMSTATE_UINT8_ARRAY_V(gid.raw, RdmaRmGid, 16, 0),
Is 16 the array length? If yes, do we have same macro definition?
> +            VMSTATE_END_OF_LIST()
> +    }
> +};
> +
> +static const VMStateDescription vmstate_pvrdma = {
> +    .name = PVRDMA_HW_NAME,
> +    .post_save = pvrdma_post_save,
> +    .post_load = pvrdma_post_load,
> +    .fields = (VMStateField[]) {
> +            VMSTATE_PCI_DEVICE(parent_obj, PVRDMADev),
> +            VMSTATE_MSIX(parent_obj, PVRDMADev),
> +            VMSTATE_UINT64(dsr_info.dma, PVRDMADev),
> +            VMSTATE_STRUCT_ARRAY(rdma_dev_res.port.gid_tbl, PVRDMADev,
> +                                 MAX_PORT_GIDS, 0, vmstate_pvrdma_gids,
> +                                 RdmaRmGid),
> +            VMSTATE_END_OF_LIST()
> +    }
> +};
> +
>   static void pvrdma_realize(PCIDevice *pdev, Error **errp)
>   {
>       int rc = 0;
> @@ -688,6 +764,7 @@ static void pvrdma_class_init(ObjectClass *klass, void *data)
>   
>       dc->desc = "RDMA Device";
>       dc->props = pvrdma_dev_properties;
> +    dc->vmsd = &vmstate_pvrdma;
>       set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
>   
>       ir->print_statistics = pvrdma_print_statistics;
Very simple an elegant.
If I understand correctly the live migration of a pvrdma device with no
active workloads works with this patch, right?
If yes, I think we should consider merging this code already.
Yuval, do you agree?
Thanks,
Marcel
^ permalink raw reply	[flat|nested] 14+ messages in thread
- * Re: [Qemu-devel] [PATCH v1 2/2] hw/pvrdma: add live migration support
  2019-08-31 19:45   ` Marcel Apfelbaum
@ 2019-09-01  9:35     ` Yuval Shaia
  2019-09-03 11:05     ` Sukrit Bhatnagar
  1 sibling, 0 replies; 14+ messages in thread
From: Yuval Shaia @ 2019-09-01  9:35 UTC (permalink / raw)
  To: Marcel Apfelbaum; +Cc: Sukrit Bhatnagar, qemu-devel
On Sat, Aug 31, 2019 at 10:45:44PM +0300, Marcel Apfelbaum wrote:
> 
> 
> On 8/28/19 5:23 PM, Sukrit Bhatnagar wrote:
> > vmstate_pvrdma describes the PCI and MSIX states as well as the dma
> > address for dsr and the gid table of device.
> > vmstate_pvrdma_gids describes each gid in the gid table.
> > 
> > pvrdma_post_save() does the job of unregistering gid entries from the
> > backend device in the source host.
> > 
> > pvrdma_post_load() maps to dsr using the loaded dma address, registers
> > each loaded gid into the backend device, and finally calls load_dsr()
> > to perform other mappings and ring init operations.
> > 
> > Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
> > Cc: Yuval Shaia <yuval.shaia@oracle.com>
> > Signed-off-by: Sukrit Bhatnagar <skrtbhtngr@gmail.com>
> > ---
> >   hw/rdma/vmw/pvrdma_main.c | 77 +++++++++++++++++++++++++++++++++++++++
> >   1 file changed, 77 insertions(+)
> > 
> > diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c
> > index 6c90db96f9..6f8b56dea3 100644
> > --- a/hw/rdma/vmw/pvrdma_main.c
> > +++ b/hw/rdma/vmw/pvrdma_main.c
> > @@ -28,6 +28,7 @@
> >   #include "sysemu/sysemu.h"
> >   #include "monitor/monitor.h"
> >   #include "hw/rdma/rdma.h"
> > +#include "migration/register.h"
> >   #include "../rdma_rm.h"
> >   #include "../rdma_backend.h"
> > @@ -593,6 +594,81 @@ static void pvrdma_shutdown_notifier(Notifier *n, void *opaque)
> >       pvrdma_fini(pci_dev);
> >   }
> > +static int pvrdma_post_save(void *opaque)
> > +{
> > +    int i, rc;
> > +    PVRDMADev *dev = opaque;
> > +
> > +    for (i = 0; i < MAX_GIDS; i++) {
> > +
> 
> No need for the extra line
> > +        if (!dev->rdma_dev_res.port.gid_tbl[i].gid.global.interface_id) {
> > +            continue;
> > +        }
> > +        rc = rdma_backend_del_gid(&dev->backend_dev,
> > +                                   dev->backend_eth_device_name,
> > +                                   &dev->rdma_dev_res.port.gid_tbl[i].gid);
> > +        if (rc) {
> > +            return -EINVAL;
> > +        }
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static int pvrdma_post_load(void *opaque, int version_id)
> > +{
> > +    int i, rc;
> > +    PVRDMADev *dev = opaque;
> > +    PCIDevice *pci_dev = PCI_DEVICE(dev);
> > +    DSRInfo *dsr_info = &dev->dsr_info;
> > +
> > +    dsr_info->dsr = rdma_pci_dma_map(pci_dev, dsr_info->dma,
> > +                                sizeof(struct pvrdma_device_shared_region));
> > +    if (!dsr_info->dsr) {
> > +        rdma_error_report("Failed to map to DSR");
> > +        return -ENOMEM;
> > +    }
> > +
> > +    for (i = 0; i < MAX_GIDS; i++) {
> > +
> 
> The same here
> 
> > +        if (!dev->rdma_dev_res.port.gid_tbl[i].gid.global.interface_id) {
> > +            continue;
> > +        }
> > +
> > +        rc = rdma_backend_add_gid(&dev->backend_dev,
> > +                                  dev->backend_eth_device_name,
> > +                                  &dev->rdma_dev_res.port.gid_tbl[i].gid);
> > +        if (rc) {
> > +            return -EINVAL;
> > +        }
> > +    }
> > +
> > +    return load_dsr(dev);
> > +}
> > +
> > +static const VMStateDescription vmstate_pvrdma_gids = {
> > +    .name = "pvrdma-gids",
> > +    .fields = (VMStateField[]) {
> > +            VMSTATE_UINT8_ARRAY_V(gid.raw, RdmaRmGid, 16, 0),
> 
> Is 16 the array length? If yes, do we have same macro definition?
> 
> > +            VMSTATE_END_OF_LIST()
> > +    }
> > +};
> > +
> > +static const VMStateDescription vmstate_pvrdma = {
> > +    .name = PVRDMA_HW_NAME,
> > +    .post_save = pvrdma_post_save,
> > +    .post_load = pvrdma_post_load,
> > +    .fields = (VMStateField[]) {
> > +            VMSTATE_PCI_DEVICE(parent_obj, PVRDMADev),
> > +            VMSTATE_MSIX(parent_obj, PVRDMADev),
> > +            VMSTATE_UINT64(dsr_info.dma, PVRDMADev),
> > +            VMSTATE_STRUCT_ARRAY(rdma_dev_res.port.gid_tbl, PVRDMADev,
> > +                                 MAX_PORT_GIDS, 0, vmstate_pvrdma_gids,
> > +                                 RdmaRmGid),
> > +            VMSTATE_END_OF_LIST()
> > +    }
> > +};
> > +
> >   static void pvrdma_realize(PCIDevice *pdev, Error **errp)
> >   {
> >       int rc = 0;
> > @@ -688,6 +764,7 @@ static void pvrdma_class_init(ObjectClass *klass, void *data)
> >       dc->desc = "RDMA Device";
> >       dc->props = pvrdma_dev_properties;
> > +    dc->vmsd = &vmstate_pvrdma;
> >       set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
> >       ir->print_statistics = pvrdma_print_statistics;
> 
> Very simple an elegant.
> If I understand correctly the live migration of a pvrdma device with no
> active workloads works with this patch, right?
And no QPs also.
> If yes, I think we should consider merging this code already.
> Yuval, do you agree?
Sure i do!
Even with the limitation, this is huge enhancement that can be used right
away.
But first suggested some fixes, let's see v2.
> 
> Thanks,
> Marcel
> 
> 
^ permalink raw reply	[flat|nested] 14+ messages in thread
- * Re: [Qemu-devel] [PATCH v1 2/2] hw/pvrdma: add live migration support
  2019-08-31 19:45   ` Marcel Apfelbaum
  2019-09-01  9:35     ` Yuval Shaia
@ 2019-09-03 11:05     ` Sukrit Bhatnagar
  1 sibling, 0 replies; 14+ messages in thread
From: Sukrit Bhatnagar @ 2019-09-03 11:05 UTC (permalink / raw)
  To: Marcel Apfelbaum; +Cc: qemu-devel, Yuval Shaia
On Sun, 1 Sep 2019 at 01:15, Marcel Apfelbaum
<marcel.apfelbaum@gmail.com> wrote:
>
>
>
> On 8/28/19 5:23 PM, Sukrit Bhatnagar wrote:
> > vmstate_pvrdma describes the PCI and MSIX states as well as the dma
> > address for dsr and the gid table of device.
> > vmstate_pvrdma_gids describes each gid in the gid table.
> >
> > pvrdma_post_save() does the job of unregistering gid entries from the
> > backend device in the source host.
> >
> > pvrdma_post_load() maps to dsr using the loaded dma address, registers
> > each loaded gid into the backend device, and finally calls load_dsr()
> > to perform other mappings and ring init operations.
> >
> > Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
> > Cc: Yuval Shaia <yuval.shaia@oracle.com>
> > Signed-off-by: Sukrit Bhatnagar <skrtbhtngr@gmail.com>
> > ---
> >   hw/rdma/vmw/pvrdma_main.c | 77 +++++++++++++++++++++++++++++++++++++++
> >   1 file changed, 77 insertions(+)
> >
> > diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c
> > index 6c90db96f9..6f8b56dea3 100644
> > --- a/hw/rdma/vmw/pvrdma_main.c
> > +++ b/hw/rdma/vmw/pvrdma_main.c
> > @@ -28,6 +28,7 @@
> >   #include "sysemu/sysemu.h"
> >   #include "monitor/monitor.h"
> >   #include "hw/rdma/rdma.h"
> > +#include "migration/register.h"
> >
> >   #include "../rdma_rm.h"
> >   #include "../rdma_backend.h"
> > @@ -593,6 +594,81 @@ static void pvrdma_shutdown_notifier(Notifier *n, void *opaque)
> >       pvrdma_fini(pci_dev);
> >   }
> >
> > +static int pvrdma_post_save(void *opaque)
> > +{
> > +    int i, rc;
> > +    PVRDMADev *dev = opaque;
> > +
> > +    for (i = 0; i < MAX_GIDS; i++) {
> > +
>
> No need for the extra line
> > +        if (!dev->rdma_dev_res.port.gid_tbl[i].gid.global.interface_id) {
> > +            continue;
> > +        }
> > +        rc = rdma_backend_del_gid(&dev->backend_dev,
> > +                                   dev->backend_eth_device_name,
> > +                                   &dev->rdma_dev_res.port.gid_tbl[i].gid);
> > +        if (rc) {
> > +            return -EINVAL;
> > +        }
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static int pvrdma_post_load(void *opaque, int version_id)
> > +{
> > +    int i, rc;
> > +    PVRDMADev *dev = opaque;
> > +    PCIDevice *pci_dev = PCI_DEVICE(dev);
> > +    DSRInfo *dsr_info = &dev->dsr_info;
> > +
> > +    dsr_info->dsr = rdma_pci_dma_map(pci_dev, dsr_info->dma,
> > +                                sizeof(struct pvrdma_device_shared_region));
> > +    if (!dsr_info->dsr) {
> > +        rdma_error_report("Failed to map to DSR");
> > +        return -ENOMEM;
> > +    }
> > +
> > +    for (i = 0; i < MAX_GIDS; i++) {
> > +
>
> The same here
>
> > +        if (!dev->rdma_dev_res.port.gid_tbl[i].gid.global.interface_id) {
> > +            continue;
> > +        }
> > +
> > +        rc = rdma_backend_add_gid(&dev->backend_dev,
> > +                                  dev->backend_eth_device_name,
> > +                                  &dev->rdma_dev_res.port.gid_tbl[i].gid);
> > +        if (rc) {
> > +            return -EINVAL;
> > +        }
> > +    }
> > +
> > +    return load_dsr(dev);
> > +}
> > +
> > +static const VMStateDescription vmstate_pvrdma_gids = {
> > +    .name = "pvrdma-gids",
> > +    .fields = (VMStateField[]) {
> > +            VMSTATE_UINT8_ARRAY_V(gid.raw, RdmaRmGid, 16, 0),
>
> Is 16 the array length? If yes, do we have same macro definition?
16 here represents the number of bytes in a GID.
This comes from the verbs definition of ibv_gid
union ibv_gid {
    uint8_t         raw[16];
    struct {
        __be64  subnet_prefix;
        __be64  interface_id;
    } global;
};
I suppose there is no macro for this but we can declare
our own (something like IBV_GID_SIZE).
> > +            VMSTATE_END_OF_LIST()
> > +    }
> > +};
> > +
> > +static const VMStateDescription vmstate_pvrdma = {
> > +    .name = PVRDMA_HW_NAME,
> > +    .post_save = pvrdma_post_save,
> > +    .post_load = pvrdma_post_load,
> > +    .fields = (VMStateField[]) {
> > +            VMSTATE_PCI_DEVICE(parent_obj, PVRDMADev),
> > +            VMSTATE_MSIX(parent_obj, PVRDMADev),
> > +            VMSTATE_UINT64(dsr_info.dma, PVRDMADev),
> > +            VMSTATE_STRUCT_ARRAY(rdma_dev_res.port.gid_tbl, PVRDMADev,
> > +                                 MAX_PORT_GIDS, 0, vmstate_pvrdma_gids,
> > +                                 RdmaRmGid),
> > +            VMSTATE_END_OF_LIST()
> > +    }
> > +};
> > +
> >   static void pvrdma_realize(PCIDevice *pdev, Error **errp)
> >   {
> >       int rc = 0;
> > @@ -688,6 +764,7 @@ static void pvrdma_class_init(ObjectClass *klass, void *data)
> >
> >       dc->desc = "RDMA Device";
> >       dc->props = pvrdma_dev_properties;
> > +    dc->vmsd = &vmstate_pvrdma;
> >       set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
> >
> >       ir->print_statistics = pvrdma_print_statistics;
>
> Very simple an elegant.
> If I understand correctly the live migration of a pvrdma device with no
> active workloads works with this patch, right?
Yes.
> If yes, I think we should consider merging this code already.
> Yuval, do you agree?
>
> Thanks,
> Marcel
>
>
^ permalink raw reply	[flat|nested] 14+ messages in thread