* Re: [PATCH 1/1] vdpa/mlx5: Support interrupt bypassing
[not found] ` <20230403162039.18932-2-elic@nvidia.com>
@ 2023-04-03 16:58 ` Parav Pandit via Virtualization
2023-04-03 18:11 ` Michael S. Tsirkin
2023-04-04 2:24 ` Jason Wang
2 siblings, 0 replies; 9+ messages in thread
From: Parav Pandit via Virtualization @ 2023-04-03 16:58 UTC (permalink / raw)
To: Eli Cohen, mst, jasowang, si-wei.liu, eperezma, virtualization
Cc: Saeed Mahameed, parav
On 4/3/2023 12:20 PM, Eli Cohen wrote:
> Add support for generation of interrupts from the device directly to the
> VM to the VCPU thus avoiding the overhead on the host CPU.
>
> When supported, the driver will attempt to allocate vectors for each
> data virtqueue. If a vector for a virtqueue cannot be provided it will
> use the QP mode where notifications go through the driver.
>
> In addition, we add a shutdown callback to make sure allocated
> interrupts are released in case of shutdown to allow clean shutdown.
>
> Signed-off-by: Eli Cohen <elic@nvidia.com>
> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
> ---
> drivers/vdpa/mlx5/net/mlx5_vnet.c | 139 ++++++++++++++++++++++++++++--
> drivers/vdpa/mlx5/net/mlx5_vnet.h | 14 +++
> 2 files changed, 144 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 520646ae7fa0..215a46cf8a98 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -83,6 +83,7 @@ struct mlx5_vq_restore_info {
> u64 driver_addr;
> u16 avail_index;
> u16 used_index;
> + struct msi_map map;
> bool ready;
> bool restore;
> };
> @@ -118,6 +119,7 @@ struct mlx5_vdpa_virtqueue {
> u16 avail_idx;
> u16 used_idx;
> int fw_state;
> + struct msi_map map;
>
> /* keep last in the struct */
> struct mlx5_vq_restore_info ri;
> @@ -792,6 +794,13 @@ static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
> BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
> }
>
> +static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev)
> +{
Better to have const struct. It makes it clear that it is read only API
and there is no accidental modification of hca cap or other fields.
> + return (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) &
> + (1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) &&
> + pci_msix_can_alloc_dyn(mvdev->mdev->pdev));
> +}
> +
> static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
> {
> int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
> @@ -829,9 +838,15 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> if (vq_is_tx(mvq->index))
> MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
>
> - MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
> + if (mvq->map.virq) {
> + MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE);
> + MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index);
> + } else {
> + MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
> + MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
> + }
> +
> MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
> - MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
> MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
> MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
> !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
> @@ -1174,6 +1189,32 @@ static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_vir
> mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
> }
>
> +static void alloc_vector(struct mlx5_vdpa_net *ndev,
> + struct mlx5_vdpa_virtqueue *mvq)
> +{
> + struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
> + int i;
> +
> + for (i = 0; i < irqp->num_ent; i++) {
> + if (!irqp->entries[i].usecount) {
The usage appears to be boolean.
So better to rename it from usecount to -> used.
used = true/false;
In future, if you plan to reuse the same vector, may be at that point
usecount makes more sense.
> + irqp->entries[i].usecount++;
> + mvq->map = irqp->entries[i].map;
> + return;
> + }
> + }
> +}
> +
> +static void dealloc_vector(struct mlx5_vdpa_net *ndev,
> + struct mlx5_vdpa_virtqueue *mvq)
> +{
> + struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
> + int i;
> +
> + for (i = 0; i < irqp->num_ent; i++)
> + if (mvq->map.virq == irqp->entries[i].map.virq)
> + irqp->entries[i].usecount--;
You should add return here too like alloc to not go over rest of the
entries once a specific vq is taken care.
> +}
> +
> static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
> {
> u16 idx = mvq->index;
> @@ -1203,27 +1244,31 @@ static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
>
> err = counter_set_alloc(ndev, mvq);
> if (err)
> - goto err_counter;
> + goto err_connect;
>
> + alloc_vector(ndev, mvq);
> err = create_virtqueue(ndev, mvq);
> if (err)
> - goto err_connect;
> + goto err_vq;
>
> if (mvq->ready) {
> err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> if (err) {
> mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
> idx, err);
> - goto err_connect;
> + goto err_modify;
> }
> }
>
> mvq->initialized = true;
> return 0;
>
> -err_connect:
> +err_modify:
> + destroy_virtqueue(ndev, mvq);
> +err_vq:
> + dealloc_vector(ndev, mvq);
> counter_set_dealloc(ndev, mvq);
> -err_counter:
> +err_connect:
> qp_destroy(ndev, &mvq->vqqp);
> err_vqqp:
> qp_destroy(ndev, &mvq->fwqp);
> @@ -1267,6 +1312,7 @@ static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *
> return;
>
> suspend_vq(ndev, mvq);
> + dealloc_vector(ndev, mvq);
Should be destroyed after destroying the vq to keep the cleanup as
mirror of the create flow.
> destroy_virtqueue(ndev, mvq);
> counter_set_dealloc(ndev, mvq);
> qp_destroy(ndev, &mvq->vqqp);
> @@ -2374,6 +2420,7 @@ static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqu
> ri->desc_addr = mvq->desc_addr;
> ri->device_addr = mvq->device_addr;
> ri->driver_addr = mvq->driver_addr;
> + ri->map = mvq->map;
> ri->restore = true;
> return 0;
> }
> @@ -2418,6 +2465,7 @@ static void restore_channels_info(struct mlx5_vdpa_net *ndev)
> mvq->desc_addr = ri->desc_addr;
> mvq->device_addr = ri->device_addr;
> mvq->driver_addr = ri->driver_addr;
> + mvq->map = ri->map;
> }
> }
>
> @@ -2693,6 +2741,22 @@ static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
> return mvdev->vdev.dma_dev;
> }
>
> +static void free_irqs(struct mlx5_vdpa_net *ndev)
> +{
> + struct mlx5_vdpa_irq_pool_entry *ent;
> + int i;
> +
> + if (!msix_mode_supported(&ndev->mvdev))
> + return;
> +
> + for (i = ndev->irqp.num_ent - 1; i >= 0; i--) {
> + ent = ndev->irqp.entries + i;
> + mlx5_msix_free(ndev->mvdev.mdev, ent->map);
> + ndev->irqp.num_ent--;
No need to reduce num_ent here.
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 0/1] posted interrtups support for mlx5
[not found] <20230403162039.18932-1-elic@nvidia.com>
@ 2023-04-03 17:00 ` Parav Pandit via Virtualization
2023-04-03 18:09 ` Michael S. Tsirkin
[not found] ` <20230403162039.18932-2-elic@nvidia.com>
1 sibling, 1 reply; 9+ messages in thread
From: Parav Pandit via Virtualization @ 2023-04-03 17:00 UTC (permalink / raw)
To: Eli Cohen, mst, jasowang, si-wei.liu, eperezma, virtualization; +Cc: parav
On 4/3/2023 12:20 PM, Eli Cohen wrote:
> Hi,
>
> the single patch in this series adds support for posted interrupts in
> mlx5.
>
> It depends on the patch series already accpted can be seen here:
> https://patchwork.kernel.org/project/netdevbpf/patch/20230324231341.29808-1-saeed@kernel.org/
>
> git pull git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux.git
> tags/mlx5-updates-2023-03-20
>
> Eli Cohen (1):
> vdpa/mlx5: Support interrupt bypassing
>
> drivers/vdpa/mlx5/net/mlx5_vnet.c | 139 ++++++++++++++++++++++++++++--
> drivers/vdpa/mlx5/net/mlx5_vnet.h | 14 +++
> 2 files changed, 144 insertions(+), 9 deletions(-)
>
In subject
s/interrtups/interrupts/
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 0/1] posted interrtups support for mlx5
2023-04-03 17:00 ` [PATCH 0/1] posted interrtups support for mlx5 Parav Pandit via Virtualization
@ 2023-04-03 18:09 ` Michael S. Tsirkin
2023-04-03 18:10 ` Michael S. Tsirkin
0 siblings, 1 reply; 9+ messages in thread
From: Michael S. Tsirkin @ 2023-04-03 18:09 UTC (permalink / raw)
To: Parav Pandit; +Cc: parav, virtualization, eperezma, Eli Cohen
On Mon, Apr 03, 2023 at 01:00:16PM -0400, Parav Pandit wrote:
>
>
> On 4/3/2023 12:20 PM, Eli Cohen wrote:
> > Hi,
> >
> > the single patch in this series adds support for posted interrupts in
> > mlx5.
> >
> > It depends on the patch series already accpted can be seen here:
> > https://patchwork.kernel.org/project/netdevbpf/patch/20230324231341.29808-1-saeed@kernel.org/
> >
> > git pull git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux.git
> > tags/mlx5-updates-2023-03-20
> >
> > Eli Cohen (1):
> > vdpa/mlx5: Support interrupt bypassing
> >
> > drivers/vdpa/mlx5/net/mlx5_vnet.c | 139 ++++++++++++++++++++++++++++--
> > drivers/vdpa/mlx5/net/mlx5_vnet.h | 14 +++
> > 2 files changed, 144 insertions(+), 9 deletions(-)
> >
> In subject
>
> s/interrtups/interrupts/
In fact if we are splitting hairs, this is a reduced relative (full form
would be "support of posted interrupts") so it should not use a plural
form. Also pls use a prefix in the patch subject. In the end:
mlx5: posted interrupt support
would be better.
--
MST
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 0/1] posted interrtups support for mlx5
2023-04-03 18:09 ` Michael S. Tsirkin
@ 2023-04-03 18:10 ` Michael S. Tsirkin
0 siblings, 0 replies; 9+ messages in thread
From: Michael S. Tsirkin @ 2023-04-03 18:10 UTC (permalink / raw)
To: Parav Pandit; +Cc: parav, virtualization, eperezma, Eli Cohen
On Mon, Apr 03, 2023 at 02:09:46PM -0400, Michael S. Tsirkin wrote:
> On Mon, Apr 03, 2023 at 01:00:16PM -0400, Parav Pandit wrote:
> >
> >
> > On 4/3/2023 12:20 PM, Eli Cohen wrote:
> > > Hi,
> > >
> > > the single patch in this series adds support for posted interrupts in
> > > mlx5.
> > >
> > > It depends on the patch series already accpted can be seen here:
> > > https://patchwork.kernel.org/project/netdevbpf/patch/20230324231341.29808-1-saeed@kernel.org/
> > >
> > > git pull git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux.git
> > > tags/mlx5-updates-2023-03-20
> > >
> > > Eli Cohen (1):
> > > vdpa/mlx5: Support interrupt bypassing
> > >
> > > drivers/vdpa/mlx5/net/mlx5_vnet.c | 139 ++++++++++++++++++++++++++++--
> > > drivers/vdpa/mlx5/net/mlx5_vnet.h | 14 +++
> > > 2 files changed, 144 insertions(+), 9 deletions(-)
> > >
> > In subject
> >
> > s/interrtups/interrupts/
>
> In fact if we are splitting hairs, this is a reduced relative (full form
> would be "support of posted interrupts") so it should not use a plural
> form. Also pls use a prefix in the patch subject. In the end:
> mlx5: posted interrupt support
> would be better.
Having said that, it's 0/1 so typos do not matter, they are not
in commit log.
--
MST
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/1] vdpa/mlx5: Support interrupt bypassing
[not found] ` <20230403162039.18932-2-elic@nvidia.com>
2023-04-03 16:58 ` [PATCH 1/1] vdpa/mlx5: Support interrupt bypassing Parav Pandit via Virtualization
@ 2023-04-03 18:11 ` Michael S. Tsirkin
2023-04-04 2:24 ` Jason Wang
2 siblings, 0 replies; 9+ messages in thread
From: Michael S. Tsirkin @ 2023-04-03 18:11 UTC (permalink / raw)
To: Eli Cohen; +Cc: parav, virtualization, eperezma, Saeed Mahameed
On Mon, Apr 03, 2023 at 07:20:39PM +0300, Eli Cohen wrote:
> Add support for generation of interrupts from the device directly to the
> VM to the VCPU thus avoiding the overhead on the host CPU.
>
> When supported, the driver will attempt to allocate vectors for each
> data virtqueue. If a vector for a virtqueue cannot be provided it will
> use the QP mode where notifications go through the driver.
>
> In addition, we add a shutdown callback to make sure allocated
> interrupts are released in case of shutdown to allow clean shutdown.
>
> Signed-off-by: Eli Cohen <elic@nvidia.com>
> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
> ---
> drivers/vdpa/mlx5/net/mlx5_vnet.c | 139 ++++++++++++++++++++++++++++--
> drivers/vdpa/mlx5/net/mlx5_vnet.h | 14 +++
> 2 files changed, 144 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 520646ae7fa0..215a46cf8a98 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -83,6 +83,7 @@ struct mlx5_vq_restore_info {
> u64 driver_addr;
> u16 avail_index;
> u16 used_index;
> + struct msi_map map;
> bool ready;
> bool restore;
> };
> @@ -118,6 +119,7 @@ struct mlx5_vdpa_virtqueue {
> u16 avail_idx;
> u16 used_idx;
> int fw_state;
> + struct msi_map map;
>
> /* keep last in the struct */
> struct mlx5_vq_restore_info ri;
> @@ -792,6 +794,13 @@ static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
> BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
> }
>
> +static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev)
> +{
> + return (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) &
> + (1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) &&
> + pci_msix_can_alloc_dyn(mvdev->mdev->pdev));
Don't add () around return value. too many () just obscures the logic.
> +}
> +
> static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
> {
> int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
> @@ -829,9 +838,15 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> if (vq_is_tx(mvq->index))
> MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
>
> - MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
> + if (mvq->map.virq) {
> + MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE);
> + MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index);
> + } else {
> + MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
> + MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
> + }
> +
> MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
> - MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
> MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
> MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
> !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
> @@ -1174,6 +1189,32 @@ static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_vir
> mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
> }
>
> +static void alloc_vector(struct mlx5_vdpa_net *ndev,
> + struct mlx5_vdpa_virtqueue *mvq)
> +{
> + struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
> + int i;
> +
> + for (i = 0; i < irqp->num_ent; i++) {
> + if (!irqp->entries[i].usecount) {
> + irqp->entries[i].usecount++;
> + mvq->map = irqp->entries[i].map;
> + return;
> + }
> + }
> +}
> +
> +static void dealloc_vector(struct mlx5_vdpa_net *ndev,
> + struct mlx5_vdpa_virtqueue *mvq)
> +{
> + struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
> + int i;
> +
> + for (i = 0; i < irqp->num_ent; i++)
> + if (mvq->map.virq == irqp->entries[i].map.virq)
> + irqp->entries[i].usecount--;
> +}
> +
> static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
> {
> u16 idx = mvq->index;
> @@ -1203,27 +1244,31 @@ static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
>
> err = counter_set_alloc(ndev, mvq);
> if (err)
> - goto err_counter;
> + goto err_connect;
>
> + alloc_vector(ndev, mvq);
> err = create_virtqueue(ndev, mvq);
> if (err)
> - goto err_connect;
> + goto err_vq;
>
> if (mvq->ready) {
> err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> if (err) {
> mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
> idx, err);
> - goto err_connect;
> + goto err_modify;
> }
> }
>
> mvq->initialized = true;
> return 0;
>
> -err_connect:
> +err_modify:
> + destroy_virtqueue(ndev, mvq);
> +err_vq:
> + dealloc_vector(ndev, mvq);
> counter_set_dealloc(ndev, mvq);
> -err_counter:
> +err_connect:
> qp_destroy(ndev, &mvq->vqqp);
> err_vqqp:
> qp_destroy(ndev, &mvq->fwqp);
> @@ -1267,6 +1312,7 @@ static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *
> return;
>
> suspend_vq(ndev, mvq);
> + dealloc_vector(ndev, mvq);
> destroy_virtqueue(ndev, mvq);
> counter_set_dealloc(ndev, mvq);
> qp_destroy(ndev, &mvq->vqqp);
> @@ -2374,6 +2420,7 @@ static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqu
> ri->desc_addr = mvq->desc_addr;
> ri->device_addr = mvq->device_addr;
> ri->driver_addr = mvq->driver_addr;
> + ri->map = mvq->map;
> ri->restore = true;
> return 0;
> }
> @@ -2418,6 +2465,7 @@ static void restore_channels_info(struct mlx5_vdpa_net *ndev)
> mvq->desc_addr = ri->desc_addr;
> mvq->device_addr = ri->device_addr;
> mvq->driver_addr = ri->driver_addr;
> + mvq->map = ri->map;
> }
> }
>
> @@ -2693,6 +2741,22 @@ static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
> return mvdev->vdev.dma_dev;
> }
>
> +static void free_irqs(struct mlx5_vdpa_net *ndev)
> +{
> + struct mlx5_vdpa_irq_pool_entry *ent;
> + int i;
> +
> + if (!msix_mode_supported(&ndev->mvdev))
> + return;
> +
> + for (i = ndev->irqp.num_ent - 1; i >= 0; i--) {
> + ent = ndev->irqp.entries + i;
> + mlx5_msix_free(ndev->mvdev.mdev, ent->map);
> + ndev->irqp.num_ent--;
> + }
> + kfree(ndev->irqp.entries);
> +}
> +
> static void mlx5_vdpa_free(struct vdpa_device *vdev)
> {
> struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> @@ -2708,6 +2772,7 @@ static void mlx5_vdpa_free(struct vdpa_device *vdev)
> mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
> }
> mlx5_vdpa_free_resources(&ndev->mvdev);
> + free_irqs(ndev);
> kfree(ndev->event_cbs);
> kfree(ndev->vqs);
> }
> @@ -2736,9 +2801,23 @@ static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device
> return ret;
> }
>
> -static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
> +static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx)
> {
> - return -EOPNOTSUPP;
> + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> + struct mlx5_vdpa_virtqueue *mvq;
> +
> + if (!is_index_valid(mvdev, idx))
> + return -EINVAL;
> +
> + if (is_ctrl_vq_idx(mvdev, idx))
> + return -EOPNOTSUPP;
> +
> + mvq = &ndev->vqs[idx];
> + if (!mvq->map.virq)
> + return -EOPNOTSUPP;
> +
> + return mvq->map.virq;
> }
>
> static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
> @@ -3095,6 +3174,35 @@ static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
> return err;
> }
>
> +static irqreturn_t int_handler(int irq, void *nh)
> +{
> + return IRQ_HANDLED;
> +}
> +
> +static void allocate_irqs(struct mlx5_vdpa_net *ndev)
> +{
> + struct mlx5_vdpa_irq_pool_entry *ent;
> + int i;
> +
> + if (!msix_mode_supported(&ndev->mvdev))
> + return;
> +
> + ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL);
> + if (!ndev->irqp.entries)
> + return;
> +
> + for (i = 0; i < ndev->mvdev.max_vqs; i++) {
> + ent = ndev->irqp.entries + i;
> + snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
> + dev_name(&ndev->mvdev.vdev.dev), i);
> + ent->map = mlx5_msix_alloc(ndev->mvdev.mdev, int_handler, NULL, ent->name);
> + if (!ent->map.virq)
> + return;
> +
> + ndev->irqp.num_ent++;
> + }
> +}
> +
> static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
> const struct vdpa_dev_set_config *add_config)
> {
> @@ -3171,6 +3279,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
> }
>
> init_mvqs(ndev);
> + allocate_irqs(ndev);
> init_rwsem(&ndev->reslock);
> config = &ndev->config;
>
> @@ -3358,6 +3467,17 @@ static void mlx5v_remove(struct auxiliary_device *adev)
> kfree(mgtdev);
> }
>
> +static void mlx5v_shutdown(struct auxiliary_device *auxdev)
> +{
> + struct mlx5_vdpa_mgmtdev *mgtdev;
> + struct mlx5_vdpa_net *ndev;
> +
> + mgtdev = auxiliary_get_drvdata(auxdev);
> + ndev = mgtdev->ndev;
> +
> + free_irqs(ndev);
> +}
> +
> static const struct auxiliary_device_id mlx5v_id_table[] = {
> { .name = MLX5_ADEV_NAME ".vnet", },
> {},
> @@ -3369,6 +3489,7 @@ static struct auxiliary_driver mlx5v_driver = {
> .name = "vnet",
> .probe = mlx5v_probe,
> .remove = mlx5v_remove,
> + .shutdown = mlx5v_shutdown,
> .id_table = mlx5v_id_table,
> };
>
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.h b/drivers/vdpa/mlx5/net/mlx5_vnet.h
> index c90a89e1de4d..e5063b310d3c 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.h
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.h
> @@ -26,6 +26,19 @@ static inline u16 key2vid(u64 key)
> return (u16)(key >> 48) & 0xfff;
> }
>
> +#define MLX5_VDPA_IRQ_NAME_LEN 32
> +
> +struct mlx5_vdpa_irq_pool_entry {
> + struct msi_map map;
> + int usecount;
> + char name[MLX5_VDPA_IRQ_NAME_LEN];
> +};
> +
> +struct mlx5_vdpa_irq_pool {
> + int num_ent;
> + struct mlx5_vdpa_irq_pool_entry *entries;
> +};
> +
> struct mlx5_vdpa_net {
> struct mlx5_vdpa_dev mvdev;
> struct mlx5_vdpa_net_resources res;
> @@ -49,6 +62,7 @@ struct mlx5_vdpa_net {
> struct vdpa_callback config_cb;
> struct mlx5_vdpa_wq_ent cvq_ent;
> struct hlist_head macvlan_hash[MLX5V_MACVLAN_SIZE];
> + struct mlx5_vdpa_irq_pool irqp;
> struct dentry *debugfs;
> };
>
> --
> 2.39.2
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/1] vdpa/mlx5: Support interrupt bypassing
[not found] ` <20230403162039.18932-2-elic@nvidia.com>
2023-04-03 16:58 ` [PATCH 1/1] vdpa/mlx5: Support interrupt bypassing Parav Pandit via Virtualization
2023-04-03 18:11 ` Michael S. Tsirkin
@ 2023-04-04 2:24 ` Jason Wang
[not found] ` <c12e67e9-2757-b1e3-93ae-f05df3774d03@nvidia.com>
2 siblings, 1 reply; 9+ messages in thread
From: Jason Wang @ 2023-04-04 2:24 UTC (permalink / raw)
To: Eli Cohen, mst, si-wei.liu, eperezma, virtualization
Cc: Saeed Mahameed, parav
在 2023/4/4 00:20, Eli Cohen 写道:
> Add support for generation of interrupts from the device directly to the
> VM to the VCPU thus avoiding the overhead on the host CPU.
>
> When supported, the driver will attempt to allocate vectors for each
> data virtqueue. If a vector for a virtqueue cannot be provided it will
> use the QP mode where notifications go through the driver.
>
> In addition, we add a shutdown callback to make sure allocated
> interrupts are released in case of shutdown to allow clean shutdown.
>
> Signed-off-by: Eli Cohen <elic@nvidia.com>
> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
> ---
> drivers/vdpa/mlx5/net/mlx5_vnet.c | 139 ++++++++++++++++++++++++++++--
> drivers/vdpa/mlx5/net/mlx5_vnet.h | 14 +++
> 2 files changed, 144 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 520646ae7fa0..215a46cf8a98 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -83,6 +83,7 @@ struct mlx5_vq_restore_info {
> u64 driver_addr;
> u16 avail_index;
> u16 used_index;
> + struct msi_map map;
> bool ready;
> bool restore;
> };
> @@ -118,6 +119,7 @@ struct mlx5_vdpa_virtqueue {
> u16 avail_idx;
> u16 used_idx;
> int fw_state;
> + struct msi_map map;
Just spot that this structure is defined in mlx5_vnet.c but it's nothing
net specific. I think it's better to move it with the interrupt
bypassing logic here to core/ (we can do it in the future).
>
> /* keep last in the struct */
> struct mlx5_vq_restore_info ri;
> @@ -792,6 +794,13 @@ static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
> BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
> }
>
> +static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev)
> +{
> + return (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) &
> + (1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) &&
> + pci_msix_can_alloc_dyn(mvdev->mdev->pdev));
> +}
> +
> static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
> {
> int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
> @@ -829,9 +838,15 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> if (vq_is_tx(mvq->index))
> MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
>
> - MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
> + if (mvq->map.virq) {
> + MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE);
> + MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index);
> + } else {
> + MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
> + MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
> + }
> +
> MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
> - MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
> MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
> MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
> !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
> @@ -1174,6 +1189,32 @@ static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_vir
> mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
> }
>
> +static void alloc_vector(struct mlx5_vdpa_net *ndev,
> + struct mlx5_vdpa_virtqueue *mvq)
> +{
> + struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
> + int i;
> +
> + for (i = 0; i < irqp->num_ent; i++) {
> + if (!irqp->entries[i].usecount) {
> + irqp->entries[i].usecount++;
If we can't get a usercount which is greater than 1, it might be better
to use boolean instead.
> + mvq->map = irqp->entries[i].map;
> + return;
> + }
> + }
> +}
> +
> +static void dealloc_vector(struct mlx5_vdpa_net *ndev,
> + struct mlx5_vdpa_virtqueue *mvq)
> +{
> + struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
> + int i;
> +
> + for (i = 0; i < irqp->num_ent; i++)
> + if (mvq->map.virq == irqp->entries[i].map.virq)
> + irqp->entries[i].usecount--;
> +}
> +
> static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
> {
> u16 idx = mvq->index;
> @@ -1203,27 +1244,31 @@ static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
>
> err = counter_set_alloc(ndev, mvq);
> if (err)
> - goto err_counter;
> + goto err_connect;
>
> + alloc_vector(ndev, mvq);
> err = create_virtqueue(ndev, mvq);
> if (err)
> - goto err_connect;
> + goto err_vq;
>
> if (mvq->ready) {
> err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> if (err) {
> mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
> idx, err);
> - goto err_connect;
> + goto err_modify;
> }
> }
>
> mvq->initialized = true;
> return 0;
>
> -err_connect:
> +err_modify:
> + destroy_virtqueue(ndev, mvq);
> +err_vq:
> + dealloc_vector(ndev, mvq);
> counter_set_dealloc(ndev, mvq);
> -err_counter:
> +err_connect:
> qp_destroy(ndev, &mvq->vqqp);
> err_vqqp:
> qp_destroy(ndev, &mvq->fwqp);
> @@ -1267,6 +1312,7 @@ static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *
> return;
>
> suspend_vq(ndev, mvq);
> + dealloc_vector(ndev, mvq);
> destroy_virtqueue(ndev, mvq);
> counter_set_dealloc(ndev, mvq);
> qp_destroy(ndev, &mvq->vqqp);
> @@ -2374,6 +2420,7 @@ static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqu
> ri->desc_addr = mvq->desc_addr;
> ri->device_addr = mvq->device_addr;
> ri->driver_addr = mvq->driver_addr;
> + ri->map = mvq->map;
> ri->restore = true;
> return 0;
> }
> @@ -2418,6 +2465,7 @@ static void restore_channels_info(struct mlx5_vdpa_net *ndev)
> mvq->desc_addr = ri->desc_addr;
> mvq->device_addr = ri->device_addr;
> mvq->driver_addr = ri->driver_addr;
> + mvq->map = ri->map;
> }
> }
>
> @@ -2693,6 +2741,22 @@ static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
> return mvdev->vdev.dma_dev;
> }
>
> +static void free_irqs(struct mlx5_vdpa_net *ndev)
> +{
> + struct mlx5_vdpa_irq_pool_entry *ent;
> + int i;
> +
> + if (!msix_mode_supported(&ndev->mvdev))
> + return;
> +
> + for (i = ndev->irqp.num_ent - 1; i >= 0; i--) {
> + ent = ndev->irqp.entries + i;
> + mlx5_msix_free(ndev->mvdev.mdev, ent->map);
> + ndev->irqp.num_ent--;
> + }
> + kfree(ndev->irqp.entries);
> +}
> +
> static void mlx5_vdpa_free(struct vdpa_device *vdev)
> {
> struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> @@ -2708,6 +2772,7 @@ static void mlx5_vdpa_free(struct vdpa_device *vdev)
> mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
> }
> mlx5_vdpa_free_resources(&ndev->mvdev);
> + free_irqs(ndev);
> kfree(ndev->event_cbs);
> kfree(ndev->vqs);
> }
> @@ -2736,9 +2801,23 @@ static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device
> return ret;
> }
>
> -static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
> +static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx)
> {
> - return -EOPNOTSUPP;
> + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> + struct mlx5_vdpa_virtqueue *mvq;
> +
> + if (!is_index_valid(mvdev, idx))
> + return -EINVAL;
> +
> + if (is_ctrl_vq_idx(mvdev, idx))
> + return -EOPNOTSUPP;
> +
> + mvq = &ndev->vqs[idx];
> + if (!mvq->map.virq)
> + return -EOPNOTSUPP;
> +
> + return mvq->map.virq;
> }
>
> static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
> @@ -3095,6 +3174,35 @@ static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
> return err;
> }
>
> +static irqreturn_t int_handler(int irq, void *nh)
> +{
> + return IRQ_HANDLED;
I'm not sure I get the meaning of the irq handler. Note that even if we
support get_vq_irq() it does not means the posted interrupt can work for
each interrupt. It work only if several conditions are met (for example
the VCPU is running).
So if this is the fallback irq handler when we can do post interrupt,
should we trigger the virtqueue callback here?
> +}
> +
> +static void allocate_irqs(struct mlx5_vdpa_net *ndev)
> +{
> + struct mlx5_vdpa_irq_pool_entry *ent;
> + int i;
> +
> + if (!msix_mode_supported(&ndev->mvdev))
> + return;
> +
> + ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL);
> + if (!ndev->irqp.entries)
> + return;
> +
> + for (i = 0; i < ndev->mvdev.max_vqs; i++) {
> + ent = ndev->irqp.entries + i;
> + snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
> + dev_name(&ndev->mvdev.vdev.dev), i);
> + ent->map = mlx5_msix_alloc(ndev->mvdev.mdev, int_handler, NULL, ent->name);
> + if (!ent->map.virq)
> + return;
> +
> + ndev->irqp.num_ent++;
> + }
> +}
> +
> static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
> const struct vdpa_dev_set_config *add_config)
> {
> @@ -3171,6 +3279,7 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
> }
>
> init_mvqs(ndev);
> + allocate_irqs(ndev);
> init_rwsem(&ndev->reslock);
> config = &ndev->config;
>
> @@ -3358,6 +3467,17 @@ static void mlx5v_remove(struct auxiliary_device *adev)
> kfree(mgtdev);
> }
>
> +static void mlx5v_shutdown(struct auxiliary_device *auxdev)
> +{
> + struct mlx5_vdpa_mgmtdev *mgtdev;
> + struct mlx5_vdpa_net *ndev;
> +
> + mgtdev = auxiliary_get_drvdata(auxdev);
> + ndev = mgtdev->ndev;
> +
> + free_irqs(ndev);
> +}
> +
> static const struct auxiliary_device_id mlx5v_id_table[] = {
> { .name = MLX5_ADEV_NAME ".vnet", },
> {},
> @@ -3369,6 +3489,7 @@ static struct auxiliary_driver mlx5v_driver = {
> .name = "vnet",
> .probe = mlx5v_probe,
> .remove = mlx5v_remove,
> + .shutdown = mlx5v_shutdown,
So we allocate the irq during dev_add, it seems cleaner if we do that in
the dev_del.
Note that mlx5v_remove will call vdpa_mgmtdev_unregister() which will
call dev_del.
Thanks
> .id_table = mlx5v_id_table,
> };
>
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.h b/drivers/vdpa/mlx5/net/mlx5_vnet.h
> index c90a89e1de4d..e5063b310d3c 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.h
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.h
> @@ -26,6 +26,19 @@ static inline u16 key2vid(u64 key)
> return (u16)(key >> 48) & 0xfff;
> }
>
> +#define MLX5_VDPA_IRQ_NAME_LEN 32
> +
> +struct mlx5_vdpa_irq_pool_entry {
> + struct msi_map map;
> + int usecount;
> + char name[MLX5_VDPA_IRQ_NAME_LEN];
> +};
> +
> +struct mlx5_vdpa_irq_pool {
> + int num_ent;
> + struct mlx5_vdpa_irq_pool_entry *entries;
> +};
> +
> struct mlx5_vdpa_net {
> struct mlx5_vdpa_dev mvdev;
> struct mlx5_vdpa_net_resources res;
> @@ -49,6 +62,7 @@ struct mlx5_vdpa_net {
> struct vdpa_callback config_cb;
> struct mlx5_vdpa_wq_ent cvq_ent;
> struct hlist_head macvlan_hash[MLX5V_MACVLAN_SIZE];
> + struct mlx5_vdpa_irq_pool irqp;
> struct dentry *debugfs;
> };
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/1] vdpa/mlx5: Support interrupt bypassing
[not found] ` <c12e67e9-2757-b1e3-93ae-f05df3774d03@nvidia.com>
@ 2023-04-10 2:00 ` Jason Wang
[not found] ` <9200f3cc-aa57-c1ba-0d45-d6148eaa4533@nvidia.com>
[not found] ` <fd2c22a3-a9f3-b9a7-3669-a8ef5b0e0f3c@nvidia.com>
1 sibling, 1 reply; 9+ messages in thread
From: Jason Wang @ 2023-04-10 2:00 UTC (permalink / raw)
To: Eli Cohen; +Cc: parav, mst, virtualization, eperezma, Saeed Mahameed
On Tue, Apr 4, 2023 at 3:04 PM Eli Cohen <elic@nvidia.com> wrote:
>
>
> On 04/04/2023 5:24, Jason Wang wrote:
> >
> > 在 2023/4/4 00:20, Eli Cohen 写道:
> >> Add support for generation of interrupts from the device directly to the
> >> VM to the VCPU thus avoiding the overhead on the host CPU.
> >>
> >> When supported, the driver will attempt to allocate vectors for each
> >> data virtqueue. If a vector for a virtqueue cannot be provided it will
> >> use the QP mode where notifications go through the driver.
> >>
> >> In addition, we add a shutdown callback to make sure allocated
> >> interrupts are released in case of shutdown to allow clean shutdown.
> >>
> >> Signed-off-by: Eli Cohen <elic@nvidia.com>
> >> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
> >> ---
> >> drivers/vdpa/mlx5/net/mlx5_vnet.c | 139 ++++++++++++++++++++++++++++--
> >> drivers/vdpa/mlx5/net/mlx5_vnet.h | 14 +++
> >> 2 files changed, 144 insertions(+), 9 deletions(-)
> >>
> >> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> >> b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> >> index 520646ae7fa0..215a46cf8a98 100644
> >> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> >> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> >> @@ -83,6 +83,7 @@ struct mlx5_vq_restore_info {
> >> u64 driver_addr;
> >> u16 avail_index;
> >> u16 used_index;
> >> + struct msi_map map;
> >> bool ready;
> >> bool restore;
> >> };
> >> @@ -118,6 +119,7 @@ struct mlx5_vdpa_virtqueue {
> >> u16 avail_idx;
> >> u16 used_idx;
> >> int fw_state;
> >> + struct msi_map map;
> >
> >
> > Just spot that this structure is defined in mlx5_vnet.c but it's
> > nothing net specific. I think it's better to move it with the
> > interrupt bypassing logic here to core/ (we can do it in the future).
> Right.
> >
> >
> >> /* keep last in the struct */
> >> struct mlx5_vq_restore_info ri;
> >> @@ -792,6 +794,13 @@ static bool counters_supported(const struct
> >> mlx5_vdpa_dev *mvdev)
> >> BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
> >> }
> >> +static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev)
> >> +{
> >> + return (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) &
> >> + (1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) &&
> >> + pci_msix_can_alloc_dyn(mvdev->mdev->pdev));
> >> +}
> >> +
> >> static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct
> >> mlx5_vdpa_virtqueue *mvq)
> >> {
> >> int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
> >> @@ -829,9 +838,15 @@ static int create_virtqueue(struct mlx5_vdpa_net
> >> *ndev, struct mlx5_vdpa_virtque
> >> if (vq_is_tx(mvq->index))
> >> MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn,
> >> ndev->res.tisn);
> >> - MLX5_SET(virtio_q, vq_ctx, event_mode,
> >> MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
> >> + if (mvq->map.virq) {
> >> + MLX5_SET(virtio_q, vq_ctx, event_mode,
> >> MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE);
> >> + MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index);
> >> + } else {
> >> + MLX5_SET(virtio_q, vq_ctx, event_mode,
> >> MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
> >> + MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix,
> >> mvq->fwqp.mqp.qpn);
> >> + }
> >> +
> >> MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
> >> - MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
> >> MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
> >> MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
> >> !!(ndev->mvdev.actual_features &
> >> BIT_ULL(VIRTIO_F_VERSION_1)));
> >> @@ -1174,6 +1189,32 @@ static void counter_set_dealloc(struct
> >> mlx5_vdpa_net *ndev, struct mlx5_vdpa_vir
> >> mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n",
> >> mvq->counter_set_id);
> >> }
> >> +static void alloc_vector(struct mlx5_vdpa_net *ndev,
> >> + struct mlx5_vdpa_virtqueue *mvq)
> >> +{
> >> + struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
> >> + int i;
> >> +
> >> + for (i = 0; i < irqp->num_ent; i++) {
> >> + if (!irqp->entries[i].usecount) {
> >> + irqp->entries[i].usecount++;
> >
> >
> > If we can't get a usercount which is greater than 1, it might be
> > better to use boolean instead.
> Will fix.
> >
> >
> >> + mvq->map = irqp->entries[i].map;
> >> + return;
> >> + }
> >> + }
> >> +}
> >> +
> >> +static void dealloc_vector(struct mlx5_vdpa_net *ndev,
> >> + struct mlx5_vdpa_virtqueue *mvq)
> >> +{
> >> + struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
> >> + int i;
> >> +
> >> + for (i = 0; i < irqp->num_ent; i++)
> >> + if (mvq->map.virq == irqp->entries[i].map.virq)
> >> + irqp->entries[i].usecount--;
> >> +}
> >> +
> >> static int setup_vq(struct mlx5_vdpa_net *ndev, struct
> >> mlx5_vdpa_virtqueue *mvq)
> >> {
> >> u16 idx = mvq->index;
> >> @@ -1203,27 +1244,31 @@ static int setup_vq(struct mlx5_vdpa_net
> >> *ndev, struct mlx5_vdpa_virtqueue *mvq)
> >> err = counter_set_alloc(ndev, mvq);
> >> if (err)
> >> - goto err_counter;
> >> + goto err_connect;
> >> + alloc_vector(ndev, mvq);
> >> err = create_virtqueue(ndev, mvq);
> >> if (err)
> >> - goto err_connect;
> >> + goto err_vq;
> >> if (mvq->ready) {
> >> err = modify_virtqueue(ndev, mvq,
> >> MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> >> if (err) {
> >> mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready
> >> vq idx %d(%d)\n",
> >> idx, err);
> >> - goto err_connect;
> >> + goto err_modify;
> >> }
> >> }
> >> mvq->initialized = true;
> >> return 0;
> >> -err_connect:
> >> +err_modify:
> >> + destroy_virtqueue(ndev, mvq);
> >> +err_vq:
> >> + dealloc_vector(ndev, mvq);
> >> counter_set_dealloc(ndev, mvq);
> >> -err_counter:
> >> +err_connect:
> >> qp_destroy(ndev, &mvq->vqqp);
> >> err_vqqp:
> >> qp_destroy(ndev, &mvq->fwqp);
> >> @@ -1267,6 +1312,7 @@ static void teardown_vq(struct mlx5_vdpa_net
> >> *ndev, struct mlx5_vdpa_virtqueue *
> >> return;
> >> suspend_vq(ndev, mvq);
> >> + dealloc_vector(ndev, mvq);
> >> destroy_virtqueue(ndev, mvq);
> >> counter_set_dealloc(ndev, mvq);
> >> qp_destroy(ndev, &mvq->vqqp);
> >> @@ -2374,6 +2420,7 @@ static int save_channel_info(struct
> >> mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqu
> >> ri->desc_addr = mvq->desc_addr;
> >> ri->device_addr = mvq->device_addr;
> >> ri->driver_addr = mvq->driver_addr;
> >> + ri->map = mvq->map;
> >> ri->restore = true;
> >> return 0;
> >> }
> >> @@ -2418,6 +2465,7 @@ static void restore_channels_info(struct
> >> mlx5_vdpa_net *ndev)
> >> mvq->desc_addr = ri->desc_addr;
> >> mvq->device_addr = ri->device_addr;
> >> mvq->driver_addr = ri->driver_addr;
> >> + mvq->map = ri->map;
> >> }
> >> }
> >> @@ -2693,6 +2741,22 @@ static struct device
> >> *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
> >> return mvdev->vdev.dma_dev;
> >> }
> >> +static void free_irqs(struct mlx5_vdpa_net *ndev)
> >> +{
> >> + struct mlx5_vdpa_irq_pool_entry *ent;
> >> + int i;
> >> +
> >> + if (!msix_mode_supported(&ndev->mvdev))
> >> + return;
> >> +
> >> + for (i = ndev->irqp.num_ent - 1; i >= 0; i--) {
> >> + ent = ndev->irqp.entries + i;
> >> + mlx5_msix_free(ndev->mvdev.mdev, ent->map);
> >> + ndev->irqp.num_ent--;
> >> + }
> >> + kfree(ndev->irqp.entries);
> >> +}
> >> +
> >> static void mlx5_vdpa_free(struct vdpa_device *vdev)
> >> {
> >> struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> >> @@ -2708,6 +2772,7 @@ static void mlx5_vdpa_free(struct vdpa_device
> >> *vdev)
> >> mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
> >> }
> >> mlx5_vdpa_free_resources(&ndev->mvdev);
> >> + free_irqs(ndev);
> >> kfree(ndev->event_cbs);
> >> kfree(ndev->vqs);
> >> }
> >> @@ -2736,9 +2801,23 @@ static struct vdpa_notification_area
> >> mlx5_get_vq_notification(struct vdpa_device
> >> return ret;
> >> }
> >> -static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
> >> +static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx)
> >> {
> >> - return -EOPNOTSUPP;
> >> + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> >> + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> >> + struct mlx5_vdpa_virtqueue *mvq;
> >> +
> >> + if (!is_index_valid(mvdev, idx))
> >> + return -EINVAL;
> >> +
> >> + if (is_ctrl_vq_idx(mvdev, idx))
> >> + return -EOPNOTSUPP;
> >> +
> >> + mvq = &ndev->vqs[idx];
> >> + if (!mvq->map.virq)
> >> + return -EOPNOTSUPP;
> >> +
> >> + return mvq->map.virq;
> >> }
> >> static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
> >> @@ -3095,6 +3174,35 @@ static int config_func_mtu(struct
> >> mlx5_core_dev *mdev, u16 mtu)
> >> return err;
> >> }
> >> +static irqreturn_t int_handler(int irq, void *nh)
> >> +{
> >> + return IRQ_HANDLED;
> >
> >
> > I'm not sure I get the meaning of the irq handler. Note that even if
> > we support get_vq_irq() it does not means the posted interrupt can
> > work for each interrupt. It work only if several conditions are met
> > (for example the VCPU is running).
>
> Not sure I get it. Can you be explicit what needs to be done to make
> sure the interrupts will go through directly to the vCPU?
Firstly, the code should work in the setups without PI, e.g for VMX it
has a check vmx_can_use_vtd_pi():
static bool vmx_can_use_vtd_pi(struct kvm *kvm)
{
return irqchip_in_kernel(kvm) && enable_apicv &&
kvm_arch_has_assigned_device(kvm) &&
irq_remapping_cap(IRQ_POSTING_CAP);
}
We can see it might only work when
- irq chip is emulated in the kernel
- apicv is enabled
- IRQ remapping is supported
And what's more even if we had all of those, it needs to satisfy extra
requirements like virq affinity but the virq affinity is under the
control of the guest, so we can't guarantee that the post interrupt
can work for each time. See the comments in vmx_pi_update_irte():
/*
* VT-d PI cannot support posting multicast/broadcast
* interrupts to a vCPU, we still use interrupt remapping
* for these kind of interrupts.
*
* For lowest-priority interrupts, we only support
* those with single CPU as the destination, e.g. user
* configures the interrupts via /proc/irq or uses
* irqbalance to make the interrupts single-CPU.
*
* We will support full lowest-priority interrupt later.
*
* In addition, we can only inject generic interrupts using
* the PI mechanism, refuse to route others through it.
*/
So I think what I don't understand is how the code works if PI is not supported?
Thanks
>
> >
> > So if this is the fallback irq handler when we can do post interrupt,
> > should we trigger the virtqueue callback here?
>
> It's not a fallback. I should remove it completely.
>
> >
> >
> >> +}
> >> +
> >> +static void allocate_irqs(struct mlx5_vdpa_net *ndev)
> >> +{
> >> + struct mlx5_vdpa_irq_pool_entry *ent;
> >> + int i;
> >> +
> >> + if (!msix_mode_supported(&ndev->mvdev))
> >> + return;
> >> +
> >> + ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs,
> >> sizeof(*ndev->irqp.entries), GFP_KERNEL);
> >> + if (!ndev->irqp.entries)
> >> + return;
> >> +
> >> + for (i = 0; i < ndev->mvdev.max_vqs; i++) {
> >> + ent = ndev->irqp.entries + i;
> >> + snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
> >> + dev_name(&ndev->mvdev.vdev.dev), i);
> >> + ent->map = mlx5_msix_alloc(ndev->mvdev.mdev, int_handler,
> >> NULL, ent->name);
> >> + if (!ent->map.virq)
> >> + return;
> >> +
> >> + ndev->irqp.num_ent++;
> >> + }
> >> +}
> >> +
> >> static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const
> >> char *name,
> >> const struct vdpa_dev_set_config *add_config)
> >> {
> >> @@ -3171,6 +3279,7 @@ static int mlx5_vdpa_dev_add(struct
> >> vdpa_mgmt_dev *v_mdev, const char *name,
> >> }
> >> init_mvqs(ndev);
> >> + allocate_irqs(ndev);
> >> init_rwsem(&ndev->reslock);
> >> config = &ndev->config;
> >> @@ -3358,6 +3467,17 @@ static void mlx5v_remove(struct
> >> auxiliary_device *adev)
> >> kfree(mgtdev);
> >> }
> >> +static void mlx5v_shutdown(struct auxiliary_device *auxdev)
> >> +{
> >> + struct mlx5_vdpa_mgmtdev *mgtdev;
> >> + struct mlx5_vdpa_net *ndev;
> >> +
> >> + mgtdev = auxiliary_get_drvdata(auxdev);
> >> + ndev = mgtdev->ndev;
> >> +
> >> + free_irqs(ndev);
> >> +}
> >> +
> >> static const struct auxiliary_device_id mlx5v_id_table[] = {
> >> { .name = MLX5_ADEV_NAME ".vnet", },
> >> {},
> >> @@ -3369,6 +3489,7 @@ static struct auxiliary_driver mlx5v_driver = {
> >> .name = "vnet",
> >> .probe = mlx5v_probe,
> >> .remove = mlx5v_remove,
> >> + .shutdown = mlx5v_shutdown,
> >
> >
> > So we allocate the irq during dev_add, it seems cleaner if we do that
> > in the dev_del.
> Not sure what you mean here. You're saying I should not call free_irqs()
> in mlx5v_shutdown()
> >
> > Note that mlx5v_remove will call vdpa_mgmtdev_unregister() which will
> > call dev_del.
> >
> > Thanks
> >
> >
> >> .id_table = mlx5v_id_table,
> >> };
> >> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.h
> >> b/drivers/vdpa/mlx5/net/mlx5_vnet.h
> >> index c90a89e1de4d..e5063b310d3c 100644
> >> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.h
> >> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.h
> >> @@ -26,6 +26,19 @@ static inline u16 key2vid(u64 key)
> >> return (u16)(key >> 48) & 0xfff;
> >> }
> >> +#define MLX5_VDPA_IRQ_NAME_LEN 32
> >> +
> >> +struct mlx5_vdpa_irq_pool_entry {
> >> + struct msi_map map;
> >> + int usecount;
> >> + char name[MLX5_VDPA_IRQ_NAME_LEN];
> >> +};
> >> +
> >> +struct mlx5_vdpa_irq_pool {
> >> + int num_ent;
> >> + struct mlx5_vdpa_irq_pool_entry *entries;
> >> +};
> >> +
> >> struct mlx5_vdpa_net {
> >> struct mlx5_vdpa_dev mvdev;
> >> struct mlx5_vdpa_net_resources res;
> >> @@ -49,6 +62,7 @@ struct mlx5_vdpa_net {
> >> struct vdpa_callback config_cb;
> >> struct mlx5_vdpa_wq_ent cvq_ent;
> >> struct hlist_head macvlan_hash[MLX5V_MACVLAN_SIZE];
> >> + struct mlx5_vdpa_irq_pool irqp;
> >> struct dentry *debugfs;
> >> };
> >
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/1] vdpa/mlx5: Support interrupt bypassing
[not found] ` <fd2c22a3-a9f3-b9a7-3669-a8ef5b0e0f3c@nvidia.com>
@ 2023-04-10 2:02 ` Jason Wang
0 siblings, 0 replies; 9+ messages in thread
From: Jason Wang @ 2023-04-10 2:02 UTC (permalink / raw)
To: Eli Cohen; +Cc: parav, mst, virtualization, eperezma, Saeed Mahameed
On Tue, Apr 4, 2023 at 9:30 PM Eli Cohen <elic@nvidia.com> wrote:
>
>
> On 04/04/2023 10:04, Eli Cohen wrote:
> >
> > On 04/04/2023 5:24, Jason Wang wrote:
> >>
> >> 在 2023/4/4 00:20, Eli Cohen 写道:
> >>> Add support for generation of interrupts from the device directly to
> >>> the
> >>> VM to the VCPU thus avoiding the overhead on the host CPU.
> >>>
> >>> When supported, the driver will attempt to allocate vectors for each
> >>> data virtqueue. If a vector for a virtqueue cannot be provided it will
> >>> use the QP mode where notifications go through the driver.
> >>>
> >>> In addition, we add a shutdown callback to make sure allocated
> >>> interrupts are released in case of shutdown to allow clean shutdown.
> >>>
> >>> Signed-off-by: Eli Cohen <elic@nvidia.com>
> >>> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
> >>> ---
> >>> drivers/vdpa/mlx5/net/mlx5_vnet.c | 139
> >>> ++++++++++++++++++++++++++++--
> >>> drivers/vdpa/mlx5/net/mlx5_vnet.h | 14 +++
> >>> 2 files changed, 144 insertions(+), 9 deletions(-)
> >>>
> >>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> >>> b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> >>> index 520646ae7fa0..215a46cf8a98 100644
> >>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> >>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> >>> @@ -83,6 +83,7 @@ struct mlx5_vq_restore_info {
> >>> u64 driver_addr;
> >>> u16 avail_index;
> >>> u16 used_index;
> >>> + struct msi_map map;
> >>> bool ready;
> >>> bool restore;
> >>> };
> >>> @@ -118,6 +119,7 @@ struct mlx5_vdpa_virtqueue {
> >>> u16 avail_idx;
> >>> u16 used_idx;
> >>> int fw_state;
> >>> + struct msi_map map;
> >>
> >>
> >> Just spot that this structure is defined in mlx5_vnet.c but it's
> >> nothing net specific. I think it's better to move it with the
> >> interrupt bypassing logic here to core/ (we can do it in the future).
> > Right.
> >>
> >>
> >>> /* keep last in the struct */
> >>> struct mlx5_vq_restore_info ri;
> >>> @@ -792,6 +794,13 @@ static bool counters_supported(const struct
> >>> mlx5_vdpa_dev *mvdev)
> >>> BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
> >>> }
> >>> +static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev)
> >>> +{
> >>> + return (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) &
> >>> + (1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) &&
> >>> + pci_msix_can_alloc_dyn(mvdev->mdev->pdev));
> >>> +}
> >>> +
> >>> static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct
> >>> mlx5_vdpa_virtqueue *mvq)
> >>> {
> >>> int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
> >>> @@ -829,9 +838,15 @@ static int create_virtqueue(struct
> >>> mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
> >>> if (vq_is_tx(mvq->index))
> >>> MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn,
> >>> ndev->res.tisn);
> >>> - MLX5_SET(virtio_q, vq_ctx, event_mode,
> >>> MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
> >>> + if (mvq->map.virq) {
> >>> + MLX5_SET(virtio_q, vq_ctx, event_mode,
> >>> MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE);
> >>> + MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index);
> >>> + } else {
> >>> + MLX5_SET(virtio_q, vq_ctx, event_mode,
> >>> MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
> >>> + MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix,
> >>> mvq->fwqp.mqp.qpn);
> >>> + }
> >>> +
> >>> MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
> >>> - MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
> >>> MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
> >>> MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
> >>> !!(ndev->mvdev.actual_features &
> >>> BIT_ULL(VIRTIO_F_VERSION_1)));
> >>> @@ -1174,6 +1189,32 @@ static void counter_set_dealloc(struct
> >>> mlx5_vdpa_net *ndev, struct mlx5_vdpa_vir
> >>> mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n",
> >>> mvq->counter_set_id);
> >>> }
> >>> +static void alloc_vector(struct mlx5_vdpa_net *ndev,
> >>> + struct mlx5_vdpa_virtqueue *mvq)
> >>> +{
> >>> + struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
> >>> + int i;
> >>> +
> >>> + for (i = 0; i < irqp->num_ent; i++) {
> >>> + if (!irqp->entries[i].usecount) {
> >>> + irqp->entries[i].usecount++;
> >>
> >>
> >> If we can't get a usercount which is greater than 1, it might be
> >> better to use boolean instead.
> > Will fix.
> >>
> >>
> >>> + mvq->map = irqp->entries[i].map;
> >>> + return;
> >>> + }
> >>> + }
> >>> +}
> >>> +
> >>> +static void dealloc_vector(struct mlx5_vdpa_net *ndev,
> >>> + struct mlx5_vdpa_virtqueue *mvq)
> >>> +{
> >>> + struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
> >>> + int i;
> >>> +
> >>> + for (i = 0; i < irqp->num_ent; i++)
> >>> + if (mvq->map.virq == irqp->entries[i].map.virq)
> >>> + irqp->entries[i].usecount--;
> >>> +}
> >>> +
> >>> static int setup_vq(struct mlx5_vdpa_net *ndev, struct
> >>> mlx5_vdpa_virtqueue *mvq)
> >>> {
> >>> u16 idx = mvq->index;
> >>> @@ -1203,27 +1244,31 @@ static int setup_vq(struct mlx5_vdpa_net
> >>> *ndev, struct mlx5_vdpa_virtqueue *mvq)
> >>> err = counter_set_alloc(ndev, mvq);
> >>> if (err)
> >>> - goto err_counter;
> >>> + goto err_connect;
> >>> + alloc_vector(ndev, mvq);
> >>> err = create_virtqueue(ndev, mvq);
> >>> if (err)
> >>> - goto err_connect;
> >>> + goto err_vq;
> >>> if (mvq->ready) {
> >>> err = modify_virtqueue(ndev, mvq,
> >>> MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> >>> if (err) {
> >>> mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to
> >>> ready vq idx %d(%d)\n",
> >>> idx, err);
> >>> - goto err_connect;
> >>> + goto err_modify;
> >>> }
> >>> }
> >>> mvq->initialized = true;
> >>> return 0;
> >>> -err_connect:
> >>> +err_modify:
> >>> + destroy_virtqueue(ndev, mvq);
> >>> +err_vq:
> >>> + dealloc_vector(ndev, mvq);
> >>> counter_set_dealloc(ndev, mvq);
> >>> -err_counter:
> >>> +err_connect:
> >>> qp_destroy(ndev, &mvq->vqqp);
> >>> err_vqqp:
> >>> qp_destroy(ndev, &mvq->fwqp);
> >>> @@ -1267,6 +1312,7 @@ static void teardown_vq(struct mlx5_vdpa_net
> >>> *ndev, struct mlx5_vdpa_virtqueue *
> >>> return;
> >>> suspend_vq(ndev, mvq);
> >>> + dealloc_vector(ndev, mvq);
> >>> destroy_virtqueue(ndev, mvq);
> >>> counter_set_dealloc(ndev, mvq);
> >>> qp_destroy(ndev, &mvq->vqqp);
> >>> @@ -2374,6 +2420,7 @@ static int save_channel_info(struct
> >>> mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqu
> >>> ri->desc_addr = mvq->desc_addr;
> >>> ri->device_addr = mvq->device_addr;
> >>> ri->driver_addr = mvq->driver_addr;
> >>> + ri->map = mvq->map;
> >>> ri->restore = true;
> >>> return 0;
> >>> }
> >>> @@ -2418,6 +2465,7 @@ static void restore_channels_info(struct
> >>> mlx5_vdpa_net *ndev)
> >>> mvq->desc_addr = ri->desc_addr;
> >>> mvq->device_addr = ri->device_addr;
> >>> mvq->driver_addr = ri->driver_addr;
> >>> + mvq->map = ri->map;
> >>> }
> >>> }
> >>> @@ -2693,6 +2741,22 @@ static struct device
> >>> *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
> >>> return mvdev->vdev.dma_dev;
> >>> }
> >>> +static void free_irqs(struct mlx5_vdpa_net *ndev)
> >>> +{
> >>> + struct mlx5_vdpa_irq_pool_entry *ent;
> >>> + int i;
> >>> +
> >>> + if (!msix_mode_supported(&ndev->mvdev))
> >>> + return;
> >>> +
> >>> + for (i = ndev->irqp.num_ent - 1; i >= 0; i--) {
> >>> + ent = ndev->irqp.entries + i;
> >>> + mlx5_msix_free(ndev->mvdev.mdev, ent->map);
> >>> + ndev->irqp.num_ent--;
> >>> + }
> >>> + kfree(ndev->irqp.entries);
> >>> +}
> >>> +
> >>> static void mlx5_vdpa_free(struct vdpa_device *vdev)
> >>> {
> >>> struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> >>> @@ -2708,6 +2772,7 @@ static void mlx5_vdpa_free(struct vdpa_device
> >>> *vdev)
> >>> mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
> >>> }
> >>> mlx5_vdpa_free_resources(&ndev->mvdev);
> >>> + free_irqs(ndev);
> >>> kfree(ndev->event_cbs);
> >>> kfree(ndev->vqs);
> >>> }
> >>> @@ -2736,9 +2801,23 @@ static struct vdpa_notification_area
> >>> mlx5_get_vq_notification(struct vdpa_device
> >>> return ret;
> >>> }
> >>> -static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
> >>> +static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx)
> >>> {
> >>> - return -EOPNOTSUPP;
> >>> + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> >>> + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> >>> + struct mlx5_vdpa_virtqueue *mvq;
> >>> +
> >>> + if (!is_index_valid(mvdev, idx))
> >>> + return -EINVAL;
> >>> +
> >>> + if (is_ctrl_vq_idx(mvdev, idx))
> >>> + return -EOPNOTSUPP;
> >>> +
> >>> + mvq = &ndev->vqs[idx];
> >>> + if (!mvq->map.virq)
> >>> + return -EOPNOTSUPP;
> >>> +
> >>> + return mvq->map.virq;
> >>> }
> >>> static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
> >>> @@ -3095,6 +3174,35 @@ static int config_func_mtu(struct
> >>> mlx5_core_dev *mdev, u16 mtu)
> >>> return err;
> >>> }
> >>> +static irqreturn_t int_handler(int irq, void *nh)
> >>> +{
> >>> + return IRQ_HANDLED;
> >>
> >>
> >> I'm not sure I get the meaning of the irq handler. Note that even if
> >> we support get_vq_irq() it does not means the posted interrupt can
> >> work for each interrupt. It work only if several conditions are met
> >> (for example the VCPU is running).
> >
> > Not sure I get it. Can you be explicit what needs to be done to make
> > sure the interrupts will go through directly to the vCPU?
> >
> >>
> >> So if this is the fallback irq handler when we can do post interrupt,
> >> should we trigger the virtqueue callback here?
> >
> > It's not a fallback. I should remove it completely.
>
> Actually it can't be removed. If we do remove it, we will fail
> request_irq() due to this code from request_threaded_irq():
>
> if (!handler) {
> if (!thread_fn)
> return -EINVAL;
>
> handler = irq_default_primary_handler;
> }
>
> If no errors were encountered, we don't expect the handlers to ever be
> called since the interrupts go directly to the guest vCPU.
So the question is what happens if the setup doesn't support PI. It
looks to me we can reach this handler.
Thanks
>
> So I am thinking, maybe I should just put a WARN_ONCE in the handler.
>
> I am also taking the opportunity to say that I will be on vacation till
> March 13 so I won't be able to respond to emails.
>
> >
> >>
> >>
> >>> +}
> >>> +
> >>> +static void allocate_irqs(struct mlx5_vdpa_net *ndev)
> >>> +{
> >>> + struct mlx5_vdpa_irq_pool_entry *ent;
> >>> + int i;
> >>> +
> >>> + if (!msix_mode_supported(&ndev->mvdev))
> >>> + return;
> >>> +
> >>> + ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs,
> >>> sizeof(*ndev->irqp.entries), GFP_KERNEL);
> >>> + if (!ndev->irqp.entries)
> >>> + return;
> >>> +
> >>> + for (i = 0; i < ndev->mvdev.max_vqs; i++) {
> >>> + ent = ndev->irqp.entries + i;
> >>> + snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
> >>> + dev_name(&ndev->mvdev.vdev.dev), i);
> >>> + ent->map = mlx5_msix_alloc(ndev->mvdev.mdev, int_handler,
> >>> NULL, ent->name);
> >>> + if (!ent->map.virq)
> >>> + return;
> >>> +
> >>> + ndev->irqp.num_ent++;
> >>> + }
> >>> +}
> >>> +
> >>> static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const
> >>> char *name,
> >>> const struct vdpa_dev_set_config *add_config)
> >>> {
> >>> @@ -3171,6 +3279,7 @@ static int mlx5_vdpa_dev_add(struct
> >>> vdpa_mgmt_dev *v_mdev, const char *name,
> >>> }
> >>> init_mvqs(ndev);
> >>> + allocate_irqs(ndev);
> >>> init_rwsem(&ndev->reslock);
> >>> config = &ndev->config;
> >>> @@ -3358,6 +3467,17 @@ static void mlx5v_remove(struct
> >>> auxiliary_device *adev)
> >>> kfree(mgtdev);
> >>> }
> >>> +static void mlx5v_shutdown(struct auxiliary_device *auxdev)
> >>> +{
> >>> + struct mlx5_vdpa_mgmtdev *mgtdev;
> >>> + struct mlx5_vdpa_net *ndev;
> >>> +
> >>> + mgtdev = auxiliary_get_drvdata(auxdev);
> >>> + ndev = mgtdev->ndev;
> >>> +
> >>> + free_irqs(ndev);
> >>> +}
> >>> +
> >>> static const struct auxiliary_device_id mlx5v_id_table[] = {
> >>> { .name = MLX5_ADEV_NAME ".vnet", },
> >>> {},
> >>> @@ -3369,6 +3489,7 @@ static struct auxiliary_driver mlx5v_driver = {
> >>> .name = "vnet",
> >>> .probe = mlx5v_probe,
> >>> .remove = mlx5v_remove,
> >>> + .shutdown = mlx5v_shutdown,
> >>
> >>
> >> So we allocate the irq during dev_add, it seems cleaner if we do that
> >> in the dev_del.
> > Not sure what you mean here. You're saying I should not call
> > free_irqs() in mlx5v_shutdown()
> >>
> >> Note that mlx5v_remove will call vdpa_mgmtdev_unregister() which will
> >> call dev_del.
> >>
> >> Thanks
> >>
> >>
> >>> .id_table = mlx5v_id_table,
> >>> };
> >>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.h
> >>> b/drivers/vdpa/mlx5/net/mlx5_vnet.h
> >>> index c90a89e1de4d..e5063b310d3c 100644
> >>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.h
> >>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.h
> >>> @@ -26,6 +26,19 @@ static inline u16 key2vid(u64 key)
> >>> return (u16)(key >> 48) & 0xfff;
> >>> }
> >>> +#define MLX5_VDPA_IRQ_NAME_LEN 32
> >>> +
> >>> +struct mlx5_vdpa_irq_pool_entry {
> >>> + struct msi_map map;
> >>> + int usecount;
> >>> + char name[MLX5_VDPA_IRQ_NAME_LEN];
> >>> +};
> >>> +
> >>> +struct mlx5_vdpa_irq_pool {
> >>> + int num_ent;
> >>> + struct mlx5_vdpa_irq_pool_entry *entries;
> >>> +};
> >>> +
> >>> struct mlx5_vdpa_net {
> >>> struct mlx5_vdpa_dev mvdev;
> >>> struct mlx5_vdpa_net_resources res;
> >>> @@ -49,6 +62,7 @@ struct mlx5_vdpa_net {
> >>> struct vdpa_callback config_cb;
> >>> struct mlx5_vdpa_wq_ent cvq_ent;
> >>> struct hlist_head macvlan_hash[MLX5V_MACVLAN_SIZE];
> >>> + struct mlx5_vdpa_irq_pool irqp;
> >>> struct dentry *debugfs;
> >>> };
> >>
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 1/1] vdpa/mlx5: Support interrupt bypassing
[not found] ` <9200f3cc-aa57-c1ba-0d45-d6148eaa4533@nvidia.com>
@ 2023-04-19 4:41 ` Jason Wang
0 siblings, 0 replies; 9+ messages in thread
From: Jason Wang @ 2023-04-19 4:41 UTC (permalink / raw)
To: Eli Cohen; +Cc: parav, mst, virtualization, eperezma, Saeed Mahameed
On Thu, Apr 13, 2023 at 10:39 PM Eli Cohen <elic@nvidia.com> wrote:
>
>
> On 10/04/2023 5:00, Jason Wang wrote:
> > On Tue, Apr 4, 2023 at 3:04 PM Eli Cohen <elic@nvidia.com> wrote:
> >>
> >> On 04/04/2023 5:24, Jason Wang wrote:
> >>> 在 2023/4/4 00:20, Eli Cohen 写道:
> >>>> Add support for generation of interrupts from the device directly to the
> >>>> VM to the VCPU thus avoiding the overhead on the host CPU.
> >>>>
> >>>> When supported, the driver will attempt to allocate vectors for each
> >>>> data virtqueue. If a vector for a virtqueue cannot be provided it will
> >>>> use the QP mode where notifications go through the driver.
> >>>>
> >>>> In addition, we add a shutdown callback to make sure allocated
> >>>> interrupts are released in case of shutdown to allow clean shutdown.
> >>>>
> >>>> Signed-off-by: Eli Cohen <elic@nvidia.com>
> >>>> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
> >>>> ---
> >>>> drivers/vdpa/mlx5/net/mlx5_vnet.c | 139 ++++++++++++++++++++++++++++--
> >>>> drivers/vdpa/mlx5/net/mlx5_vnet.h | 14 +++
> >>>> 2 files changed, 144 insertions(+), 9 deletions(-)
> >>>>
> >>>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> >>>> b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> >>>> index 520646ae7fa0..215a46cf8a98 100644
> >>>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> >>>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> >>>> @@ -83,6 +83,7 @@ struct mlx5_vq_restore_info {
> >>>> u64 driver_addr;
> >>>> u16 avail_index;
> >>>> u16 used_index;
> >>>> + struct msi_map map;
> >>>> bool ready;
> >>>> bool restore;
> >>>> };
> >>>> @@ -118,6 +119,7 @@ struct mlx5_vdpa_virtqueue {
> >>>> u16 avail_idx;
> >>>> u16 used_idx;
> >>>> int fw_state;
> >>>> + struct msi_map map;
> >>>
> >>> Just spot that this structure is defined in mlx5_vnet.c but it's
> >>> nothing net specific. I think it's better to move it with the
> >>> interrupt bypassing logic here to core/ (we can do it in the future).
> >> Right.
> >>>
> >>>> /* keep last in the struct */
> >>>> struct mlx5_vq_restore_info ri;
> >>>> @@ -792,6 +794,13 @@ static bool counters_supported(const struct
> >>>> mlx5_vdpa_dev *mvdev)
> >>>> BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
> >>>> }
> >>>> +static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev)
> >>>> +{
> >>>> + return (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) &
> >>>> + (1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) &&
> >>>> + pci_msix_can_alloc_dyn(mvdev->mdev->pdev));
> >>>> +}
> >>>> +
> >>>> static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct
> >>>> mlx5_vdpa_virtqueue *mvq)
> >>>> {
> >>>> int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
> >>>> @@ -829,9 +838,15 @@ static int create_virtqueue(struct mlx5_vdpa_net
> >>>> *ndev, struct mlx5_vdpa_virtque
> >>>> if (vq_is_tx(mvq->index))
> >>>> MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn,
> >>>> ndev->res.tisn);
> >>>> - MLX5_SET(virtio_q, vq_ctx, event_mode,
> >>>> MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
> >>>> + if (mvq->map.virq) {
> >>>> + MLX5_SET(virtio_q, vq_ctx, event_mode,
> >>>> MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE);
> >>>> + MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index);
> >>>> + } else {
> >>>> + MLX5_SET(virtio_q, vq_ctx, event_mode,
> >>>> MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
> >>>> + MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix,
> >>>> mvq->fwqp.mqp.qpn);
> >>>> + }
> >>>> +
> >>>> MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
> >>>> - MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
> >>>> MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
> >>>> MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
> >>>> !!(ndev->mvdev.actual_features &
> >>>> BIT_ULL(VIRTIO_F_VERSION_1)));
> >>>> @@ -1174,6 +1189,32 @@ static void counter_set_dealloc(struct
> >>>> mlx5_vdpa_net *ndev, struct mlx5_vdpa_vir
> >>>> mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n",
> >>>> mvq->counter_set_id);
> >>>> }
> >>>> +static void alloc_vector(struct mlx5_vdpa_net *ndev,
> >>>> + struct mlx5_vdpa_virtqueue *mvq)
> >>>> +{
> >>>> + struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
> >>>> + int i;
> >>>> +
> >>>> + for (i = 0; i < irqp->num_ent; i++) {
> >>>> + if (!irqp->entries[i].usecount) {
> >>>> + irqp->entries[i].usecount++;
> >>>
> >>> If we can't get a usercount which is greater than 1, it might be
> >>> better to use boolean instead.
> >> Will fix.
> >>>
> >>>> + mvq->map = irqp->entries[i].map;
> >>>> + return;
> >>>> + }
> >>>> + }
> >>>> +}
> >>>> +
> >>>> +static void dealloc_vector(struct mlx5_vdpa_net *ndev,
> >>>> + struct mlx5_vdpa_virtqueue *mvq)
> >>>> +{
> >>>> + struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
> >>>> + int i;
> >>>> +
> >>>> + for (i = 0; i < irqp->num_ent; i++)
> >>>> + if (mvq->map.virq == irqp->entries[i].map.virq)
> >>>> + irqp->entries[i].usecount--;
> >>>> +}
> >>>> +
> >>>> static int setup_vq(struct mlx5_vdpa_net *ndev, struct
> >>>> mlx5_vdpa_virtqueue *mvq)
> >>>> {
> >>>> u16 idx = mvq->index;
> >>>> @@ -1203,27 +1244,31 @@ static int setup_vq(struct mlx5_vdpa_net
> >>>> *ndev, struct mlx5_vdpa_virtqueue *mvq)
> >>>> err = counter_set_alloc(ndev, mvq);
> >>>> if (err)
> >>>> - goto err_counter;
> >>>> + goto err_connect;
> >>>> + alloc_vector(ndev, mvq);
> >>>> err = create_virtqueue(ndev, mvq);
> >>>> if (err)
> >>>> - goto err_connect;
> >>>> + goto err_vq;
> >>>> if (mvq->ready) {
> >>>> err = modify_virtqueue(ndev, mvq,
> >>>> MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
> >>>> if (err) {
> >>>> mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready
> >>>> vq idx %d(%d)\n",
> >>>> idx, err);
> >>>> - goto err_connect;
> >>>> + goto err_modify;
> >>>> }
> >>>> }
> >>>> mvq->initialized = true;
> >>>> return 0;
> >>>> -err_connect:
> >>>> +err_modify:
> >>>> + destroy_virtqueue(ndev, mvq);
> >>>> +err_vq:
> >>>> + dealloc_vector(ndev, mvq);
> >>>> counter_set_dealloc(ndev, mvq);
> >>>> -err_counter:
> >>>> +err_connect:
> >>>> qp_destroy(ndev, &mvq->vqqp);
> >>>> err_vqqp:
> >>>> qp_destroy(ndev, &mvq->fwqp);
> >>>> @@ -1267,6 +1312,7 @@ static void teardown_vq(struct mlx5_vdpa_net
> >>>> *ndev, struct mlx5_vdpa_virtqueue *
> >>>> return;
> >>>> suspend_vq(ndev, mvq);
> >>>> + dealloc_vector(ndev, mvq);
> >>>> destroy_virtqueue(ndev, mvq);
> >>>> counter_set_dealloc(ndev, mvq);
> >>>> qp_destroy(ndev, &mvq->vqqp);
> >>>> @@ -2374,6 +2420,7 @@ static int save_channel_info(struct
> >>>> mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqu
> >>>> ri->desc_addr = mvq->desc_addr;
> >>>> ri->device_addr = mvq->device_addr;
> >>>> ri->driver_addr = mvq->driver_addr;
> >>>> + ri->map = mvq->map;
> >>>> ri->restore = true;
> >>>> return 0;
> >>>> }
> >>>> @@ -2418,6 +2465,7 @@ static void restore_channels_info(struct
> >>>> mlx5_vdpa_net *ndev)
> >>>> mvq->desc_addr = ri->desc_addr;
> >>>> mvq->device_addr = ri->device_addr;
> >>>> mvq->driver_addr = ri->driver_addr;
> >>>> + mvq->map = ri->map;
> >>>> }
> >>>> }
> >>>> @@ -2693,6 +2741,22 @@ static struct device
> >>>> *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
> >>>> return mvdev->vdev.dma_dev;
> >>>> }
> >>>> +static void free_irqs(struct mlx5_vdpa_net *ndev)
> >>>> +{
> >>>> + struct mlx5_vdpa_irq_pool_entry *ent;
> >>>> + int i;
> >>>> +
> >>>> + if (!msix_mode_supported(&ndev->mvdev))
> >>>> + return;
> >>>> +
> >>>> + for (i = ndev->irqp.num_ent - 1; i >= 0; i--) {
> >>>> + ent = ndev->irqp.entries + i;
> >>>> + mlx5_msix_free(ndev->mvdev.mdev, ent->map);
> >>>> + ndev->irqp.num_ent--;
> >>>> + }
> >>>> + kfree(ndev->irqp.entries);
> >>>> +}
> >>>> +
> >>>> static void mlx5_vdpa_free(struct vdpa_device *vdev)
> >>>> {
> >>>> struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> >>>> @@ -2708,6 +2772,7 @@ static void mlx5_vdpa_free(struct vdpa_device
> >>>> *vdev)
> >>>> mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
> >>>> }
> >>>> mlx5_vdpa_free_resources(&ndev->mvdev);
> >>>> + free_irqs(ndev);
> >>>> kfree(ndev->event_cbs);
> >>>> kfree(ndev->vqs);
> >>>> }
> >>>> @@ -2736,9 +2801,23 @@ static struct vdpa_notification_area
> >>>> mlx5_get_vq_notification(struct vdpa_device
> >>>> return ret;
> >>>> }
> >>>> -static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
> >>>> +static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx)
> >>>> {
> >>>> - return -EOPNOTSUPP;
> >>>> + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
> >>>> + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
> >>>> + struct mlx5_vdpa_virtqueue *mvq;
> >>>> +
> >>>> + if (!is_index_valid(mvdev, idx))
> >>>> + return -EINVAL;
> >>>> +
> >>>> + if (is_ctrl_vq_idx(mvdev, idx))
> >>>> + return -EOPNOTSUPP;
> >>>> +
> >>>> + mvq = &ndev->vqs[idx];
> >>>> + if (!mvq->map.virq)
> >>>> + return -EOPNOTSUPP;
> >>>> +
> >>>> + return mvq->map.virq;
> >>>> }
> >>>> static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
> >>>> @@ -3095,6 +3174,35 @@ static int config_func_mtu(struct
> >>>> mlx5_core_dev *mdev, u16 mtu)
> >>>> return err;
> >>>> }
> >>>> +static irqreturn_t int_handler(int irq, void *nh)
> >>>> +{
> >>>> + return IRQ_HANDLED;
> >>>
> >>> I'm not sure I get the meaning of the irq handler. Note that even if
> >>> we support get_vq_irq() it does not means the posted interrupt can
> >>> work for each interrupt. It work only if several conditions are met
> >>> (for example the VCPU is running).
> >> Not sure I get it. Can you be explicit what needs to be done to make
> >> sure the interrupts will go through directly to the vCPU?
> > Firstly, the code should work in the setups without PI, e.g for VMX it
> > has a check vmx_can_use_vtd_pi():
> >
> > static bool vmx_can_use_vtd_pi(struct kvm *kvm)
> > {
> > return irqchip_in_kernel(kvm) && enable_apicv &&
> > kvm_arch_has_assigned_device(kvm) &&
> > irq_remapping_cap(IRQ_POSTING_CAP);
> > }
> >
> > We can see it might only work when
> >
> > - irq chip is emulated in the kernel
> > - apicv is enabled
> > - IRQ remapping is supported
>
>
> OK, so we probably need a public API to tell you that. I am not aware of
> any. Do you think we should introduce one?
We don't need an API to do that, we can simply fallback to the slow
patch which is triggering the callback in the interrupt handler. Then
everything is fine.
>
> BTW, I didn't see intel ifcvf making those checks. Does it mean the
> driver is broken or is there anything I am missing.
>
> >
> > And what's more even if we had all of those, it needs to satisfy extra
> > requirements like virq affinity but the virq affinity is under the
> > control of the guest, so we can't guarantee that the post interrupt
> > can work for each time. See the comments in vmx_pi_update_irte():
> >
> > /*
> > * VT-d PI cannot support posting multicast/broadcast
> > * interrupts to a vCPU, we still use interrupt remapping
> > * for these kind of interrupts.
> > *
> > * For lowest-priority interrupts, we only support
> > * those with single CPU as the destination, e.g. user
> > * configures the interrupts via /proc/irq or uses
> > * irqbalance to make the interrupts single-CPU.
> > *
> > * We will support full lowest-priority interrupt later.
> > *
> > * In addition, we can only inject generic interrupts using
> > * the PI mechanism, refuse to route others through it.
> > */
> >
> > So I think what I don't understand is how the code works if PI is not supported?
>
> So it means that on the guest we need to make sure the affinity of each
> VQ interrupts is affiliated to a single CPU (eg. one bit only set).
>
> BTW, on my system the mask is 0xff and I don't have any issue with
> posted interrupts.
Such affinity is determined by the guest driver (e.g for Linux, users
could tweak it via procfs). So we can't have such an assumption.
Thanks
>
> >
> > Thanks
> >
> >>> So if this is the fallback irq handler when we can do post interrupt,
> >>> should we trigger the virtqueue callback here?
> >> It's not a fallback. I should remove it completely.
> >>
> >>>
> >>>> +}
> >>>> +
> >>>> +static void allocate_irqs(struct mlx5_vdpa_net *ndev)
> >>>> +{
> >>>> + struct mlx5_vdpa_irq_pool_entry *ent;
> >>>> + int i;
> >>>> +
> >>>> + if (!msix_mode_supported(&ndev->mvdev))
> >>>> + return;
> >>>> +
> >>>> + ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs,
> >>>> sizeof(*ndev->irqp.entries), GFP_KERNEL);
> >>>> + if (!ndev->irqp.entries)
> >>>> + return;
> >>>> +
> >>>> + for (i = 0; i < ndev->mvdev.max_vqs; i++) {
> >>>> + ent = ndev->irqp.entries + i;
> >>>> + snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
> >>>> + dev_name(&ndev->mvdev.vdev.dev), i);
> >>>> + ent->map = mlx5_msix_alloc(ndev->mvdev.mdev, int_handler,
> >>>> NULL, ent->name);
> >>>> + if (!ent->map.virq)
> >>>> + return;
> >>>> +
> >>>> + ndev->irqp.num_ent++;
> >>>> + }
> >>>> +}
> >>>> +
> >>>> static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const
> >>>> char *name,
> >>>> const struct vdpa_dev_set_config *add_config)
> >>>> {
> >>>> @@ -3171,6 +3279,7 @@ static int mlx5_vdpa_dev_add(struct
> >>>> vdpa_mgmt_dev *v_mdev, const char *name,
> >>>> }
> >>>> init_mvqs(ndev);
> >>>> + allocate_irqs(ndev);
> >>>> init_rwsem(&ndev->reslock);
> >>>> config = &ndev->config;
> >>>> @@ -3358,6 +3467,17 @@ static void mlx5v_remove(struct
> >>>> auxiliary_device *adev)
> >>>> kfree(mgtdev);
> >>>> }
> >>>> +static void mlx5v_shutdown(struct auxiliary_device *auxdev)
> >>>> +{
> >>>> + struct mlx5_vdpa_mgmtdev *mgtdev;
> >>>> + struct mlx5_vdpa_net *ndev;
> >>>> +
> >>>> + mgtdev = auxiliary_get_drvdata(auxdev);
> >>>> + ndev = mgtdev->ndev;
> >>>> +
> >>>> + free_irqs(ndev);
> >>>> +}
> >>>> +
> >>>> static const struct auxiliary_device_id mlx5v_id_table[] = {
> >>>> { .name = MLX5_ADEV_NAME ".vnet", },
> >>>> {},
> >>>> @@ -3369,6 +3489,7 @@ static struct auxiliary_driver mlx5v_driver = {
> >>>> .name = "vnet",
> >>>> .probe = mlx5v_probe,
> >>>> .remove = mlx5v_remove,
> >>>> + .shutdown = mlx5v_shutdown,
> >>>
> >>> So we allocate the irq during dev_add, it seems cleaner if we do that
> >>> in the dev_del.
> >> Not sure what you mean here. You're saying I should not call free_irqs()
> >> in mlx5v_shutdown()
> >>> Note that mlx5v_remove will call vdpa_mgmtdev_unregister() which will
> >>> call dev_del.
> >>>
> >>> Thanks
> >>>
> >>>
> >>>> .id_table = mlx5v_id_table,
> >>>> };
> >>>> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.h
> >>>> b/drivers/vdpa/mlx5/net/mlx5_vnet.h
> >>>> index c90a89e1de4d..e5063b310d3c 100644
> >>>> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.h
> >>>> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.h
> >>>> @@ -26,6 +26,19 @@ static inline u16 key2vid(u64 key)
> >>>> return (u16)(key >> 48) & 0xfff;
> >>>> }
> >>>> +#define MLX5_VDPA_IRQ_NAME_LEN 32
> >>>> +
> >>>> +struct mlx5_vdpa_irq_pool_entry {
> >>>> + struct msi_map map;
> >>>> + int usecount;
> >>>> + char name[MLX5_VDPA_IRQ_NAME_LEN];
> >>>> +};
> >>>> +
> >>>> +struct mlx5_vdpa_irq_pool {
> >>>> + int num_ent;
> >>>> + struct mlx5_vdpa_irq_pool_entry *entries;
> >>>> +};
> >>>> +
> >>>> struct mlx5_vdpa_net {
> >>>> struct mlx5_vdpa_dev mvdev;
> >>>> struct mlx5_vdpa_net_resources res;
> >>>> @@ -49,6 +62,7 @@ struct mlx5_vdpa_net {
> >>>> struct vdpa_callback config_cb;
> >>>> struct mlx5_vdpa_wq_ent cvq_ent;
> >>>> struct hlist_head macvlan_hash[MLX5V_MACVLAN_SIZE];
> >>>> + struct mlx5_vdpa_irq_pool irqp;
> >>>> struct dentry *debugfs;
> >>>> };
>
_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2023-04-19 4:42 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20230403162039.18932-1-elic@nvidia.com>
2023-04-03 17:00 ` [PATCH 0/1] posted interrtups support for mlx5 Parav Pandit via Virtualization
2023-04-03 18:09 ` Michael S. Tsirkin
2023-04-03 18:10 ` Michael S. Tsirkin
[not found] ` <20230403162039.18932-2-elic@nvidia.com>
2023-04-03 16:58 ` [PATCH 1/1] vdpa/mlx5: Support interrupt bypassing Parav Pandit via Virtualization
2023-04-03 18:11 ` Michael S. Tsirkin
2023-04-04 2:24 ` Jason Wang
[not found] ` <c12e67e9-2757-b1e3-93ae-f05df3774d03@nvidia.com>
2023-04-10 2:00 ` Jason Wang
[not found] ` <9200f3cc-aa57-c1ba-0d45-d6148eaa4533@nvidia.com>
2023-04-19 4:41 ` Jason Wang
[not found] ` <fd2c22a3-a9f3-b9a7-3669-a8ef5b0e0f3c@nvidia.com>
2023-04-10 2:02 ` Jason Wang
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).