virtualization.lists.linux-foundation.org archive mirror
 help / color / mirror / Atom feed
* Re: [PATCH v2] vduse: Fix race condition between resetting and irq injecting
       [not found] <20210929083050.88-1-xieyongji@bytedance.com>
@ 2021-09-29  8:40 ` Jason Wang
       [not found]   ` <CACycT3vp-kxMGVL8W=ebQgOFt_aWs5Y33ZML-Up8KuwsTfQCwA@mail.gmail.com>
  2021-10-13 11:10 ` Michael S. Tsirkin
  1 sibling, 1 reply; 4+ messages in thread
From: Jason Wang @ 2021-09-29  8:40 UTC (permalink / raw)
  To: Xie Yongji; +Cc: virtualization, mst

On Wed, Sep 29, 2021 at 4:32 PM Xie Yongji <xieyongji@bytedance.com> wrote:
>
> The interrupt might be triggered after a reset since there is
> no synchronization between resetting and irq injecting. And it
> might break something if the interrupt is delayed until a new
> round of device initialization.
>
> Fixes: c8a6153b6c59 ("vduse: Introduce VDUSE - vDPA Device in Userspace")
> Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
> ---
>  drivers/vdpa/vdpa_user/vduse_dev.c | 37 +++++++++++++++++++++++++------------
>  1 file changed, 25 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
> index cefb301b2ee4..841667a896dd 100644
> --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> @@ -80,6 +80,7 @@ struct vduse_dev {
>         struct vdpa_callback config_cb;
>         struct work_struct inject;
>         spinlock_t irq_lock;
> +       struct rw_semaphore rwsem;
>         int minor;
>         bool broken;
>         bool connected;
> @@ -410,6 +411,8 @@ static void vduse_dev_reset(struct vduse_dev *dev)
>         if (domain->bounce_map)
>                 vduse_domain_reset_bounce_map(domain);
>
> +       down_write(&dev->rwsem);
> +
>         dev->status = 0;
>         dev->driver_features = 0;
>         dev->generation++;
> @@ -443,6 +446,8 @@ static void vduse_dev_reset(struct vduse_dev *dev)
>                 flush_work(&vq->inject);
>                 flush_work(&vq->kick);
>         }
> +
> +       up_write(&dev->rwsem);

Rethink about this, do we need to sync set_status() as well?

Thanks

>  }
>
>  static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
> @@ -885,6 +890,23 @@ static void vduse_vq_irq_inject(struct work_struct *work)
>         spin_unlock_irq(&vq->irq_lock);
>  }
>
> +static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> +                                   struct work_struct *irq_work)
> +{
> +       int ret = -EINVAL;
> +
> +       down_read(&dev->rwsem);
> +       if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
> +               goto unlock;
> +
> +       ret = 0;
> +       queue_work(vduse_irq_wq, irq_work);
> +unlock:
> +       up_read(&dev->rwsem);
> +
> +       return ret;
> +}
> +
>  static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
>                             unsigned long arg)
>  {
> @@ -966,12 +988,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
>                 break;
>         }
>         case VDUSE_DEV_INJECT_CONFIG_IRQ:
> -               ret = -EINVAL;
> -               if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
> -                       break;
> -
> -               ret = 0;
> -               queue_work(vduse_irq_wq, &dev->inject);
> +               ret = vduse_dev_queue_irq_work(dev, &dev->inject);
>                 break;
>         case VDUSE_VQ_SETUP: {
>                 struct vduse_vq_config config;
> @@ -1049,10 +1066,6 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
>         case VDUSE_VQ_INJECT_IRQ: {
>                 u32 index;
>
> -               ret = -EINVAL;
> -               if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
> -                       break;
> -
>                 ret = -EFAULT;
>                 if (get_user(index, (u32 __user *)argp))
>                         break;
> @@ -1061,9 +1074,8 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
>                 if (index >= dev->vq_num)
>                         break;
>
> -               ret = 0;
>                 index = array_index_nospec(index, dev->vq_num);
> -               queue_work(vduse_irq_wq, &dev->vqs[index].inject);
> +               ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject);
>                 break;
>         }
>         default:
> @@ -1144,6 +1156,7 @@ static struct vduse_dev *vduse_dev_create(void)
>         INIT_LIST_HEAD(&dev->send_list);
>         INIT_LIST_HEAD(&dev->recv_list);
>         spin_lock_init(&dev->irq_lock);
> +       init_rwsem(&dev->rwsem);
>
>         INIT_WORK(&dev->inject, vduse_dev_irq_inject);
>         init_waitqueue_head(&dev->waitq);
> --
> 2.11.0
>

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] vduse: Fix race condition between resetting and irq injecting
       [not found]   ` <CACycT3vp-kxMGVL8W=ebQgOFt_aWs5Y33ZML-Up8KuwsTfQCwA@mail.gmail.com>
@ 2021-10-13 11:06     ` Michael S. Tsirkin
  0 siblings, 0 replies; 4+ messages in thread
From: Michael S. Tsirkin @ 2021-10-13 11:06 UTC (permalink / raw)
  To: Yongji Xie; +Cc: virtualization

On Wed, Sep 29, 2021 at 04:50:24PM +0800, Yongji Xie wrote:
> On Wed, Sep 29, 2021 at 4:41 PM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Wed, Sep 29, 2021 at 4:32 PM Xie Yongji <xieyongji@bytedance.com> wrote:
> > >
> > > The interrupt might be triggered after a reset since there is
> > > no synchronization between resetting and irq injecting. And it
> > > might break something if the interrupt is delayed until a new
> > > round of device initialization.
> > >
> > > Fixes: c8a6153b6c59 ("vduse: Introduce VDUSE - vDPA Device in Userspace")
> > > Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
> > > ---
> > >  drivers/vdpa/vdpa_user/vduse_dev.c | 37 +++++++++++++++++++++++++------------
> > >  1 file changed, 25 insertions(+), 12 deletions(-)
> > >
> > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
> > > index cefb301b2ee4..841667a896dd 100644
> > > --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> > > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> > > @@ -80,6 +80,7 @@ struct vduse_dev {
> > >         struct vdpa_callback config_cb;
> > >         struct work_struct inject;
> > >         spinlock_t irq_lock;
> > > +       struct rw_semaphore rwsem;
> > >         int minor;
> > >         bool broken;
> > >         bool connected;
> > > @@ -410,6 +411,8 @@ static void vduse_dev_reset(struct vduse_dev *dev)
> > >         if (domain->bounce_map)
> > >                 vduse_domain_reset_bounce_map(domain);
> > >
> > > +       down_write(&dev->rwsem);
> > > +
> > >         dev->status = 0;
> > >         dev->driver_features = 0;
> > >         dev->generation++;
> > > @@ -443,6 +446,8 @@ static void vduse_dev_reset(struct vduse_dev *dev)
> > >                 flush_work(&vq->inject);
> > >                 flush_work(&vq->kick);
> > >         }
> > > +
> > > +       up_write(&dev->rwsem);
> >
> > Rethink about this, do we need to sync set_status() as well?
> >
> 
> I'm not sure. But I don't see any case that needs synchronization.
> 
> Thanks,
> Yongji

Well you are testing under a lock but it can change at any time...

-- 
MST

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] vduse: Fix race condition between resetting and irq injecting
       [not found] <20210929083050.88-1-xieyongji@bytedance.com>
  2021-09-29  8:40 ` [PATCH v2] vduse: Fix race condition between resetting and irq injecting Jason Wang
@ 2021-10-13 11:10 ` Michael S. Tsirkin
       [not found]   ` <CACycT3tSP-Vxt_u+ow71ZzxBjKuGycZ1LqUrbjQ6Ew3ehX7kqw@mail.gmail.com>
  1 sibling, 1 reply; 4+ messages in thread
From: Michael S. Tsirkin @ 2021-10-13 11:10 UTC (permalink / raw)
  To: Xie Yongji; +Cc: virtualization

On Wed, Sep 29, 2021 at 04:30:50PM +0800, Xie Yongji wrote:
> The interrupt might be triggered after a reset since there is
> no synchronization between resetting and irq injecting.

In fact, irq_lock is already used to synchronize with
irqs. Why isn't taking and releasing it enough?

> And it
> might break something if the interrupt is delayed until a new
> round of device initialization.
> 
> Fixes: c8a6153b6c59 ("vduse: Introduce VDUSE - vDPA Device in Userspace")
> Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
> ---
>  drivers/vdpa/vdpa_user/vduse_dev.c | 37 +++++++++++++++++++++++++------------
>  1 file changed, 25 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
> index cefb301b2ee4..841667a896dd 100644
> --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> @@ -80,6 +80,7 @@ struct vduse_dev {
>  	struct vdpa_callback config_cb;
>  	struct work_struct inject;
>  	spinlock_t irq_lock;
> +	struct rw_semaphore rwsem;
>  	int minor;
>  	bool broken;
>  	bool connected;

What does this lock protect? Use a more descriptive name pls,
and maybe add a comment.


> @@ -410,6 +411,8 @@ static void vduse_dev_reset(struct vduse_dev *dev)
>  	if (domain->bounce_map)
>  		vduse_domain_reset_bounce_map(domain);
>  
> +	down_write(&dev->rwsem);
> +
>  	dev->status = 0;
>  	dev->driver_features = 0;
>  	dev->generation++;
> @@ -443,6 +446,8 @@ static void vduse_dev_reset(struct vduse_dev *dev)
>  		flush_work(&vq->inject);
>  		flush_work(&vq->kick);
>  	}
> +
> +	up_write(&dev->rwsem);
>  }
>  
>  static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
> @@ -885,6 +890,23 @@ static void vduse_vq_irq_inject(struct work_struct *work)
>  	spin_unlock_irq(&vq->irq_lock);
>  }
>  
> +static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> +				    struct work_struct *irq_work)
> +{
> +	int ret = -EINVAL;
> +
> +	down_read(&dev->rwsem);
> +	if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
> +		goto unlock;
> +
> +	ret = 0;
> +	queue_work(vduse_irq_wq, irq_work);
> +unlock:
> +	up_read(&dev->rwsem);
> +
> +	return ret;
> +}
> +
>  static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
>  			    unsigned long arg)
>  {


so that's a lot of overhead for an irq.
Normally the way to address races like this is to add
flushing to the reset path, not locking to irq path.


> @@ -966,12 +988,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
>  		break;
>  	}
>  	case VDUSE_DEV_INJECT_CONFIG_IRQ:
> -		ret = -EINVAL;
> -		if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
> -			break;
> -
> -		ret = 0;
> -		queue_work(vduse_irq_wq, &dev->inject);
> +		ret = vduse_dev_queue_irq_work(dev, &dev->inject);
>  		break;
>  	case VDUSE_VQ_SETUP: {
>  		struct vduse_vq_config config;
> @@ -1049,10 +1066,6 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
>  	case VDUSE_VQ_INJECT_IRQ: {
>  		u32 index;
>  
> -		ret = -EINVAL;
> -		if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
> -			break;
> -
>  		ret = -EFAULT;
>  		if (get_user(index, (u32 __user *)argp))
>  			break;
> @@ -1061,9 +1074,8 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
>  		if (index >= dev->vq_num)
>  			break;
>  
> -		ret = 0;
>  		index = array_index_nospec(index, dev->vq_num);
> -		queue_work(vduse_irq_wq, &dev->vqs[index].inject);
> +		ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject);
>  		break;
>  	}
>  	default:
> @@ -1144,6 +1156,7 @@ static struct vduse_dev *vduse_dev_create(void)
>  	INIT_LIST_HEAD(&dev->send_list);
>  	INIT_LIST_HEAD(&dev->recv_list);
>  	spin_lock_init(&dev->irq_lock);
> +	init_rwsem(&dev->rwsem);
>  
>  	INIT_WORK(&dev->inject, vduse_dev_irq_inject);
>  	init_waitqueue_head(&dev->waitq);
> -- 
> 2.11.0

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] vduse: Fix race condition between resetting and irq injecting
       [not found]   ` <CACycT3tSP-Vxt_u+ow71ZzxBjKuGycZ1LqUrbjQ6Ew3ehX7kqw@mail.gmail.com>
@ 2021-10-13 12:34     ` Michael S. Tsirkin
  0 siblings, 0 replies; 4+ messages in thread
From: Michael S. Tsirkin @ 2021-10-13 12:34 UTC (permalink / raw)
  To: Yongji Xie; +Cc: virtualization

On Wed, Oct 13, 2021 at 08:30:40PM +0800, Yongji Xie wrote:
> On Wed, Oct 13, 2021 at 7:10 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Wed, Sep 29, 2021 at 04:30:50PM +0800, Xie Yongji wrote:
> > > The interrupt might be triggered after a reset since there is
> > > no synchronization between resetting and irq injecting.
> >
> > In fact, irq_lock is already used to synchronize with
> > irqs. Why isn't taking and releasing it enough?
> >
> 
> For example:
> 
> CPU 0
>                   CPU1
> ---------
>                       --------
> vduse_dev_ioctl()
>   check DRIVER_OK
> 
>                         vduse_dev_reset()
> 
>                           flush_work(&vq->inject);
>     queue_work(vduse_irq_wq, &vq->inject);
> 
>                         virtio_vdpa_probe()
> 
>                           virtio_vdpa_find_vqs()
>    vduse_vq_irq_inject()
>      vq->cb.callback(vq->cb.private);
> 
>                         set DRIVER_OK
> 
> In the above case, the irq callback is still triggered before DRIVER_OK is set.
> 
> But now I found it seems to be better to just check DRIVER_OK again in
> vduse_vq_irq_inject().

And then pesumably make sure each time we set status
it's done under the irq lock?

> > > And it
> > > might break something if the interrupt is delayed until a new
> > > round of device initialization.
> > >
> > > Fixes: c8a6153b6c59 ("vduse: Introduce VDUSE - vDPA Device in Userspace")
> > > Signed-off-by: Xie Yongji <xieyongji@bytedance.com>
> > > ---
> > >  drivers/vdpa/vdpa_user/vduse_dev.c | 37 +++++++++++++++++++++++++------------
> > >  1 file changed, 25 insertions(+), 12 deletions(-)
> > >
> > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
> > > index cefb301b2ee4..841667a896dd 100644
> > > --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> > > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> > > @@ -80,6 +80,7 @@ struct vduse_dev {
> > >       struct vdpa_callback config_cb;
> > >       struct work_struct inject;
> > >       spinlock_t irq_lock;
> > > +     struct rw_semaphore rwsem;
> > >       int minor;
> > >       bool broken;
> > >       bool connected;
> >
> > What does this lock protect? Use a more descriptive name pls,
> > and maybe add a comment.
> >
> 
> This lock is used to ensure there is no more inflight irq kwork after reset.
> 
> >
> > > @@ -410,6 +411,8 @@ static void vduse_dev_reset(struct vduse_dev *dev)
> > >       if (domain->bounce_map)
> > >               vduse_domain_reset_bounce_map(domain);
> > >
> > > +     down_write(&dev->rwsem);
> > > +
> > >       dev->status = 0;
> > >       dev->driver_features = 0;
> > >       dev->generation++;
> > > @@ -443,6 +446,8 @@ static void vduse_dev_reset(struct vduse_dev *dev)
> > >               flush_work(&vq->inject);
> > >               flush_work(&vq->kick);
> > >       }
> > > +
> > > +     up_write(&dev->rwsem);
> > >  }
> > >
> > >  static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
> > > @@ -885,6 +890,23 @@ static void vduse_vq_irq_inject(struct work_struct *work)
> > >       spin_unlock_irq(&vq->irq_lock);
> > >  }
> > >
> > > +static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
> > > +                                 struct work_struct *irq_work)
> > > +{
> > > +     int ret = -EINVAL;
> > > +
> > > +     down_read(&dev->rwsem);
> > > +     if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
> > > +             goto unlock;
> > > +
> > > +     ret = 0;
> > > +     queue_work(vduse_irq_wq, irq_work);
> > > +unlock:
> > > +     up_read(&dev->rwsem);
> > > +
> > > +     return ret;
> > > +}
> > > +
> > >  static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
> > >                           unsigned long arg)
> > >  {
> >
> >
> > so that's a lot of overhead for an irq.
> > Normally the way to address races like this is to add
> > flushing to the reset path, not locking to irq path.
> >
> 
> Yes, we already call flush_work() in the reset path.
> 
> Thanks,
> Yongji

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-10-13 12:34 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20210929083050.88-1-xieyongji@bytedance.com>
2021-09-29  8:40 ` [PATCH v2] vduse: Fix race condition between resetting and irq injecting Jason Wang
     [not found]   ` <CACycT3vp-kxMGVL8W=ebQgOFt_aWs5Y33ZML-Up8KuwsTfQCwA@mail.gmail.com>
2021-10-13 11:06     ` Michael S. Tsirkin
2021-10-13 11:10 ` Michael S. Tsirkin
     [not found]   ` <CACycT3tSP-Vxt_u+ow71ZzxBjKuGycZ1LqUrbjQ6Ew3ehX7kqw@mail.gmail.com>
2021-10-13 12:34     ` Michael S. Tsirkin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).