* Re: [PATCH 3/6] vduse: Add sysfs interface for irq affinity setup [not found] ` <20221114070233.248-4-xieyongji@bytedance.com> @ 2022-11-14 7:58 ` Jason Wang [not found] ` <CACycT3s8CbO1YD3AAzN-iXEkf6MKM7ihK+=NFik+33HDjanBJQ@mail.gmail.com> 0 siblings, 1 reply; 9+ messages in thread From: Jason Wang @ 2022-11-14 7:58 UTC (permalink / raw) To: Xie Yongji; +Cc: virtualization, mst On Mon, Nov 14, 2022 at 3:16 PM Xie Yongji <xieyongji@bytedance.com> wrote: > > Add sysfs interface for each vduse virtqueue to setup > irq affinity. This would be useful for performance > tuning, e.g., mitigate the virtqueue lock contention > in virtio block driver. Do we have any perforamnce numbers for this? Btw, I wonder if irq is the best for the name since we actually don't use IRQ at all. I guess using "callback" might be better? Thanks > > Signed-off-by: Xie Yongji <xieyongji@bytedance.com> > --- > drivers/vdpa/vdpa_user/vduse_dev.c | 113 ++++++++++++++++++++++++++--- > 1 file changed, 102 insertions(+), 11 deletions(-) > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c > index 9303942c2e64..3a0922fa7eb2 100644 > --- a/drivers/vdpa/vdpa_user/vduse_dev.c > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c > @@ -57,6 +57,7 @@ struct vduse_virtqueue { > struct vdpa_callback cb; > struct work_struct inject; > struct work_struct kick; > + struct kobject kobj; > int irq_affinity; > }; > > @@ -1347,6 +1348,88 @@ static const struct file_operations vduse_dev_fops = { > .llseek = noop_llseek, > }; > > +static ssize_t irq_affinity_show(struct vduse_virtqueue *vq, char *buf) > +{ > + return sprintf(buf, "%d\n", vq->irq_affinity); > +} > + > +static ssize_t irq_affinity_store(struct vduse_virtqueue *vq, > + const char *buf, size_t count) > +{ > + int val; > + > + if (kstrtoint(buf, 0, &val) < 0) > + return -EINVAL; > + > + if (!(val == -1 || (val <= nr_cpu_ids && val >= 0 && cpu_online(val)))) > + return -EINVAL; > + > + vq->irq_affinity = val; > + > + return count; > +} > + > +struct vq_sysfs_entry { > + struct attribute attr; > + ssize_t (*show)(struct vduse_virtqueue *vq, char *buf); > + ssize_t (*store)(struct vduse_virtqueue *vq, const char *buf, > + size_t count); > +}; > + > +static struct vq_sysfs_entry irq_affinity_attr = __ATTR_RW(irq_affinity); > + > +static struct attribute *vq_attrs[] = { > + &irq_affinity_attr.attr, > + NULL, > +}; > +ATTRIBUTE_GROUPS(vq); > + > +static ssize_t vq_attr_show(struct kobject *kobj, struct attribute *attr, > + char *buf) > +{ > + struct vduse_virtqueue *vq = container_of(kobj, > + struct vduse_virtqueue, kobj); > + struct vq_sysfs_entry *entry = container_of(attr, > + struct vq_sysfs_entry, attr); > + > + if (!entry->show) > + return -EIO; > + > + return entry->show(vq, buf); > +} > + > +static ssize_t vq_attr_store(struct kobject *kobj, struct attribute *attr, > + const char *buf, size_t count) > +{ > + struct vduse_virtqueue *vq = container_of(kobj, > + struct vduse_virtqueue, kobj); > + struct vq_sysfs_entry *entry = container_of(attr, > + struct vq_sysfs_entry, attr); > + > + if (!entry->store) > + return -EIO; > + > + return entry->store(vq, buf, count); > +} > + > +static const struct sysfs_ops vq_sysfs_ops = { > + .show = vq_attr_show, > + .store = vq_attr_store, > +}; > + > +static void vq_release(struct kobject *kobj) > +{ > + struct vduse_virtqueue *vq = container_of(kobj, > + struct vduse_virtqueue, kobj); > + kfree(vq); > +} > + > +static struct kobj_type vq_type = { > + .release = vq_release, > + .sysfs_ops = &vq_sysfs_ops, > + .default_groups = vq_groups, > +}; > + > static void vduse_dev_deinit_vqs(struct vduse_dev *dev) > { > int i; > @@ -1355,13 +1438,13 @@ static void vduse_dev_deinit_vqs(struct vduse_dev *dev) > return; > > for (i = 0; i < dev->vq_num; i++) > - kfree(dev->vqs[i]); > + kobject_put(&dev->vqs[i]->kobj); > kfree(dev->vqs); > } > > static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num) > { > - int i; > + int ret, i; > > dev->vq_align = vq_align; > dev->vq_num = vq_num; > @@ -1371,8 +1454,10 @@ static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num) > > for (i = 0; i < vq_num; i++) { > dev->vqs[i] = kzalloc(sizeof(*dev->vqs[i]), GFP_KERNEL); > - if (!dev->vqs[i]) > + if (!dev->vqs[i]) { > + ret = -ENOMEM; > goto err; > + } > > dev->vqs[i]->index = i; > dev->vqs[i]->irq_affinity = -1; > @@ -1380,15 +1465,20 @@ static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num) > INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work); > spin_lock_init(&dev->vqs[i]->kick_lock); > spin_lock_init(&dev->vqs[i]->irq_lock); > + kobject_init(&dev->vqs[i]->kobj, &vq_type); > + ret = kobject_add(&dev->vqs[i]->kobj, > + &dev->dev->kobj, "vq%d", i); > + if (ret) > + goto err; > } > > return 0; > err: > while (i--) > - kfree(dev->vqs[i]); > + kobject_put(&dev->vqs[i]->kobj); > kfree(dev->vqs); > dev->vqs = NULL; > - return -ENOMEM; > + return ret; > } > > static struct vduse_dev *vduse_dev_create(void) > @@ -1563,10 +1653,6 @@ static int vduse_create_dev(struct vduse_dev_config *config, > dev->config = config_buf; > dev->config_size = config->config_size; > > - ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num); > - if (ret) > - goto err_vqs; > - > ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL); > if (ret < 0) > goto err_idr; > @@ -1580,14 +1666,19 @@ static int vduse_create_dev(struct vduse_dev_config *config, > ret = PTR_ERR(dev->dev); > goto err_dev; > } > + > + ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num); > + if (ret) > + goto err_vqs; > + > __module_get(THIS_MODULE); > > return 0; > +err_vqs: > + device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor)); > err_dev: > idr_remove(&vduse_idr, dev->minor); > err_idr: > - vduse_dev_deinit_vqs(dev); > -err_vqs: > vduse_domain_destroy(dev->domain); > err_domain: > kfree(dev->name); > -- > 2.20.1 > _______________________________________________ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization ^ permalink raw reply [flat|nested] 9+ messages in thread
[parent not found: <CACycT3s8CbO1YD3AAzN-iXEkf6MKM7ihK+=NFik+33HDjanBJQ@mail.gmail.com>]
* Re: [PATCH 3/6] vduse: Add sysfs interface for irq affinity setup [not found] ` <CACycT3s8CbO1YD3AAzN-iXEkf6MKM7ihK+=NFik+33HDjanBJQ@mail.gmail.com> @ 2022-11-14 8:55 ` Jason Wang [not found] ` <CACycT3uwqNb=+9P=Ta7pw5qUCRfJXveMUX==CYPrtE=+OQBCrg@mail.gmail.com> 0 siblings, 1 reply; 9+ messages in thread From: Jason Wang @ 2022-11-14 8:55 UTC (permalink / raw) To: Yongji Xie; +Cc: virtualization, Michael S. Tsirkin On Mon, Nov 14, 2022 at 4:20 PM Yongji Xie <xieyongji@bytedance.com> wrote: > > On Mon, Nov 14, 2022 at 3:58 PM Jason Wang <jasowang@redhat.com> wrote: > > > > On Mon, Nov 14, 2022 at 3:16 PM Xie Yongji <xieyongji@bytedance.com> wrote: > > > > > > Add sysfs interface for each vduse virtqueue to setup > > > irq affinity. This would be useful for performance > > > tuning, e.g., mitigate the virtqueue lock contention > > > in virtio block driver. > > > > Do we have any perforamnce numbers for this? > > > > Almost 50% improvement (600k iops -> 900k iops) in the high iops > workloads. I have mentioned it in the cover-letter. For some reason, I miss that. I also wonder if we can do this automatically, then there's no need to play with sysfs which is kind of a burden for the management layer. Thanks > > > Btw, I wonder if irq is the best for the name since we actually don't > > use IRQ at all. I guess using "callback" might be better? > > > > Looks good to me. > > Thanks, > Yongji > _______________________________________________ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization ^ permalink raw reply [flat|nested] 9+ messages in thread
[parent not found: <CACycT3uwqNb=+9P=Ta7pw5qUCRfJXveMUX==CYPrtE=+OQBCrg@mail.gmail.com>]
* Re: [PATCH 3/6] vduse: Add sysfs interface for irq affinity setup [not found] ` <CACycT3uwqNb=+9P=Ta7pw5qUCRfJXveMUX==CYPrtE=+OQBCrg@mail.gmail.com> @ 2022-11-16 7:11 ` Jason Wang 0 siblings, 0 replies; 9+ messages in thread From: Jason Wang @ 2022-11-16 7:11 UTC (permalink / raw) To: Yongji Xie; +Cc: virtualization, Michael S. Tsirkin On Tue, Nov 15, 2022 at 10:49 AM Yongji Xie <xieyongji@bytedance.com> wrote: > > On Mon, Nov 14, 2022 at 4:55 PM Jason Wang <jasowang@redhat.com> wrote: > > > > On Mon, Nov 14, 2022 at 4:20 PM Yongji Xie <xieyongji@bytedance.com> wrote: > > > > > > On Mon, Nov 14, 2022 at 3:58 PM Jason Wang <jasowang@redhat.com> wrote: > > > > > > > > On Mon, Nov 14, 2022 at 3:16 PM Xie Yongji <xieyongji@bytedance.com> wrote: > > > > > > > > > > Add sysfs interface for each vduse virtqueue to setup > > > > > irq affinity. This would be useful for performance > > > > > tuning, e.g., mitigate the virtqueue lock contention > > > > > in virtio block driver. > > > > > > > > Do we have any perforamnce numbers for this? > > > > > > > > > > Almost 50% improvement (600k iops -> 900k iops) in the high iops > > > workloads. I have mentioned it in the cover-letter. > > > > For some reason, I miss that. > > > > I also wonder if we can do this automatically, then there's no need to > > play with sysfs which is kind of a burden for the management layer. > > > > This is hard to do since vduse doesn't know which cpu should be bound > for a certain virtqueue. Probably via the kick_vq()? It probably won't work when notification is disabled. But we need to think a little bit more about this. Requiring management software to do ad-hoc running just for VDUSE seems not easy. Thanks > > Thanks, > Yongji > _______________________________________________ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization ^ permalink raw reply [flat|nested] 9+ messages in thread
[parent not found: <20221114070233.248-2-xieyongji@bytedance.com>]
* Re: [PATCH 1/6] vduse: Refactor allocation for vduse virtqueues [not found] ` <20221114070233.248-2-xieyongji@bytedance.com> @ 2022-11-17 5:17 ` Jason Wang 0 siblings, 0 replies; 9+ messages in thread From: Jason Wang @ 2022-11-17 5:17 UTC (permalink / raw) To: Xie Yongji; +Cc: virtualization, mst On Mon, Nov 14, 2022 at 3:16 PM Xie Yongji <xieyongji@bytedance.com> wrote: > > Allocate memory for vduse virtqueues one by one instead of > doing one allocation for all of them. > > This is a preparation for adding sysfs interface for virtqueues. The code looks but I think it's not a must since each kobject could be allocated indepdpently? Thanks > > Signed-off-by: Xie Yongji <xieyongji@bytedance.com> > --- > drivers/vdpa/vdpa_user/vduse_dev.c | 98 ++++++++++++++++++++---------- > 1 file changed, 66 insertions(+), 32 deletions(-) > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c > index 35dceee3ed56..37809bfcb7ef 100644 > --- a/drivers/vdpa/vdpa_user/vduse_dev.c > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c > @@ -76,7 +76,7 @@ struct vduse_umem { > struct vduse_dev { > struct vduse_vdpa *vdev; > struct device *dev; > - struct vduse_virtqueue *vqs; > + struct vduse_virtqueue **vqs; > struct vduse_iova_domain *domain; > char *name; > struct mutex lock; > @@ -434,7 +434,7 @@ static void vduse_dev_reset(struct vduse_dev *dev) > flush_work(&dev->inject); > > for (i = 0; i < dev->vq_num; i++) { > - struct vduse_virtqueue *vq = &dev->vqs[i]; > + struct vduse_virtqueue *vq = dev->vqs[i]; > > vq->ready = false; > vq->desc_addr = 0; > @@ -466,7 +466,7 @@ static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx, > u64 device_area) > { > struct vduse_dev *dev = vdpa_to_vduse(vdpa); > - struct vduse_virtqueue *vq = &dev->vqs[idx]; > + struct vduse_virtqueue *vq = dev->vqs[idx]; > > vq->desc_addr = desc_area; > vq->driver_addr = driver_area; > @@ -500,7 +500,7 @@ static void vduse_vq_kick_work(struct work_struct *work) > static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx) > { > struct vduse_dev *dev = vdpa_to_vduse(vdpa); > - struct vduse_virtqueue *vq = &dev->vqs[idx]; > + struct vduse_virtqueue *vq = dev->vqs[idx]; > > if (!eventfd_signal_allowed()) { > schedule_work(&vq->kick); > @@ -513,7 +513,7 @@ static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx, > struct vdpa_callback *cb) > { > struct vduse_dev *dev = vdpa_to_vduse(vdpa); > - struct vduse_virtqueue *vq = &dev->vqs[idx]; > + struct vduse_virtqueue *vq = dev->vqs[idx]; > > spin_lock(&vq->irq_lock); > vq->cb.callback = cb->callback; > @@ -524,7 +524,7 @@ static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx, > static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num) > { > struct vduse_dev *dev = vdpa_to_vduse(vdpa); > - struct vduse_virtqueue *vq = &dev->vqs[idx]; > + struct vduse_virtqueue *vq = dev->vqs[idx]; > > vq->num = num; > } > @@ -533,7 +533,7 @@ static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa, > u16 idx, bool ready) > { > struct vduse_dev *dev = vdpa_to_vduse(vdpa); > - struct vduse_virtqueue *vq = &dev->vqs[idx]; > + struct vduse_virtqueue *vq = dev->vqs[idx]; > > vq->ready = ready; > } > @@ -541,7 +541,7 @@ static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa, > static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx) > { > struct vduse_dev *dev = vdpa_to_vduse(vdpa); > - struct vduse_virtqueue *vq = &dev->vqs[idx]; > + struct vduse_virtqueue *vq = dev->vqs[idx]; > > return vq->ready; > } > @@ -550,7 +550,7 @@ static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx, > const struct vdpa_vq_state *state) > { > struct vduse_dev *dev = vdpa_to_vduse(vdpa); > - struct vduse_virtqueue *vq = &dev->vqs[idx]; > + struct vduse_virtqueue *vq = dev->vqs[idx]; > > if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) { > vq->state.packed.last_avail_counter = > @@ -569,7 +569,7 @@ static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx, > struct vdpa_vq_state *state) > { > struct vduse_dev *dev = vdpa_to_vduse(vdpa); > - struct vduse_virtqueue *vq = &dev->vqs[idx]; > + struct vduse_virtqueue *vq = dev->vqs[idx]; > > if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) > return vduse_dev_get_vq_state_packed(dev, vq, &state->packed); > @@ -624,8 +624,8 @@ static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa) > int i; > > for (i = 0; i < dev->vq_num; i++) > - if (num_max < dev->vqs[i].num_max) > - num_max = dev->vqs[i].num_max; > + if (num_max < dev->vqs[i]->num_max) > + num_max = dev->vqs[i]->num_max; > > return num_max; > } > @@ -863,7 +863,7 @@ static int vduse_kickfd_setup(struct vduse_dev *dev, > return -EINVAL; > > index = array_index_nospec(eventfd->index, dev->vq_num); > - vq = &dev->vqs[index]; > + vq = dev->vqs[index]; > if (eventfd->fd >= 0) { > ctx = eventfd_ctx_fdget(eventfd->fd); > if (IS_ERR(ctx)) > @@ -889,7 +889,7 @@ static bool vduse_dev_is_ready(struct vduse_dev *dev) > int i; > > for (i = 0; i < dev->vq_num; i++) > - if (!dev->vqs[i].num_max) > + if (!dev->vqs[i]->num_max) > return false; > > return true; > @@ -1130,7 +1130,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, > break; > > index = array_index_nospec(config.index, dev->vq_num); > - dev->vqs[index].num_max = config.max_size; > + dev->vqs[index]->num_max = config.max_size; > ret = 0; > break; > } > @@ -1148,7 +1148,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, > break; > > index = array_index_nospec(vq_info.index, dev->vq_num); > - vq = &dev->vqs[index]; > + vq = dev->vqs[index]; > vq_info.desc_addr = vq->desc_addr; > vq_info.driver_addr = vq->driver_addr; > vq_info.device_addr = vq->device_addr; > @@ -1198,7 +1198,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd, > break; > > index = array_index_nospec(index, dev->vq_num); > - ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject); > + ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index]->inject); > break; > } > case VDUSE_IOTLB_REG_UMEM: { > @@ -1339,6 +1339,49 @@ static const struct file_operations vduse_dev_fops = { > .llseek = noop_llseek, > }; > > +static void vduse_dev_deinit_vqs(struct vduse_dev *dev) > +{ > + int i; > + > + if (!dev->vqs) > + return; > + > + for (i = 0; i < dev->vq_num; i++) > + kfree(dev->vqs[i]); > + kfree(dev->vqs); > +} > + > +static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 vq_align, u32 vq_num) > +{ > + int i; > + > + dev->vq_align = vq_align; > + dev->vq_num = vq_num; > + dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL); > + if (!dev->vqs) > + return -ENOMEM; > + > + for (i = 0; i < vq_num; i++) { > + dev->vqs[i] = kzalloc(sizeof(*dev->vqs[i]), GFP_KERNEL); > + if (!dev->vqs[i]) > + goto err; > + > + dev->vqs[i]->index = i; > + INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject); > + INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work); > + spin_lock_init(&dev->vqs[i]->kick_lock); > + spin_lock_init(&dev->vqs[i]->irq_lock); > + } > + > + return 0; > +err: > + while (i--) > + kfree(dev->vqs[i]); > + kfree(dev->vqs); > + dev->vqs = NULL; > + return -ENOMEM; > +} > + > static struct vduse_dev *vduse_dev_create(void) > { > struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL); > @@ -1396,7 +1439,7 @@ static int vduse_destroy_dev(char *name) > device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor)); > idr_remove(&vduse_idr, dev->minor); > kvfree(dev->config); > - kfree(dev->vqs); > + vduse_dev_deinit_vqs(dev); > vduse_domain_destroy(dev->domain); > kfree(dev->name); > vduse_dev_destroy(dev); > @@ -1483,7 +1526,7 @@ ATTRIBUTE_GROUPS(vduse_dev); > static int vduse_create_dev(struct vduse_dev_config *config, > void *config_buf, u64 api_version) > { > - int i, ret; > + int ret; > struct vduse_dev *dev; > > ret = -EEXIST; > @@ -1510,19 +1553,10 @@ static int vduse_create_dev(struct vduse_dev_config *config, > > dev->config = config_buf; > dev->config_size = config->config_size; > - dev->vq_align = config->vq_align; > - dev->vq_num = config->vq_num; > - dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL); > - if (!dev->vqs) > - goto err_vqs; > > - for (i = 0; i < dev->vq_num; i++) { > - dev->vqs[i].index = i; > - INIT_WORK(&dev->vqs[i].inject, vduse_vq_irq_inject); > - INIT_WORK(&dev->vqs[i].kick, vduse_vq_kick_work); > - spin_lock_init(&dev->vqs[i].kick_lock); > - spin_lock_init(&dev->vqs[i].irq_lock); > - } > + ret = vduse_dev_init_vqs(dev, config->vq_align, config->vq_num); > + if (ret) > + goto err_vqs; > > ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL); > if (ret < 0) > @@ -1543,7 +1577,7 @@ static int vduse_create_dev(struct vduse_dev_config *config, > err_dev: > idr_remove(&vduse_idr, dev->minor); > err_idr: > - kfree(dev->vqs); > + vduse_dev_deinit_vqs(dev); > err_vqs: > vduse_domain_destroy(dev->domain); > err_domain: > -- > 2.20.1 > _______________________________________________ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization ^ permalink raw reply [flat|nested] 9+ messages in thread
[parent not found: <20221114070233.248-7-xieyongji@bytedance.com>]
* Re: [PATCH 6/6] vduse: Support specifying bounce buffer size via sysfs [not found] ` <20221114070233.248-7-xieyongji@bytedance.com> @ 2022-11-17 5:26 ` Jason Wang [not found] ` <CACycT3vSbwhsrM6R8Sd23e=AfkYh18rx-4FscnWEVGiWiu4rQQ@mail.gmail.com> 0 siblings, 1 reply; 9+ messages in thread From: Jason Wang @ 2022-11-17 5:26 UTC (permalink / raw) To: Xie Yongji; +Cc: virtualization, mst On Mon, Nov 14, 2022 at 3:16 PM Xie Yongji <xieyongji@bytedance.com> wrote: > > Add sysfs interface to support specifying bounce > buffer size in virtio-vdpa case. This is a performance > tuning parameter for high throughput workloads. I wonder what's the reason for not having this in VDUSE_CREATE_DEV? Thanks > > Signed-off-by: Xie Yongji <xieyongji@bytedance.com> > --- > drivers/vdpa/vdpa_user/vduse_dev.c | 45 +++++++++++++++++++++++++++++- > 1 file changed, 44 insertions(+), 1 deletion(-) > > diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c > index 428615093c28..3f97e2d7f7d7 100644 > --- a/drivers/vdpa/vdpa_user/vduse_dev.c > +++ b/drivers/vdpa/vdpa_user/vduse_dev.c > @@ -37,8 +37,11 @@ > #define DRV_LICENSE "GPL v2" > > #define VDUSE_DEV_MAX (1U << MINORBITS) > +#define VDUSE_MAX_BOUNCE_SIZE (1024 * 1024 * 1024) > +#define VDUSE_MIN_BOUNCE_SIZE (1024 * 1024) > #define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024) > -#define VDUSE_IOVA_SIZE (128 * 1024 * 1024) > +/* 128 MB reserved for virtqueue creation */ > +#define VDUSE_IOVA_SIZE (VDUSE_MAX_BOUNCE_SIZE + 128 * 1024 * 1024) > #define VDUSE_MSG_DEFAULT_TIMEOUT 30 > > struct vduse_virtqueue { > @@ -1687,8 +1690,48 @@ static ssize_t msg_timeout_store(struct device *device, > > static DEVICE_ATTR_RW(msg_timeout); > > +static ssize_t bounce_size_show(struct device *device, > + struct device_attribute *attr, char *buf) > +{ > + struct vduse_dev *dev = dev_get_drvdata(device); > + > + return sysfs_emit(buf, "%u\n", dev->bounce_size); > +} > + > +static ssize_t bounce_size_store(struct device *device, > + struct device_attribute *attr, > + const char *buf, size_t count) > +{ > + struct vduse_dev *dev = dev_get_drvdata(device); > + unsigned int bounce_size; > + int ret; > + > + ret = -EPERM; > + mutex_lock(&dev->domain_lock); > + if (dev->domain) > + goto unlock; > + > + ret = kstrtouint(buf, 10, &bounce_size); > + if (ret < 0) > + goto unlock; > + > + ret = -EINVAL; > + if (bounce_size > VDUSE_MAX_BOUNCE_SIZE || > + bounce_size < VDUSE_MIN_BOUNCE_SIZE) > + goto unlock; > + > + dev->bounce_size = bounce_size; > + ret = count; > +unlock: > + mutex_unlock(&dev->domain_lock); > + return ret; > +} > + > +static DEVICE_ATTR_RW(bounce_size); > + > static struct attribute *vduse_dev_attrs[] = { > &dev_attr_msg_timeout.attr, > + &dev_attr_bounce_size.attr, > NULL > }; > > -- > 2.20.1 > _______________________________________________ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization ^ permalink raw reply [flat|nested] 9+ messages in thread
[parent not found: <CACycT3vSbwhsrM6R8Sd23e=AfkYh18rx-4FscnWEVGiWiu4rQQ@mail.gmail.com>]
* Re: [PATCH 6/6] vduse: Support specifying bounce buffer size via sysfs [not found] ` <CACycT3vSbwhsrM6R8Sd23e=AfkYh18rx-4FscnWEVGiWiu4rQQ@mail.gmail.com> @ 2022-11-17 6:12 ` Jason Wang 0 siblings, 0 replies; 9+ messages in thread From: Jason Wang @ 2022-11-17 6:12 UTC (permalink / raw) To: Yongji Xie; +Cc: virtualization, Michael S. Tsirkin On Thu, Nov 17, 2022 at 2:08 PM Yongji Xie <xieyongji@bytedance.com> wrote: > > On Thu, Nov 17, 2022 at 1:26 PM Jason Wang <jasowang@redhat.com> wrote: > > > > On Mon, Nov 14, 2022 at 3:16 PM Xie Yongji <xieyongji@bytedance.com> wrote: > > > > > > Add sysfs interface to support specifying bounce > > > buffer size in virtio-vdpa case. This is a performance > > > tuning parameter for high throughput workloads. > > > > I wonder what's the reason for not having this in VDUSE_CREATE_DEV? > > > > This was discussed [1] before. > > [1] https://lore.kernel.org/netdev/20210713084656.232-3-xieyongji@bytedance.com/T/#m48ccd5bb514c40345d476bac80a59a99eeb9e9a7 > > And the device's sysfs interface should be better than module parameters. Right, so let's document this somewhere (probably the changelog). Thanks > > Thanks, > Yongji > _______________________________________________ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization ^ permalink raw reply [flat|nested] 9+ messages in thread
[parent not found: <CACycT3siOCZv+u+-Xcto9BEdY1c8t_ivw-DF73bmuLqBRxF7=A@mail.gmail.com>]
* Re: [PATCH 3/6] vduse: Add sysfs interface for irq affinity setup [not found] <CACycT3siOCZv+u+-Xcto9BEdY1c8t_ivw-DF73bmuLqBRxF7=A@mail.gmail.com> @ 2022-11-17 3:36 ` Jason Wang [not found] ` <CACycT3vPyEuQcJEEPJE2Dv-49f=w8xLoOXsoLb5Fjtx9uqQoyQ@mail.gmail.com> 0 siblings, 1 reply; 9+ messages in thread From: Jason Wang @ 2022-11-17 3:36 UTC (permalink / raw) To: Yongji Xie; +Cc: virtualization, Michael S. Tsirkin On Wed, Nov 16, 2022 at 3:46 PM Yongji Xie <xieyongji@bytedance.com> wrote: > > On Wed, Nov 16, 2022 at 3:11 PM Jason Wang <jasowang@redhat.com> wrote: > > > > On Tue, Nov 15, 2022 at 10:49 AM Yongji Xie <xieyongji@bytedance.com> wrote: > > > > > > On Mon, Nov 14, 2022 at 4:55 PM Jason Wang <jasowang@redhat.com> wrote: > > > > > > > > On Mon, Nov 14, 2022 at 4:20 PM Yongji Xie <xieyongji@bytedance.com> wrote: > > > > > > > > > > On Mon, Nov 14, 2022 at 3:58 PM Jason Wang <jasowang@redhat.com> wrote: > > > > > > > > > > > > On Mon, Nov 14, 2022 at 3:16 PM Xie Yongji <xieyongji@bytedance.com> wrote: > > > > > > > > > > > > > > Add sysfs interface for each vduse virtqueue to setup > > > > > > > irq affinity. This would be useful for performance > > > > > > > tuning, e.g., mitigate the virtqueue lock contention > > > > > > > in virtio block driver. > > > > > > > > > > > > Do we have any perforamnce numbers for this? > > > > > > > > > > > > > > > > Almost 50% improvement (600k iops -> 900k iops) in the high iops > > > > > workloads. I have mentioned it in the cover-letter. > > > > > > > > For some reason, I miss that. > > > > > > > > I also wonder if we can do this automatically, then there's no need to > > > > play with sysfs which is kind of a burden for the management layer. > > > > > > > > > > This is hard to do since vduse doesn't know which cpu should be bound > > > for a certain virtqueue. > > > > Probably via the kick_vq()? It probably won't work when notification > > is disabled. But we need to think a little bit more about this. > > Yes, another problem is that this way can only work when the cpu and > virtqueue are 1:1 mapping. It's still hard to decide which cpu to bind > in the N:1 mapping case. This is the same situation as what you propose here. I think it would be better to use cpumask instead of cpu id here. > > So I think it could be an optimization, but the sysfs interface is still needed. > > > Requiring management software to do ad-hoc running just for VDUSE > > seems not easy. > > > > I'm not sure. In the kubernetes environment, something like a CSI/CNI > plugin can do it. Only works when the process is bound to a specific cpu. If a process is migrated to another CPU, it would be hard to track. Thanks > > Thanks, > Yongji > _______________________________________________ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization ^ permalink raw reply [flat|nested] 9+ messages in thread
[parent not found: <CACycT3vPyEuQcJEEPJE2Dv-49f=w8xLoOXsoLb5Fjtx9uqQoyQ@mail.gmail.com>]
* Re: [PATCH 3/6] vduse: Add sysfs interface for irq affinity setup [not found] ` <CACycT3vPyEuQcJEEPJE2Dv-49f=w8xLoOXsoLb5Fjtx9uqQoyQ@mail.gmail.com> @ 2022-11-17 6:07 ` Jason Wang [not found] ` <CACycT3uH-OPZ9BUR1OnXmY8Lx0vhYTq=Gb3S9i6thGApa5GHFQ@mail.gmail.com> 0 siblings, 1 reply; 9+ messages in thread From: Jason Wang @ 2022-11-17 6:07 UTC (permalink / raw) To: Yongji Xie; +Cc: virtualization, Michael S. Tsirkin On Thu, Nov 17, 2022 at 1:48 PM Yongji Xie <xieyongji@bytedance.com> wrote: > > On Thu, Nov 17, 2022 at 11:37 AM Jason Wang <jasowang@redhat.com> wrote: > > > > On Wed, Nov 16, 2022 at 3:46 PM Yongji Xie <xieyongji@bytedance.com> wrote: > > > > > > On Wed, Nov 16, 2022 at 3:11 PM Jason Wang <jasowang@redhat.com> wrote: > > > > > > > > On Tue, Nov 15, 2022 at 10:49 AM Yongji Xie <xieyongji@bytedance.com> wrote: > > > > > > > > > > On Mon, Nov 14, 2022 at 4:55 PM Jason Wang <jasowang@redhat.com> wrote: > > > > > > > > > > > > On Mon, Nov 14, 2022 at 4:20 PM Yongji Xie <xieyongji@bytedance.com> wrote: > > > > > > > > > > > > > > On Mon, Nov 14, 2022 at 3:58 PM Jason Wang <jasowang@redhat.com> wrote: > > > > > > > > > > > > > > > > On Mon, Nov 14, 2022 at 3:16 PM Xie Yongji <xieyongji@bytedance.com> wrote: > > > > > > > > > > > > > > > > > > Add sysfs interface for each vduse virtqueue to setup > > > > > > > > > irq affinity. This would be useful for performance > > > > > > > > > tuning, e.g., mitigate the virtqueue lock contention > > > > > > > > > in virtio block driver. > > > > > > > > > > > > > > > > Do we have any perforamnce numbers for this? > > > > > > > > > > > > > > > > > > > > > > Almost 50% improvement (600k iops -> 900k iops) in the high iops > > > > > > > workloads. I have mentioned it in the cover-letter. > > > > > > > > > > > > For some reason, I miss that. > > > > > > > > > > > > I also wonder if we can do this automatically, then there's no need to > > > > > > play with sysfs which is kind of a burden for the management layer. > > > > > > > > > > > > > > > > This is hard to do since vduse doesn't know which cpu should be bound > > > > > for a certain virtqueue. > > > > > > > > Probably via the kick_vq()? It probably won't work when notification > > > > is disabled. But we need to think a little bit more about this. > > > > > > Yes, another problem is that this way can only work when the cpu and > > > virtqueue are 1:1 mapping. It's still hard to decide which cpu to bind > > > in the N:1 mapping case. > > > > This is the same situation as what you propose here. I think it would > > be better to use cpumask instead of cpu id here. > > > > If so, we need to know which cpu to bind for one virtqueue. Do you > mean using the cpu who kicks the virtqueue? I meant you're using: int irq_affinity; This seems to assume that the callback can only be delivered to a specific cpu. It would make more sense to use cpumask_t. This may have broader use cases. > > > > > > > So I think it could be an optimization, but the sysfs interface is still needed. > > > > > > > Requiring management software to do ad-hoc running just for VDUSE > > > > seems not easy. > > > > > > > > > > I'm not sure. In the kubernetes environment, something like a CSI/CNI > > > plugin can do it. > > > > Only works when the process is bound to a specific cpu. If a process > > is migrated to another CPU, it would be hard to track. > > > > OK, I see. Seems like there's no good way to handle this case. Yes, using cpumask_t might improve things a little bit. > Maybe > it's better to leave it as it is. It would be better to think of an automatic method to do this as affinity managed irq used by virtio-pci (not sure how hard it is though). Thanks > > Thanks, > Yongji > _______________________________________________ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization ^ permalink raw reply [flat|nested] 9+ messages in thread
[parent not found: <CACycT3uH-OPZ9BUR1OnXmY8Lx0vhYTq=Gb3S9i6thGApa5GHFQ@mail.gmail.com>]
* Re: [PATCH 3/6] vduse: Add sysfs interface for irq affinity setup [not found] ` <CACycT3uH-OPZ9BUR1OnXmY8Lx0vhYTq=Gb3S9i6thGApa5GHFQ@mail.gmail.com> @ 2022-11-18 7:23 ` Jason Wang 0 siblings, 0 replies; 9+ messages in thread From: Jason Wang @ 2022-11-18 7:23 UTC (permalink / raw) To: Yongji Xie; +Cc: virtualization, Michael S. Tsirkin On Thu, Nov 17, 2022 at 4:54 PM Yongji Xie <xieyongji@bytedance.com> wrote: > > On Thu, Nov 17, 2022 at 2:07 PM Jason Wang <jasowang@redhat.com> wrote: > > > > On Thu, Nov 17, 2022 at 1:48 PM Yongji Xie <xieyongji@bytedance.com> wrote: > > > > > > On Thu, Nov 17, 2022 at 11:37 AM Jason Wang <jasowang@redhat.com> wrote: > > > > > > > > On Wed, Nov 16, 2022 at 3:46 PM Yongji Xie <xieyongji@bytedance.com> wrote: > > > > > > > > > > On Wed, Nov 16, 2022 at 3:11 PM Jason Wang <jasowang@redhat.com> wrote: > > > > > > > > > > > > On Tue, Nov 15, 2022 at 10:49 AM Yongji Xie <xieyongji@bytedance.com> wrote: > > > > > > > > > > > > > > On Mon, Nov 14, 2022 at 4:55 PM Jason Wang <jasowang@redhat.com> wrote: > > > > > > > > > > > > > > > > On Mon, Nov 14, 2022 at 4:20 PM Yongji Xie <xieyongji@bytedance.com> wrote: > > > > > > > > > > > > > > > > > > On Mon, Nov 14, 2022 at 3:58 PM Jason Wang <jasowang@redhat.com> wrote: > > > > > > > > > > > > > > > > > > > > On Mon, Nov 14, 2022 at 3:16 PM Xie Yongji <xieyongji@bytedance.com> wrote: > > > > > > > > > > > > > > > > > > > > > > Add sysfs interface for each vduse virtqueue to setup > > > > > > > > > > > irq affinity. This would be useful for performance > > > > > > > > > > > tuning, e.g., mitigate the virtqueue lock contention > > > > > > > > > > > in virtio block driver. > > > > > > > > > > > > > > > > > > > > Do we have any perforamnce numbers for this? > > > > > > > > > > > > > > > > > > > > > > > > > > > > Almost 50% improvement (600k iops -> 900k iops) in the high iops > > > > > > > > > workloads. I have mentioned it in the cover-letter. > > > > > > > > > > > > > > > > For some reason, I miss that. > > > > > > > > > > > > > > > > I also wonder if we can do this automatically, then there's no need to > > > > > > > > play with sysfs which is kind of a burden for the management layer. > > > > > > > > > > > > > > > > > > > > > > This is hard to do since vduse doesn't know which cpu should be bound > > > > > > > for a certain virtqueue. > > > > > > > > > > > > Probably via the kick_vq()? It probably won't work when notification > > > > > > is disabled. But we need to think a little bit more about this. > > > > > > > > > > Yes, another problem is that this way can only work when the cpu and > > > > > virtqueue are 1:1 mapping. It's still hard to decide which cpu to bind > > > > > in the N:1 mapping case. > > > > > > > > This is the same situation as what you propose here. I think it would > > > > be better to use cpumask instead of cpu id here. > > > > > > > > > > If so, we need to know which cpu to bind for one virtqueue. Do you > > > mean using the cpu who kicks the virtqueue? > > > > I meant you're using: > > > > int irq_affinity; > > > > This seems to assume that the callback can only be delivered to a > > specific cpu. It would make more sense to use cpumask_t. This may have > > broader use cases. > > > > Yes, I see. I meant we need to know how to choose the cpu to run the > irq callback if we use cpumask_t, e.g., round-robin or choosing the > cpu who kicked the virtqueue before. > > > > > > > > > > > > > > So I think it could be an optimization, but the sysfs interface is still needed. > > > > > > > > > > > Requiring management software to do ad-hoc running just for VDUSE > > > > > > seems not easy. > > > > > > > > > > > > > > > > I'm not sure. In the kubernetes environment, something like a CSI/CNI > > > > > plugin can do it. > > > > > > > > Only works when the process is bound to a specific cpu. If a process > > > > is migrated to another CPU, it would be hard to track. > > > > > > > > > > OK, I see. Seems like there's no good way to handle this case. > > > > Yes, using cpumask_t might improve things a little bit. > > > > > Maybe > > > it's better to leave it as it is. > > > > It would be better to think of an automatic method to do this as > > affinity managed irq used by virtio-pci (not sure how hard it is > > though). > > > > Do you mean making use of .set_vq_affinity and .get_vq_affinity callbacks? This works for net but not block. I know little about block but looks like block is using affinity descriptor to allow blk mq to do proper irq steering. Maybe we can do something the same. Thanks > > Thanks, > Yongji > _______________________________________________ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization ^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2022-11-18 7:23 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20221114070233.248-1-xieyongji@bytedance.com>
[not found] ` <20221114070233.248-4-xieyongji@bytedance.com>
2022-11-14 7:58 ` [PATCH 3/6] vduse: Add sysfs interface for irq affinity setup Jason Wang
[not found] ` <CACycT3s8CbO1YD3AAzN-iXEkf6MKM7ihK+=NFik+33HDjanBJQ@mail.gmail.com>
2022-11-14 8:55 ` Jason Wang
[not found] ` <CACycT3uwqNb=+9P=Ta7pw5qUCRfJXveMUX==CYPrtE=+OQBCrg@mail.gmail.com>
2022-11-16 7:11 ` Jason Wang
[not found] ` <20221114070233.248-2-xieyongji@bytedance.com>
2022-11-17 5:17 ` [PATCH 1/6] vduse: Refactor allocation for vduse virtqueues Jason Wang
[not found] ` <20221114070233.248-7-xieyongji@bytedance.com>
2022-11-17 5:26 ` [PATCH 6/6] vduse: Support specifying bounce buffer size via sysfs Jason Wang
[not found] ` <CACycT3vSbwhsrM6R8Sd23e=AfkYh18rx-4FscnWEVGiWiu4rQQ@mail.gmail.com>
2022-11-17 6:12 ` Jason Wang
[not found] <CACycT3siOCZv+u+-Xcto9BEdY1c8t_ivw-DF73bmuLqBRxF7=A@mail.gmail.com>
2022-11-17 3:36 ` [PATCH 3/6] vduse: Add sysfs interface for irq affinity setup Jason Wang
[not found] ` <CACycT3vPyEuQcJEEPJE2Dv-49f=w8xLoOXsoLb5Fjtx9uqQoyQ@mail.gmail.com>
2022-11-17 6:07 ` Jason Wang
[not found] ` <CACycT3uH-OPZ9BUR1OnXmY8Lx0vhYTq=Gb3S9i6thGApa5GHFQ@mail.gmail.com>
2022-11-18 7:23 ` Jason Wang
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).