[PATCH 1/2] vhost: simplify work flushing

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 1/2] vhost: simplify work flushing
@ 2016-04-26  2:14 Jason Wang
  2016-04-26  2:14 ` [PATCH 2/2] vhost: lockless enqueuing Jason Wang
  0 siblings, 1 reply; 5+ messages in thread
From: Jason Wang @ 2016-04-26  2:14 UTC (permalink / raw)
  To: mst; +Cc: netdev, linux-kernel, kvm, virtualization

We used to implement the work flushing through tracking queued seq,
done seq, and the number of flushing. This patch simplify this by just
implement work flushing through another kind of vhost work with
completion. This will be used by lockless enqueuing patch.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vhost/vhost.c | 53 ++++++++++++++++++++-------------------------------
 1 file changed, 21 insertions(+), 32 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 669fef1..73dd16d 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -131,6 +131,19 @@ static void vhost_reset_is_le(struct vhost_virtqueue *vq)
 	vq->is_le = virtio_legacy_is_little_endian();
 }
 
+struct vhost_flush_struct {
+	struct vhost_work work;
+	struct completion wait_event;
+};
+
+static void vhost_flush_work(struct vhost_work *work)
+{
+	struct vhost_flush_struct *s;
+
+	s = container_of(work, struct vhost_flush_struct, work);
+	complete(&s->wait_event);
+}
+
 static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
 			    poll_table *pt)
 {
@@ -158,8 +171,6 @@ void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn)
 	INIT_LIST_HEAD(&work->node);
 	work->fn = fn;
 	init_waitqueue_head(&work->done);
-	work->flushing = 0;
-	work->queue_seq = work->done_seq = 0;
 }
 EXPORT_SYMBOL_GPL(vhost_work_init);
 
@@ -211,31 +222,17 @@ void vhost_poll_stop(struct vhost_poll *poll)
 }
 EXPORT_SYMBOL_GPL(vhost_poll_stop);
 
-static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work,
-				unsigned seq)
-{
-	int left;
-
-	spin_lock_irq(&dev->work_lock);
-	left = seq - work->done_seq;
-	spin_unlock_irq(&dev->work_lock);
-	return left <= 0;
-}
-
 void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work)
 {
-	unsigned seq;
-	int flushing;
+	struct vhost_flush_struct flush;
+
+	if (dev->worker) {
+		init_completion(&flush.wait_event);
+		vhost_work_init(&flush.work, vhost_flush_work);
 
-	spin_lock_irq(&dev->work_lock);
-	seq = work->queue_seq;
-	work->flushing++;
-	spin_unlock_irq(&dev->work_lock);
-	wait_event(work->done, vhost_work_seq_done(dev, work, seq));
-	spin_lock_irq(&dev->work_lock);
-	flushing = --work->flushing;
-	spin_unlock_irq(&dev->work_lock);
-	BUG_ON(flushing < 0);
+		vhost_work_queue(dev, &flush.work);
+		wait_for_completion(&flush.wait_event);
+	}
 }
 EXPORT_SYMBOL_GPL(vhost_work_flush);
 
@@ -254,7 +251,6 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
 	spin_lock_irqsave(&dev->work_lock, flags);
 	if (list_empty(&work->node)) {
 		list_add_tail(&work->node, &dev->work_list);
-		work->queue_seq++;
 		spin_unlock_irqrestore(&dev->work_lock, flags);
 		wake_up_process(dev->worker);
 	} else {
@@ -310,7 +306,6 @@ static int vhost_worker(void *data)
 {
 	struct vhost_dev *dev = data;
 	struct vhost_work *work = NULL;
-	unsigned uninitialized_var(seq);
 	mm_segment_t oldfs = get_fs();
 
 	set_fs(USER_DS);
@@ -321,11 +316,6 @@ static int vhost_worker(void *data)
 		set_current_state(TASK_INTERRUPTIBLE);
 
 		spin_lock_irq(&dev->work_lock);
-		if (work) {
-			work->done_seq = seq;
-			if (work->flushing)
-				wake_up_all(&work->done);
-		}
 
 		if (kthread_should_stop()) {
 			spin_unlock_irq(&dev->work_lock);
@@ -336,7 +326,6 @@ static int vhost_worker(void *data)
 			work = list_first_entry(&dev->work_list,
 						struct vhost_work, node);
 			list_del_init(&work->node);
-			seq = work->queue_seq;
 		} else
 			work = NULL;
 		spin_unlock_irq(&dev->work_lock);
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/2] vhost: lockless enqueuing
  2016-04-26  2:14 [PATCH 1/2] vhost: simplify work flushing Jason Wang
@ 2016-04-26  2:14 ` Jason Wang
  2016-04-26  6:24   ` Pankaj Gupta
  0 siblings, 1 reply; 5+ messages in thread
From: Jason Wang @ 2016-04-26  2:14 UTC (permalink / raw)
  To: mst; +Cc: kvm, virtualization, netdev, linux-kernel, Jason Wang

We use spinlock to synchronize the work list now which may cause
unnecessary contentions. So this patch switch to use llist to remove
this contention. Pktgen tests shows about 5% improvement:

Before:
~1300000 pps
After:
~1370000 pps

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vhost/vhost.c | 52 +++++++++++++++++++++++++--------------------------
 drivers/vhost/vhost.h |  7 ++++---
 2 files changed, 29 insertions(+), 30 deletions(-)

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 73dd16d..0061a7b 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -168,7 +168,7 @@ static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync,
 
 void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn)
 {
-	INIT_LIST_HEAD(&work->node);
+	clear_bit(VHOST_WORK_QUEUED, &work->flags);
 	work->fn = fn;
 	init_waitqueue_head(&work->done);
 }
@@ -246,15 +246,16 @@ EXPORT_SYMBOL_GPL(vhost_poll_flush);
 
 void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
 {
-	unsigned long flags;
+	if (!dev->worker)
+		return;
 
-	spin_lock_irqsave(&dev->work_lock, flags);
-	if (list_empty(&work->node)) {
-		list_add_tail(&work->node, &dev->work_list);
-		spin_unlock_irqrestore(&dev->work_lock, flags);
+	if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) {
+		/* We can only add the work to the list after we're
+		 * sure it was not in the list.
+		 */
+		smp_mb();
+		llist_add(&work->node, &dev->work_list);
 		wake_up_process(dev->worker);
-	} else {
-		spin_unlock_irqrestore(&dev->work_lock, flags);
 	}
 }
 EXPORT_SYMBOL_GPL(vhost_work_queue);
@@ -262,7 +263,7 @@ EXPORT_SYMBOL_GPL(vhost_work_queue);
 /* A lockless hint for busy polling code to exit the loop */
 bool vhost_has_work(struct vhost_dev *dev)
 {
-	return !list_empty(&dev->work_list);
+	return !llist_empty(&dev->work_list);
 }
 EXPORT_SYMBOL_GPL(vhost_has_work);
 
@@ -305,7 +306,8 @@ static void vhost_vq_reset(struct vhost_dev *dev,
 static int vhost_worker(void *data)
 {
 	struct vhost_dev *dev = data;
-	struct vhost_work *work = NULL;
+	struct vhost_work *work, *work_next;
+	struct llist_node *node;
 	mm_segment_t oldfs = get_fs();
 
 	set_fs(USER_DS);
@@ -315,29 +317,25 @@ static int vhost_worker(void *data)
 		/* mb paired w/ kthread_stop */
 		set_current_state(TASK_INTERRUPTIBLE);
 
-		spin_lock_irq(&dev->work_lock);
-
 		if (kthread_should_stop()) {
-			spin_unlock_irq(&dev->work_lock);
 			__set_current_state(TASK_RUNNING);
 			break;
 		}
-		if (!list_empty(&dev->work_list)) {
-			work = list_first_entry(&dev->work_list,
-						struct vhost_work, node);
-			list_del_init(&work->node);
-		} else
-			work = NULL;
-		spin_unlock_irq(&dev->work_lock);
 
-		if (work) {
+		node = llist_del_all(&dev->work_list);
+		if (!node)
+			schedule();
+
+		node = llist_reverse_order(node);
+		/* make sure flag is seen after deletion */
+		smp_wmb();
+		llist_for_each_entry_safe(work, work_next, node, node) {
+			clear_bit(VHOST_WORK_QUEUED, &work->flags);
 			__set_current_state(TASK_RUNNING);
 			work->fn(work);
 			if (need_resched())
 				schedule();
-		} else
-			schedule();
-
+		}
 	}
 	unuse_mm(dev->mm);
 	set_fs(oldfs);
@@ -398,9 +396,9 @@ void vhost_dev_init(struct vhost_dev *dev,
 	dev->log_file = NULL;
 	dev->memory = NULL;
 	dev->mm = NULL;
-	spin_lock_init(&dev->work_lock);
-	INIT_LIST_HEAD(&dev->work_list);
 	dev->worker = NULL;
+	init_llist_head(&dev->work_list);
+
 
 	for (i = 0; i < dev->nvqs; ++i) {
 		vq = dev->vqs[i];
@@ -566,7 +564,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked)
 	/* No one will access memory at this point */
 	kvfree(dev->memory);
 	dev->memory = NULL;
-	WARN_ON(!list_empty(&dev->work_list));
+	WARN_ON(!llist_empty(&dev->work_list));
 	if (dev->worker) {
 		kthread_stop(dev->worker);
 		dev->worker = NULL;
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index d36d8be..6690e64 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -15,13 +15,15 @@
 struct vhost_work;
 typedef void (*vhost_work_fn_t)(struct vhost_work *work);
 
+#define VHOST_WORK_QUEUED 1
 struct vhost_work {
-	struct list_head	  node;
+	struct llist_node	  node;
 	vhost_work_fn_t		  fn;
 	wait_queue_head_t	  done;
 	int			  flushing;
 	unsigned		  queue_seq;
 	unsigned		  done_seq;
+	unsigned long		  flags;
 };
 
 /* Poll a file (eventfd or socket) */
@@ -126,8 +128,7 @@ struct vhost_dev {
 	int nvqs;
 	struct file *log_file;
 	struct eventfd_ctx *log_ctx;
-	spinlock_t work_lock;
-	struct list_head work_list;
+	struct llist_head work_list;
 	struct task_struct *worker;
 };
 
-- 
1.8.3.1

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] vhost: lockless enqueuing
  2016-04-26  2:14 ` [PATCH 2/2] vhost: lockless enqueuing Jason Wang
@ 2016-04-26  6:24   ` Pankaj Gupta
  2016-04-26  7:05     ` Jason Wang
  0 siblings, 1 reply; 5+ messages in thread
From: Pankaj Gupta @ 2016-04-26  6:24 UTC (permalink / raw)
  To: Jason Wang; +Cc: mst, kvm, virtualization, netdev, linux-kernel

Hi Jason,

Overall patches look good. Just one doubt I have is below:
> 
> We use spinlock to synchronize the work list now which may cause
> unnecessary contentions. So this patch switch to use llist to remove
> this contention. Pktgen tests shows about 5% improvement:
> 
> Before:
> ~1300000 pps
> After:
> ~1370000 pps
> 
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
>  drivers/vhost/vhost.c | 52
>  +++++++++++++++++++++++++--------------------------
>  drivers/vhost/vhost.h |  7 ++++---
>  2 files changed, 29 insertions(+), 30 deletions(-)
> 
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 73dd16d..0061a7b 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -168,7 +168,7 @@ static int vhost_poll_wakeup(wait_queue_t *wait, unsigned
> mode, int sync,
>  
>  void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn)
>  {
> -	INIT_LIST_HEAD(&work->node);
> +	clear_bit(VHOST_WORK_QUEUED, &work->flags);
>  	work->fn = fn;
>  	init_waitqueue_head(&work->done);
>  }
> @@ -246,15 +246,16 @@ EXPORT_SYMBOL_GPL(vhost_poll_flush);
>  
>  void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
>  {
> -	unsigned long flags;
> +	if (!dev->worker)
> +		return;
>  
> -	spin_lock_irqsave(&dev->work_lock, flags);
> -	if (list_empty(&work->node)) {
> -		list_add_tail(&work->node, &dev->work_list);
> -		spin_unlock_irqrestore(&dev->work_lock, flags);
> +	if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) {
> +		/* We can only add the work to the list after we're
> +		 * sure it was not in the list.
> +		 */
> +		smp_mb();
> +		llist_add(&work->node, &dev->work_list);
>  		wake_up_process(dev->worker);
> -	} else {
> -		spin_unlock_irqrestore(&dev->work_lock, flags);
>  	}
>  }
>  EXPORT_SYMBOL_GPL(vhost_work_queue);
> @@ -262,7 +263,7 @@ EXPORT_SYMBOL_GPL(vhost_work_queue);
>  /* A lockless hint for busy polling code to exit the loop */
>  bool vhost_has_work(struct vhost_dev *dev)
>  {
> -	return !list_empty(&dev->work_list);
> +	return !llist_empty(&dev->work_list);
>  }
>  EXPORT_SYMBOL_GPL(vhost_has_work);
>  
> @@ -305,7 +306,8 @@ static void vhost_vq_reset(struct vhost_dev *dev,
>  static int vhost_worker(void *data)
>  {
>  	struct vhost_dev *dev = data;
> -	struct vhost_work *work = NULL;
> +	struct vhost_work *work, *work_next;
> +	struct llist_node *node;
>  	mm_segment_t oldfs = get_fs();
>  
>  	set_fs(USER_DS);
> @@ -315,29 +317,25 @@ static int vhost_worker(void *data)
>  		/* mb paired w/ kthread_stop */
>  		set_current_state(TASK_INTERRUPTIBLE);
>  
> -		spin_lock_irq(&dev->work_lock);
> -
>  		if (kthread_should_stop()) {
> -			spin_unlock_irq(&dev->work_lock);
>  			__set_current_state(TASK_RUNNING);
>  			break;
>  		}
> -		if (!list_empty(&dev->work_list)) {
> -			work = list_first_entry(&dev->work_list,
> -						struct vhost_work, node);
> -			list_del_init(&work->node);
> -		} else
> -			work = NULL;
> -		spin_unlock_irq(&dev->work_lock);
>  
> -		if (work) {
> +		node = llist_del_all(&dev->work_list);
> +		if (!node)
> +			schedule();
> +
> +		node = llist_reverse_order(node);

Can we avoid llist reverse here?

> +		/* make sure flag is seen after deletion */
> +		smp_wmb();
> +		llist_for_each_entry_safe(work, work_next, node, node) {
> +			clear_bit(VHOST_WORK_QUEUED, &work->flags);
>  			__set_current_state(TASK_RUNNING);
>  			work->fn(work);
>  			if (need_resched())
>  				schedule();
> -		} else
> -			schedule();
> -
> +		}
>  	}
>  	unuse_mm(dev->mm);
>  	set_fs(oldfs);
> @@ -398,9 +396,9 @@ void vhost_dev_init(struct vhost_dev *dev,
>  	dev->log_file = NULL;
>  	dev->memory = NULL;
>  	dev->mm = NULL;
> -	spin_lock_init(&dev->work_lock);
> -	INIT_LIST_HEAD(&dev->work_list);
>  	dev->worker = NULL;
> +	init_llist_head(&dev->work_list);
> +
>  
>  	for (i = 0; i < dev->nvqs; ++i) {
>  		vq = dev->vqs[i];
> @@ -566,7 +564,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev, bool
> locked)
>  	/* No one will access memory at this point */
>  	kvfree(dev->memory);
>  	dev->memory = NULL;
> -	WARN_ON(!list_empty(&dev->work_list));
> +	WARN_ON(!llist_empty(&dev->work_list));
>  	if (dev->worker) {
>  		kthread_stop(dev->worker);
>  		dev->worker = NULL;
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index d36d8be..6690e64 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -15,13 +15,15 @@
>  struct vhost_work;
>  typedef void (*vhost_work_fn_t)(struct vhost_work *work);
>  
> +#define VHOST_WORK_QUEUED 1
>  struct vhost_work {
> -	struct list_head	  node;
> +	struct llist_node	  node;
>  	vhost_work_fn_t		  fn;
>  	wait_queue_head_t	  done;
>  	int			  flushing;
>  	unsigned		  queue_seq;
>  	unsigned		  done_seq;
> +	unsigned long		  flags;
>  };
>  
>  /* Poll a file (eventfd or socket) */
> @@ -126,8 +128,7 @@ struct vhost_dev {
>  	int nvqs;
>  	struct file *log_file;
>  	struct eventfd_ctx *log_ctx;
> -	spinlock_t work_lock;
> -	struct list_head work_list;
> +	struct llist_head work_list;
>  	struct task_struct *worker;
>  };
>  
> --
> 1.8.3.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] vhost: lockless enqueuing
  2016-04-26  6:24   ` Pankaj Gupta
@ 2016-04-26  7:05     ` Jason Wang
  2016-04-26  7:57       ` Pankaj Gupta
  0 siblings, 1 reply; 5+ messages in thread
From: Jason Wang @ 2016-04-26  7:05 UTC (permalink / raw)
  To: Pankaj Gupta; +Cc: mst, kvm, virtualization, netdev, linux-kernel



On 04/26/2016 02:24 PM, Pankaj Gupta wrote:
> Hi Jason,
>
> Overall patches look good. Just one doubt I have is below:
>> We use spinlock to synchronize the work list now which may cause
>> unnecessary contentions. So this patch switch to use llist to remove
>> this contention. Pktgen tests shows about 5% improvement:
>>
>> Before:
>> ~1300000 pps
>> After:
>> ~1370000 pps
>>
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>> ---
>>  drivers/vhost/vhost.c | 52
>>  +++++++++++++++++++++++++--------------------------
>>  drivers/vhost/vhost.h |  7 ++++---
>>  2 files changed, 29 insertions(+), 30 deletions(-)

[...]

>> -		if (work) {
>> +		node = llist_del_all(&dev->work_list);
>> +		if (!node)
>> +			schedule();
>> +
>> +		node = llist_reverse_order(node);
> Can we avoid llist reverse here?
>

Probably not, this is because:

- we should process the work exactly the same order as they were queued,
otherwise flush won't work
- llist can only add a node to the head of list.

Thanks

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] vhost: lockless enqueuing
  2016-04-26  7:05     ` Jason Wang
@ 2016-04-26  7:57       ` Pankaj Gupta
  0 siblings, 0 replies; 5+ messages in thread
From: Pankaj Gupta @ 2016-04-26  7:57 UTC (permalink / raw)
  To: Jason Wang; +Cc: netdev, virtualization, linux-kernel, kvm, mst


> 
> 
> 
> On 04/26/2016 02:24 PM, Pankaj Gupta wrote:
> > Hi Jason,
> >
> > Overall patches look good. Just one doubt I have is below:
> >> We use spinlock to synchronize the work list now which may cause
> >> unnecessary contentions. So this patch switch to use llist to remove
> >> this contention. Pktgen tests shows about 5% improvement:
> >>
> >> Before:
> >> ~1300000 pps
> >> After:
> >> ~1370000 pps
> >>
> >> Signed-off-by: Jason Wang <jasowang@redhat.com>
> >> ---
> >>  drivers/vhost/vhost.c | 52
> >>  +++++++++++++++++++++++++--------------------------
> >>  drivers/vhost/vhost.h |  7 ++++---
> >>  2 files changed, 29 insertions(+), 30 deletions(-)
> 
> [...]
> 
> >> -		if (work) {
> >> +		node = llist_del_all(&dev->work_list);
> >> +		if (!node)
> >> +			schedule();
> >> +
> >> +		node = llist_reverse_order(node);
> > Can we avoid llist reverse here?
> >
> 
> Probably not, this is because:
> 
> - we should process the work exactly the same order as they were queued,
> otherwise flush won't work
> - llist can only add a node to the head of list.

Got it.

Thanks,
> 
> Thanks
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2016-04-26  7:57 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-04-26  2:14 [PATCH 1/2] vhost: simplify work flushing Jason Wang
2016-04-26  2:14 ` [PATCH 2/2] vhost: lockless enqueuing Jason Wang
2016-04-26  6:24   ` Pankaj Gupta
2016-04-26  7:05     ` Jason Wang
2016-04-26  7:57       ` Pankaj Gupta

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).