* [PATCH V2 1/2] vhost_net: stop polling socket during rx processing [not found] <1464590874-39539-1-git-send-email-jasowang@redhat.com> @ 2016-05-30 6:47 ` Jason Wang 2016-05-30 15:47 ` Michael S. Tsirkin 2016-05-30 6:47 ` [PATCH V2 2/2] vhost_net: conditionally enable tx polling Jason Wang 1 sibling, 1 reply; 6+ messages in thread From: Jason Wang @ 2016-05-30 6:47 UTC (permalink / raw) To: mst, kvm, virtualization, netdev, linux-kernel We don't stop rx polling socket during rx processing, this will lead unnecessary wakeups from under layer net devices (E.g sock_def_readable() form tun). Rx will be slowed down in this way. This patch avoids this by stop polling socket during rx processing. A small drawback is that this introduces some overheads in light load case because of the extra start/stop polling, but single netperf TCP_RR does not notice any change. In a super heavy load case, e.g using pktgen to inject packet to guest, we get about ~8.8% improvement on pps: before: ~1240000 pkt/s after: ~1350000 pkt/s Signed-off-by: Jason Wang <jasowang@redhat.com> --- drivers/vhost/net.c | 56 +++++++++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 10ff494..e91603b 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -301,6 +301,32 @@ static bool vhost_can_busy_poll(struct vhost_dev *dev, !vhost_has_work(dev); } +static void vhost_net_disable_vq(struct vhost_net *n, + struct vhost_virtqueue *vq) +{ + struct vhost_net_virtqueue *nvq = + container_of(vq, struct vhost_net_virtqueue, vq); + struct vhost_poll *poll = n->poll + (nvq - n->vqs); + if (!vq->private_data) + return; + vhost_poll_stop(poll); +} + +static int vhost_net_enable_vq(struct vhost_net *n, + struct vhost_virtqueue *vq) +{ + struct vhost_net_virtqueue *nvq = + container_of(vq, struct vhost_net_virtqueue, vq); + struct vhost_poll *poll = n->poll + (nvq - n->vqs); + struct socket *sock; + + sock = vq->private_data; + if (!sock) + return 0; + + return vhost_poll_start(poll, sock->file); +} + static int vhost_net_tx_get_vq_desc(struct vhost_net *net, struct vhost_virtqueue *vq, struct iovec iov[], unsigned int iov_size, @@ -627,6 +653,7 @@ static void handle_rx(struct vhost_net *net) if (!sock) goto out; vhost_disable_notify(&net->dev, vq); + vhost_net_disable_vq(net, vq); vhost_hlen = nvq->vhost_hlen; sock_hlen = nvq->sock_hlen; @@ -715,9 +742,10 @@ static void handle_rx(struct vhost_net *net) total_len += vhost_len; if (unlikely(total_len >= VHOST_NET_WEIGHT)) { vhost_poll_queue(&vq->poll); - break; + goto out; } } + vhost_net_enable_vq(net, vq); out: mutex_unlock(&vq->mutex); } @@ -796,32 +824,6 @@ static int vhost_net_open(struct inode *inode, struct file *f) return 0; } -static void vhost_net_disable_vq(struct vhost_net *n, - struct vhost_virtqueue *vq) -{ - struct vhost_net_virtqueue *nvq = - container_of(vq, struct vhost_net_virtqueue, vq); - struct vhost_poll *poll = n->poll + (nvq - n->vqs); - if (!vq->private_data) - return; - vhost_poll_stop(poll); -} - -static int vhost_net_enable_vq(struct vhost_net *n, - struct vhost_virtqueue *vq) -{ - struct vhost_net_virtqueue *nvq = - container_of(vq, struct vhost_net_virtqueue, vq); - struct vhost_poll *poll = n->poll + (nvq - n->vqs); - struct socket *sock; - - sock = vq->private_data; - if (!sock) - return 0; - - return vhost_poll_start(poll, sock->file); -} - static struct socket *vhost_net_stop_vq(struct vhost_net *n, struct vhost_virtqueue *vq) { -- 1.8.3.1 ^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH V2 1/2] vhost_net: stop polling socket during rx processing 2016-05-30 6:47 ` [PATCH V2 1/2] vhost_net: stop polling socket during rx processing Jason Wang @ 2016-05-30 15:47 ` Michael S. Tsirkin 2016-05-31 3:14 ` Jason Wang 0 siblings, 1 reply; 6+ messages in thread From: Michael S. Tsirkin @ 2016-05-30 15:47 UTC (permalink / raw) To: Jason Wang; +Cc: netdev, linux-kernel, kvm, virtualization On Mon, May 30, 2016 at 02:47:53AM -0400, Jason Wang wrote: > We don't stop rx polling socket during rx processing, this will lead > unnecessary wakeups from under layer net devices (E.g > sock_def_readable() form tun). Rx will be slowed down in this > way. This patch avoids this by stop polling socket during rx > processing. A small drawback is that this introduces some overheads in > light load case because of the extra start/stop polling, but single > netperf TCP_RR does not notice any change. In a super heavy load case, > e.g using pktgen to inject packet to guest, we get about ~8.8% > improvement on pps: > > before: ~1240000 pkt/s > after: ~1350000 pkt/s > > Signed-off-by: Jason Wang <jasowang@redhat.com> > --- > drivers/vhost/net.c | 56 +++++++++++++++++++++++++++-------------------------- > 1 file changed, 29 insertions(+), 27 deletions(-) > > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c > index 10ff494..e91603b 100644 > --- a/drivers/vhost/net.c > +++ b/drivers/vhost/net.c > @@ -301,6 +301,32 @@ static bool vhost_can_busy_poll(struct vhost_dev *dev, > !vhost_has_work(dev); > } > > +static void vhost_net_disable_vq(struct vhost_net *n, > + struct vhost_virtqueue *vq) > +{ > + struct vhost_net_virtqueue *nvq = > + container_of(vq, struct vhost_net_virtqueue, vq); > + struct vhost_poll *poll = n->poll + (nvq - n->vqs); > + if (!vq->private_data) > + return; > + vhost_poll_stop(poll); > +} > + > +static int vhost_net_enable_vq(struct vhost_net *n, > + struct vhost_virtqueue *vq) > +{ > + struct vhost_net_virtqueue *nvq = > + container_of(vq, struct vhost_net_virtqueue, vq); > + struct vhost_poll *poll = n->poll + (nvq - n->vqs); > + struct socket *sock; > + > + sock = vq->private_data; > + if (!sock) > + return 0; > + > + return vhost_poll_start(poll, sock->file); > +} > + > static int vhost_net_tx_get_vq_desc(struct vhost_net *net, > struct vhost_virtqueue *vq, > struct iovec iov[], unsigned int iov_size, BTW we might want to rename these functions, name no longer reflects function ... > @@ -627,6 +653,7 @@ static void handle_rx(struct vhost_net *net) > if (!sock) > goto out; > vhost_disable_notify(&net->dev, vq); > + vhost_net_disable_vq(net, vq); > > vhost_hlen = nvq->vhost_hlen; > sock_hlen = nvq->sock_hlen; > @@ -715,9 +742,10 @@ static void handle_rx(struct vhost_net *net) > total_len += vhost_len; > if (unlikely(total_len >= VHOST_NET_WEIGHT)) { > vhost_poll_queue(&vq->poll); > - break; > + goto out; > } > } > + vhost_net_enable_vq(net, vq); OK so if sock is readable but RX VQ is empty, this will immediately schedule another round of handle_rx and so ad infinitum, Looks like a bug. > out: > mutex_unlock(&vq->mutex); > } > @@ -796,32 +824,6 @@ static int vhost_net_open(struct inode *inode, struct file *f) > return 0; > } > > -static void vhost_net_disable_vq(struct vhost_net *n, > - struct vhost_virtqueue *vq) > -{ > - struct vhost_net_virtqueue *nvq = > - container_of(vq, struct vhost_net_virtqueue, vq); > - struct vhost_poll *poll = n->poll + (nvq - n->vqs); > - if (!vq->private_data) > - return; > - vhost_poll_stop(poll); > -} > - > -static int vhost_net_enable_vq(struct vhost_net *n, > - struct vhost_virtqueue *vq) > -{ > - struct vhost_net_virtqueue *nvq = > - container_of(vq, struct vhost_net_virtqueue, vq); > - struct vhost_poll *poll = n->poll + (nvq - n->vqs); > - struct socket *sock; > - > - sock = vq->private_data; > - if (!sock) > - return 0; > - > - return vhost_poll_start(poll, sock->file); > -} > - > static struct socket *vhost_net_stop_vq(struct vhost_net *n, > struct vhost_virtqueue *vq) > { > -- > 1.8.3.1 ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH V2 1/2] vhost_net: stop polling socket during rx processing 2016-05-30 15:47 ` Michael S. Tsirkin @ 2016-05-31 3:14 ` Jason Wang 0 siblings, 0 replies; 6+ messages in thread From: Jason Wang @ 2016-05-31 3:14 UTC (permalink / raw) To: Michael S. Tsirkin; +Cc: netdev, linux-kernel, kvm, virtualization On 2016年05月30日 23:47, Michael S. Tsirkin wrote: > On Mon, May 30, 2016 at 02:47:53AM -0400, Jason Wang wrote: >> We don't stop rx polling socket during rx processing, this will lead >> unnecessary wakeups from under layer net devices (E.g >> sock_def_readable() form tun). Rx will be slowed down in this >> way. This patch avoids this by stop polling socket during rx >> processing. A small drawback is that this introduces some overheads in >> light load case because of the extra start/stop polling, but single >> netperf TCP_RR does not notice any change. In a super heavy load case, >> e.g using pktgen to inject packet to guest, we get about ~8.8% >> improvement on pps: >> >> before: ~1240000 pkt/s >> after: ~1350000 pkt/s >> >> Signed-off-by: Jason Wang <jasowang@redhat.com> >> --- >> drivers/vhost/net.c | 56 +++++++++++++++++++++++++++-------------------------- >> 1 file changed, 29 insertions(+), 27 deletions(-) >> >> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c >> index 10ff494..e91603b 100644 >> --- a/drivers/vhost/net.c >> +++ b/drivers/vhost/net.c >> @@ -301,6 +301,32 @@ static bool vhost_can_busy_poll(struct vhost_dev *dev, >> !vhost_has_work(dev); >> } >> >> +static void vhost_net_disable_vq(struct vhost_net *n, >> + struct vhost_virtqueue *vq) >> +{ >> + struct vhost_net_virtqueue *nvq = >> + container_of(vq, struct vhost_net_virtqueue, vq); >> + struct vhost_poll *poll = n->poll + (nvq - n->vqs); >> + if (!vq->private_data) >> + return; >> + vhost_poll_stop(poll); >> +} >> + >> +static int vhost_net_enable_vq(struct vhost_net *n, >> + struct vhost_virtqueue *vq) >> +{ >> + struct vhost_net_virtqueue *nvq = >> + container_of(vq, struct vhost_net_virtqueue, vq); >> + struct vhost_poll *poll = n->poll + (nvq - n->vqs); >> + struct socket *sock; >> + >> + sock = vq->private_data; >> + if (!sock) >> + return 0; >> + >> + return vhost_poll_start(poll, sock->file); >> +} >> + >> static int vhost_net_tx_get_vq_desc(struct vhost_net *net, >> struct vhost_virtqueue *vq, >> struct iovec iov[], unsigned int iov_size, > BTW we might want to rename these functions, name no longer > reflects function ... Do you mean adding something reflect busy polling in the name? Then the name may be too long or have suggestion on the name? > > >> @@ -627,6 +653,7 @@ static void handle_rx(struct vhost_net *net) >> if (!sock) >> goto out; >> vhost_disable_notify(&net->dev, vq); >> + vhost_net_disable_vq(net, vq); >> >> vhost_hlen = nvq->vhost_hlen; >> sock_hlen = nvq->sock_hlen; >> @@ -715,9 +742,10 @@ static void handle_rx(struct vhost_net *net) >> total_len += vhost_len; >> if (unlikely(total_len >= VHOST_NET_WEIGHT)) { >> vhost_poll_queue(&vq->poll); >> - break; >> + goto out; >> } >> } >> + vhost_net_enable_vq(net, vq); > OK so if sock is readable but RX VQ is empty, this will > immediately schedule another round of handle_rx and so ad > infinitum, > > Looks like a bug. Yes it is, will change the above headcount check to: /* OK, now we need to know about added descriptors. */ if (!headcount) { if (unlikely(vhost_enable_notify(&net->dev, vq))) { /* They have slipped one in as we were * doing that: check again. */ vhost_disable_notify(&net->dev, vq); continue; } /* Nothing new? Wait for eventfd to tell us * they refilled. */ goto out; } > > >> out: >> mutex_unlock(&vq->mutex); >> } >> @@ -796,32 +824,6 @@ static int vhost_net_open(struct inode *inode, struct file *f) >> return 0; >> } >> >> -static void vhost_net_disable_vq(struct vhost_net *n, >> - struct vhost_virtqueue *vq) >> -{ >> - struct vhost_net_virtqueue *nvq = >> - container_of(vq, struct vhost_net_virtqueue, vq); >> - struct vhost_poll *poll = n->poll + (nvq - n->vqs); >> - if (!vq->private_data) >> - return; >> - vhost_poll_stop(poll); >> -} >> - >> -static int vhost_net_enable_vq(struct vhost_net *n, >> - struct vhost_virtqueue *vq) >> -{ >> - struct vhost_net_virtqueue *nvq = >> - container_of(vq, struct vhost_net_virtqueue, vq); >> - struct vhost_poll *poll = n->poll + (nvq - n->vqs); >> - struct socket *sock; >> - >> - sock = vq->private_data; >> - if (!sock) >> - return 0; >> - >> - return vhost_poll_start(poll, sock->file); >> -} >> - >> static struct socket *vhost_net_stop_vq(struct vhost_net *n, >> struct vhost_virtqueue *vq) >> { >> -- >> 1.8.3.1 > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html _______________________________________________ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization ^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH V2 2/2] vhost_net: conditionally enable tx polling [not found] <1464590874-39539-1-git-send-email-jasowang@redhat.com> 2016-05-30 6:47 ` [PATCH V2 1/2] vhost_net: stop polling socket during rx processing Jason Wang @ 2016-05-30 6:47 ` Jason Wang 2016-05-30 15:55 ` Michael S. Tsirkin [not found] ` <20160530155521.GA5427@redhat.com> 1 sibling, 2 replies; 6+ messages in thread From: Jason Wang @ 2016-05-30 6:47 UTC (permalink / raw) To: mst, kvm, virtualization, netdev, linux-kernel We always poll tx for socket, this is sub optimal since: - it will be only used when we exceed the sndbuf of the socket. - since we use two independent polls for tx and vq, this will slightly increase the waitqueue traversing time and more important, vhost could not benefit from commit 9e641bdcfa4ef4d6e2fbaa59c1be0ad5d1551fd5 ("net-tun: restructure tun_do_read for better sleep/wakeup efficiency") even if we've stopped rx polling during handle_rx since tx poll were still left in the waitqueue. Fix this by conditionally enable tx polling only when -EAGAIN were met. Test shows about 8% improvement on guest rx pps. Before: ~1350000 After: ~1460000 Signed-off-by: Jason Wang <jasowang@redhat.com> --- drivers/vhost/net.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index e91603b..5a05fa0 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -378,6 +378,7 @@ static void handle_tx(struct vhost_net *net) goto out; vhost_disable_notify(&net->dev, vq); + vhost_net_disable_vq(net, vq); hdr_size = nvq->vhost_hlen; zcopy = nvq->ubufs; @@ -459,6 +460,8 @@ static void handle_tx(struct vhost_net *net) % UIO_MAXIOV; } vhost_discard_vq_desc(vq, 1); + if (err == -EAGAIN) + vhost_net_enable_vq(net, vq); break; } if (err != len) -- 1.8.3.1 ^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH V2 2/2] vhost_net: conditionally enable tx polling 2016-05-30 6:47 ` [PATCH V2 2/2] vhost_net: conditionally enable tx polling Jason Wang @ 2016-05-30 15:55 ` Michael S. Tsirkin [not found] ` <20160530155521.GA5427@redhat.com> 1 sibling, 0 replies; 6+ messages in thread From: Michael S. Tsirkin @ 2016-05-30 15:55 UTC (permalink / raw) To: Jason Wang; +Cc: netdev, linux-kernel, kvm, virtualization On Mon, May 30, 2016 at 02:47:54AM -0400, Jason Wang wrote: > We always poll tx for socket, this is sub optimal since: > > - it will be only used when we exceed the sndbuf of the socket. > - since we use two independent polls for tx and vq, this will slightly > increase the waitqueue traversing time and more important, vhost > could not benefit from commit > 9e641bdcfa4ef4d6e2fbaa59c1be0ad5d1551fd5 ("net-tun: restructure > tun_do_read for better sleep/wakeup efficiency") even if we've > stopped rx polling during handle_rx since tx poll were still left in > the waitqueue. Why is this an issue? sock_def_write_space only wakes up when queue is half empty, not on each packet. if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) I suspect the issue is with your previous patch, it now pokes at the spinlock on data path where it used not to. Is that right? > > Fix this by conditionally enable tx polling only when -EAGAIN were > met. > > Test shows about 8% improvement on guest rx pps. > > Before: ~1350000 > After: ~1460000 > > Signed-off-by: Jason Wang <jasowang@redhat.com> > --- > drivers/vhost/net.c | 3 +++ > 1 file changed, 3 insertions(+) > > diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c > index e91603b..5a05fa0 100644 > --- a/drivers/vhost/net.c > +++ b/drivers/vhost/net.c > @@ -378,6 +378,7 @@ static void handle_tx(struct vhost_net *net) > goto out; > > vhost_disable_notify(&net->dev, vq); > + vhost_net_disable_vq(net, vq); > > hdr_size = nvq->vhost_hlen; > zcopy = nvq->ubufs; > @@ -459,6 +460,8 @@ static void handle_tx(struct vhost_net *net) > % UIO_MAXIOV; > } > vhost_discard_vq_desc(vq, 1); > + if (err == -EAGAIN) > + vhost_net_enable_vq(net, vq); > break; > } > if (err != len) > -- > 1.8.3.1 ^ permalink raw reply [flat|nested] 6+ messages in thread
[parent not found: <20160530155521.GA5427@redhat.com>]
* Re: [PATCH V2 2/2] vhost_net: conditionally enable tx polling [not found] ` <20160530155521.GA5427@redhat.com> @ 2016-05-31 3:23 ` Jason Wang 0 siblings, 0 replies; 6+ messages in thread From: Jason Wang @ 2016-05-31 3:23 UTC (permalink / raw) To: Michael S. Tsirkin; +Cc: netdev, linux-kernel, kvm, virtualization On 2016年05月30日 23:55, Michael S. Tsirkin wrote: > On Mon, May 30, 2016 at 02:47:54AM -0400, Jason Wang wrote: >> We always poll tx for socket, this is sub optimal since: >> >> - it will be only used when we exceed the sndbuf of the socket. >> - since we use two independent polls for tx and vq, this will slightly >> increase the waitqueue traversing time and more important, vhost >> could not benefit from commit >> 9e641bdcfa4ef4d6e2fbaa59c1be0ad5d1551fd5 ("net-tun: restructure >> tun_do_read for better sleep/wakeup efficiency") even if we've >> stopped rx polling during handle_rx since tx poll were still left in >> the waitqueue. > Why is this an issue? > sock_def_write_space only wakes up when queue is half empty, > not on each packet. > if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) > > I suspect the issue is with your previous patch, > it now pokes at the spinlock on data path > where it used not to. > > Is that right? The problem is not tx wake up but still rx wake up. Patch 1 removes rx poll, but still left tx poll. So in sock_def_readable(), skwq_has_sleeper() returns true, we still need to traverse waitqueue and touch spinlocks. With this patch, unless a heavy tx load, tx poll were disabled, sock_def_readable() can return finish very soon. > > >> Fix this by conditionally enable tx polling only when -EAGAIN were >> met. >> >> Test shows about 8% improvement on guest rx pps. >> >> Before: ~1350000 >> After: ~1460000 >> >> Signed-off-by: Jason Wang <jasowang@redhat.com> >> --- >> drivers/vhost/net.c | 3 +++ >> 1 file changed, 3 insertions(+) >> >> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c >> index e91603b..5a05fa0 100644 >> --- a/drivers/vhost/net.c >> +++ b/drivers/vhost/net.c >> @@ -378,6 +378,7 @@ static void handle_tx(struct vhost_net *net) >> goto out; >> >> vhost_disable_notify(&net->dev, vq); >> + vhost_net_disable_vq(net, vq); >> >> hdr_size = nvq->vhost_hlen; >> zcopy = nvq->ubufs; >> @@ -459,6 +460,8 @@ static void handle_tx(struct vhost_net *net) >> % UIO_MAXIOV; >> } >> vhost_discard_vq_desc(vq, 1); >> + if (err == -EAGAIN) >> + vhost_net_enable_vq(net, vq); >> break; >> } >> if (err != len) >> -- >> 1.8.3.1 _______________________________________________ Virtualization mailing list Virtualization@lists.linux-foundation.org https://lists.linuxfoundation.org/mailman/listinfo/virtualization ^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2016-05-31 3:23 UTC | newest] Thread overview: 6+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- [not found] <1464590874-39539-1-git-send-email-jasowang@redhat.com> 2016-05-30 6:47 ` [PATCH V2 1/2] vhost_net: stop polling socket during rx processing Jason Wang 2016-05-30 15:47 ` Michael S. Tsirkin 2016-05-31 3:14 ` Jason Wang 2016-05-30 6:47 ` [PATCH V2 2/2] vhost_net: conditionally enable tx polling Jason Wang 2016-05-30 15:55 ` Michael S. Tsirkin [not found] ` <20160530155521.GA5427@redhat.com> 2016-05-31 3:23 ` Jason Wang
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).