* [PATCH 1/4] [RFC rev2] Change virtqueue structure
2011-04-05 15:08 [PATCH 0/4] [RFC rev2] Implement multiqueue (RX & TX) virtio-net Krishna Kumar
@ 2011-04-05 15:08 ` Krishna Kumar
2011-04-05 15:08 ` [PATCH 2/4] [RFC rev2] virtio-net changes Krishna Kumar
` (3 subsequent siblings)
4 siblings, 0 replies; 9+ messages in thread
From: Krishna Kumar @ 2011-04-05 15:08 UTC (permalink / raw)
To: rusty, davem, mst
Cc: eric.dumazet, arnd, netdev, horms, avi, anthony, kvm,
Krishna Kumar
Move queue_index from virtio_pci_vq_info to virtqueue. This
allows callback handlers to figure out the queue number for
the vq that needs attention.
Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
---
drivers/virtio/virtio_pci.c | 10 +++-------
include/linux/virtio.h | 1 +
2 files changed, 4 insertions(+), 7 deletions(-)
diff -ruNp org/include/linux/virtio.h new/include/linux/virtio.h
--- org/include/linux/virtio.h 2011-04-05 14:15:18.000000000 +0530
+++ new/include/linux/virtio.h 2011-04-05 14:15:18.000000000 +0530
@@ -22,6 +22,7 @@ struct virtqueue {
void (*callback)(struct virtqueue *vq);
const char *name;
struct virtio_device *vdev;
+ int queue_index; /* the index of the queue */
void *priv;
};
diff -ruNp org/drivers/virtio/virtio_pci.c new/drivers/virtio/virtio_pci.c
--- org/drivers/virtio/virtio_pci.c 2011-04-05 14:15:18.000000000 +0530
+++ new/drivers/virtio/virtio_pci.c 2011-04-05 14:15:18.000000000 +0530
@@ -75,9 +75,6 @@ struct virtio_pci_vq_info
/* the number of entries in the queue */
int num;
- /* the index of the queue */
- int queue_index;
-
/* the virtual address of the ring queue */
void *queue;
@@ -180,11 +177,10 @@ static void vp_reset(struct virtio_devic
static void vp_notify(struct virtqueue *vq)
{
struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
- struct virtio_pci_vq_info *info = vq->priv;
/* we write the queue's selector into the notification register to
* signal the other end */
- iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
+ iowrite16(vq->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
}
/* Handle a configuration change: Tell driver if it wants to know. */
@@ -380,7 +376,6 @@ static struct virtqueue *setup_vq(struct
if (!info)
return ERR_PTR(-ENOMEM);
- info->queue_index = index;
info->num = num;
info->msix_vector = msix_vec;
@@ -403,6 +398,7 @@ static struct virtqueue *setup_vq(struct
goto out_activate_queue;
}
+ vq->queue_index = index;
vq->priv = info;
info->vq = vq;
@@ -441,7 +437,7 @@ static void vp_del_vq(struct virtqueue *
list_del(&info->node);
spin_unlock_irqrestore(&vp_dev->lock, flags);
- iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
+ iowrite16(vq->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
if (vp_dev->msix_enabled) {
iowrite16(VIRTIO_MSI_NO_VECTOR,
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 2/4] [RFC rev2] virtio-net changes
2011-04-05 15:08 [PATCH 0/4] [RFC rev2] Implement multiqueue (RX & TX) virtio-net Krishna Kumar
2011-04-05 15:08 ` [PATCH 1/4] [RFC rev2] Change virtqueue structure Krishna Kumar
@ 2011-04-05 15:08 ` Krishna Kumar
2011-04-13 1:28 ` Rusty Russell
2011-04-05 15:09 ` [PATCH 3/4] [RFC rev2] vhost changes Krishna Kumar
` (2 subsequent siblings)
4 siblings, 1 reply; 9+ messages in thread
From: Krishna Kumar @ 2011-04-05 15:08 UTC (permalink / raw)
To: rusty, davem, mst
Cc: eric.dumazet, arnd, netdev, horms, avi, anthony, kvm,
Krishna Kumar
Implement mq virtio-net driver.
Though struct virtio_net_config changes, it works with the old
qemu since the last element is not accessed unless qemu sets
VIRTIO_NET_F_MULTIQUEUE.
Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
---
drivers/net/virtio_net.c | 573 ++++++++++++++++++++++++-----------
include/linux/virtio_net.h | 3
2 files changed, 408 insertions(+), 168 deletions(-)
diff -ruNp org/include/linux/virtio_net.h new/include/linux/virtio_net.h
--- org/include/linux/virtio_net.h 2011-04-05 14:15:18.000000000 +0530
+++ new/include/linux/virtio_net.h 2011-04-05 14:15:18.000000000 +0530
@@ -26,6 +26,7 @@
#define VIRTIO_NET_F_CTRL_RX 18 /* Control channel RX mode support */
#define VIRTIO_NET_F_CTRL_VLAN 19 /* Control channel VLAN filtering */
#define VIRTIO_NET_F_CTRL_RX_EXTRA 20 /* Extra RX mode control support */
+#define VIRTIO_NET_F_MULTIQUEUE 21 /* Device supports multiple TXQ/RXQ */
#define VIRTIO_NET_S_LINK_UP 1 /* Link is up */
@@ -34,6 +35,8 @@ struct virtio_net_config {
__u8 mac[6];
/* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
__u16 status;
+ /* total number of RX/TX queues */
+ __u16 num_queue_pairs;
} __attribute__((packed));
/* This is the first element of the scatter-gather list. If you don't
diff -ruNp org/drivers/net/virtio_net.c new/drivers/net/virtio_net.c
--- org/drivers/net/virtio_net.c 2011-04-05 20:30:23.000000000 +0530
+++ new/drivers/net/virtio_net.c 2011-04-05 20:30:53.000000000 +0530
@@ -40,31 +40,53 @@ module_param(gso, bool, 0444);
#define VIRTNET_SEND_COMMAND_SG_MAX 2
-struct virtnet_info {
- struct virtio_device *vdev;
- struct virtqueue *rvq, *svq, *cvq;
- struct net_device *dev;
+/* Internal representation of a send virtqueue */
+struct send_queue {
+ /* Virtqueue associated with this send _queue */
+ struct virtqueue *svq;
+
+ /* TX: fragments + linear part + virtio header */
+ struct scatterlist tx_sg[MAX_SKB_FRAGS + 2];
+};
+
+/* Internal representation of a receive virtqueue */
+struct receive_queue {
+ /* Virtqueue associated with this receive_queue */
+ struct virtqueue *rvq;
+
+ /* Back pointer to the virtnet_info */
+ struct virtnet_info *vi;
+
struct napi_struct napi;
- unsigned int status;
/* Number of input buffers, and max we've ever had. */
unsigned int num, max;
- /* I like... big packets and I cannot lie! */
- bool big_packets;
-
- /* Host will merge rx buffers for big packets (shake it! shake it!) */
- bool mergeable_rx_bufs;
-
/* Work struct for refilling if we run low on memory. */
struct delayed_work refill;
/* Chain pages by the private ptr. */
struct page *pages;
- /* fragments + linear part + virtio header */
+ /* RX: fragments + linear part + virtio header */
struct scatterlist rx_sg[MAX_SKB_FRAGS + 2];
- struct scatterlist tx_sg[MAX_SKB_FRAGS + 2];
+};
+
+struct virtnet_info {
+ struct send_queue **sq;
+ struct receive_queue **rq;
+
+ int numtxqs; /* # of rxqs/txqs */
+ struct virtio_device *vdev;
+ struct virtqueue *cvq;
+ struct net_device *dev;
+ unsigned int status;
+
+ /* I like... big packets and I cannot lie! */
+ bool big_packets;
+
+ /* Host will merge rx buffers for big packets (shake it! shake it!) */
+ bool mergeable_rx_bufs;
};
struct skb_vnet_hdr {
@@ -94,22 +116,22 @@ static inline struct skb_vnet_hdr *skb_v
* private is used to chain pages for big packets, put the whole
* most recent used list in the beginning for reuse
*/
-static void give_pages(struct virtnet_info *vi, struct page *page)
+static void give_pages(struct receive_queue *rq, struct page *page)
{
struct page *end;
/* Find end of list, sew whole thing into vi->pages. */
for (end = page; end->private; end = (struct page *)end->private);
- end->private = (unsigned long)vi->pages;
- vi->pages = page;
+ end->private = (unsigned long)rq->pages;
+ rq->pages = page;
}
-static struct page *get_a_page(struct virtnet_info *vi, gfp_t gfp_mask)
+static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
{
- struct page *p = vi->pages;
+ struct page *p = rq->pages;
if (p) {
- vi->pages = (struct page *)p->private;
+ rq->pages = (struct page *)p->private;
/* clear private here, it is used to chain pages */
p->private = 0;
} else
@@ -120,12 +142,13 @@ static struct page *get_a_page(struct vi
static void skb_xmit_done(struct virtqueue *svq)
{
struct virtnet_info *vi = svq->vdev->priv;
+ int qnum = svq->queue_index / 2; /* RX/TX vqs are allocated in pairs */
/* Suppress further interrupts. */
virtqueue_disable_cb(svq);
/* We were probably waiting for more output buffers. */
- netif_wake_queue(vi->dev);
+ netif_wake_subqueue(vi->dev, qnum);
}
static void set_skb_frag(struct sk_buff *skb, struct page *page,
@@ -145,9 +168,10 @@ static void set_skb_frag(struct sk_buff
*len -= f->size;
}
-static struct sk_buff *page_to_skb(struct virtnet_info *vi,
+static struct sk_buff *page_to_skb(struct receive_queue *rq,
struct page *page, unsigned int len)
{
+ struct virtnet_info *vi = rq->vi;
struct sk_buff *skb;
struct skb_vnet_hdr *hdr;
unsigned int copy, hdr_len, offset;
@@ -190,12 +214,12 @@ static struct sk_buff *page_to_skb(struc
}
if (page)
- give_pages(vi, page);
+ give_pages(rq, page);
return skb;
}
-static int receive_mergeable(struct virtnet_info *vi, struct sk_buff *skb)
+static int receive_mergeable(struct receive_queue *rq, struct sk_buff *skb)
{
struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
struct page *page;
@@ -210,7 +234,7 @@ static int receive_mergeable(struct virt
return -EINVAL;
}
- page = virtqueue_get_buf(vi->rvq, &len);
+ page = virtqueue_get_buf(rq->rvq, &len);
if (!page) {
pr_debug("%s: rx error: %d buffers missing\n",
skb->dev->name, hdr->mhdr.num_buffers);
@@ -222,13 +246,14 @@ static int receive_mergeable(struct virt
set_skb_frag(skb, page, 0, &len);
- --vi->num;
+ --rq->num;
}
return 0;
}
-static void receive_buf(struct net_device *dev, void *buf, unsigned int len)
+static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
{
+ struct net_device *dev = rq->vi->dev;
struct virtnet_info *vi = netdev_priv(dev);
struct sk_buff *skb;
struct page *page;
@@ -238,7 +263,7 @@ static void receive_buf(struct net_devic
pr_debug("%s: short packet %i\n", dev->name, len);
dev->stats.rx_length_errors++;
if (vi->mergeable_rx_bufs || vi->big_packets)
- give_pages(vi, buf);
+ give_pages(rq, buf);
else
dev_kfree_skb(buf);
return;
@@ -250,14 +275,14 @@ static void receive_buf(struct net_devic
skb_trim(skb, len);
} else {
page = buf;
- skb = page_to_skb(vi, page, len);
+ skb = page_to_skb(rq, page, len);
if (unlikely(!skb)) {
dev->stats.rx_dropped++;
- give_pages(vi, page);
+ give_pages(rq, page);
return;
}
if (vi->mergeable_rx_bufs)
- if (receive_mergeable(vi, skb)) {
+ if (receive_mergeable(rq, skb)) {
dev_kfree_skb(skb);
return;
}
@@ -323,184 +348,200 @@ frame_err:
dev_kfree_skb(skb);
}
-static int add_recvbuf_small(struct virtnet_info *vi, gfp_t gfp)
+static int add_recvbuf_small(struct receive_queue *rq, gfp_t gfp)
{
struct sk_buff *skb;
struct skb_vnet_hdr *hdr;
int err;
- skb = netdev_alloc_skb_ip_align(vi->dev, MAX_PACKET_LEN);
+ skb = netdev_alloc_skb_ip_align(rq->vi->dev, MAX_PACKET_LEN);
if (unlikely(!skb))
return -ENOMEM;
skb_put(skb, MAX_PACKET_LEN);
hdr = skb_vnet_hdr(skb);
- sg_set_buf(vi->rx_sg, &hdr->hdr, sizeof hdr->hdr);
+ sg_set_buf(rq->rx_sg, &hdr->hdr, sizeof hdr->hdr);
- skb_to_sgvec(skb, vi->rx_sg + 1, 0, skb->len);
+ skb_to_sgvec(skb, rq->rx_sg + 1, 0, skb->len);
- err = virtqueue_add_buf_gfp(vi->rvq, vi->rx_sg, 0, 2, skb, gfp);
+ err = virtqueue_add_buf_gfp(rq->rvq, rq->rx_sg, 0, 2, skb, gfp);
if (err < 0)
dev_kfree_skb(skb);
return err;
}
-static int add_recvbuf_big(struct virtnet_info *vi, gfp_t gfp)
+static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp)
{
struct page *first, *list = NULL;
char *p;
int i, err, offset;
- /* page in vi->rx_sg[MAX_SKB_FRAGS + 1] is list tail */
+ /* page in rq->rx_sg[MAX_SKB_FRAGS + 1] is list tail */
for (i = MAX_SKB_FRAGS + 1; i > 1; --i) {
- first = get_a_page(vi, gfp);
+ first = get_a_page(rq, gfp);
if (!first) {
if (list)
- give_pages(vi, list);
+ give_pages(rq, list);
return -ENOMEM;
}
- sg_set_buf(&vi->rx_sg[i], page_address(first), PAGE_SIZE);
+ sg_set_buf(&rq->rx_sg[i], page_address(first), PAGE_SIZE);
/* chain new page in list head to match sg */
first->private = (unsigned long)list;
list = first;
}
- first = get_a_page(vi, gfp);
+ first = get_a_page(rq, gfp);
if (!first) {
- give_pages(vi, list);
+ give_pages(rq, list);
return -ENOMEM;
}
p = page_address(first);
- /* vi->rx_sg[0], vi->rx_sg[1] share the same page */
- /* a separated vi->rx_sg[0] for virtio_net_hdr only due to QEMU bug */
- sg_set_buf(&vi->rx_sg[0], p, sizeof(struct virtio_net_hdr));
+ /* rq->rx_sg[0], rq->rx_sg[1] share the same page */
+ /* a separated rq->rx_sg[0] for virtio_net_hdr only due to QEMU bug */
+ sg_set_buf(&rq->rx_sg[0], p, sizeof(struct virtio_net_hdr));
- /* vi->rx_sg[1] for data packet, from offset */
+ /* rq->rx_sg[1] for data packet, from offset */
offset = sizeof(struct padded_vnet_hdr);
- sg_set_buf(&vi->rx_sg[1], p + offset, PAGE_SIZE - offset);
+ sg_set_buf(&rq->rx_sg[1], p + offset, PAGE_SIZE - offset);
/* chain first in list head */
first->private = (unsigned long)list;
- err = virtqueue_add_buf_gfp(vi->rvq, vi->rx_sg, 0, MAX_SKB_FRAGS + 2,
+ err = virtqueue_add_buf_gfp(rq->rvq, rq->rx_sg, 0, MAX_SKB_FRAGS + 2,
first, gfp);
if (err < 0)
- give_pages(vi, first);
+ give_pages(rq, first);
return err;
}
-static int add_recvbuf_mergeable(struct virtnet_info *vi, gfp_t gfp)
+static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp)
{
struct page *page;
int err;
- page = get_a_page(vi, gfp);
+ page = get_a_page(rq, gfp);
if (!page)
return -ENOMEM;
- sg_init_one(vi->rx_sg, page_address(page), PAGE_SIZE);
+ sg_init_one(rq->rx_sg, page_address(page), PAGE_SIZE);
- err = virtqueue_add_buf_gfp(vi->rvq, vi->rx_sg, 0, 1, page, gfp);
+ err = virtqueue_add_buf_gfp(rq->rvq, rq->rx_sg, 0, 1, page, gfp);
if (err < 0)
- give_pages(vi, page);
+ give_pages(rq, page);
return err;
}
/* Returns false if we couldn't fill entirely (OOM). */
-static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp)
+static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp)
{
+ struct virtnet_info *vi = rq->vi;
int err;
bool oom;
do {
if (vi->mergeable_rx_bufs)
- err = add_recvbuf_mergeable(vi, gfp);
+ err = add_recvbuf_mergeable(rq, gfp);
else if (vi->big_packets)
- err = add_recvbuf_big(vi, gfp);
+ err = add_recvbuf_big(rq, gfp);
else
- err = add_recvbuf_small(vi, gfp);
+ err = add_recvbuf_small(rq, gfp);
oom = err == -ENOMEM;
if (err < 0)
break;
- ++vi->num;
+ ++rq->num;
} while (err > 0);
- if (unlikely(vi->num > vi->max))
- vi->max = vi->num;
- virtqueue_kick(vi->rvq);
+ if (unlikely(rq->num > rq->max))
+ rq->max = rq->num;
+ virtqueue_kick(rq->rvq);
return !oom;
}
static void skb_recv_done(struct virtqueue *rvq)
{
+ int qnum = rvq->queue_index / 2; /* RX/TX vqs are allocated in pairs */
struct virtnet_info *vi = rvq->vdev->priv;
+ struct napi_struct *napi = &vi->rq[qnum]->napi;
+
/* Schedule NAPI, Suppress further interrupts if successful. */
- if (napi_schedule_prep(&vi->napi)) {
+ if (napi_schedule_prep(napi)) {
virtqueue_disable_cb(rvq);
- __napi_schedule(&vi->napi);
+ __napi_schedule(napi);
}
}
-static void virtnet_napi_enable(struct virtnet_info *vi)
+static void virtnet_napi_enable(struct receive_queue *rq)
{
- napi_enable(&vi->napi);
+ napi_enable(&rq->napi);
/* If all buffers were filled by other side before we napi_enabled, we
* won't get another interrupt, so process any outstanding packets
* now. virtnet_poll wants re-enable the queue, so we disable here.
* We synchronize against interrupts via NAPI_STATE_SCHED */
- if (napi_schedule_prep(&vi->napi)) {
- virtqueue_disable_cb(vi->rvq);
- __napi_schedule(&vi->napi);
+ if (napi_schedule_prep(&rq->napi)) {
+ virtqueue_disable_cb(rq->rvq);
+ __napi_schedule(&rq->napi);
}
}
+static void virtnet_napi_enable_all_queues(struct virtnet_info *vi)
+{
+ int i;
+
+ for (i = 0; i < vi->numtxqs; i++)
+ virtnet_napi_enable(vi->rq[i]);
+}
+
static void refill_work(struct work_struct *work)
{
- struct virtnet_info *vi;
+ struct napi_struct *napi;
+ struct receive_queue *rq;
bool still_empty;
- vi = container_of(work, struct virtnet_info, refill.work);
- napi_disable(&vi->napi);
- still_empty = !try_fill_recv(vi, GFP_KERNEL);
- virtnet_napi_enable(vi);
+ rq = container_of(work, struct receive_queue, refill.work);
+ napi = &rq->napi;
+
+ napi_disable(napi);
+ still_empty = !try_fill_recv(rq, GFP_KERNEL);
+ virtnet_napi_enable(rq);
/* In theory, this can happen: if we don't get any buffers in
* we will *never* try to fill again. */
if (still_empty)
- schedule_delayed_work(&vi->refill, HZ/2);
+ schedule_delayed_work(&rq->refill, HZ/2);
}
static int virtnet_poll(struct napi_struct *napi, int budget)
{
- struct virtnet_info *vi = container_of(napi, struct virtnet_info, napi);
+ struct receive_queue *rq = container_of(napi, struct receive_queue,
+ napi);
void *buf;
unsigned int len, received = 0;
again:
while (received < budget &&
- (buf = virtqueue_get_buf(vi->rvq, &len)) != NULL) {
- receive_buf(vi->dev, buf, len);
- --vi->num;
+ (buf = virtqueue_get_buf(rq->rvq, &len)) != NULL) {
+ receive_buf(rq, buf, len);
+ --rq->num;
received++;
}
- if (vi->num < vi->max / 2) {
- if (!try_fill_recv(vi, GFP_ATOMIC))
- schedule_delayed_work(&vi->refill, 0);
+ if (rq->num < rq->max / 2) {
+ if (!try_fill_recv(rq, GFP_ATOMIC))
+ schedule_delayed_work(&rq->refill, 0);
}
/* Out of packets? */
if (received < budget) {
napi_complete(napi);
- if (unlikely(!virtqueue_enable_cb(vi->rvq)) &&
+ if (unlikely(!virtqueue_enable_cb(rq->rvq)) &&
napi_schedule_prep(napi)) {
- virtqueue_disable_cb(vi->rvq);
+ virtqueue_disable_cb(rq->rvq);
__napi_schedule(napi);
goto again;
}
@@ -509,12 +550,13 @@ again:
return received;
}
-static unsigned int free_old_xmit_skbs(struct virtnet_info *vi)
+static unsigned int free_old_xmit_skbs(struct virtnet_info *vi,
+ struct virtqueue *svq)
{
struct sk_buff *skb;
unsigned int len, tot_sgs = 0;
- while ((skb = virtqueue_get_buf(vi->svq, &len)) != NULL) {
+ while ((skb = virtqueue_get_buf(svq, &len)) != NULL) {
pr_debug("Sent skb %p\n", skb);
vi->dev->stats.tx_bytes += skb->len;
vi->dev->stats.tx_packets++;
@@ -524,7 +566,8 @@ static unsigned int free_old_xmit_skbs(s
return tot_sgs;
}
-static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
+static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb,
+ struct virtqueue *svq, struct scatterlist *tx_sg)
{
struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
@@ -562,12 +605,12 @@ static int xmit_skb(struct virtnet_info
/* Encode metadata header at front. */
if (vi->mergeable_rx_bufs)
- sg_set_buf(vi->tx_sg, &hdr->mhdr, sizeof hdr->mhdr);
+ sg_set_buf(tx_sg, &hdr->mhdr, sizeof hdr->mhdr);
else
- sg_set_buf(vi->tx_sg, &hdr->hdr, sizeof hdr->hdr);
+ sg_set_buf(tx_sg, &hdr->hdr, sizeof hdr->hdr);
- hdr->num_sg = skb_to_sgvec(skb, vi->tx_sg + 1, 0, skb->len) + 1;
- return virtqueue_add_buf(vi->svq, vi->tx_sg, hdr->num_sg,
+ hdr->num_sg = skb_to_sgvec(skb, tx_sg + 1, 0, skb->len) + 1;
+ return virtqueue_add_buf(svq, tx_sg, hdr->num_sg,
0, skb);
}
@@ -575,31 +618,34 @@ static netdev_tx_t start_xmit(struct sk_
{
struct virtnet_info *vi = netdev_priv(dev);
int capacity;
+ int qnum = skb_get_queue_mapping(skb);
+ struct virtqueue *svq = vi->sq[qnum]->svq;
/* Free up any pending old buffers before queueing new ones. */
- free_old_xmit_skbs(vi);
+ free_old_xmit_skbs(vi, svq);
/* Try to transmit */
- capacity = xmit_skb(vi, skb);
+ capacity = xmit_skb(vi, skb, svq, vi->sq[qnum]->tx_sg);
/* This can happen with OOM and indirect buffers. */
if (unlikely(capacity < 0)) {
if (net_ratelimit()) {
if (likely(capacity == -ENOMEM)) {
dev_warn(&dev->dev,
- "TX queue failure: out of memory\n");
+ "TXQ (%d) failure: out of memory\n",
+ qnum);
} else {
dev->stats.tx_fifo_errors++;
dev_warn(&dev->dev,
- "Unexpected TX queue failure: %d\n",
- capacity);
+ "Unexpected TXQ (%d) failure: %d\n",
+ qnum, capacity);
}
}
dev->stats.tx_dropped++;
kfree_skb(skb);
return NETDEV_TX_OK;
}
- virtqueue_kick(vi->svq);
+ virtqueue_kick(svq);
/* Don't wait up for transmitted skbs to be freed. */
skb_orphan(skb);
@@ -608,13 +654,13 @@ static netdev_tx_t start_xmit(struct sk_
/* Apparently nice girls don't return TX_BUSY; stop the queue
* before it gets out of hand. Naturally, this wastes entries. */
if (capacity < 2+MAX_SKB_FRAGS) {
- netif_stop_queue(dev);
- if (unlikely(!virtqueue_enable_cb(vi->svq))) {
+ netif_stop_subqueue(dev, qnum);
+ if (unlikely(!virtqueue_enable_cb(svq))) {
/* More just got used, free them then recheck. */
- capacity += free_old_xmit_skbs(vi);
+ capacity += free_old_xmit_skbs(vi, svq);
if (capacity >= 2+MAX_SKB_FRAGS) {
- netif_start_queue(dev);
- virtqueue_disable_cb(vi->svq);
+ netif_start_subqueue(dev, qnum);
+ virtqueue_disable_cb(svq);
}
}
}
@@ -643,8 +689,10 @@ static int virtnet_set_mac_address(struc
static void virtnet_netpoll(struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
+ int i;
- napi_schedule(&vi->napi);
+ for (i = 0; i < vi->numtxqs; i++)
+ napi_schedule(&vi->rq[i]->napi);
}
#endif
@@ -652,7 +700,7 @@ static int virtnet_open(struct net_devic
{
struct virtnet_info *vi = netdev_priv(dev);
- virtnet_napi_enable(vi);
+ virtnet_napi_enable_all_queues(vi);
return 0;
}
@@ -704,8 +752,10 @@ static bool virtnet_send_command(struct
static int virtnet_close(struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
+ int i;
- napi_disable(&vi->napi);
+ for (i = 0; i < vi->numtxqs; i++)
+ napi_disable(&vi->rq[i]->napi);
return 0;
}
@@ -861,10 +911,10 @@ static void virtnet_update_status(struct
if (vi->status & VIRTIO_NET_S_LINK_UP) {
netif_carrier_on(vi->dev);
- netif_wake_queue(vi->dev);
+ netif_tx_wake_all_queues(vi->dev);
} else {
netif_carrier_off(vi->dev);
- netif_stop_queue(vi->dev);
+ netif_tx_stop_all_queues(vi->dev);
}
}
@@ -875,18 +925,222 @@ static void virtnet_config_changed(struc
virtnet_update_status(vi);
}
+static void free_receive_bufs(struct virtnet_info *vi)
+{
+ int i;
+
+ for (i = 0; i < vi->numtxqs; i++) {
+ BUG_ON(vi->rq[i] == NULL);
+ while (vi->rq[i]->pages)
+ __free_pages(get_a_page(vi->rq[i], GFP_KERNEL), 0);
+ }
+}
+
+/* Free memory allocated for send and receive queues */
+static void free_rq_sq(struct virtnet_info *vi)
+{
+ int i;
+
+ if (vi->rq) {
+ for (i = 0; i < vi->numtxqs; i++)
+ kfree(vi->rq[i]);
+ kfree(vi->rq);
+ }
+
+ if (vi->sq) {
+ for (i = 0; i < vi->numtxqs; i++)
+ kfree(vi->sq[i]);
+ kfree(vi->sq);
+ }
+}
+
+static void free_unused_bufs(struct virtnet_info *vi)
+{
+ void *buf;
+ int i;
+
+ for (i = 0; i < vi->numtxqs; i++) {
+ struct virtqueue *svq = vi->sq[i]->svq;
+
+ while (1) {
+ buf = virtqueue_detach_unused_buf(svq);
+ if (!buf)
+ break;
+ dev_kfree_skb(buf);
+ }
+ }
+
+ for (i = 0; i < vi->numtxqs; i++) {
+ struct virtqueue *rvq = vi->rq[i]->rvq;
+
+ while (1) {
+ buf = virtqueue_detach_unused_buf(rvq);
+ if (!buf)
+ break;
+ if (vi->mergeable_rx_bufs || vi->big_packets)
+ give_pages(vi->rq[i], buf);
+ else
+ dev_kfree_skb(buf);
+ --vi->rq[i]->num;
+ }
+ BUG_ON(vi->rq[i]->num != 0);
+ }
+}
+
+#define MAX_DEVICE_NAME 16
+static int initialize_vqs(struct virtnet_info *vi, int numtxqs)
+{
+ vq_callback_t **callbacks;
+ struct virtqueue **vqs;
+ int i, err = -ENOMEM;
+ int totalvqs;
+ char **names;
+
+ /* Allocate receive queues */
+ vi->rq = kcalloc(numtxqs, sizeof(*vi->rq), GFP_KERNEL);
+ if (!vi->rq)
+ goto out;
+ for (i = 0; i < numtxqs; i++) {
+ vi->rq[i] = kzalloc(sizeof(*vi->rq[i]), GFP_KERNEL);
+ if (!vi->rq[i])
+ goto out;
+ }
+
+ /* Allocate send queues */
+ vi->sq = kcalloc(numtxqs, sizeof(*vi->sq), GFP_KERNEL);
+ if (!vi->sq)
+ goto out;
+ for (i = 0; i < numtxqs; i++) {
+ vi->sq[i] = kzalloc(sizeof(*vi->sq[i]), GFP_KERNEL);
+ if (!vi->sq[i])
+ goto out;
+ }
+
+ /* setup initial receive and send queue parameters */
+ for (i = 0; i < numtxqs; i++) {
+ vi->rq[i]->vi = vi;
+ vi->rq[i]->pages = NULL;
+ INIT_DELAYED_WORK(&vi->rq[i]->refill, refill_work);
+ netif_napi_add(vi->dev, &vi->rq[i]->napi, virtnet_poll,
+ napi_weight);
+
+ sg_init_table(vi->rq[i]->rx_sg, ARRAY_SIZE(vi->rq[i]->rx_sg));
+ sg_init_table(vi->sq[i]->tx_sg, ARRAY_SIZE(vi->sq[i]->tx_sg));
+ }
+
+ /*
+ * We expect 1 RX virtqueue followed by 1 TX virtqueues, followed
+ * by the same 'numtxqs-1' times, and optionally one control virtqueue.
+ */
+ totalvqs = numtxqs * 2 +
+ virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ);
+
+ /* Allocate space for find_vqs parameters */
+ vqs = kmalloc(totalvqs * sizeof(*vqs), GFP_KERNEL);
+ callbacks = kmalloc(totalvqs * sizeof(*callbacks), GFP_KERNEL);
+ names = kzalloc(totalvqs * sizeof(*names), GFP_KERNEL);
+ if (!vqs || !callbacks || !names)
+ goto free_params;
+
+#if 1
+ /* Allocate/initialize parameters for recv/send virtqueues */
+ for (i = 0; i < numtxqs * 2; i++) {
+ names[i] = kmalloc(MAX_DEVICE_NAME * sizeof(*names[i]),
+ GFP_KERNEL);
+ if (!names[i])
+ goto free_params;
+
+ if (!(i & 1)) { /* RX */
+ callbacks[i] = skb_recv_done;
+ sprintf(names[i], "input.%d", i / 2);
+ } else {
+ callbacks[i] = skb_xmit_done;
+ sprintf(names[i], "output.%d", i / 2);
+ }
+ }
+
+ /* Parameters for control virtqueue, if any */
+ if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
+ callbacks[i] = NULL;
+ names[i] = "control";
+ }
+#else
+ /* Allocate/initialize parameters for recv virtqueues */
+ for (i = 0; i < numtxqs * 2; i += 2) {
+ callbacks[i] = skb_recv_done;
+ names[i] = kmalloc(MAX_DEVICE_NAME * sizeof(*names[i]),
+ GFP_KERNEL);
+ if (!names[i])
+ goto free_params;
+ sprintf(names[i], "input.%d", i / 2);
+ }
+
+ /* Allocate/initialize parameters for send virtqueues */
+ for (i = 1; i < numtxqs * 2; i += 2) {
+ callbacks[i] = skb_xmit_done;
+ names[i] = kmalloc(MAX_DEVICE_NAME * sizeof(*names[i]),
+ GFP_KERNEL);
+ if (!names[i])
+ goto free_params;
+ sprintf(names[i], "output.%d", i / 2);
+ }
+
+ /* Parameters for control virtqueue, if any */
+ if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
+ callbacks[i - 1] = NULL;
+ names[i - 1] = "control";
+ }
+#endif
+
+ err = vi->vdev->config->find_vqs(vi->vdev, totalvqs, vqs, callbacks,
+ (const char **)names);
+ if (err)
+ goto free_params;
+
+ /* Assign the allocated vqs alternatively for RX/TX */
+ for (i = 0; i < numtxqs * 2; i += 2) {
+ vi->rq[i/2]->rvq = vqs[i];
+ vi->sq[i/2]->svq = vqs[i + 1];
+ }
+
+ if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ))
+ vi->cvq = vqs[i];
+
+free_params:
+ if (names) {
+ for (i = 0; i < numtxqs * 2; i++)
+ kfree(names[i]);
+ kfree(names);
+ }
+
+ kfree(callbacks);
+ kfree(vqs);
+
+out:
+ if (err)
+ free_rq_sq(vi);
+
+ return err;
+}
+
static int virtnet_probe(struct virtio_device *vdev)
{
- int err;
+ int i, err;
+ u16 numtxqs;
+ u16 num_queue_pairs = 2;
struct net_device *dev;
struct virtnet_info *vi;
- struct virtqueue *vqs[3];
- vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL};
- const char *names[] = { "input", "output", "control" };
- int nvqs;
+
+ /* Find if host supports MULTIQUEUE */
+ err = virtio_config_val(vdev, VIRTIO_NET_F_MULTIQUEUE,
+ offsetof(struct virtio_net_config,
+ num_queue_pairs), &num_queue_pairs);
+ numtxqs = num_queue_pairs / 2;
+ if (!numtxqs)
+ numtxqs = 1;
/* Allocate ourselves a network device with room for our info */
- dev = alloc_etherdev(sizeof(struct virtnet_info));
+ dev = alloc_etherdev_mq(sizeof(struct virtnet_info), numtxqs);
if (!dev)
return -ENOMEM;
@@ -932,14 +1186,10 @@ static int virtnet_probe(struct virtio_d
/* Set up our device-specific information */
vi = netdev_priv(dev);
- netif_napi_add(dev, &vi->napi, virtnet_poll, napi_weight);
vi->dev = dev;
vi->vdev = vdev;
vdev->priv = vi;
- vi->pages = NULL;
- INIT_DELAYED_WORK(&vi->refill, refill_work);
- sg_init_table(vi->rx_sg, ARRAY_SIZE(vi->rx_sg));
- sg_init_table(vi->tx_sg, ARRAY_SIZE(vi->tx_sg));
+ vi->numtxqs = numtxqs;
/* If we can receive ANY GSO packets, we must allocate large ones. */
if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
@@ -950,23 +1200,14 @@ static int virtnet_probe(struct virtio_d
if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
vi->mergeable_rx_bufs = true;
- /* We expect two virtqueues, receive then send,
- * and optionally control. */
- nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
-
- err = vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names);
+ /* Initialize our rx/tx queue parameters, and invoke find_vqs */
+ err = initialize_vqs(vi, numtxqs);
if (err)
- goto free;
+ goto free_netdev;
- vi->rvq = vqs[0];
- vi->svq = vqs[1];
-
- if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
- vi->cvq = vqs[2];
-
- if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
- dev->features |= NETIF_F_HW_VLAN_FILTER;
- }
+ if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) &&
+ virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
+ dev->features |= NETIF_F_HW_VLAN_FILTER;
err = register_netdev(dev);
if (err) {
@@ -975,14 +1216,21 @@ static int virtnet_probe(struct virtio_d
}
/* Last of all, set up some receive buffers. */
- try_fill_recv(vi, GFP_KERNEL);
+ for (i = 0; i < numtxqs; i++) {
+ try_fill_recv(vi->rq[i], GFP_KERNEL);
- /* If we didn't even get one input buffer, we're useless. */
- if (vi->num == 0) {
- err = -ENOMEM;
- goto unregister;
+ /* If we didn't even get one input buffer, we're useless. */
+ if (vi->rq[i]->num == 0) {
+ if (i)
+ free_unused_bufs(vi);
+ err = -ENOMEM;
+ goto free_recv_bufs;
+ }
}
+ dev_info(&dev->dev, "(virtio-net) Allocated %d RX & TX vq's\n",
+ numtxqs);
+
/* Assume link up if device can't report link status,
otherwise get link status from config. */
if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
@@ -993,59 +1241,48 @@ static int virtnet_probe(struct virtio_d
netif_carrier_on(dev);
}
- pr_debug("virtnet: registered device %s\n", dev->name);
+ pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
+ dev->name, numtxqs);
return 0;
-unregister:
+free_recv_bufs:
+ free_receive_bufs(vi);
unregister_netdev(dev);
- cancel_delayed_work_sync(&vi->refill);
+
free_vqs:
+ for (i = 0; i < numtxqs; i++)
+ cancel_delayed_work_sync(&vi->rq[i]->refill);
vdev->config->del_vqs(vdev);
-free:
+ free_rq_sq(vi);
+
+free_netdev:
free_netdev(dev);
return err;
}
-static void free_unused_bufs(struct virtnet_info *vi)
-{
- void *buf;
- while (1) {
- buf = virtqueue_detach_unused_buf(vi->svq);
- if (!buf)
- break;
- dev_kfree_skb(buf);
- }
- while (1) {
- buf = virtqueue_detach_unused_buf(vi->rvq);
- if (!buf)
- break;
- if (vi->mergeable_rx_bufs || vi->big_packets)
- give_pages(vi, buf);
- else
- dev_kfree_skb(buf);
- --vi->num;
- }
- BUG_ON(vi->num != 0);
-}
-
static void __devexit virtnet_remove(struct virtio_device *vdev)
{
struct virtnet_info *vi = vdev->priv;
+ int i;
/* Stop all the virtqueues. */
vdev->config->reset(vdev);
unregister_netdev(vi->dev);
- cancel_delayed_work_sync(&vi->refill);
+
+ for (i = 0; i < vi->numtxqs; i++)
+ cancel_delayed_work_sync(&vi->rq[i]->refill);
/* Free unused buffers in both send and recv, if any. */
free_unused_bufs(vi);
vdev->config->del_vqs(vi->vdev);
- while (vi->pages)
- __free_pages(get_a_page(vi, GFP_KERNEL), 0);
+ free_receive_bufs(vi);
+
+ /* Free memory for send and receive queues */
+ free_rq_sq(vi);
free_netdev(vi->dev);
}
@@ -1062,7 +1299,7 @@ static unsigned int features[] = {
VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6,
VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO,
VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ,
- VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
+ VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, VIRTIO_NET_F_MULTIQUEUE,
};
static struct virtio_driver virtio_net_driver = {
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/4] [RFC rev2] virtio-net changes
2011-04-05 15:08 ` [PATCH 2/4] [RFC rev2] virtio-net changes Krishna Kumar
@ 2011-04-13 1:28 ` Rusty Russell
2011-04-13 16:20 ` Krishna Kumar2
0 siblings, 1 reply; 9+ messages in thread
From: Rusty Russell @ 2011-04-13 1:28 UTC (permalink / raw)
To: Krishna Kumar, davem, mst
Cc: eric.dumazet, arnd, netdev, horms, avi, anthony, kvm,
Krishna Kumar
On Tue, 05 Apr 2011 20:38:52 +0530, Krishna Kumar <krkumar2@in.ibm.com> wrote:
> Implement mq virtio-net driver.
>
> Though struct virtio_net_config changes, it works with the old
> qemu since the last element is not accessed unless qemu sets
> VIRTIO_NET_F_MULTIQUEUE.
>
> Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
Hi Krishna!
This change looks fairly solid, but I'd prefer it split into a few
stages for clarity.
The first patch should extract out the struct send_queue and struct
receive_queue, even though there's still only one. The second patch
can then introduce VIRTIO_NET_F_MULTIQUEUE.
You could split into more parts if that makes sense, but I'd prefer to
see the mechanical changes separate from the feature addition.
> -struct virtnet_info {
> - struct virtio_device *vdev;
> - struct virtqueue *rvq, *svq, *cvq;
> - struct net_device *dev;
> +/* Internal representation of a send virtqueue */
> +struct send_queue {
> + /* Virtqueue associated with this send _queue */
> + struct virtqueue *svq;
You can simply call this vq now it's inside 'send_queue'.
> +
> + /* TX: fragments + linear part + virtio header */
> + struct scatterlist tx_sg[MAX_SKB_FRAGS + 2];
Similarly, this can just be sg.
> +static void free_receive_bufs(struct virtnet_info *vi)
> +{
> + int i;
> +
> + for (i = 0; i < vi->numtxqs; i++) {
> + BUG_ON(vi->rq[i] == NULL);
> + while (vi->rq[i]->pages)
> + __free_pages(get_a_page(vi->rq[i], GFP_KERNEL), 0);
> + }
> +}
You can skip the BUG_ON(), since the next line will have the same effect.
> +/* Free memory allocated for send and receive queues */
> +static void free_rq_sq(struct virtnet_info *vi)
> +{
> + int i;
> +
> + if (vi->rq) {
> + for (i = 0; i < vi->numtxqs; i++)
> + kfree(vi->rq[i]);
> + kfree(vi->rq);
> + }
> +
> + if (vi->sq) {
> + for (i = 0; i < vi->numtxqs; i++)
> + kfree(vi->sq[i]);
> + kfree(vi->sq);
> + }
This looks weird, even though it's correct.
I think we need a better name than numtxqs and shorter than
num_queue_pairs. Let's just use num_queues; sure, there are both tx and
rq queues, but I still think it's pretty clear.
> + for (i = 0; i < vi->numtxqs; i++) {
> + struct virtqueue *svq = vi->sq[i]->svq;
> +
> + while (1) {
> + buf = virtqueue_detach_unused_buf(svq);
> + if (!buf)
> + break;
> + dev_kfree_skb(buf);
> + }
> + }
I know this isn't your code, but it's ugly :)
while ((buf = virtqueue_detach_unused_buf(svq)) != NULL)
dev_kfree_skb(buf);
> + for (i = 0; i < vi->numtxqs; i++) {
> + struct virtqueue *rvq = vi->rq[i]->rvq;
> +
> + while (1) {
> + buf = virtqueue_detach_unused_buf(rvq);
> + if (!buf)
> + break;
Here too...
> +#define MAX_DEVICE_NAME 16
This isn't a good idea, see below.
> +static int initialize_vqs(struct virtnet_info *vi, int numtxqs)
> +{
> + vq_callback_t **callbacks;
> + struct virtqueue **vqs;
> + int i, err = -ENOMEM;
> + int totalvqs;
> + char **names;
This whole routine is really messy. How about doing find_vqs first,
then have routines like setup_rxq(), setup_txq() and setup_controlq()
would make this neater:
static int setup_rxq(struct send_queue *sq, char *name);
Also, use kasprintf() instead of kmalloc & sprintf.
> +#if 1
> + /* Allocate/initialize parameters for recv/send virtqueues */
Why is this #if 1'd?
I do prefer the #else method of doing two loops, myself (but use
kasprintf).
Cheers,
Rusty.
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 2/4] [RFC rev2] virtio-net changes
2011-04-13 1:28 ` Rusty Russell
@ 2011-04-13 16:20 ` Krishna Kumar2
0 siblings, 0 replies; 9+ messages in thread
From: Krishna Kumar2 @ 2011-04-13 16:20 UTC (permalink / raw)
To: Rusty Russell
Cc: anthony, arnd, avi, davem, eric.dumazet, horms, kvm, mst, netdev
Hi Rusty,
Thanks for your feedback. I agree with all the changes, and will
make it and resubmit next.
thanks,
- KK
Rusty Russell <rusty@rustcorp.com.au> wrote on 04/13/2011 06:58:02 AM:
> Rusty Russell <rusty@rustcorp.com.au>
> 04/13/2011 06:58 AM
>
> To
>
> Krishna Kumar2/India/IBM@IBMIN, davem@davemloft.net, mst@redhat.com
>
> cc
>
> eric.dumazet@gmail.com, arnd@arndb.de, netdev@vger.kernel.org,
> horms@verge.net.au, avi@redhat.com, anthony@codemonkey.ws,
> kvm@vger.kernel.org, Krishna Kumar2/India/IBM@IBMIN
>
> Subject
>
> Re: [PATCH 2/4] [RFC rev2] virtio-net changes
>
> On Tue, 05 Apr 2011 20:38:52 +0530, Krishna Kumar <krkumar2@in.ibm.com>
wrote:
> > Implement mq virtio-net driver.
> >
> > Though struct virtio_net_config changes, it works with the old
> > qemu since the last element is not accessed unless qemu sets
> > VIRTIO_NET_F_MULTIQUEUE.
> >
> > Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
>
> Hi Krishna!
>
> This change looks fairly solid, but I'd prefer it split into a few
> stages for clarity.
>
> The first patch should extract out the struct send_queue and struct
> receive_queue, even though there's still only one. The second patch
> can then introduce VIRTIO_NET_F_MULTIQUEUE.
>
> You could split into more parts if that makes sense, but I'd prefer to
> see the mechanical changes separate from the feature addition.
>
> > -struct virtnet_info {
> > - struct virtio_device *vdev;
> > - struct virtqueue *rvq, *svq, *cvq;
> > - struct net_device *dev;
> > +/* Internal representation of a send virtqueue */
> > +struct send_queue {
> > + /* Virtqueue associated with this send _queue */
> > + struct virtqueue *svq;
>
> You can simply call this vq now it's inside 'send_queue'.
>
> > +
> > + /* TX: fragments + linear part + virtio header */
> > + struct scatterlist tx_sg[MAX_SKB_FRAGS + 2];
>
> Similarly, this can just be sg.
>
> > +static void free_receive_bufs(struct virtnet_info *vi)
> > +{
> > + int i;
> > +
> > + for (i = 0; i < vi->numtxqs; i++) {
> > + BUG_ON(vi->rq[i] == NULL);
> > + while (vi->rq[i]->pages)
> > + __free_pages(get_a_page(vi->rq[i], GFP_KERNEL), 0);
> > + }
> > +}
>
> You can skip the BUG_ON(), since the next line will have the same effect.
>
> > +/* Free memory allocated for send and receive queues */
> > +static void free_rq_sq(struct virtnet_info *vi)
> > +{
> > + int i;
> > +
> > + if (vi->rq) {
> > + for (i = 0; i < vi->numtxqs; i++)
> > + kfree(vi->rq[i]);
> > + kfree(vi->rq);
> > + }
> > +
> > + if (vi->sq) {
> > + for (i = 0; i < vi->numtxqs; i++)
> > + kfree(vi->sq[i]);
> > + kfree(vi->sq);
> > + }
>
> This looks weird, even though it's correct.
>
> I think we need a better name than numtxqs and shorter than
> num_queue_pairs. Let's just use num_queues; sure, there are both tx and
> rq queues, but I still think it's pretty clear.
>
> > + for (i = 0; i < vi->numtxqs; i++) {
> > + struct virtqueue *svq = vi->sq[i]->svq;
> > +
> > + while (1) {
> > + buf = virtqueue_detach_unused_buf(svq);
> > + if (!buf)
> > + break;
> > + dev_kfree_skb(buf);
> > + }
> > + }
>
> I know this isn't your code, but it's ugly :)
>
> while ((buf = virtqueue_detach_unused_buf(svq)) != NULL)
> dev_kfree_skb(buf);
>
> > + for (i = 0; i < vi->numtxqs; i++) {
> > + struct virtqueue *rvq = vi->rq[i]->rvq;
> > +
> > + while (1) {
> > + buf = virtqueue_detach_unused_buf(rvq);
> > + if (!buf)
> > + break;
>
> Here too...
>
> > +#define MAX_DEVICE_NAME 16
>
> This isn't a good idea, see below.
>
> > +static int initialize_vqs(struct virtnet_info *vi, int numtxqs)
> > +{
> > + vq_callback_t **callbacks;
> > + struct virtqueue **vqs;
> > + int i, err = -ENOMEM;
> > + int totalvqs;
> > + char **names;
>
> This whole routine is really messy. How about doing find_vqs first,
> then have routines like setup_rxq(), setup_txq() and setup_controlq()
> would make this neater:
>
> static int setup_rxq(struct send_queue *sq, char *name);
>
> Also, use kasprintf() instead of kmalloc & sprintf.
>
> > +#if 1
> > + /* Allocate/initialize parameters for recv/send virtqueues */
>
> Why is this #if 1'd?
>
> I do prefer the #else method of doing two loops, myself (but use
> kasprintf).
>
> Cheers,
> Rusty.
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 3/4] [RFC rev2] vhost changes
2011-04-05 15:08 [PATCH 0/4] [RFC rev2] Implement multiqueue (RX & TX) virtio-net Krishna Kumar
2011-04-05 15:08 ` [PATCH 1/4] [RFC rev2] Change virtqueue structure Krishna Kumar
2011-04-05 15:08 ` [PATCH 2/4] [RFC rev2] virtio-net changes Krishna Kumar
@ 2011-04-05 15:09 ` Krishna Kumar
2011-04-05 15:09 ` [PATCH 4/4] [RFC rev2] qemu changes Krishna Kumar
2011-04-13 12:00 ` [PATCH 0/4] [RFC rev2] Implement multiqueue (RX & TX) virtio-net Avi Kivity
4 siblings, 0 replies; 9+ messages in thread
From: Krishna Kumar @ 2011-04-05 15:09 UTC (permalink / raw)
To: rusty, davem, mst
Cc: eric.dumazet, arnd, netdev, horms, avi, anthony, kvm,
Krishna Kumar
vhost_net_open is changed to allocate a vhost_net and return.
The remaining initializations are delayed till SET_OWNER.
SET_OWNER is changed so that the argument is used to determine
how many txqs to use. Unmodified qemu's will pass NULL, so
this is recognized and handled as numtxqs=1.
The number of vhost threads is <= #txqs. Threads handle more
than one txq when #txqs is more than MAX_VHOST_THREADS (4).
The same thread handles both RX and TX - tested with tap/bridge
so far (TBD: some changes are needed in macvtap to support the
same).
Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
---
drivers/vhost/net.c | 249 +++++++++++++++++++++++++---------------
drivers/vhost/vhost.c | 221 ++++++++++++++++++++++++-----------
drivers/vhost/vhost.h | 22 ++-
3 files changed, 332 insertions(+), 160 deletions(-)
diff -ruNp org/drivers/vhost/net.c new/drivers/vhost/net.c
--- org/drivers/vhost/net.c 2011-04-05 14:15:18.000000000 +0530
+++ new/drivers/vhost/net.c 2011-04-05 20:15:32.000000000 +0530
@@ -32,12 +32,6 @@
* Using this limit prevents one virtqueue from starving others. */
#define VHOST_NET_WEIGHT 0x80000
-enum {
- VHOST_NET_VQ_RX = 0,
- VHOST_NET_VQ_TX = 1,
- VHOST_NET_VQ_MAX = 2,
-};
-
enum vhost_net_poll_state {
VHOST_NET_POLL_DISABLED = 0,
VHOST_NET_POLL_STARTED = 1,
@@ -46,12 +40,13 @@ enum vhost_net_poll_state {
struct vhost_net {
struct vhost_dev dev;
- struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
- struct vhost_poll poll[VHOST_NET_VQ_MAX];
+ struct vhost_virtqueue *vqs;
+ struct vhost_poll *poll;
+ struct socket **socks;
/* Tells us whether we are polling a socket for TX.
* We only do this when socket buffer fills up.
* Protected by tx vq lock. */
- enum vhost_net_poll_state tx_poll_state;
+ enum vhost_net_poll_state *tx_poll_state;
};
/* Pop first len bytes from iovec. Return number of segments used. */
@@ -93,28 +88,28 @@ static void copy_iovec_hdr(const struct
}
/* Caller must have TX VQ lock */
-static void tx_poll_stop(struct vhost_net *net)
+static void tx_poll_stop(struct vhost_net *net, int qnum)
{
- if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
+ if (likely(net->tx_poll_state[qnum / 2] != VHOST_NET_POLL_STARTED))
return;
- vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
- net->tx_poll_state = VHOST_NET_POLL_STOPPED;
+ vhost_poll_stop(&net->poll[qnum]);
+ net->tx_poll_state[qnum / 2] = VHOST_NET_POLL_STOPPED;
}
/* Caller must have TX VQ lock */
-static void tx_poll_start(struct vhost_net *net, struct socket *sock)
+static void tx_poll_start(struct vhost_net *net, struct socket *sock, int qnum)
{
- if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
+ if (unlikely(net->tx_poll_state[qnum / 2] != VHOST_NET_POLL_STOPPED))
return;
- vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
- net->tx_poll_state = VHOST_NET_POLL_STARTED;
+ vhost_poll_start(&net->poll[qnum], sock->file);
+ net->tx_poll_state[qnum / 2] = VHOST_NET_POLL_STARTED;
}
/* Expects to be always run from workqueue - which acts as
* read-size critical section for our kind of RCU. */
-static void handle_tx(struct vhost_net *net)
+static void handle_tx(struct vhost_virtqueue *vq)
{
- struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_TX];
+ struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev);
unsigned out, in, s;
int head;
struct msghdr msg = {
@@ -138,7 +133,7 @@ static void handle_tx(struct vhost_net *
wmem = atomic_read(&sock->sk->sk_wmem_alloc);
if (wmem >= sock->sk->sk_sndbuf) {
mutex_lock(&vq->mutex);
- tx_poll_start(net, sock);
+ tx_poll_start(net, sock, vq->qnum);
mutex_unlock(&vq->mutex);
return;
}
@@ -147,7 +142,7 @@ static void handle_tx(struct vhost_net *
vhost_disable_notify(vq);
if (wmem < sock->sk->sk_sndbuf / 2)
- tx_poll_stop(net);
+ tx_poll_stop(net, vq->qnum);
hdr_size = vq->vhost_hlen;
for (;;) {
@@ -162,7 +157,7 @@ static void handle_tx(struct vhost_net *
if (head == vq->num) {
wmem = atomic_read(&sock->sk->sk_wmem_alloc);
if (wmem >= sock->sk->sk_sndbuf * 3 / 4) {
- tx_poll_start(net, sock);
+ tx_poll_start(net, sock, vq->qnum);
set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
break;
}
@@ -192,7 +187,7 @@ static void handle_tx(struct vhost_net *
err = sock->ops->sendmsg(NULL, sock, &msg, len);
if (unlikely(err < 0)) {
vhost_discard_vq_desc(vq, 1);
- tx_poll_start(net, sock);
+ tx_poll_start(net, sock, vq->qnum);
break;
}
if (err != len)
@@ -287,9 +282,9 @@ err:
/* Expects to be always run from workqueue - which acts as
* read-size critical section for our kind of RCU. */
-static void handle_rx(struct vhost_net *net)
+static void handle_rx(struct vhost_virtqueue *vq)
{
- struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
+ struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev);
unsigned uninitialized_var(in), log;
struct vhost_log *vq_log;
struct msghdr msg = {
@@ -398,87 +393,155 @@ static void handle_tx_kick(struct vhost_
{
struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
poll.work);
- struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev);
- handle_tx(net);
+ handle_tx(vq);
}
static void handle_rx_kick(struct vhost_work *work)
{
struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
poll.work);
- struct vhost_net *net = container_of(vq->dev, struct vhost_net, dev);
- handle_rx(net);
+ handle_rx(vq);
}
static void handle_tx_net(struct vhost_work *work)
{
- struct vhost_net *net = container_of(work, struct vhost_net,
- poll[VHOST_NET_VQ_TX].work);
- handle_tx(net);
+ struct vhost_virtqueue *vq = container_of(work, struct vhost_poll,
+ work)->vq;
+
+ handle_tx(vq);
}
static void handle_rx_net(struct vhost_work *work)
{
- struct vhost_net *net = container_of(work, struct vhost_net,
- poll[VHOST_NET_VQ_RX].work);
- handle_rx(net);
+ struct vhost_virtqueue *vq = container_of(work, struct vhost_poll,
+ work)->vq;
+
+ handle_rx(vq);
}
-static int vhost_net_open(struct inode *inode, struct file *f)
+void vhost_free_vqs(struct vhost_dev *dev)
{
- struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL);
- struct vhost_dev *dev;
- int r;
+ struct vhost_net *n = container_of(dev, struct vhost_net, dev);
- if (!n)
- return -ENOMEM;
+ if (!n->vqs)
+ return;
- dev = &n->dev;
- n->vqs[VHOST_NET_VQ_TX].handle_kick = handle_tx_kick;
- n->vqs[VHOST_NET_VQ_RX].handle_kick = handle_rx_kick;
- r = vhost_dev_init(dev, n->vqs, VHOST_NET_VQ_MAX);
- if (r < 0) {
- kfree(n);
- return r;
+ kfree(n->socks);
+ kfree(n->tx_poll_state);
+ kfree(n->poll);
+ kfree(n->vqs);
+
+ /*
+ * Reset so that vhost_net_release (which gets called when
+ * vhost_dev_set_owner() call fails) will notice.
+ */
+ n->vqs = NULL;
+}
+
+int vhost_setup_vqs(struct vhost_dev *dev, int numtxqs)
+{
+ struct vhost_net *n = container_of(dev, struct vhost_net, dev);
+ int i, nvqs;
+ int ret = -ENOMEM;
+
+ if (numtxqs < 0)
+ return -EINVAL;
+
+ if (numtxqs == 0) {
+ /* Old qemu doesn't pass arguments to set_owner, use 1 txq */
+ numtxqs = 1;
+ }
+
+ /* Get total number of virtqueues */
+ nvqs = numtxqs * 2;
+
+ n->vqs = kmalloc(nvqs * sizeof(*n->vqs), GFP_KERNEL);
+ n->poll = kmalloc(nvqs * sizeof(*n->poll), GFP_KERNEL);
+ n->socks = kmalloc(nvqs * sizeof(*n->socks), GFP_KERNEL);
+ n->tx_poll_state = kmalloc(numtxqs * sizeof(*n->tx_poll_state),
+ GFP_KERNEL);
+ if (!n->vqs || !n->poll || !n->socks || !n->tx_poll_state)
+ goto err;
+
+ /* RX followed by TX queues */
+ for (i = 0; i < nvqs; i += 2) {
+ n->vqs[i].handle_kick = handle_rx_kick;
+ n->vqs[i + 1].handle_kick = handle_tx_kick;
}
- vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev);
- vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev);
- n->tx_poll_state = VHOST_NET_POLL_DISABLED;
+ ret = vhost_dev_init(dev, n->vqs, nvqs);
+ if (ret < 0)
+ goto err;
- f->private_data = n;
+ for (i = 0; i < nvqs; i += 2) {
+ vhost_poll_init(&n->poll[i], handle_rx_net, POLLIN,
+ &n->vqs[i]);
+ vhost_poll_init(&n->poll[i+1], handle_tx_net, POLLOUT,
+ &n->vqs[i+1]);
+ if (i / 2 < numtxqs)
+ n->tx_poll_state[i/2] = VHOST_NET_POLL_DISABLED;
+ }
return 0;
+
+err:
+ /* Free all pointers that may have been allocated */
+ vhost_free_vqs(dev);
+
+ return ret;
+}
+
+static int vhost_net_open(struct inode *inode, struct file *f)
+{
+ struct vhost_net *n = kzalloc(sizeof *n, GFP_KERNEL);
+ int ret = -ENOMEM;
+
+ if (n) {
+ struct vhost_dev *dev = &n->dev;
+
+ f->private_data = n;
+ mutex_init(&dev->mutex);
+
+ /* Defer all other initialization till user does SET_OWNER */
+ ret = 0;
+ }
+
+ return ret;
}
static void vhost_net_disable_vq(struct vhost_net *n,
struct vhost_virtqueue *vq)
{
+ int qnum = vq->qnum;
+
if (!vq->private_data)
return;
- if (vq == n->vqs + VHOST_NET_VQ_TX) {
- tx_poll_stop(n);
- n->tx_poll_state = VHOST_NET_POLL_DISABLED;
- } else
- vhost_poll_stop(n->poll + VHOST_NET_VQ_RX);
+ if (qnum & 1) { /* Odd qnum -> TX */
+ tx_poll_stop(n, qnum);
+ n->tx_poll_state[qnum / 2] = VHOST_NET_POLL_DISABLED;
+ } else { /* Even qnum -> RX */
+ vhost_poll_stop(&n->poll[qnum]);
+ }
}
static void vhost_net_enable_vq(struct vhost_net *n,
struct vhost_virtqueue *vq)
{
struct socket *sock;
+ int qnum = vq->qnum;
sock = rcu_dereference_protected(vq->private_data,
lockdep_is_held(&vq->mutex));
if (!sock)
return;
- if (vq == n->vqs + VHOST_NET_VQ_TX) {
- n->tx_poll_state = VHOST_NET_POLL_STOPPED;
- tx_poll_start(n, sock);
- } else
- vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file);
+ if (qnum & 1) { /* Odd qnum -> TX */
+ n->tx_poll_state[qnum / 2] = VHOST_NET_POLL_STOPPED;
+ tx_poll_start(n, sock, qnum);
+ } else { /* Even qnum -> RX */
+ vhost_poll_start(&n->poll[qnum], sock->file);
+ }
}
static struct socket *vhost_net_stop_vq(struct vhost_net *n,
@@ -495,11 +558,12 @@ static struct socket *vhost_net_stop_vq(
return sock;
}
-static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock,
- struct socket **rx_sock)
+static void vhost_net_stop(struct vhost_net *n)
{
- *tx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_TX);
- *rx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_RX);
+ int i;
+
+ for (i = 0; i < n->dev.nvqs; i++)
+ n->socks[i] = vhost_net_stop_vq(n, &n->vqs[i]);
}
static void vhost_net_flush_vq(struct vhost_net *n, int index)
@@ -510,26 +574,33 @@ static void vhost_net_flush_vq(struct vh
static void vhost_net_flush(struct vhost_net *n)
{
- vhost_net_flush_vq(n, VHOST_NET_VQ_TX);
- vhost_net_flush_vq(n, VHOST_NET_VQ_RX);
+ int i;
+
+ for (i = 0; i < n->dev.nvqs; i++)
+ vhost_net_flush_vq(n, i);
}
static int vhost_net_release(struct inode *inode, struct file *f)
{
struct vhost_net *n = f->private_data;
- struct socket *tx_sock;
- struct socket *rx_sock;
+ struct vhost_dev *dev = &n->dev;
+ int i;
- vhost_net_stop(n, &tx_sock, &rx_sock);
+ vhost_net_stop(n);
vhost_net_flush(n);
- vhost_dev_cleanup(&n->dev);
- if (tx_sock)
- fput(tx_sock->file);
- if (rx_sock)
- fput(rx_sock->file);
+ vhost_dev_cleanup(dev);
+
+ for (i = 0; i < n->dev.nvqs; i++)
+ if (n->socks[i])
+ fput(n->socks[i]->file);
+
/* We do an extra flush before freeing memory,
* since jobs can re-queue themselves. */
vhost_net_flush(n);
+
+ /* Free all old pointers */
+ vhost_free_vqs(dev);
+
kfree(n);
return 0;
}
@@ -610,7 +681,7 @@ static long vhost_net_set_backend(struct
if (r)
goto err;
- if (index >= VHOST_NET_VQ_MAX) {
+ if (index >= n->dev.nvqs) {
r = -ENOBUFS;
goto err;
}
@@ -656,23 +727,25 @@ err:
static long vhost_net_reset_owner(struct vhost_net *n)
{
- struct socket *tx_sock = NULL;
- struct socket *rx_sock = NULL;
long err;
+ int i;
mutex_lock(&n->dev.mutex);
err = vhost_dev_check_owner(&n->dev);
- if (err)
- goto done;
- vhost_net_stop(n, &tx_sock, &rx_sock);
+ if (err) {
+ mutex_unlock(&n->dev.mutex);
+ return err;
+ }
+
+ vhost_net_stop(n);
vhost_net_flush(n);
err = vhost_dev_reset_owner(&n->dev);
-done:
mutex_unlock(&n->dev.mutex);
- if (tx_sock)
- fput(tx_sock->file);
- if (rx_sock)
- fput(rx_sock->file);
+
+ for (i = 0; i < n->dev.nvqs; i++)
+ if (n->socks[i])
+ fput(n->socks[i]->file);
+
return err;
}
@@ -701,7 +774,7 @@ static int vhost_net_set_features(struct
}
n->dev.acked_features = features;
smp_wmb();
- for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
+ for (i = 0; i < n->dev.nvqs; ++i) {
mutex_lock(&n->vqs[i].mutex);
n->vqs[i].vhost_hlen = vhost_hlen;
n->vqs[i].sock_hlen = sock_hlen;
diff -ruNp org/drivers/vhost/vhost.c new/drivers/vhost/vhost.c
--- org/drivers/vhost/vhost.c 2011-04-05 14:15:18.000000000 +0530
+++ new/drivers/vhost/vhost.c 2011-04-05 20:06:11.000000000 +0530
@@ -70,12 +70,12 @@ static void vhost_work_init(struct vhost
/* Init poll structure */
void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
- unsigned long mask, struct vhost_dev *dev)
+ unsigned long mask, struct vhost_virtqueue *vq)
{
init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup);
init_poll_funcptr(&poll->table, vhost_poll_func);
poll->mask = mask;
- poll->dev = dev;
+ poll->vq = vq;
vhost_work_init(&poll->work, fn);
}
@@ -98,30 +98,31 @@ void vhost_poll_stop(struct vhost_poll *
remove_wait_queue(poll->wqh, &poll->wait);
}
-static bool vhost_work_seq_done(struct vhost_dev *dev, struct vhost_work *work,
- unsigned seq)
+static bool vhost_work_seq_done(struct vhost_virtqueue *vq,
+ struct vhost_work *work, unsigned seq)
{
int left;
- spin_lock_irq(&dev->work_lock);
+ spin_lock_irq(vq->work_lock);
left = seq - work->done_seq;
- spin_unlock_irq(&dev->work_lock);
+ spin_unlock_irq(vq->work_lock);
return left <= 0;
}
-static void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work)
+static void vhost_work_flush(struct vhost_virtqueue *vq,
+ struct vhost_work *work)
{
unsigned seq;
int flushing;
- spin_lock_irq(&dev->work_lock);
+ spin_lock_irq(vq->work_lock);
seq = work->queue_seq;
work->flushing++;
- spin_unlock_irq(&dev->work_lock);
- wait_event(work->done, vhost_work_seq_done(dev, work, seq));
- spin_lock_irq(&dev->work_lock);
+ spin_unlock_irq(vq->work_lock);
+ wait_event(work->done, vhost_work_seq_done(vq, work, seq));
+ spin_lock_irq(vq->work_lock);
flushing = --work->flushing;
- spin_unlock_irq(&dev->work_lock);
+ spin_unlock_irq(vq->work_lock);
BUG_ON(flushing < 0);
}
@@ -129,26 +130,26 @@ static void vhost_work_flush(struct vhos
* locks that are also used by the callback. */
void vhost_poll_flush(struct vhost_poll *poll)
{
- vhost_work_flush(poll->dev, &poll->work);
+ vhost_work_flush(poll->vq, &poll->work);
}
-static inline void vhost_work_queue(struct vhost_dev *dev,
+static inline void vhost_work_queue(struct vhost_virtqueue *vq,
struct vhost_work *work)
{
unsigned long flags;
- spin_lock_irqsave(&dev->work_lock, flags);
+ spin_lock_irqsave(vq->work_lock, flags);
if (list_empty(&work->node)) {
- list_add_tail(&work->node, &dev->work_list);
+ list_add_tail(&work->node, vq->work_list);
work->queue_seq++;
- wake_up_process(dev->worker);
+ wake_up_process(vq->worker);
}
- spin_unlock_irqrestore(&dev->work_lock, flags);
+ spin_unlock_irqrestore(vq->work_lock, flags);
}
void vhost_poll_queue(struct vhost_poll *poll)
{
- vhost_work_queue(poll->dev, &poll->work);
+ vhost_work_queue(poll->vq, &poll->work);
}
static void vhost_vq_reset(struct vhost_dev *dev,
@@ -178,17 +179,17 @@ static void vhost_vq_reset(struct vhost_
static int vhost_worker(void *data)
{
- struct vhost_dev *dev = data;
+ struct vhost_virtqueue *vq = data;
struct vhost_work *work = NULL;
unsigned uninitialized_var(seq);
- use_mm(dev->mm);
+ use_mm(vq->dev->mm);
for (;;) {
/* mb paired w/ kthread_stop */
set_current_state(TASK_INTERRUPTIBLE);
- spin_lock_irq(&dev->work_lock);
+ spin_lock_irq(vq->work_lock);
if (work) {
work->done_seq = seq;
if (work->flushing)
@@ -196,18 +197,18 @@ static int vhost_worker(void *data)
}
if (kthread_should_stop()) {
- spin_unlock_irq(&dev->work_lock);
+ spin_unlock_irq(vq->work_lock);
__set_current_state(TASK_RUNNING);
break;
}
- if (!list_empty(&dev->work_list)) {
- work = list_first_entry(&dev->work_list,
+ if (!list_empty(vq->work_list)) {
+ work = list_first_entry(vq->work_list,
struct vhost_work, node);
list_del_init(&work->node);
seq = work->queue_seq;
} else
work = NULL;
- spin_unlock_irq(&dev->work_lock);
+ spin_unlock_irq(vq->work_lock);
if (work) {
__set_current_state(TASK_RUNNING);
@@ -216,7 +217,7 @@ static int vhost_worker(void *data)
schedule();
}
- unuse_mm(dev->mm);
+ unuse_mm(vq->dev->mm);
return 0;
}
@@ -262,6 +263,30 @@ static void vhost_dev_free_iovecs(struct
}
}
+/*
+ * Get index of an existing thread that will handle this rx/tx queue pair.
+ * The same thread handles both rx and tx.
+ */
+static int vhost_get_thread_index(int index)
+{
+ return (index / 2) % MAX_VHOST_THREADS;
+}
+
+/* Get index into the an earlier vq that we can share with */
+static int vhost_get_vq_index(int index)
+{
+ return vhost_get_thread_index(index) * 2;
+}
+
+/*
+ * This is needed to initialize work_list/work_lock or start a new
+ * worker thread. Since a single rx/tx is handled by a single worker,
+ */
+static int vhost_needs_init(int i, int j)
+{
+ return i == j * 2;
+}
+
long vhost_dev_init(struct vhost_dev *dev,
struct vhost_virtqueue *vqs, int nvqs)
{
@@ -274,20 +299,30 @@ long vhost_dev_init(struct vhost_dev *de
dev->log_file = NULL;
dev->memory = NULL;
dev->mm = NULL;
- spin_lock_init(&dev->work_lock);
- INIT_LIST_HEAD(&dev->work_list);
- dev->worker = NULL;
for (i = 0; i < dev->nvqs; ++i) {
- dev->vqs[i].log = NULL;
- dev->vqs[i].indirect = NULL;
- dev->vqs[i].heads = NULL;
- dev->vqs[i].dev = dev;
- mutex_init(&dev->vqs[i].mutex);
+ struct vhost_virtqueue *vq = &dev->vqs[i];
+ int j = vhost_get_thread_index(i);
+
+ if (vhost_needs_init(i, j)) {
+ spin_lock_init(&dev->work[j].work_lock);
+ INIT_LIST_HEAD(&dev->work[j].work_list);
+ }
+
+ vq->work_lock = &dev->work[j].work_lock;
+ vq->work_list = &dev->work[j].work_list;
+
+ vq->worker = NULL;
+ vq->qnum = i;
+ vq->log = NULL;
+ vq->indirect = NULL;
+ vq->heads = NULL;
+ vq->dev = dev;
+ mutex_init(&vq->mutex);
vhost_vq_reset(dev, dev->vqs + i);
- if (dev->vqs[i].handle_kick)
- vhost_poll_init(&dev->vqs[i].poll,
- dev->vqs[i].handle_kick, POLLIN, dev);
+ if (vq->handle_kick)
+ vhost_poll_init(&vq->poll,
+ vq->handle_kick, POLLIN, vq);
}
return 0;
@@ -314,21 +349,83 @@ static void vhost_attach_cgroups_work(st
s->ret = cgroup_attach_task_all(s->owner, current);
}
-static int vhost_attach_cgroups(struct vhost_dev *dev)
+static int vhost_attach_cgroups(struct vhost_virtqueue *vq)
{
struct vhost_attach_cgroups_struct attach;
attach.owner = current;
vhost_work_init(&attach.work, vhost_attach_cgroups_work);
- vhost_work_queue(dev, &attach.work);
- vhost_work_flush(dev, &attach.work);
+ vhost_work_queue(vq, &attach.work);
+ vhost_work_flush(vq, &attach.work);
return attach.ret;
}
+static void __vhost_stop_workers(struct vhost_dev *dev, int nvhosts)
+{
+ int i;
+
+ for (i = 0; i < dev->nvqs; i++) {
+ if (i < nvhosts) {
+ WARN_ON(!list_empty(dev->vqs[i * 2].work_list));
+ if (dev->vqs[i * 2].worker)
+ kthread_stop(dev->vqs[i * 2].worker);
+ }
+ dev->vqs[i].worker = NULL;
+ }
+
+ if (dev->mm)
+ mmput(dev->mm);
+ dev->mm = NULL;
+}
+
+static void vhost_stop_workers(struct vhost_dev *dev)
+{
+ int nthreads = min_t(int, dev->nvqs / 2, MAX_VHOST_THREADS);
+
+ __vhost_stop_workers(dev, nthreads);
+}
+
+static int vhost_start_workers(struct vhost_dev *dev)
+{
+ int i, err;
+
+ for (i = 0; i < dev->nvqs; ++i) {
+ struct vhost_virtqueue *vq = &dev->vqs[i];
+ int j = vhost_get_thread_index(i);
+
+ if (vhost_needs_init(i, j)) {
+ /* Start a new thread */
+ vq->worker = kthread_create(vhost_worker, vq,
+ "vhost-%d-%d",
+ current->pid, j);
+ if (IS_ERR(vq->worker)) {
+ err = PTR_ERR(vq->worker);
+ goto err;
+ }
+
+ wake_up_process(vq->worker);
+
+ /* avoid contributing to loadavg */
+ err = vhost_attach_cgroups(vq);
+ if (err)
+ goto err;
+ } else {
+ /* Share work with an existing thread */
+ int j = vhost_get_vq_index(i);
+
+ vq->worker = dev->vqs[j].worker;
+ }
+ }
+ return 0;
+
+err:
+ __vhost_stop_workers(dev, i / 2);
+ return err;
+}
+
/* Caller should have device mutex */
-static long vhost_dev_set_owner(struct vhost_dev *dev)
+static long vhost_dev_set_owner(struct vhost_dev *dev, int numtxqs)
{
- struct task_struct *worker;
int err;
/* Is there an owner already? */
@@ -337,33 +434,30 @@ static long vhost_dev_set_owner(struct v
goto err_mm;
}
+ err = vhost_setup_vqs(dev, numtxqs);
+ if (err)
+ goto err_mm;
+
/* No owner, become one */
dev->mm = get_task_mm(current);
- worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid);
- if (IS_ERR(worker)) {
- err = PTR_ERR(worker);
- goto err_worker;
- }
-
- dev->worker = worker;
- wake_up_process(worker); /* avoid contributing to loadavg */
- err = vhost_attach_cgroups(dev);
+ /* Start threads */
+ err = vhost_start_workers(dev);
if (err)
- goto err_cgroup;
+ goto free_vqs;
err = vhost_dev_alloc_iovecs(dev);
if (err)
- goto err_cgroup;
+ goto clean_workers;
return 0;
-err_cgroup:
- kthread_stop(worker);
- dev->worker = NULL;
-err_worker:
+clean_workers:
+ vhost_stop_workers(dev);
+free_vqs:
if (dev->mm)
mmput(dev->mm);
dev->mm = NULL;
+ vhost_free_vqs(dev);
err_mm:
return err;
}
@@ -418,14 +512,7 @@ void vhost_dev_cleanup(struct vhost_dev
kfree(rcu_dereference_protected(dev->memory,
lockdep_is_held(&dev->mutex)));
RCU_INIT_POINTER(dev->memory, NULL);
- WARN_ON(!list_empty(&dev->work_list));
- if (dev->worker) {
- kthread_stop(dev->worker);
- dev->worker = NULL;
- }
- if (dev->mm)
- mmput(dev->mm);
- dev->mm = NULL;
+ vhost_stop_workers(dev);
}
static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz)
@@ -790,7 +877,7 @@ long vhost_dev_ioctl(struct vhost_dev *d
/* If you are not the owner, you can become one */
if (ioctl == VHOST_SET_OWNER) {
- r = vhost_dev_set_owner(d);
+ r = vhost_dev_set_owner(d, arg);
goto done;
}
diff -ruNp org/drivers/vhost/vhost.h new/drivers/vhost/vhost.h
--- org/drivers/vhost/vhost.h 2011-04-05 14:15:18.000000000 +0530
+++ new/drivers/vhost/vhost.h 2011-04-05 14:15:18.000000000 +0530
@@ -35,11 +35,11 @@ struct vhost_poll {
wait_queue_t wait;
struct vhost_work work;
unsigned long mask;
- struct vhost_dev *dev;
+ struct vhost_virtqueue *vq; /* points back to vq */
};
void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
- unsigned long mask, struct vhost_dev *dev);
+ unsigned long mask, struct vhost_virtqueue *vq);
void vhost_poll_start(struct vhost_poll *poll, struct file *file);
void vhost_poll_stop(struct vhost_poll *poll);
void vhost_poll_flush(struct vhost_poll *poll);
@@ -108,8 +108,20 @@ struct vhost_virtqueue {
/* Log write descriptors */
void __user *log_base;
struct vhost_log *log;
+ struct task_struct *worker; /* worker for this vq */
+ spinlock_t *work_lock; /* points to a dev->work_lock[] entry */
+ struct list_head *work_list; /* points to a dev->work_list[] entry */
+ int qnum; /* 0 for RX, 1 for TX, and so on alternatively */
};
+/* work entry and the lock */
+struct work_lock_list {
+ spinlock_t work_lock;
+ struct list_head work_list;
+} ____cacheline_aligned_in_smp;
+
+#define MAX_VHOST_THREADS 4
+
struct vhost_dev {
/* Readers use RCU to access memory table pointer
* log base pointer and features.
@@ -122,11 +134,11 @@ struct vhost_dev {
int nvqs;
struct file *log_file;
struct eventfd_ctx *log_ctx;
- spinlock_t work_lock;
- struct list_head work_list;
- struct task_struct *worker;
+ struct work_lock_list work[MAX_VHOST_THREADS];
};
+int vhost_setup_vqs(struct vhost_dev *dev, int numtxqs);
+void vhost_free_vqs(struct vhost_dev *dev);
long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue *vqs, int nvqs);
long vhost_dev_check_owner(struct vhost_dev *);
long vhost_dev_reset_owner(struct vhost_dev *);
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH 4/4] [RFC rev2] qemu changes
2011-04-05 15:08 [PATCH 0/4] [RFC rev2] Implement multiqueue (RX & TX) virtio-net Krishna Kumar
` (2 preceding siblings ...)
2011-04-05 15:09 ` [PATCH 3/4] [RFC rev2] vhost changes Krishna Kumar
@ 2011-04-05 15:09 ` Krishna Kumar
2011-04-13 12:00 ` [PATCH 0/4] [RFC rev2] Implement multiqueue (RX & TX) virtio-net Avi Kivity
4 siblings, 0 replies; 9+ messages in thread
From: Krishna Kumar @ 2011-04-05 15:09 UTC (permalink / raw)
To: rusty, davem, mst
Cc: eric.dumazet, arnd, netdev, horms, avi, anthony, kvm,
Krishna Kumar
diff -ruNp org/hw/vhost.c new/hw/vhost.c
--- org/hw/vhost.c 2011-04-05 14:15:18.000000000 +0530
+++ new/hw/vhost.c 2011-04-05 14:15:18.000000000 +0530
@@ -581,7 +581,7 @@ static void vhost_virtqueue_cleanup(stru
0, virtio_queue_get_desc_size(vdev, idx));
}
-int vhost_dev_init(struct vhost_dev *hdev, int devfd, bool force)
+int vhost_dev_init(struct vhost_dev *hdev, int devfd, bool force, int numtxqs)
{
uint64_t features;
int r;
@@ -593,11 +593,13 @@ int vhost_dev_init(struct vhost_dev *hde
return -errno;
}
}
- r = ioctl(hdev->control, VHOST_SET_OWNER, NULL);
+ r = ioctl(hdev->control, VHOST_SET_OWNER, numtxqs);
if (r < 0) {
goto fail;
}
+ hdev->nvqs = numtxqs * 2;
+
r = ioctl(hdev->control, VHOST_GET_FEATURES, &features);
if (r < 0) {
goto fail;
diff -ruNp org/hw/vhost.h new/hw/vhost.h
--- org/hw/vhost.h 2011-04-05 14:15:18.000000000 +0530
+++ new/hw/vhost.h 2011-04-05 14:15:18.000000000 +0530
@@ -41,7 +41,7 @@ struct vhost_dev {
bool force;
};
-int vhost_dev_init(struct vhost_dev *hdev, int devfd, bool force);
+int vhost_dev_init(struct vhost_dev *hdev, int devfd, bool force, int numtxqs);
void vhost_dev_cleanup(struct vhost_dev *hdev);
bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev);
int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev);
diff -ruNp org/hw/vhost_net.c new/hw/vhost_net.c
--- org/hw/vhost_net.c 2011-04-05 14:15:18.000000000 +0530
+++ new/hw/vhost_net.c 2011-04-05 20:27:01.000000000 +0530
@@ -36,8 +36,9 @@
struct vhost_net {
struct vhost_dev dev;
- struct vhost_virtqueue vqs[2];
- int backend;
+ struct vhost_virtqueue *vqs;
+ int nvqs;
+ int *backend;
VLANClientState *vc;
};
@@ -70,11 +71,11 @@ void vhost_net_ack_features(struct vhost
}
}
-static int vhost_net_get_fd(VLANClientState *backend)
+static int vhost_net_get_fd(VLANClientState *backend, int index)
{
switch (backend->info->type) {
case NET_CLIENT_TYPE_TAP:
- return tap_get_fd(backend);
+ return tap_get_fd(backend, index);
default:
fprintf(stderr, "vhost-net requires tap backend\n");
return -EBADFD;
@@ -82,27 +83,36 @@ static int vhost_net_get_fd(VLANClientSt
}
struct vhost_net *vhost_net_init(VLANClientState *backend, int devfd,
- bool force)
+ bool force, int numtxqs)
{
- int r;
+ int i, r;
struct vhost_net *net = qemu_malloc(sizeof *net);
if (!backend) {
fprintf(stderr, "vhost-net requires backend to be setup\n");
goto fail;
}
- r = vhost_net_get_fd(backend);
- if (r < 0) {
- goto fail;
+
+ net->backend = qemu_malloc(numtxqs * (sizeof *net->backend));
+ for (i = 0; i < numtxqs; i++) {
+ r = vhost_net_get_fd(backend, i);
+ if (r < 0) {
+ goto fail;
+ }
+ net->backend[i] = r;
}
+
net->vc = backend;
net->dev.backend_features = tap_has_vnet_hdr(backend) ? 0 :
(1 << VHOST_NET_F_VIRTIO_NET_HDR);
- net->backend = r;
- r = vhost_dev_init(&net->dev, devfd, force);
+ r = vhost_dev_init(&net->dev, devfd, force, numtxqs);
if (r < 0) {
goto fail;
}
+
+ net->nvqs = numtxqs * 2;
+ net->vqs = qemu_malloc(net->nvqs * (sizeof *net->vqs));
+
if (!tap_has_vnet_hdr_len(backend,
sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
net->dev.features &= ~(1 << VIRTIO_NET_F_MRG_RXBUF);
@@ -137,7 +147,6 @@ int vhost_net_start(struct vhost_net *ne
sizeof(struct virtio_net_hdr_mrg_rxbuf));
}
- net->dev.nvqs = 2;
net->dev.vqs = net->vqs;
r = vhost_dev_start(&net->dev, dev);
if (r < 0) {
@@ -145,9 +154,9 @@ int vhost_net_start(struct vhost_net *ne
}
net->vc->info->poll(net->vc, false);
- qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
- file.fd = net->backend;
for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
+ qemu_set_fd_handler(net->backend[file.index/2], NULL, NULL, NULL);
+ file.fd = net->backend[(file.index / 2) % (net->dev.nvqs / 2)];
r = ioctl(net->dev.control, VHOST_NET_SET_BACKEND, &file);
if (r < 0) {
r = -errno;
@@ -195,7 +204,7 @@ void vhost_net_cleanup(struct vhost_net
}
#else
struct vhost_net *vhost_net_init(VLANClientState *backend, int devfd,
- bool force)
+ bool force, int numtxqs)
{
return NULL;
}
diff -ruNp org/hw/vhost_net.h new/hw/vhost_net.h
--- org/hw/vhost_net.h 2011-04-05 14:15:18.000000000 +0530
+++ new/hw/vhost_net.h 2011-04-05 14:15:18.000000000 +0530
@@ -6,7 +6,8 @@
struct vhost_net;
typedef struct vhost_net VHostNetState;
-VHostNetState *vhost_net_init(VLANClientState *backend, int devfd, bool force);
+VHostNetState *vhost_net_init(VLANClientState *backend, int devfd, bool force,
+ int numtxqs);
bool vhost_net_query(VHostNetState *net, VirtIODevice *dev);
int vhost_net_start(VHostNetState *net, VirtIODevice *dev);
diff -ruNp org/hw/virtio-net.c new/hw/virtio-net.c
--- org/hw/virtio-net.c 2011-04-05 14:15:18.000000000 +0530
+++ new/hw/virtio-net.c 2011-04-05 14:15:18.000000000 +0530
@@ -31,8 +31,8 @@ typedef struct VirtIONet
VirtIODevice vdev;
uint8_t mac[ETH_ALEN];
uint16_t status;
- VirtQueue *rx_vq;
- VirtQueue *tx_vq;
+ VirtQueue **rx_vq;
+ VirtQueue **tx_vq;
VirtQueue *ctrl_vq;
NICState *nic;
QEMUTimer *tx_timer;
@@ -63,6 +63,7 @@ typedef struct VirtIONet
} mac_table;
uint32_t *vlans;
DeviceState *qdev;
+ uint16_t numtxqs;
} VirtIONet;
/* TODO
@@ -80,6 +81,7 @@ static void virtio_net_get_config(VirtIO
struct virtio_net_config netcfg;
stw_p(&netcfg.status, n->status);
+ netcfg.num_queue_pairs = n->numtxqs * 2;
memcpy(netcfg.mac, n->mac, ETH_ALEN);
memcpy(config, &netcfg, sizeof(netcfg));
}
@@ -228,6 +230,9 @@ static uint32_t virtio_net_get_features(
VirtIONet *n = to_virtio_net(vdev);
features |= (1 << VIRTIO_NET_F_MAC);
+ if (n->numtxqs > 1)
+ features |= (1 << VIRTIO_NET_F_MULTIQUEUE);
+
if (peer_has_vnet_hdr(n)) {
tap_using_vnet_hdr(n->nic->nc.peer, 1);
@@ -460,7 +465,7 @@ static int virtio_net_can_receive(VLANCl
return 0;
}
- if (!virtio_queue_ready(n->rx_vq) ||
+ if (!virtio_queue_ready(n->rx_vq[0]) ||
!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
return 0;
@@ -469,22 +474,22 @@ static int virtio_net_can_receive(VLANCl
static int virtio_net_has_buffers(VirtIONet *n, int bufsize)
{
- if (virtio_queue_empty(n->rx_vq) ||
+ if (virtio_queue_empty(n->rx_vq[0]) ||
(n->mergeable_rx_bufs &&
- !virtqueue_avail_bytes(n->rx_vq, bufsize, 0))) {
- virtio_queue_set_notification(n->rx_vq, 1);
+ !virtqueue_avail_bytes(n->rx_vq[0], bufsize, 0))) {
+ virtio_queue_set_notification(n->rx_vq[0], 1);
/* To avoid a race condition where the guest has made some buffers
* available after the above check but before notification was
* enabled, check for available buffers again.
*/
- if (virtio_queue_empty(n->rx_vq) ||
+ if (virtio_queue_empty(n->rx_vq[0]) ||
(n->mergeable_rx_bufs &&
- !virtqueue_avail_bytes(n->rx_vq, bufsize, 0)))
+ !virtqueue_avail_bytes(n->rx_vq[0], bufsize, 0)))
return 0;
}
- virtio_queue_set_notification(n->rx_vq, 0);
+ virtio_queue_set_notification(n->rx_vq[0], 0);
return 1;
}
@@ -623,7 +628,7 @@ static ssize_t virtio_net_receive(VLANCl
total = 0;
- if (virtqueue_pop(n->rx_vq, &elem) == 0) {
+ if (virtqueue_pop(n->rx_vq[0], &elem) == 0) {
if (i == 0)
return -1;
error_report("virtio-net unexpected empty queue: "
@@ -675,15 +680,15 @@ static ssize_t virtio_net_receive(VLANCl
}
/* signal other side */
- virtqueue_fill(n->rx_vq, &elem, total, i++);
+ virtqueue_fill(n->rx_vq[0], &elem, total, i++);
}
if (mhdr) {
stw_p(&mhdr->num_buffers, i);
}
- virtqueue_flush(n->rx_vq, i);
- virtio_notify(&n->vdev, n->rx_vq);
+ virtqueue_flush(n->rx_vq[0], i);
+ virtio_notify(&n->vdev, n->rx_vq[0]);
return size;
}
@@ -694,13 +699,13 @@ static void virtio_net_tx_complete(VLANC
{
VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
- virtqueue_push(n->tx_vq, &n->async_tx.elem, n->async_tx.len);
- virtio_notify(&n->vdev, n->tx_vq);
+ virtqueue_push(n->tx_vq[0], &n->async_tx.elem, n->async_tx.len);
+ virtio_notify(&n->vdev, n->tx_vq[0]);
n->async_tx.elem.out_num = n->async_tx.len = 0;
- virtio_queue_set_notification(n->tx_vq, 1);
- virtio_net_flush_tx(n, n->tx_vq);
+ virtio_queue_set_notification(n->tx_vq[0], 1);
+ virtio_net_flush_tx(n, n->tx_vq[0]);
}
/* TX */
@@ -715,7 +720,7 @@ static int32_t virtio_net_flush_tx(VirtI
assert(n->vdev.vm_running);
if (n->async_tx.elem.out_num) {
- virtio_queue_set_notification(n->tx_vq, 0);
+ virtio_queue_set_notification(n->tx_vq[0], 0);
return num_packets;
}
@@ -750,7 +755,7 @@ static int32_t virtio_net_flush_tx(VirtI
ret = qemu_sendv_packet_async(&n->nic->nc, out_sg, out_num,
virtio_net_tx_complete);
if (ret == 0) {
- virtio_queue_set_notification(n->tx_vq, 0);
+ virtio_queue_set_notification(n->tx_vq[0], 0);
n->async_tx.elem = elem;
n->async_tx.len = len;
return -EBUSY;
@@ -818,8 +823,8 @@ static void virtio_net_tx_timer(void *op
if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
return;
- virtio_queue_set_notification(n->tx_vq, 1);
- virtio_net_flush_tx(n, n->tx_vq);
+ virtio_queue_set_notification(n->tx_vq[0], 1);
+ virtio_net_flush_tx(n, n->tx_vq[0]);
}
static void virtio_net_tx_bh(void *opaque)
@@ -835,7 +840,7 @@ static void virtio_net_tx_bh(void *opaqu
if (unlikely(!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)))
return;
- ret = virtio_net_flush_tx(n, n->tx_vq);
+ ret = virtio_net_flush_tx(n, n->tx_vq[0]);
if (ret == -EBUSY) {
return; /* Notification re-enable handled by tx_complete */
}
@@ -851,9 +856,9 @@ static void virtio_net_tx_bh(void *opaqu
/* If less than a full burst, re-enable notification and flush
* anything that may have come in while we weren't looking. If
* we find something, assume the guest is still active and reschedule */
- virtio_queue_set_notification(n->tx_vq, 1);
- if (virtio_net_flush_tx(n, n->tx_vq) > 0) {
- virtio_queue_set_notification(n->tx_vq, 0);
+ virtio_queue_set_notification(n->tx_vq[0], 1);
+ if (virtio_net_flush_tx(n, n->tx_vq[0]) > 0) {
+ virtio_queue_set_notification(n->tx_vq[0], 0);
qemu_bh_schedule(n->tx_bh);
n->tx_waiting = 1;
}
@@ -869,6 +874,7 @@ static void virtio_net_save(QEMUFile *f,
virtio_save(&n->vdev, f);
qemu_put_buffer(f, n->mac, ETH_ALEN);
+ qemu_put_be16(f, n->numtxqs);
qemu_put_be32(f, n->tx_waiting);
qemu_put_be32(f, n->mergeable_rx_bufs);
qemu_put_be16(f, n->status);
@@ -898,6 +904,7 @@ static int virtio_net_load(QEMUFile *f,
virtio_load(&n->vdev, f);
qemu_get_buffer(f, n->mac, ETH_ALEN);
+ n->numtxqs = qemu_get_be32(f);
n->tx_waiting = qemu_get_be32(f);
n->mergeable_rx_bufs = qemu_get_be32(f);
@@ -996,11 +1003,13 @@ VirtIODevice *virtio_net_init(DeviceStat
virtio_net_conf *net)
{
VirtIONet *n;
+ int i;
n = (VirtIONet *)virtio_common_init("virtio-net", VIRTIO_ID_NET,
sizeof(struct virtio_net_config),
sizeof(VirtIONet));
+ n->numtxqs = conf->peer->numtxqs;
n->vdev.get_config = virtio_net_get_config;
n->vdev.set_config = virtio_net_set_config;
n->vdev.get_features = virtio_net_get_features;
@@ -1008,7 +1017,6 @@ VirtIODevice *virtio_net_init(DeviceStat
n->vdev.bad_features = virtio_net_bad_features;
n->vdev.reset = virtio_net_reset;
n->vdev.set_status = virtio_net_set_status;
- n->rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
if (net->tx && strcmp(net->tx, "timer") && strcmp(net->tx, "bh")) {
error_report("virtio-net: "
@@ -1017,12 +1025,25 @@ VirtIODevice *virtio_net_init(DeviceStat
error_report("Defaulting to \"bh\"");
}
+ /* Allocate per rx/tx vq's */
+ n->rx_vq = qemu_mallocz(n->numtxqs * sizeof(*n->rx_vq));
+ n->tx_vq = qemu_mallocz(n->numtxqs * sizeof(*n->tx_vq));
+
+ for (i = 0; i < n->numtxqs; i++) {
+ n->rx_vq[i] = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
+ if (net->tx && !strcmp(net->tx, "timer")) {
+ n->tx_vq[i] = virtio_add_queue(&n->vdev, 256,
+ virtio_net_handle_tx_timer);
+ } else {
+ n->tx_vq[i] = virtio_add_queue(&n->vdev, 256,
+ virtio_net_handle_tx_bh);
+ }
+ }
+
if (net->tx && !strcmp(net->tx, "timer")) {
- n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx_timer);
n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
n->tx_timeout = net->txtimer;
} else {
- n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx_bh);
n->tx_bh = qemu_bh_new(virtio_net_tx_bh, n);
}
n->ctrl_vq = virtio_add_queue(&n->vdev, 64, virtio_net_handle_ctrl);
diff -ruNp org/hw/virtio-net.h new/hw/virtio-net.h
--- org/hw/virtio-net.h 2011-04-05 14:15:18.000000000 +0530
+++ new/hw/virtio-net.h 2011-04-05 14:15:18.000000000 +0530
@@ -44,6 +44,7 @@
#define VIRTIO_NET_F_CTRL_RX 18 /* Control channel RX mode support */
#define VIRTIO_NET_F_CTRL_VLAN 19 /* Control channel VLAN filtering */
#define VIRTIO_NET_F_CTRL_RX_EXTRA 20 /* Extra RX mode control support */
+#define VIRTIO_NET_F_MULTIQUEUE 21 /* Supports multiple RX/TX queues */
#define VIRTIO_NET_S_LINK_UP 1 /* Link is up */
@@ -72,6 +73,7 @@ struct virtio_net_config
uint8_t mac[ETH_ALEN];
/* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
uint16_t status;
+ uint16_t num_queue_pairs; /* number of rx+tx queues */
} __attribute__((packed));
/* This is the first element of the scatter-gather list. If you don't
diff -ruNp org/hw/virtio-pci.c new/hw/virtio-pci.c
--- org/hw/virtio-pci.c 2011-04-05 14:15:18.000000000 +0530
+++ new/hw/virtio-pci.c 2011-04-05 14:15:18.000000000 +0530
@@ -103,6 +103,7 @@ typedef struct {
uint32_t addr;
uint32_t class_code;
uint32_t nvectors;
+ uint32_t mq;
BlockConf block;
NICConf nic;
uint32_t host_features;
@@ -965,6 +966,7 @@ static PCIDeviceInfo virtio_info[] = {
DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags,
VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, false),
DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3),
+ DEFINE_PROP_UINT32("mq", VirtIOPCIProxy, mq, 1),
DEFINE_VIRTIO_NET_FEATURES(VirtIOPCIProxy, host_features),
DEFINE_NIC_PROPERTIES(VirtIOPCIProxy, nic),
DEFINE_PROP_UINT32("x-txtimer", VirtIOPCIProxy,
diff -ruNp org/net/tap.c new/net/tap.c
--- org/net/tap.c 2011-04-05 14:15:18.000000000 +0530
+++ new/net/tap.c 2011-04-05 14:15:18.000000000 +0530
@@ -49,16 +49,20 @@
*/
#define TAP_BUFSIZE (4096 + 65536)
+#define VIRTIO_MAX_TXQS 8
+
typedef struct TAPState {
VLANClientState nc;
- int fd;
+ int *fds;
+ int numfds;
char down_script[1024];
- char down_script_arg[128];
+ char down_script_arg[VIRTIO_MAX_TXQS][128];
uint8_t buf[TAP_BUFSIZE];
unsigned int read_poll : 1;
unsigned int write_poll : 1;
unsigned int using_vnet_hdr : 1;
unsigned int has_ufo: 1;
+ unsigned int do_script: 1;
VHostNetState *vhost_net;
unsigned host_vnet_hdr_len;
} TAPState;
@@ -71,11 +75,16 @@ static void tap_writable(void *opaque);
static void tap_update_fd_handler(TAPState *s)
{
- qemu_set_fd_handler2(s->fd,
- s->read_poll ? tap_can_send : NULL,
- s->read_poll ? tap_send : NULL,
- s->write_poll ? tap_writable : NULL,
- s);
+ int i;
+
+ for (i = 0; i < s->numfds; i++) {
+ qemu_set_fd_handler2(s->fds[i],
+ s->read_poll ? tap_can_send : NULL,
+ s->read_poll ? tap_send : NULL,
+ s->write_poll ? tap_writable : NULL,
+ s);
+ }
+ }
}
static void tap_read_poll(TAPState *s, int enable)
@@ -104,7 +113,7 @@ static ssize_t tap_write_packet(TAPState
ssize_t len;
do {
- len = writev(s->fd, iov, iovcnt);
+ len = writev(s->fds[0], iov, iovcnt);
} while (len == -1 && errno == EINTR);
if (len == -1 && errno == EAGAIN) {
@@ -197,7 +206,7 @@ static void tap_send(void *opaque)
do {
uint8_t *buf = s->buf;
- size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
+ size = tap_read_packet(s->fds[0], s->buf, sizeof(s->buf));
if (size <= 0) {
break;
}
@@ -238,18 +247,20 @@ int tap_has_vnet_hdr_len(VLANClientState
assert(nc->info->type == NET_CLIENT_TYPE_TAP);
- return tap_probe_vnet_hdr_len(s->fd, len);
+ return tap_probe_vnet_hdr_len(s->fds[0], len);
}
void tap_set_vnet_hdr_len(VLANClientState *nc, int len)
{
TAPState *s = DO_UPCAST(TAPState, nc, nc);
+ int i;
assert(nc->info->type == NET_CLIENT_TYPE_TAP);
assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
len == sizeof(struct virtio_net_hdr));
- tap_fd_set_vnet_hdr_len(s->fd, len);
+ for (i = 0; i < s->numfds; i++)
+ tap_fd_set_vnet_hdr_len(s->fds[i], len);
s->host_vnet_hdr_len = len;
}
@@ -269,16 +280,27 @@ void tap_set_offload(VLANClientState *nc
int tso6, int ecn, int ufo)
{
TAPState *s = DO_UPCAST(TAPState, nc, nc);
- if (s->fd < 0) {
- return;
+ int i;
+
+ for (i = 0; i < s->numfds; i++) {
+ if (s->fds[i] >= 0)
+ tap_fd_set_offload(s->fds[i], csum, tso4, tso6, ecn, ufo);
}
+}
- tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo);
+static void close_tap_fds(int *fds, int numtxqs)
+{
+ int i;
+
+ for (i = 0; i < numtxqs; i++) {
+ close(fds[i]);
+ }
}
static void tap_cleanup(VLANClientState *nc)
{
TAPState *s = DO_UPCAST(TAPState, nc, nc);
+ int i;
if (s->vhost_net) {
vhost_net_cleanup(s->vhost_net);
@@ -287,13 +309,15 @@ static void tap_cleanup(VLANClientState
qemu_purge_queued_packets(nc);
- if (s->down_script[0])
- launch_script(s->down_script, s->down_script_arg, s->fd);
+ for (i = 0; i < s->numfds; i++) {
+ if (s->down_script[0])
+ launch_script(s->down_script, s->down_script_arg[i], s->fds[i]);
+ }
tap_read_poll(s, 0);
tap_write_poll(s, 0);
- close(s->fd);
- s->fd = -1;
+
+ close_tap_fds(s->fds, s->numfds);
}
static void tap_poll(VLANClientState *nc, bool enable)
@@ -303,11 +327,12 @@ static void tap_poll(VLANClientState *nc
tap_write_poll(s, enable);
}
-int tap_get_fd(VLANClientState *nc)
+int tap_get_fd(VLANClientState *nc, int index)
{
TAPState *s = DO_UPCAST(TAPState, nc, nc);
assert(nc->info->type == NET_CLIENT_TYPE_TAP);
- return s->fd;
+ assert(index < s->numfds);
+ return s->fds[index];
}
/* fd support */
@@ -325,20 +350,25 @@ static NetClientInfo net_tap_info = {
static TAPState *net_tap_fd_init(VLANState *vlan,
const char *model,
const char *name,
- int fd,
+ int *fds, int numtxqs,
int vnet_hdr)
{
VLANClientState *nc;
TAPState *s;
+ int i;
nc = qemu_new_net_client(&net_tap_info, vlan, NULL, model, name);
+ nc->numtxqs = numtxqs;
s = DO_UPCAST(TAPState, nc, nc);
- s->fd = fd;
+ s->fds = fds;
+ s->numfds = numtxqs;
s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
s->using_vnet_hdr = 0;
- s->has_ufo = tap_probe_has_ufo(s->fd);
+ for (i = 0; i < s->numfds; i++) {
+ s->has_ufo = tap_probe_has_ufo(s->fds[i]);
+ }
tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
tap_read_poll(s, 1);
s->vhost_net = NULL;
@@ -389,11 +419,28 @@ static int launch_script(const char *set
return -1;
}
-static int net_tap_init(QemuOpts *opts, int *vnet_hdr)
+static int net_tap_init(QemuOpts *opts, int *vnet_hdr, int *fds, int numtxqs,
+ int *script)
{
- int fd, vnet_hdr_required;
+ int i, vnet_hdr_required;
char ifname[128] = {0,};
const char *setup_script;
+ int launch = 0;
+ const char *dev;
+
+ if (qemu_opt_get(opts, "vtap")) {
+ *vnet_hdr = 1;
+ *script = 0; /* we don't need start/stop script */
+ dev = qemu_opt_get(opts, "vtap");
+ for (i = 0; i < numtxqs; i++) {
+ TFR(fds[i] = vtap_open(dev, vnet_hdr, 1));
+ if (fds[i] < 0)
+ goto err;
+ fcntl(fds[i], F_SETFL, O_NONBLOCK);
+ }
+ *vnet_hdr = !!tap_probe_vnet_hdr(fds[0]);
+ return 0;
+ }
if (qemu_opt_get(opts, "ifname")) {
pstrcpy(ifname, sizeof(ifname), qemu_opt_get(opts, "ifname"));
@@ -406,29 +453,76 @@ static int net_tap_init(QemuOpts *opts,
vnet_hdr_required = 0;
}
- TFR(fd = tap_open(ifname, sizeof(ifname), vnet_hdr, vnet_hdr_required));
- if (fd < 0) {
- return -1;
- }
-
setup_script = qemu_opt_get(opts, "script");
if (setup_script &&
setup_script[0] != '\0' &&
- strcmp(setup_script, "no") != 0 &&
- launch_script(setup_script, ifname, fd)) {
- close(fd);
- return -1;
+ strcmp(setup_script, "no") != 0) {
+ launch = 1;
+ *script = 1;
+ }
+
+ if (numtxqs == 1) {
+ fprintf(stderr, "Device: %s\n", ifname);
+ TFR(fds[0] = tap_open(ifname, sizeof(ifname), vnet_hdr,
+ vnet_hdr_required));
+ if (fds[0] < 0) {
+ goto err;
+ }
+
+ if (launch && launch_script(setup_script, ifname, fds[0]))
+ goto err;
+ } else {
+ char alt_name[128];
+
+ for (i = 0; i < numtxqs; i++) {
+ sprintf(alt_name, "%s.%d", ifname, i);
+ fprintf(stderr, "Device: %s\n", alt_name);
+ TFR(fds[i] = tap_open(alt_name, sizeof(alt_name), vnet_hdr,
+ vnet_hdr_required));
+ if (fds[i] < 0) {
+ goto err;
+ }
+
+ if (launch && launch_script(setup_script, alt_name, fds[i]))
+ goto err;
+ }
}
qemu_opt_set(opts, "ifname", ifname);
- return fd;
+ return 0;
+
+err:
+ close_tap_fds(fds, numtxqs);
+ return -1;
}
int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan)
{
TAPState *s;
- int fd, vnet_hdr = 0;
+ int *fds, vnet_hdr = 0;
+ int i, vhost;
+ int script = 0, numtxqs = 1;
+
+ vhost = qemu_opt_get_bool(opts, "vhost", 0);
+
+ /*
+ * We support multiple tx queues if:
+ * 1. smp > 1
+ * 2. vhost=on
+ * 3. mq=on
+ * In this case, #txqueues = #cpus. This value can be changed by
+ * using the "numtxqs" option.
+ */
+ if (vhost && smp_cpus > 1) {
+ if (qemu_opt_get_bool(opts, "mq", 0)) {
+ int dflt = MIN(smp_cpus, VIRTIO_MAX_TXQS);
+
+ numtxqs = qemu_opt_get_number(opts, "numtxqs", dflt);
+ }
+ }
+
+ fds = qemu_mallocz(numtxqs * sizeof(*fds));
if (qemu_opt_get(opts, "fd")) {
if (qemu_opt_get(opts, "ifname") ||
@@ -439,14 +533,14 @@ int net_init_tap(QemuOpts *opts, Monitor
return -1;
}
- fd = net_handle_fd_param(mon, qemu_opt_get(opts, "fd"));
- if (fd == -1) {
+ fds[0] = net_handle_fd_param(mon, qemu_opt_get(opts, "fd"));
+ if (fds[0] == -1) {
return -1;
}
- fcntl(fd, F_SETFL, O_NONBLOCK);
+ fcntl(fds[0], F_SETFL, O_NONBLOCK);
- vnet_hdr = tap_probe_vnet_hdr(fd);
+ vnet_hdr = tap_probe_vnet_hdr(fds[0]);
} else {
if (!qemu_opt_get(opts, "script")) {
qemu_opt_set(opts, "script", DEFAULT_NETWORK_SCRIPT);
@@ -456,24 +550,28 @@ int net_init_tap(QemuOpts *opts, Monitor
qemu_opt_set(opts, "downscript", DEFAULT_NETWORK_DOWN_SCRIPT);
}
- fd = net_tap_init(opts, &vnet_hdr);
- if (fd == -1) {
+ if (net_tap_init(opts, &vnet_hdr, fds, numtxqs, &script) == -1) {
return -1;
}
}
- s = net_tap_fd_init(vlan, "tap", name, fd, vnet_hdr);
+ s = net_tap_fd_init(vlan, "tap", name, fds, numtxqs, vnet_hdr);
if (!s) {
- close(fd);
+ close_tap_fds(fds, numtxqs);
return -1;
}
- if (tap_set_sndbuf(s->fd, opts) < 0) {
- return -1;
+ s->do_script = script;
+
+ for (i = 0; i < s->numfds; i++) {
+ if (tap_set_sndbuf(s->fds[i], opts) < 0) {
+ close_tap_fds(fds, numtxqs);
+ return -1;
+ }
}
if (qemu_opt_get(opts, "fd")) {
- snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd);
+ snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fds[0]);
} else {
const char *ifname, *script, *downscript;
@@ -487,12 +585,20 @@ int net_init_tap(QemuOpts *opts, Monitor
if (strcmp(downscript, "no") != 0) {
snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
- snprintf(s->down_script_arg, sizeof(s->down_script_arg), "%s", ifname);
+ for (i = 0; i < s->numfds; i++) {
+ char alt_name[128];
+
+ if (s->numfds == 1) {
+ pstrcpy(alt_name, sizeof(ifname), ifname);
+ } else {
+ sprintf(alt_name, "%s.%d", ifname, i);
+ }
+ snprintf(s->down_script_arg[i], sizeof(s->down_script_arg[i]), "%s", alt_name);
+ }
}
}
- if (qemu_opt_get_bool(opts, "vhost", !!qemu_opt_get(opts, "vhostfd") ||
- qemu_opt_get_bool(opts, "vhostforce", false))) {
+ if (vhost) {
int vhostfd, r;
bool force = qemu_opt_get_bool(opts, "vhostforce", false);
if (qemu_opt_get(opts, "vhostfd")) {
@@ -504,9 +610,13 @@ int net_init_tap(QemuOpts *opts, Monitor
} else {
vhostfd = -1;
}
- s->vhost_net = vhost_net_init(&s->nc, vhostfd, force);
+ s->vhost_net = vhost_net_init(&s->nc, vhostfd, force, numtxqs);
if (!s->vhost_net) {
error_report("vhost-net requested but could not be initialized");
+ if (numtxqs > 1) {
+ error_report("Need vhost support for numtxqs > 1, exiting...");
+ exit(1);
+ }
return -1;
}
} else if (qemu_opt_get(opts, "vhostfd")) {
diff -ruNp org/net/tap.h new/net/tap.h
--- org/net/tap.h 2011-04-05 14:15:18.000000000 +0530
+++ new/net/tap.h 2011-04-05 14:15:18.000000000 +0530
@@ -35,6 +35,7 @@
int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan);
int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required);
+int vtap_open(const char *devname, int *vnet_hdr, int vnet_hdr_required);
ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen);
@@ -52,7 +53,7 @@ int tap_probe_has_ufo(int fd);
void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo);
void tap_fd_set_vnet_hdr_len(int fd, int len);
-int tap_get_fd(VLANClientState *vc);
+int tap_get_fd(VLANClientState *vc, int index);
struct vhost_net;
struct vhost_net *tap_get_vhost_net(VLANClientState *vc);
diff -ruNp org/net/tap-linux.c new/net/tap-linux.c
--- org/net/tap-linux.c 2011-04-05 14:15:18.000000000 +0530
+++ new/net/tap-linux.c 2011-04-05 14:15:18.000000000 +0530
@@ -82,6 +82,48 @@ int tap_open(char *ifname, int ifname_si
return fd;
}
+int vtap_open(const char *devname, int *vnet_hdr, int vnet_hdr_required)
+{
+ struct ifreq ifr;
+ int fd, ret;
+
+ TFR(fd = open(devname, O_RDWR));
+ if (fd < 0) {
+ fprintf(stderr, "warning: could not open %s: no virtual network emulation\n", devname);
+ return -1;
+ }
+ memset(&ifr, 0, sizeof(ifr));
+ ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+
+ if (*vnet_hdr) {
+ unsigned int features;
+
+ if (ioctl(fd, TUNGETFEATURES, &features) == 0 &&
+ features & IFF_VNET_HDR) {
+ *vnet_hdr = 1;
+ ifr.ifr_flags |= IFF_VNET_HDR;
+ } else {
+ *vnet_hdr = 0;
+ }
+
+ if (vnet_hdr_required && !*vnet_hdr) {
+ error_report("vnet_hdr=1 requested, but no kernel "
+ "support for IFF_VNET_HDR available");
+ close(fd);
+ return -1;
+ }
+ }
+
+ ret = ioctl(fd, TUNSETIFF, (void *) &ifr);
+ if (ret != 0) {
+ fprintf(stderr, "warning: could not configure %s: no virtual network emulation\n", devname);
+ close(fd);
+ return -1;
+ }
+ fcntl(fd, F_SETFL, O_NONBLOCK);
+ return fd;
+}
+
/* sndbuf implements a kind of flow control for tap.
* Unfortunately when it's enabled, and packets are sent
* to other guests on the same host, the receiver
diff -ruNp org/net.c new/net.c
--- org/net.c 2011-04-05 14:15:18.000000000 +0530
+++ new/net.c 2011-04-05 14:15:18.000000000 +0530
@@ -798,6 +798,16 @@ static int net_init_nic(QemuOpts *opts,
return -1;
}
+ if (nd->netdev->numtxqs > 1 && nd->nvectors == DEV_NVECTORS_UNSPECIFIED) {
+ /*
+ * User specified mq for guest, but no "vectors=", tune
+ * it automatically to 'numtxqs' TX + 'numtxqs' RX + 1 controlq.
+ */
+ nd->nvectors = nd->netdev->numtxqs * 2 + 1;
+ monitor_printf(mon, "nvectors tuned to %d\n", nd->nvectors);
+ }
+
+
nd->used = 1;
nb_nics++;
@@ -941,6 +951,18 @@ static const struct {
},
#ifndef _WIN32
{
+ .name = "vtap",
+ .type = QEMU_OPT_STRING,
+ .help = "name of macvtap device to use",
+ }, {
+ .name = "mq",
+ .type = QEMU_OPT_BOOL,
+ .help = "enable multiqueue on network i/f",
+ }, {
+ .name = "numtxqs",
+ .type = QEMU_OPT_NUMBER,
+ .help = "optional number of RX/TX queues, if mq is enabled",
+ }, {
.name = "fd",
.type = QEMU_OPT_STRING,
.help = "file descriptor of an already opened tap",
diff -ruNp org/net.h new/net.h
--- org/net.h 2011-04-05 14:15:18.000000000 +0530
+++ new/net.h 2011-04-05 14:15:18.000000000 +0530
@@ -64,6 +64,7 @@ struct VLANClientState {
struct VLANState *vlan;
VLANClientState *peer;
NetQueue *send_queue;
+ int numtxqs;
char *model;
char *name;
char info_str[256];
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 0/4] [RFC rev2] Implement multiqueue (RX & TX) virtio-net
2011-04-05 15:08 [PATCH 0/4] [RFC rev2] Implement multiqueue (RX & TX) virtio-net Krishna Kumar
` (3 preceding siblings ...)
2011-04-05 15:09 ` [PATCH 4/4] [RFC rev2] qemu changes Krishna Kumar
@ 2011-04-13 12:00 ` Avi Kivity
2011-04-13 16:22 ` Krishna Kumar2
4 siblings, 1 reply; 9+ messages in thread
From: Avi Kivity @ 2011-04-13 12:00 UTC (permalink / raw)
To: Krishna Kumar
Cc: rusty, davem, mst, eric.dumazet, arnd, netdev, horms, anthony,
kvm
On 04/05/2011 06:08 PM, Krishna Kumar wrote:
> This patchset implements both RX and TX MQ. Patch against virtio-net,
> vhost and qemu are included.
>
> Changes from rev1:
> -------------------
> 1. vqs are allocated as: rx/tx, rx/tx, rx/tx, etc. Lot of code in
> guest/host/qemu changes, but code becomes simpler.
> 2. vhost cache align of vhost_dev correctly.
> 3. virtio-net: cleanup properly on errors (eg detach buf for vq>0 as
> pointed out by Micheal).
> 4. Minor changes:
> - Fixed some typos.
> - Changed vhost_get_thread_index to use MAX_VHOST_THREADS.
> - Removed VIRTIO_MAX_TXQS.
> - Changed capability to VIRTIO_NET_F_MULTIQUEUE.
> - Modified "numtxqs" in virtnet_info to "num_queue_pairs".
> virtnet_info still has numtxqs as it is more convenient.
> - Moved code for VIRTIO_NET_F_CTRL_VLAN into probe function.
> - Improve check for return value of virtio_config_val().
> - Removed cache align directives in guest as it was redundant.
> 5. "If we have a wrapper to init all vqs, pls add a wrapper to clean up
> all vqs as well": I haven't done this as some errors are very
> specific to the failure location (and what was initialized till
> then). So only those errors are cleaned up using goto's like the
> rest of the code. I can change in next version if you feel this is
> still required.
> 6. "I think we should have free_unused_bufs that handles a single queue,
> and call it in a loop": I haven't done this as I think the caller wants
> all rx/tx queues to be cleaned up by calling this function.
>
> TODO's:
> --------
> 1. Reduce vectors for find_vqs().
> 2. Make vhost changes minimal. For now, I have restricted the number of
> vhost threads to 4. This can be either made unrestricted; or if the
> userspace vhost works, it can be removed altogether.
>
> Please review and provide feedback. I am travelling a bit in the next
> few days but will respond at the earliest.
Do you have an update to the virtio-pci spec for this?
--
error compiling committee.c: too many arguments to function
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 0/4] [RFC rev2] Implement multiqueue (RX & TX) virtio-net
2011-04-13 12:00 ` [PATCH 0/4] [RFC rev2] Implement multiqueue (RX & TX) virtio-net Avi Kivity
@ 2011-04-13 16:22 ` Krishna Kumar2
0 siblings, 0 replies; 9+ messages in thread
From: Krishna Kumar2 @ 2011-04-13 16:22 UTC (permalink / raw)
To: Avi Kivity
Cc: anthony, arnd, davem, eric.dumazet, horms, kvm, mst, netdev,
rusty
Avi Kivity <avi@redhat.com> wrote on 04/13/2011 05:30:11 PM:
Hi Avi,
> > --------
> > 1. Reduce vectors for find_vqs().
> > 2. Make vhost changes minimal. For now, I have restricted the number of
> > vhost threads to 4. This can be either made unrestricted; or if the
> > userspace vhost works, it can be removed altogether.
> >
> > Please review and provide feedback. I am travelling a bit in the next
> > few days but will respond at the earliest.
>
> Do you have an update to the virtio-pci spec for this?
Not yet, will keep it in my TODO list.
thanks,
- KK
^ permalink raw reply [flat|nested] 9+ messages in thread