From: Rusty Russell <rusty@rustcorp.com.au>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>,
Boaz Harrosh <bharrosh@panasas.com>,
Jens Axboe <jens.axboe@oracle.com>,
Alan Cox <alan@lxorguk.ukuu.org.uk>,
Geert Uytterhoeven <geert@linux-m68k.org>,
Linux Kernel Development <linux-kernel@vger.kernel.org>,
mingo@elte.hu
Subject: [RFC PATCH 2/2] sg_ring instead of scatterlist chaining in virtio
Date: Mon, 5 Nov 2007 17:15:50 +1100 [thread overview]
Message-ID: <200711051715.50864.rusty@rustcorp.com.au> (raw)
In-Reply-To: <200711051711.55364.rusty@rustcorp.com.au>
Example using virtio.
The interface actually improves because we don't need to hand two lengths to
add_buf (it needs an input and output sg[], so we used to hand one pointer
and a counter of how many were in and how many out, now we can neatly hand
two separate sgs).
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index a901eee..6a9b54d 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -23,7 +23,9 @@ struct virtio_blk
mempool_t *pool;
/* Scatterlist: can be too big for stack. */
- struct scatterlist sg[3+MAX_PHYS_SEGMENTS];
+ DECLARE_SG(out, 1);
+ DECLARE_SG(in, 1);
+ DECLARE_SG(sg, MAX_PHYS_SEGMENTS);
};
struct virtblk_req
@@ -69,8 +71,8 @@ static bool blk_done(struct virtqueue *vq)
static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
struct request *req)
{
- unsigned long num, out, in;
struct virtblk_req *vbr;
+ struct sg_ring *in;
vbr = mempool_alloc(vblk->pool, GFP_ATOMIC);
if (!vbr)
@@ -94,23 +96,24 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
if (blk_barrier_rq(vbr->req))
vbr->out_hdr.type |= VIRTIO_BLK_T_BARRIER;
- /* We have to zero this, otherwise blk_rq_map_sg gets upset. */
- memset(vblk->sg, 0, sizeof(vblk->sg));
- sg_set_buf(&vblk->sg[0], &vbr->out_hdr, sizeof(vbr->out_hdr));
- num = blk_rq_map_sg(q, vbr->req, vblk->sg+1);
- sg_set_buf(&vblk->sg[num+1], &vbr->in_hdr, sizeof(vbr->in_hdr));
+ sg_init_single(&vblk->out.ring, &vbr->out_hdr, sizeof(vbr->out_hdr));
+ sg_ring_init(&vblk->sg.ring, ARRAY_SIZE(vblk->sg.sg));
+ vblk->sg.ring.num = blk_rq_map_sg(q, vbr->req, vblk->sg.sg);
+ sg_init_single(&vblk->in.ring, &vbr->in_hdr, sizeof(vbr->in_hdr));
if (rq_data_dir(vbr->req) == WRITE) {
vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
- out = 1 + num;
- in = 1;
+ /* Chain write request onto output buffers. */
+ list_add_tail(&vblk->sg.ring.list, &vblk->out.ring.list);
+ in = &vblk->in.ring;
} else {
vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
- out = 1;
- in = 1 + num;
+ /* Chain input (status) buffer at end of read buffers. */
+ list_add_tail(&vblk->in.ring.list, &vblk->sg.ring.list);
+ in = &vblk->sg.ring;
}
- if (vblk->vq->vq_ops->add_buf(vblk->vq, vblk->sg, out, in, vbr)) {
+ if (vblk->vq->vq_ops->add_buf(vblk->vq, &vblk->out.ring, in, vbr)) {
mempool_free(vbr, vblk->pool);
return false;
}
@@ -127,7 +130,7 @@ static void do_virtblk_request(struct request_queue *q)
while ((req = elv_next_request(q)) != NULL) {
vblk = req->rq_disk->private_data;
- BUG_ON(req->nr_phys_segments > ARRAY_SIZE(vblk->sg));
+ BUG_ON(req->nr_phys_segments > ARRAY_SIZE(vblk->sg.sg));
/* If this request fails, stop queue and wait for something to
finish to restart it. */
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 100e8a2..e4d90c7 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -54,15 +54,15 @@ static struct hv_ops virtio_cons;
* immediately (lguest's Launcher does). */
static int put_chars(u32 vtermno, const char *buf, int count)
{
- struct scatterlist sg[1];
+ DECLARE_SG(sg, 1);
unsigned int len;
/* This is a convenient routine to initialize a single-elem sg list */
- sg_init_one(sg, buf, count);
+ sg_init_single(&sg.ring, buf, count);
/* add_buf wants a token to identify this buffer: we hand it any
* non-NULL pointer, since there's only ever one buffer. */
- if (out_vq->vq_ops->add_buf(out_vq, sg, 1, 0, (void *)1) == 0) {
+ if (out_vq->vq_ops->add_buf(out_vq, &sg.ring, NULL, (void *)1) == 0) {
/* Tell Host to go! */
out_vq->vq_ops->kick(out_vq);
/* Chill out until it's done with the buffer. */
@@ -78,11 +78,12 @@ static int put_chars(u32 vtermno, const char *buf, int count)
* queue. */
static void add_inbuf(void)
{
- struct scatterlist sg[1];
- sg_init_one(sg, inbuf, PAGE_SIZE);
+ DECLARE_SG(sg, 1);
+
+ sg_init_single(&sg.ring, inbuf, PAGE_SIZE);
/* We should always be able to add one buffer to an empty queue. */
- if (in_vq->vq_ops->add_buf(in_vq, sg, 0, 1, inbuf) != 0)
+ if (in_vq->vq_ops->add_buf(in_vq, NULL, &sg.ring, inbuf) != 0)
BUG();
in_vq->vq_ops->kick(in_vq);
}
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index e396c9d..2698592 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -143,20 +143,21 @@ drop:
static void try_fill_recv(struct virtnet_info *vi)
{
struct sk_buff *skb;
- struct scatterlist sg[1+MAX_SKB_FRAGS];
- int num, err;
+ DECLARE_SG(sg, 1+MAX_SKB_FRAGS);
+ int err;
+ sg_ring_init(&sg.ring, 1+MAX_SKB_FRAGS);
for (;;) {
skb = netdev_alloc_skb(vi->dev, MAX_PACKET_LEN);
if (unlikely(!skb))
break;
skb_put(skb, MAX_PACKET_LEN);
- vnet_hdr_to_sg(sg, skb);
- num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
+ vnet_hdr_to_sg(sg.sg, skb);
+ sg.ring.num = skb_to_sgvec(skb, sg.sg+1, 0, skb->len) + 1;
skb_queue_head(&vi->recv, skb);
- err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, num, skb);
+ err = vi->rvq->vq_ops->add_buf(vi->rvq, NULL, &sg.ring, skb);
if (err) {
skb_unlink(skb, &vi->recv);
kfree_skb(skb);
@@ -225,14 +226,15 @@ static void free_old_xmit_skbs(struct virtnet_info *vi)
static int start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
- int num, err;
- struct scatterlist sg[1+MAX_SKB_FRAGS];
+ int err;
+ DECLARE_SG(sg, 1+MAX_SKB_FRAGS);
struct virtio_net_hdr *hdr;
const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
DECLARE_MAC_BUF(mac);
pr_debug("%s: xmit %p %s\n", dev->name, skb, print_mac(mac, dest));
+ sg_ring_init(&sg.ring, 1+MAX_SKB_FRAGS);
free_old_xmit_skbs(vi);
/* Encode metadata header at front. */
@@ -263,10 +265,10 @@ static int start_xmit(struct sk_buff *skb, struct net_device *dev)
hdr->gso_size = 0;
}
- vnet_hdr_to_sg(sg, skb);
- num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
+ vnet_hdr_to_sg(sg.sg, skb);
+ sg.ring.num = skb_to_sgvec(skb, sg.sg+1, 0, skb->len) + 1;
__skb_queue_head(&vi->send, skb);
- err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb);
+ err = vi->svq->vq_ops->add_buf(vi->svq, &sg.ring, NULL, skb);
if (err) {
pr_debug("%s: virtio not prepared to send\n", dev->name);
skb_unlink(skb, &vi->send);
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index ae53570..d73cd18 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -69,48 +69,68 @@ struct vring_virtqueue
#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
+static int add_desc(struct vring_virtqueue *vq, unsigned int i,
+ struct scatterlist *sg, unsigned int flags)
+{
+ if (vq->num_free == 0)
+ return -ENOSPC;
+
+ vq->vring.desc[i].flags = VRING_DESC_F_NEXT | flags;
+ vq->vring.desc[i].addr = (page_to_pfn(sg->page)<<PAGE_SHIFT)
+ + sg->offset;
+ vq->vring.desc[i].len = sg->length;
+ vq->num_free--;
+ return vq->vring.desc[i].next;
+}
+
static int vring_add_buf(struct virtqueue *_vq,
- struct scatterlist sg[],
- unsigned int out,
- unsigned int in,
+ struct sg_ring *out,
+ struct sg_ring *in,
void *data)
{
struct vring_virtqueue *vq = to_vvq(_vq);
- unsigned int i, avail, head, uninitialized_var(prev);
+ unsigned int j, avail, head, free, uninitialized_var(prev);
+ int i;
+ struct sg_ring empty_sg, *start;
BUG_ON(data == NULL);
- BUG_ON(out + in > vq->vring.num);
- BUG_ON(out + in == 0);
+
+ sg_ring_init(&empty_sg, 0);
+ empty_sg.num = 0;
+ if (!out)
+ out = &empty_sg;
+ if (!in)
+ in = &empty_sg;
+
+ BUG_ON(in->num == 0 && out->num == 0);
START_USE(vq);
- if (vq->num_free < out + in) {
- pr_debug("Can't add buf len %i - avail = %i\n",
- out + in, vq->num_free);
- END_USE(vq);
- return -ENOSPC;
- }
+ i = head = vq->free_head;
+ free = vq->num_free;
+
+ /* Lay out the output buffers first. */
+ start = out;
+ do {
+ for (j = 0; j < out->num; j++) {
+ prev = i;
+ i = add_desc(vq, i, &out->sg[j], 0);
+ if (unlikely(i < 0))
+ goto full;
+ }
+ } while ((out = sg_ring_next(out)) != start);
+
+ /* Lay out the input buffers next. */
+ start = in;
+ do {
+ for (j = 0; j < in->num; j++) {
+ prev = i;
+ i = add_desc(vq, i, &in->sg[j], VRING_DESC_F_WRITE);
+ if (unlikely(i < 0))
+ goto full;
+ }
+ } while ((in = sg_ring_next(in)) != start);
- /* We're about to use some buffers from the free list. */
- vq->num_free -= out + in;
-
- head = vq->free_head;
- for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) {
- vq->vring.desc[i].flags = VRING_DESC_F_NEXT;
- vq->vring.desc[i].addr = (page_to_pfn(sg->page)<<PAGE_SHIFT)
- + sg->offset;
- vq->vring.desc[i].len = sg->length;
- prev = i;
- sg++;
- }
- for (; in; i = vq->vring.desc[i].next, in--) {
- vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
- vq->vring.desc[i].addr = (page_to_pfn(sg->page)<<PAGE_SHIFT)
- + sg->offset;
- vq->vring.desc[i].len = sg->length;
- prev = i;
- sg++;
- }
/* Last one doesn't continue. */
vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT;
@@ -128,6 +148,12 @@ static int vring_add_buf(struct virtqueue *_vq,
pr_debug("Added buffer head %i to %p\n", head, vq);
END_USE(vq);
return 0;
+
+full:
+ pr_debug("Buffer needed more than %u on %p\n", free, vq);
+ vq->num_free = free;
+ END_USE(vq);
+ return i;
}
static void vring_kick(struct virtqueue *_vq)
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 14e1379..cee525f 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -29,9 +29,8 @@ struct virtqueue
* virtqueue_ops - operations for virtqueue abstraction layer
* @add_buf: expose buffer to other end
* vq: the struct virtqueue we're talking about.
- * sg: the description of the buffer(s).
- * out_num: the number of sg readable by other side
- * in_num: the number of sg which are writable (after readable ones)
+ * out: the scatter gather elements readable by other side (can be NULL)
+ * in: the scatter gather elements which are writable (can be NULL)
* data: the token identifying the buffer.
* Returns 0 or an error.
* @kick: update after add_buf
@@ -56,9 +55,8 @@ struct virtqueue
*/
struct virtqueue_ops {
int (*add_buf)(struct virtqueue *vq,
- struct scatterlist sg[],
- unsigned int out_num,
- unsigned int in_num,
+ struct sg_ring *out,
+ struct sg_ring *in,
void *data);
void (*kick)(struct virtqueue *vq);
next prev parent reply other threads:[~2007-11-05 6:15 UTC|newest]
Thread overview: 91+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-10-22 18:10 [PATCH 00/10] SG updates Jens Axboe
2007-10-22 18:10 ` [PATCH 01/10] [SG] Add helpers for manipulating SG entries Jens Axboe
2007-10-22 18:10 ` [PATCH 02/10] [SG] Update block layer to use sg helpers Jens Axboe
2007-10-23 5:13 ` Heiko Carstens
2007-10-23 5:16 ` Jens Axboe
2007-10-23 5:42 ` [PATCH] fix ll_rw_blk.c build on s390 Heiko Carstens
2007-10-23 5:44 ` [PATCH] net: fix xfrm build - missing scatterlist.h include Heiko Carstens
2007-10-23 7:28 ` Jens Axboe
2007-10-23 14:32 ` [PATCH 02/10] [SG] Update block layer to use sg helpers John Stoffel
2007-10-22 18:10 ` [PATCH 03/10] [SG] Update crypto/ to " Jens Axboe
2007-10-22 18:10 ` [PATCH 04/10] [SG] Update drivers to use " Jens Axboe
2007-10-23 6:28 ` Heiko Carstens
2007-10-23 7:14 ` Jens Axboe
2007-10-23 7:16 ` Heiko Carstens
2007-10-22 18:10 ` [PATCH 05/10] [SG] Update fs/ " Jens Axboe
2007-10-22 18:11 ` [PATCH 06/10] [SG] Update net/ " Jens Axboe
2007-10-23 10:44 ` Christian Borntraeger
2007-10-23 10:45 ` Jens Axboe
2007-10-22 18:11 ` [PATCH 07/10] [SG] Update swiotlb " Jens Axboe
2007-10-22 18:11 ` [PATCH 08/10] [SG] Update arch/ " Jens Axboe
2007-10-22 21:10 ` Benny Halevy
2007-10-23 7:26 ` Jens Axboe
2007-10-22 18:11 ` [PATCH 09/10] Change table chaining layout Jens Axboe
2007-10-22 19:39 ` Geert Uytterhoeven
2007-10-22 19:49 ` Linus Torvalds
2007-10-22 19:52 ` Jens Axboe
2007-10-22 20:16 ` Alan Cox
2007-10-22 20:38 ` Matt Mackall
2007-10-22 20:44 ` Linus Torvalds
2007-10-22 21:43 ` Alan Cox
2007-10-22 21:47 ` Linus Torvalds
2007-10-23 0:07 ` David Miller
2007-10-23 7:18 ` Geert Uytterhoeven
2007-10-23 9:29 ` Boaz Harrosh
2007-10-23 9:41 ` Jens Axboe
2007-10-23 9:50 ` Boaz Harrosh
2007-10-23 9:55 ` Jens Axboe
2007-10-23 10:23 ` Boaz Harrosh
2007-10-23 10:29 ` Jens Axboe
2007-10-23 15:22 ` Linus Torvalds
2007-10-24 8:05 ` Jens Axboe
2007-10-24 9:03 ` Geert Uytterhoeven
2007-10-24 9:12 ` Jens Axboe
2007-10-24 13:35 ` Olivier Galibert
2007-10-24 13:38 ` Jens Axboe
2007-10-24 13:45 ` Olivier Galibert
2007-10-24 15:16 ` Linus Torvalds
2007-10-25 8:40 ` Rusty Russell
2007-10-25 9:11 ` Jens Axboe
2007-10-25 11:54 ` Rusty Russell
2007-10-26 0:03 ` Rusty Russell
2007-10-25 15:40 ` Linus Torvalds
2007-10-25 16:03 ` Benny Halevy
2007-10-26 5:01 ` Paul Mackerras
2007-10-26 14:52 ` Linus Torvalds
2007-10-26 17:28 ` Jens Axboe
2007-11-05 6:11 ` [RFC PATCH 1/2] sg_ring instead of scatterlist chaining Rusty Russell
2007-11-05 6:15 ` Rusty Russell [this message]
2007-11-05 16:40 ` Randy Dunlap
2007-10-23 10:33 ` [PATCH 09/10] Change table chaining layout Ingo Molnar
2007-10-23 10:56 ` Jens Axboe
2007-10-23 11:27 ` Ingo Molnar
2007-10-23 19:23 ` Geert Uytterhoeven
2007-10-23 21:46 ` Jens Axboe
2007-10-24 6:56 ` Jens Axboe
2007-10-22 21:16 ` Benny Halevy
2007-10-22 21:21 ` Jeff Garzik
2007-10-22 21:47 ` Matt Mackall
2007-10-22 22:52 ` Alan Cox
2007-10-22 23:46 ` Matt Mackall
2007-10-23 0:11 ` Jeff Garzik
2007-10-23 4:09 ` powerpc: Fix fallout from sg_page() changes Olof Johansson
2007-10-23 4:09 ` Olof Johansson
2007-10-23 4:31 ` IB/ehca: Fix sg_page() fallout Olof Johansson
2007-10-23 4:31 ` Olof Johansson
2007-10-23 5:05 ` Jens Axboe
2007-10-23 5:05 ` Jens Axboe
2007-10-23 5:54 ` Olof Johansson
2007-10-23 5:54 ` Olof Johansson
2007-10-23 7:12 ` Jens Axboe
2007-10-23 7:12 ` Jens Axboe
2007-10-23 7:13 ` powerpc: Fix fallout from sg_page() changes Jens Axboe
2007-10-23 7:13 ` Jens Axboe
2007-10-23 17:08 ` [PATCH 09/10] Change table chaining layout Boaz Harrosh
2007-10-23 18:33 ` Jens Axboe
2007-10-23 19:56 ` Andi Kleen
2007-10-23 20:20 ` Jens Axboe
2007-10-23 20:57 ` Andi Kleen
2007-10-23 21:44 ` Jens Axboe
2007-10-22 18:11 ` [PATCH 10/10] Add CONFIG_DEBUG_SG sg validation Jens Axboe
2007-10-23 14:48 ` [PATCH][SG] fix typo in ps3rom.c Arnd Bergmann
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=200711051715.50864.rusty@rustcorp.com.au \
--to=rusty@rustcorp.com.au \
--cc=alan@lxorguk.ukuu.org.uk \
--cc=bharrosh@panasas.com \
--cc=geert@linux-m68k.org \
--cc=jens.axboe@oracle.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=paulus@samba.org \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.