* [PATCH 2.6.18-rc4] aoe [04/13]: zero copy write 1 of 2
[not found] <E1GE8K3-0008Jn-00@kokone.coraid.com>
@ 2006-08-18 17:39 ` Ed L. Cashin
2006-08-19 10:18 ` Alan Cox
0 siblings, 1 reply; 5+ messages in thread
From: Ed L. Cashin @ 2006-08-18 17:39 UTC (permalink / raw)
To: linux-kernel; +Cc: ecashin, Greg K-H
Signed-off-by: "Ed L. Cashin" <ecashin@coraid.com>
Avoid memory copy on writes.
(This patch depends on fixes in patch 9 to follow.)
diff -upr 2.6.18-rc4-orig/drivers/block/aoe/aoe.h 2.6.18-rc4-aoe/drivers/block/aoe/aoe.h
--- 2.6.18-rc4-orig/drivers/block/aoe/aoe.h 2006-08-17 16:45:34.000000000 -0400
+++ 2.6.18-rc4-aoe/drivers/block/aoe/aoe.h 2006-08-17 16:45:34.000000000 -0400
@@ -107,11 +107,7 @@ struct frame {
ulong waited;
struct buf *buf;
char *bufaddr;
- int writedatalen;
- int ndata;
-
- /* largest possible */
- unsigned char data[sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr)];
+ struct sk_buff *skb;
};
struct aoedev {
@@ -157,6 +153,7 @@ void aoecmd_cfg(ushort aoemajor, unsigne
void aoecmd_ata_rsp(struct sk_buff *);
void aoecmd_cfg_rsp(struct sk_buff *);
void aoecmd_sleepwork(void *vp);
+struct sk_buff *new_skb(ulong);
int aoedev_init(void);
void aoedev_exit(void);
diff -upr 2.6.18-rc4-orig/drivers/block/aoe/aoecmd.c 2.6.18-rc4-aoe/drivers/block/aoe/aoecmd.c
--- 2.6.18-rc4-orig/drivers/block/aoe/aoecmd.c 2006-08-17 16:45:34.000000000 -0400
+++ 2.6.18-rc4-aoe/drivers/block/aoe/aoecmd.c 2006-08-17 16:45:34.000000000 -0400
@@ -17,15 +17,14 @@
#define MAXTIMER (HZ << 1)
#define MAXWAIT (60 * 3) /* After MAXWAIT seconds, give up and fail dev */
-static struct sk_buff *
-new_skb(struct net_device *if_dev, ulong len)
+struct sk_buff *
+new_skb(ulong len)
{
struct sk_buff *skb;
skb = alloc_skb(len, GFP_ATOMIC);
if (skb) {
skb->nh.raw = skb->mac.raw = skb->data;
- skb->dev = if_dev;
skb->protocol = __constant_htons(ETH_P_AOE);
skb->priority = 0;
skb_put(skb, len);
@@ -40,29 +39,6 @@ new_skb(struct net_device *if_dev, ulong
return skb;
}
-static struct sk_buff *
-skb_prepare(struct aoedev *d, struct frame *f)
-{
- struct sk_buff *skb;
- char *p;
-
- skb = new_skb(d->ifp, f->ndata + f->writedatalen);
- if (!skb) {
- printk(KERN_INFO "aoe: skb_prepare: failure to allocate skb\n");
- return NULL;
- }
-
- p = skb->mac.raw;
- memcpy(p, f->data, f->ndata);
-
- if (f->writedatalen) {
- p += sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr);
- memcpy(p, f->bufaddr, f->writedatalen);
- }
-
- return skb;
-}
-
static struct frame *
getframe(struct aoedev *d, int tag)
{
@@ -129,10 +105,11 @@ aoecmd_ata_rw(struct aoedev *d, struct f
bcnt = MAXATADATA;
/* initialize the headers & frame */
- h = (struct aoe_hdr *) f->data;
+ skb = f->skb;
+ h = (struct aoe_hdr *) skb->mac.raw;
ah = (struct aoe_atahdr *) (h+1);
- f->ndata = sizeof *h + sizeof *ah;
- memset(h, 0, f->ndata);
+ skb->len = sizeof *h + sizeof *ah;
+ memset(h, 0, skb->len);
f->tag = aoehdr_atainit(d, h);
f->waited = 0;
f->buf = buf;
@@ -155,11 +132,13 @@ aoecmd_ata_rw(struct aoedev *d, struct f
}
if (bio_data_dir(buf->bio) == WRITE) {
+ skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr),
+ offset_in_page(f->bufaddr), bcnt);
ah->aflags |= AOEAFL_WRITE;
- f->writedatalen = bcnt;
} else {
+ skb_shinfo(skb)->nr_frags = 0;
+ skb->len = ETH_ZLEN;
writebit = 0;
- f->writedatalen = 0;
}
ah->cmdstat = WIN_READ | writebit | extbit;
@@ -179,15 +158,14 @@ aoecmd_ata_rw(struct aoedev *d, struct f
buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
}
- skb = skb_prepare(d, f);
- if (skb) {
- skb->next = NULL;
- if (d->sendq_hd)
- d->sendq_tl->next = skb;
- else
- d->sendq_hd = skb;
- d->sendq_tl = skb;
- }
+ skb->dev = d->ifp;
+ skb_get(skb);
+ skb->next = NULL;
+ if (d->sendq_hd)
+ d->sendq_tl->next = skb;
+ else
+ d->sendq_hd = skb;
+ d->sendq_tl = skb;
}
/* some callers cannot sleep, and they can call this function,
@@ -209,11 +187,12 @@ aoecmd_cfg_pkts(ushort aoemajor, unsigne
if (!is_aoe_netif(ifp))
continue;
- skb = new_skb(ifp, sizeof *h + sizeof *ch);
+ skb = new_skb(sizeof *h + sizeof *ch);
if (skb == NULL) {
printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n");
continue;
}
+ skb->dev = ifp;
if (sl_tail == NULL)
sl_tail = skb;
h = (struct aoe_hdr *) skb->mac.raw;
@@ -283,21 +262,21 @@ rexmit(struct aoedev *d, struct frame *f
d->aoemajor, d->aoeminor, f->tag, jiffies, n);
aoechr_error(buf);
- h = (struct aoe_hdr *) f->data;
+ skb = f->skb;
+ h = (struct aoe_hdr *) skb->mac.raw;
f->tag = n;
h->tag = cpu_to_be32(n);
memcpy(h->dst, d->addr, sizeof h->dst);
memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
- skb = skb_prepare(d, f);
- if (skb) {
- skb->next = NULL;
- if (d->sendq_hd)
- d->sendq_tl->next = skb;
- else
- d->sendq_hd = skb;
- d->sendq_tl = skb;
- }
+ skb->dev = d->ifp;
+ skb_get(skb);
+ skb->next = NULL;
+ if (d->sendq_hd)
+ d->sendq_tl->next = skb;
+ else
+ d->sendq_hd = skb;
+ d->sendq_tl = skb;
}
static int
@@ -514,7 +493,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
calc_rttavg(d, tsince(f->tag));
ahin = (struct aoe_atahdr *) (hin+1);
- ahout = (struct aoe_atahdr *) (f->data + sizeof(struct aoe_hdr));
+ ahout = (struct aoe_atahdr *) (f->skb->mac.raw + sizeof(struct aoe_hdr));
buf = f->buf;
if (ahout->cmdstat == WIN_IDENTIFY)
@@ -620,20 +599,21 @@ aoecmd_ata_id(struct aoedev *d)
}
/* initialize the headers & frame */
- h = (struct aoe_hdr *) f->data;
+ skb = f->skb;
+ h = (struct aoe_hdr *) skb->mac.raw;
ah = (struct aoe_atahdr *) (h+1);
- f->ndata = sizeof *h + sizeof *ah;
- memset(h, 0, f->ndata);
+ skb->len = sizeof *h + sizeof *ah;
+ memset(h, 0, skb->len);
f->tag = aoehdr_atainit(d, h);
f->waited = 0;
- f->writedatalen = 0;
/* set up ata header */
ah->scnt = 1;
ah->cmdstat = WIN_IDENTIFY;
ah->lba3 = 0xa0;
- skb = skb_prepare(d, f);
+ skb->dev = d->ifp;
+ skb_get(skb);
d->rttavg = MAXTIMER;
d->timer.function = rexmit_timer;
diff -upr 2.6.18-rc4-orig/drivers/block/aoe/aoedev.c 2.6.18-rc4-aoe/drivers/block/aoe/aoedev.c
--- 2.6.18-rc4-orig/drivers/block/aoe/aoedev.c 2006-08-17 16:45:34.000000000 -0400
+++ 2.6.18-rc4-aoe/drivers/block/aoe/aoedev.c 2006-08-17 16:45:34.000000000 -0400
@@ -63,22 +63,32 @@ aoedev_newdev(ulong nframes)
struct frame *f, *e;
d = kzalloc(sizeof *d, GFP_ATOMIC);
- if (d == NULL)
- return NULL;
f = kcalloc(nframes, sizeof *f, GFP_ATOMIC);
- if (f == NULL) {
- kfree(d);
+ switch (!d || !f) {
+ case 0:
+ d->nframes = nframes;
+ d->frames = f;
+ e = f + nframes;
+ for (; f<e; f++) {
+ f->tag = FREETAG;
+ f->skb = new_skb(ETH_ZLEN);
+ if (!f->skb)
+ break;
+ }
+ if (f == e)
+ break;
+ while (f > d->frames) {
+ f--;
+ dev_kfree_skb(f->skb);
+ }
+ default:
+ if (f)
+ kfree(f);
+ if (d)
+ kfree(d);
return NULL;
}
-
INIT_WORK(&d->work, aoecmd_sleepwork, d);
-
- d->nframes = nframes;
- d->frames = f;
- e = f + nframes;
- for (; f<e; f++)
- f->tag = FREETAG;
-
spin_lock_init(&d->lock);
init_timer(&d->timer);
d->timer.data = (ulong) d;
@@ -160,11 +170,19 @@ aoedev_by_sysminor_m(ulong sysminor, ulo
static void
aoedev_freedev(struct aoedev *d)
{
+ struct frame *f, *e;
+
if (d->gd) {
aoedisk_rm_sysfs(d);
del_gendisk(d->gd);
put_disk(d->gd);
}
+ f = d->frames;
+ e = f + d->nframes;
+ for (; f<e; f++) {
+ skb_shinfo(f->skb)->nr_frags = 0;
+ dev_kfree_skb(f->skb);
+ }
kfree(d->frames);
if (d->bufpool)
mempool_destroy(d->bufpool);
--
"Ed L. Cashin" <ecashin@coraid.com>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 2.6.18-rc4] aoe [04/13]: zero copy write 1 of 2
2006-08-18 17:39 ` Ed L. Cashin
@ 2006-08-19 10:18 ` Alan Cox
2006-08-22 21:21 ` Ed L. Cashin
0 siblings, 1 reply; 5+ messages in thread
From: Alan Cox @ 2006-08-19 10:18 UTC (permalink / raw)
To: Ed L. Cashin; +Cc: linux-kernel, Greg K-H
Ar Gwe, 2006-08-18 am 13:39 -0400, ysgrifennodd Ed L. Cashin:
> Signed-off-by: "Ed L. Cashin" <ecashin@coraid.com>
> + skb->len = sizeof *h + sizeof *ah;
> + memset(h, 0, skb->len);
Never play with skb->len directly. Use skb_put/skb_trim
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 2.6.18-rc4] aoe [04/13]: zero copy write 1 of 2
2006-08-19 10:18 ` Alan Cox
@ 2006-08-22 21:21 ` Ed L. Cashin
2006-08-23 15:03 ` Alan Cox
0 siblings, 1 reply; 5+ messages in thread
From: Ed L. Cashin @ 2006-08-22 21:21 UTC (permalink / raw)
To: Alan Cox; +Cc: linux-kernel, Greg K-H
On Sat, Aug 19, 2006 at 11:18:12AM +0100, Alan Cox wrote:
> Ar Gwe, 2006-08-18 am 13:39 -0400, ysgrifennodd Ed L. Cashin:
> > Signed-off-by: "Ed L. Cashin" <ecashin@coraid.com>
>
> > + skb->len = sizeof *h + sizeof *ah;
> > + memset(h, 0, skb->len);
>
> Never play with skb->len directly. Use skb_put/skb_trim
These are skbs pre-allocated by the aoe driver that will always have
enough room to accomodate this much data, and we are really setting
the packet header length.
To use skb_put here seems awkward. We'd have to do things like shown
below throughout the driver instead of just setting the length. Is
that what you'd like to see?
diff -upr 2.6.18-rc4-orig/drivers/block/aoe/aoecmd.c 2.6.18-rc4-aoe/drivers/block/aoe/aoecmd.c
--- 2.6.18-rc4-orig/drivers/block/aoe/aoecmd.c 2006-08-22 12:48:18.000000000 -0400
+++ 2.6.18-rc4-aoe/drivers/block/aoe/aoecmd.c 2006-08-22 17:03:23.000000000 -0400
@@ -314,7 +315,9 @@ rexmit(struct aoedev *d, struct frame *f
if (ah->aflags & AOEAFL_WRITE) {
skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr),
offset_in_page(f->bufaddr), DEFAULTBCNT);
- skb->len = sizeof *h + sizeof *ah + DEFAULTBCNT;
+ skb->data_len = 0;
+ skb_trim(skb, 0);
+ skb_put(skb, sizeof *h + sizeof *ah + DEFAULTBCNT);
skb->data_len = DEFAULTBCNT;
}
if (++d->lostjumbo > (d->nframes << 1))
--
Ed L Cashin <ecashin@coraid.com>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 2.6.18-rc4] aoe [04/13]: zero copy write 1 of 2
2006-08-22 21:21 ` Ed L. Cashin
@ 2006-08-23 15:03 ` Alan Cox
0 siblings, 0 replies; 5+ messages in thread
From: Alan Cox @ 2006-08-23 15:03 UTC (permalink / raw)
To: Ed L. Cashin; +Cc: linux-kernel, Greg K-H
Ar Maw, 2006-08-22 am 17:21 -0400, ysgrifennodd Ed L. Cashin:
> These are skbs pre-allocated by the aoe driver that will always have
> enough room to accomodate this much data, and we are really setting
> the packet header length.
The skb structure has other fields and if you fiddle with them by hand
you break those and you end up breaking if the skb internals change.
Eg if you set skb->len you must set skb->tail. Functions like
skb_addd_data, skb_put, skb_trim, etc do the right thing in all cases.
> To use skb_put here seems awkward. We'd have to do things like shown
> below throughout the driver instead of just setting the length. Is
> that what you'd like to see?
Yes. It might take a clock or two longer but it sets skb->tail right and
is rather more future proof.
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 2.6.18-rc4] aoe [04/13]: zero copy write 1 of 2
@ 2006-09-14 19:50 Ed L. Cashin
0 siblings, 0 replies; 5+ messages in thread
From: Ed L. Cashin @ 2006-09-14 19:50 UTC (permalink / raw)
To: Alan Cox; +Cc: linux-kernel, Greg K-H
f5d@coraid.com> <1155982692.4051.9.camel@localhost.localdomain> <20060822212150.
GQ6196@coraid.com> <1156345386.3007.24.camel@localhost.localdomain>
In-Reply-To: <1156345386.3007.24.camel@localhost.localdomain>
User-Agent: Mutt/1.5.11+cvs20060126
Hi. Back in August I sent out some patches for the aoe driver, and
Alan objected to the direct setting of skb->len in one of them. I
asked whether he was suggesting that we use something like this
instead of setting skb->len:
skb->data_len = 0;
skb_trim(skb, 0);
skb_put(skb, sizeof *h + sizeof *ah + DEFAULTBCNT);
... and Alan said that was acceptible because it takes care of
skb->tail, checks for overflow, and is more future proof.
So I took some time to implement the necessary changes, but then it
became apparent that there was a problem.
The skb_trim and skb_put macros are only for non-linear skbuffs, but
we are only using the area inside the skbuff itself for packet
headers, not data.
When we do something like this:
if (bio_data_dir(buf->bio) == WRITE) {
skb_fill_page_desc(skb, 0, bv->bv_page, buf->bv_off, bcnt);
ah.aflags |= AOEAFL_WRITE;
skb->len += bcnt;
skb->data_len = bcnt;
t->wpkts++;
... skb->tail isn't really relevant, because we are only using the
pre-allocated part of the skbuff for headers, and the headers aren't
expanding here. Other parts of the kernel that aren't putting data in
the skbuff itself set the skb->len directly.
e1000_main.c
ip_output.c
tcp.c
ip6_output.c
So is it correct for the callers of skb_fill_page_desc to set skb->len
or is another interface needed besides skb_put/skb_trim? Such a new
interface would be able to maintain the consistency of the skbuff
fields without assuming that the data is in the skbuff itself.
If a new interface is needed, then it seems like we should set
skb->len in this patch until the new interface is ready.
--
Ed L Cashin <ecashin@coraid.com>
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2006-09-14 20:07 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-09-14 19:50 [PATCH 2.6.18-rc4] aoe [04/13]: zero copy write 1 of 2 Ed L. Cashin
[not found] <E1GE8K3-0008Jn-00@kokone.coraid.com>
2006-08-18 17:39 ` Ed L. Cashin
2006-08-19 10:18 ` Alan Cox
2006-08-22 21:21 ` Ed L. Cashin
2006-08-23 15:03 ` Alan Cox
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox