netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Octavian Purdila <opurdila@ixiacom.com>
To: Jarek Poplawski <jarkao2@gmail.com>
Cc: netdev@vger.kernel.org
Subject: Re: [RESEND] [PATCH] tcp: fix for splice receive when used with software LRO
Date: Fri, 20 Jun 2008 23:44:12 +0300	[thread overview]
Message-ID: <200806202344.12771.opurdila@ixiacom.com> (raw)
In-Reply-To: <20080620130129.GA4417@ff.dom.local>

[-- Attachment #1: Type: text/plain, Size: 862 bytes --]

On Friday 20 June 2008, Jarek Poplawski wrote:
> >
> > Let me try to wrap my brain around this again :) and see if we don't miss
> > anything. In the process, maybe I can even find a way to make this code
> > easier to understand.
>
> Sure, no need to hurry, and after all, I'm OK with the current patch too.
>

OK, here is my attempt at making it a bit more readable:

- move all of the details on offsets, lengths and buffers into a 
single function instead of doing these operation from multiple places

- use a bottom up approach - try to avoid details in the high level functions, 
introduce them gradually as you go deeper in the function call stack

I've attached both the patch and a full excerpt - it might be easier to 
comment on it  this way.

I've minimally tested the patch, no obvious functional or performance problems 
were found.

Thanks,
tavi

[-- Attachment #2: y --]
[-- Type: text/plain, Size: 3161 bytes --]


static inline void __segment_seek(struct page **page, int *poff, int *plen,
				  int off)
{
	*poff += off;
	*page += *poff / PAGE_SIZE;
	*poff = *poff % PAGE_SIZE;
	*plen -= off;
}

static inline int __splice_segment(struct page *page, unsigned int poff,
				   unsigned int plen, unsigned int *off,
				   unsigned int *len, struct sk_buff *skb,
				   struct splice_pipe_desc *spd)
{
	/* skip this segment if already processed */
	if (*off >= plen) {
		*off -= plen;
		return 0;
	}

	/* ignore any bits we already processed */
	if (*off) {
		__segment_seek(&page, &poff, &plen, *off);
		*off = 0;
	}

	do {
		unsigned int flen = min(*len, plen);

		if (spd_fill_page(spd, page, flen, poff, skb))
			return 1;

		__segment_seek(&page, &poff, &plen, flen);
		*len -= flen;

	} while (*len && plen);

	return 0;
}

int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
			     unsigned int *len,
			     struct splice_pipe_desc *spd)
{
	int seg;

	/*
	 * map the linear part
	 */
	if (__splice_segment(virt_to_page(skb->data),
			     (unsigned long) skb->data & (PAGE_SIZE - 1),
			     skb_headlen(skb),
			     offset, len, skb, spd))
		return 1;

	/*
	 * then map the fragments
	 */
	for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
		const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];

		if (__splice_segment(f->page, f->page_offset, f->size,
				     offset, len, skb, spd))
			return 1;
	}

	/*
	 * now see if we have a frag_list to map
	 */
	if (skb_shinfo(skb)->frag_list) {
		struct sk_buff *list = skb_shinfo(skb)->frag_list;

		for (; list && *len; list = list->next)
			if (__skb_splice_bits(list, offset, len, spd))
				return 1;
	}

	return 0;
}

/*
 * Map data from the skb to a pipe. Should handle both the linear part,
 * the fragments, and the frag list.
 */
int skb_splice_bits(struct sk_buff *__skb, unsigned int offset,
		    struct pipe_inode_info *pipe, unsigned int tlen,
		    unsigned int flags)
{
	struct partial_page partial[PIPE_BUFFERS];
	struct page *pages[PIPE_BUFFERS];
	struct splice_pipe_desc spd = {
		.pages = pages,
		.partial = partial,
		.flags = flags,
		.ops = &sock_pipe_buf_ops,
		.spd_release = sock_spd_release,
	};
	struct sk_buff *skb;

	/*
	 * I'd love to avoid the clone here, but tcp_read_sock()
	 * ignores reference counts and unconditonally kills the sk_buff
	 * on return from the actor.
	 */
	skb = skb_clone(__skb, GFP_KERNEL);
	if (unlikely(!skb))
		return -ENOMEM;


	__skb_splice_bits(skb, &offset, &tlen, &spd);

	/*
	 * drop our reference to the clone, the pipe consumption will
	 * drop the rest.
	 */
	kfree_skb(skb);

	if (spd.nr_pages) {
		int ret;
		struct sock *sk = __skb->sk;

		/*
		 * Drop the socket lock, otherwise we have reverse
		 * locking dependencies between sk_lock and i_mutex
		 * here as compared to sendfile(). We enter here
		 * with the socket lock held, and splice_to_pipe() will
		 * grab the pipe inode lock. For sendfile() emulation,
		 * we call into ->sendpage() with the i_mutex lock held
		 * and networking will grab the socket lock.
		 */
		release_sock(sk);
		ret = splice_to_pipe(pipe, &spd);
		lock_sock(sk);
		return ret;
	}

	return 0;
}

[-- Attachment #3: x --]
[-- Type: text/plain, Size: 4985 bytes --]

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 25fa74d..017daef 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1188,114 +1188,89 @@ static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
 	return 0;
 }
 
-/*
- * Map linear and fragment data from the skb to spd. Returns number of
- * pages mapped.
- */
-static int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
-			     unsigned int *total_len,
-			     struct splice_pipe_desc *spd)
+static inline void __segment_seek(struct page **page, int *poff, int *plen,
+				  int off)
 {
-	unsigned int nr_pages = spd->nr_pages;
-	unsigned int poff, plen, len, toff, tlen;
-	int headlen, seg;
+	*poff += off;
+	*page += *poff / PAGE_SIZE;
+	*poff = *poff % PAGE_SIZE;
+	*plen -= off;
+}
 
-	toff = *offset;
-	tlen = *total_len;
-	if (!tlen)
-		goto err;
+static inline int __splice_segment(struct page *page, unsigned int poff,
+				   unsigned int plen, unsigned int *off,
+				   unsigned int *len, struct sk_buff *skb,
+				   struct splice_pipe_desc *spd)
+{
+	/* skip this segment if already processed */
+	if (*off >= plen) {
+		*off -= plen;
+		return 0;
+	}
 
-	/*
-	 * if the offset is greater than the linear part, go directly to
-	 * the fragments.
-	 */
-	headlen = skb_headlen(skb);
-	if (toff >= headlen) {
-		toff -= headlen;
-		goto map_frag;
+	/* ignore any bits we already processed */
+	if (*off) {
+		__segment_seek(&page, &poff, &plen, *off);
+		*off = 0;
 	}
 
-	/*
-	 * first map the linear region into the pages/partial map, skipping
-	 * any potential initial offset.
-	 */
-	len = 0;
-	while (len < headlen) {
-		void *p = skb->data + len;
-
-		poff = (unsigned long) p & (PAGE_SIZE - 1);
-		plen = min_t(unsigned int, headlen - len, PAGE_SIZE - poff);
-		len += plen;
-
-		if (toff) {
-			if (plen <= toff) {
-				toff -= plen;
-				continue;
-			}
-			plen -= toff;
-			poff += toff;
-			toff = 0;
-		}
+	do {
+		unsigned int flen = min(*len, plen);
 
-		plen = min(plen, tlen);
-		if (!plen)
-			break;
+		if (spd_fill_page(spd, page, flen, poff, skb))
+			return 1;
 
-		/*
-		 * just jump directly to update and return, no point
-		 * in going over fragments when the output is full.
-		 */
-		if (spd_fill_page(spd, virt_to_page(p), plen, poff, skb))
-			goto done;
+		__segment_seek(&page, &poff, &plen, flen);
+		*len -= flen;
 
-		tlen -= plen;
-	}
+	} while (*len && plen);
+
+	return 0;
+}
+
+int __skb_splice_bits(struct sk_buff *skb, unsigned int *offset,
+			     unsigned int *len,
+			     struct splice_pipe_desc *spd)
+{
+	int seg;
+
+	/*
+	 * map the linear part
+	 */
+	if (__splice_segment(virt_to_page(skb->data),
+			     (unsigned long) skb->data & (PAGE_SIZE - 1),
+			     skb_headlen(skb),
+			     offset, len, skb, spd))
+		return 1;
 
 	/*
 	 * then map the fragments
 	 */
-map_frag:
 	for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) {
 		const skb_frag_t *f = &skb_shinfo(skb)->frags[seg];
 
-		plen = f->size;
-		poff = f->page_offset;
-
-		if (toff) {
-			if (plen <= toff) {
-				toff -= plen;
-				continue;
-			}
-			plen -= toff;
-			poff += toff;
-			toff = 0;
-		}
-
-		plen = min(plen, tlen);
-		if (!plen)
-			break;
+		if (__splice_segment(f->page, f->page_offset, f->size,
+				     offset, len, skb, spd))
+			return 1;
+	}
 
-		if (spd_fill_page(spd, f->page, plen, poff, skb))
-			break;
+	/*
+	 * now see if we have a frag_list to map
+	 */
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
 
-		tlen -= plen;
+		for (; list && *len; list = list->next)
+			if (__skb_splice_bits(list, offset, len, spd))
+				return 1;
 	}
 
-done:
-	if (spd->nr_pages - nr_pages) {
-		*offset = 0;
-		*total_len = tlen;
-		return 0;
-	}
-err:
-	return 1;
+	return 0;
 }
 
 /*
  * Map data from the skb to a pipe. Should handle both the linear part,
- * the fragments, and the frag list. It does NOT handle frag lists within
- * the frag list, if such a thing exists. We'd probably need to recurse to
- * handle that cleanly.
+ * the fragments, and the frag list.
  */
 int skb_splice_bits(struct sk_buff *__skb, unsigned int offset,
 		    struct pipe_inode_info *pipe, unsigned int tlen,
@@ -1321,28 +1296,9 @@ int skb_splice_bits(struct sk_buff *__skb, unsigned int offset,
 	if (unlikely(!skb))
 		return -ENOMEM;
 
-	/*
-	 * __skb_splice_bits() only fails if the output has no room left,
-	 * so no point in going over the frag_list for the error case.
-	 */
-	if (__skb_splice_bits(skb, &offset, &tlen, &spd))
-		goto done;
-	else if (!tlen)
-		goto done;
-
-	/*
-	 * now see if we have a frag_list to map
-	 */
-	if (skb_shinfo(skb)->frag_list) {
-		struct sk_buff *list = skb_shinfo(skb)->frag_list;
 
-		for (; list && tlen; list = list->next) {
-			if (__skb_splice_bits(list, &offset, &tlen, &spd))
-				break;
-		}
-	}
+	__skb_splice_bits(skb, &offset, &tlen, &spd);
 
-done:
 	/*
 	 * drop our reference to the clone, the pipe consumption will
 	 * drop the rest.

  reply	other threads:[~2008-06-20 20:45 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <485B4ADE.8070102@domat.com.pl>
2008-06-18 16:07 ` [RESEND] [PATCH] tcp: fix for splice receive when used with software LRO Octavian Purdila
2008-06-20  6:37   ` Jarek Poplawski
2008-06-20 10:09     ` Octavian Purdila
2008-06-20 11:01       ` Jarek Poplawski
2008-06-20 12:39         ` Octavian Purdila
2008-06-20 13:01           ` Jarek Poplawski
2008-06-20 20:44             ` Octavian Purdila [this message]
2008-06-22 21:07               ` Jarek Poplawski
2008-06-23  9:50                 ` Octavian Purdila
2008-06-23 20:48                   ` Jarek Poplawski
2008-06-20 21:57       ` Jarek Poplawski
2008-06-21  0:40         ` Octavian Purdila
2008-06-21  8:39           ` Jarek Poplawski
2008-06-21 10:32             ` Octavian Purdila
2008-06-28  0:27   ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200806202344.12771.opurdila@ixiacom.com \
    --to=opurdila@ixiacom.com \
    --cc=jarkao2@gmail.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).