All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Leech <christopher.leech@intel.com>
To: linux-kernel@vger.kernel.org, netdev@vger.kernel.org
Subject: [PATCH 4/8] [I/OAT] Utility functions for offloading sk_buff to iovec copies
Date: Fri, 03 Mar 2006 13:42:27 -0800	[thread overview]
Message-ID: <20060303214227.11908.75473.stgit@gitlost.site> (raw)
In-Reply-To: <20060303214036.11908.10499.stgit@gitlost.site>

Provides for pinning user space pages in memory, copying to iovecs,
and copying from sk_buffs including fragmented and chained sk_buffs.

Signed-off-by: Chris Leech <christopher.leech@intel.com>
---

 drivers/dma/Makefile      |    1 
 drivers/dma/iovlock.c     |  320 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/dmaengine.h |   24 +++
 include/net/netdma.h      |    5 +
 net/core/Makefile         |    3 
 net/core/user_dma.c       |  133 +++++++++++++++++++
 6 files changed, 485 insertions(+), 1 deletions(-)

diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index c8a5f56..ea2f110 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -1,2 +1,3 @@
 obj-y += dmaengine.o
+obj-$(CONFIG_NET_DMA) += iovlock.o
 obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
diff --git a/drivers/dma/iovlock.c b/drivers/dma/iovlock.c
new file mode 100644
index 0000000..edbf581
--- /dev/null
+++ b/drivers/dma/iovlock.c
@@ -0,0 +1,320 @@
+/*****************************************************************************
+Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
+Portions based on net/core/datagram.c and copyrighted by their authors.
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2 of the License, or (at your option)
+any later version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59
+Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+The full GNU General Public License is included in this distribution in the
+file called LICENSE.
+*****************************************************************************/
+
+/*
+ * This code allows the net stack to make use of a DMA engine for
+ * skb to iovec copies.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/pagemap.h>
+#include <net/tcp.h> /* for memcpy_toiovec */
+#include <asm/io.h>
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_DMA_ENGINE
+
+#define NUM_PAGES_SPANNED(start, length) \
+	((PAGE_ALIGN((unsigned long)start + length) - \
+	((unsigned long)start & PAGE_MASK)) >> PAGE_SHIFT)
+
+/*
+ * Lock down all the iovec pages needed for len bytes.
+ * Return a struct dma_locked_list to keep track of pages locked down.
+ *
+ * We are allocating a single chunk of memory, and then carving it up into
+ * 3 sections, the latter 2 whose size depends on the number of iovecs and the
+ * total number of pages, respectively.
+ */
+int dma_lock_iovec_pages(struct iovec *iov, size_t len, struct dma_locked_list
+	**locked_list)
+{
+	struct dma_locked_list *local_list;
+	struct page **pages;
+	int i;
+	int ret;
+
+	int nr_iovecs = 0;
+	int iovec_len_used = 0;
+	int iovec_pages_used = 0;
+
+	/* don't lock down non-user-based iovecs */
+	if (segment_eq(get_fs(), KERNEL_DS)) {
+		*locked_list = NULL;
+		return 0;
+	}
+
+	/* determine how many iovecs/pages there are, up front */
+	do {
+		iovec_len_used += iov[nr_iovecs].iov_len;
+		iovec_pages_used += NUM_PAGES_SPANNED(iov[nr_iovecs].iov_base,
+		                                      iov[nr_iovecs].iov_len);
+		nr_iovecs++;
+	} while (iovec_len_used < len);
+
+	/* single kmalloc for locked list, page_list[], and the page arrays */
+	local_list = kmalloc(sizeof(*local_list)
+		+ (nr_iovecs * sizeof (struct dma_page_list))
+		+ (iovec_pages_used * sizeof (struct page*)), GFP_KERNEL);
+	if (!local_list)
+		return -ENOMEM;
+
+	/* list of pages starts right after the page list array */
+	pages = (struct page **) &local_list->page_list[nr_iovecs];
+
+	/* it's a userspace pointer */
+	might_sleep();
+
+	for (i = 0; i < nr_iovecs; i++) {
+		struct dma_page_list *page_list = &local_list->page_list[i];
+
+		len -= iov[i].iov_len;
+
+		if (!access_ok(VERIFY_WRITE, iov[i].iov_base, iov[i].iov_len)) {
+			dma_unlock_iovec_pages(local_list);
+			return -EFAULT;
+		}
+
+		page_list->nr_pages = NUM_PAGES_SPANNED(iov[i].iov_base,
+		                                        iov[i].iov_len);
+		page_list->base_address = iov[i].iov_base;
+
+		page_list->pages = pages;
+		pages += page_list->nr_pages;
+
+		/* lock pages down */
+		down_read(&current->mm->mmap_sem);
+		ret = get_user_pages(
+			current,
+			current->mm,
+			(unsigned long) iov[i].iov_base,
+			page_list->nr_pages,
+			1,
+			0,
+			page_list->pages,
+			NULL);
+		up_read(&current->mm->mmap_sem);
+
+		if (ret != page_list->nr_pages) {
+			goto mem_error;
+		}
+
+		local_list->nr_iovecs = i + 1;
+	}
+
+	*locked_list = local_list;
+	return 0;
+
+mem_error:
+	dma_unlock_iovec_pages(local_list);
+	return -ENOMEM;
+}
+
+void dma_unlock_iovec_pages(struct dma_locked_list *locked_list)
+{
+	int i, j;
+
+	if (!locked_list)
+		return;
+
+	for (i = 0; i < locked_list->nr_iovecs; i++) {
+		struct dma_page_list *page_list = &locked_list->page_list[i];
+		for (j = 0; j < page_list->nr_pages; j++) {
+			SetPageDirty(page_list->pages[j]);
+			page_cache_release(page_list->pages[j]);
+		}
+	}
+
+	kfree(locked_list);
+}
+
+static dma_cookie_t dma_memcpy_tokerneliovec(struct dma_chan *chan, struct
+	iovec *iov, unsigned char *kdata, size_t len)
+{
+	dma_cookie_t dma_cookie = 0;
+
+	while (len > 0) {
+		if (iov->iov_len) {
+			int copy = min_t(unsigned int, iov->iov_len, len);
+			dma_cookie = dma_async_memcpy_buf_to_buf(
+					chan,
+					iov->iov_base,
+					kdata,
+					copy);
+			kdata += copy;
+			len -= copy;
+			iov->iov_len -= copy;
+			iov->iov_base += copy;
+		}
+		iov++;
+	}
+
+	return dma_cookie;
+}
+
+/*
+ * We have already locked down the pages we will be using in the iovecs.
+ * Each entry in iov array has corresponding entry in locked_list->page_list.
+ * Using array indexing to keep iov[] and page_list[] in sync.
+ * Initial elements in iov array's iov->iov_len will be 0 if already copied into
+ *   by another call.
+ * iov array length remaining guaranteed to be bigger than len.
+ */
+dma_cookie_t dma_memcpy_toiovec(struct dma_chan *chan, struct iovec *iov,
+	struct dma_locked_list *locked_list, unsigned char *kdata, size_t len)
+{
+	int iov_byte_offset;
+	int copy;
+	dma_cookie_t dma_cookie = 0;
+	int iovec_idx;
+	int page_idx;
+
+	if (!chan)
+		return memcpy_toiovec(iov, kdata, len);
+
+	/* -> kernel copies (e.g. smbfs) */
+	if (!locked_list)
+		return dma_memcpy_tokerneliovec(chan, iov, kdata, len);
+
+	iovec_idx = 0;
+	while (iovec_idx < locked_list->nr_iovecs) {
+		struct dma_page_list *page_list;
+
+		/* skip already used-up iovecs */
+		while (!iov[iovec_idx].iov_len)
+			iovec_idx++;
+
+		page_list = &locked_list->page_list[iovec_idx];
+
+		iov_byte_offset = ((unsigned long)iov[iovec_idx].iov_base & ~PAGE_MASK);
+		page_idx = (((unsigned long)iov[iovec_idx].iov_base & PAGE_MASK)
+			 - ((unsigned long)page_list->base_address & PAGE_MASK)) >> PAGE_SHIFT;
+
+		/* break up copies to not cross page boundary */
+		while (iov[iovec_idx].iov_len) {
+			copy = min_t(int, PAGE_SIZE - iov_byte_offset, len);
+			copy = min_t(int, copy, iov[iovec_idx].iov_len);
+
+			dma_cookie = dma_async_memcpy_buf_to_pg(chan,
+					page_list->pages[page_idx],
+					iov_byte_offset,
+					kdata,
+					copy);
+
+			len -= copy;
+			iov[iovec_idx].iov_len -= copy;
+			iov[iovec_idx].iov_base += copy;
+
+			if (!len)
+				return dma_cookie;
+
+			kdata += copy;
+			iov_byte_offset = 0;
+			page_idx++;
+		}
+		iovec_idx++;
+	}
+
+	/* really bad if we ever run out of iovecs */
+	BUG();
+	return -EFAULT;
+}
+
+dma_cookie_t dma_memcpy_pg_toiovec(struct dma_chan *chan, struct iovec *iov,
+	struct dma_locked_list *locked_list, struct page *page,
+	unsigned int offset, size_t len)
+{
+	int iov_byte_offset;
+	int copy;
+	dma_cookie_t dma_cookie = 0;
+	int iovec_idx;
+	int page_idx;
+	int err;
+
+	/* this needs as-yet-unimplemented buf-to-buff, so punt. */
+	/* TODO: use dma for this */
+	if (!chan || !locked_list) {
+		u8 *vaddr = kmap(page);
+		err = memcpy_toiovec(iov, vaddr + offset, len);
+		kunmap(page);
+		return err;
+	}
+
+	iovec_idx = 0;
+	while (iovec_idx < locked_list->nr_iovecs) {
+		struct dma_page_list *page_list;
+
+		/* skip already used-up iovecs */
+		while (!iov[iovec_idx].iov_len)
+			iovec_idx++;
+
+		page_list = &locked_list->page_list[iovec_idx];
+
+		iov_byte_offset = ((unsigned long)iov[iovec_idx].iov_base & ~PAGE_MASK);
+		page_idx = (((unsigned long)iov[iovec_idx].iov_base & PAGE_MASK)
+			 - ((unsigned long)page_list->base_address & PAGE_MASK)) >> PAGE_SHIFT;
+
+		/* break up copies to not cross page boundary */
+		while (iov[iovec_idx].iov_len) {
+			copy = min_t(int, PAGE_SIZE - iov_byte_offset, len);
+			copy = min_t(int, copy, iov[iovec_idx].iov_len);
+
+			dma_cookie = dma_async_memcpy_pg_to_pg(chan,
+					page_list->pages[page_idx],
+					iov_byte_offset,
+					page,
+					offset,
+					copy);
+
+			len -= copy;
+			iov[iovec_idx].iov_len -= copy;
+			iov[iovec_idx].iov_base += copy;
+
+			if (!len)
+				return dma_cookie;
+
+			offset += copy;
+			iov_byte_offset = 0;
+			page_idx++;
+		}
+		iovec_idx++;
+	}
+
+	/* really bad if we ever run out of iovecs */
+	BUG();
+	return -EFAULT;
+}
+
+#else
+
+int dma_lock_iovec_pages(struct iovec *iov, size_t len, struct dma_locked_list
+	**locked_list)
+{
+	*locked_list = NULL;
+
+	return 0;
+}
+
+void dma_unlock_iovec_pages(struct dma_locked_list* locked_list)
+{ }
+
+#endif
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index f8a77ab..e198712 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -319,4 +319,28 @@ static inline enum dma_status dma_async_
 int dma_async_device_register(struct dma_device *device);
 void dma_async_device_unregister(struct dma_device *device);
 
+/* --- Helper iov-locking functions --- */
+
+struct dma_page_list
+{
+	char *base_address;
+	int nr_pages;
+	struct page **pages;
+};
+
+struct dma_locked_list
+{
+	int nr_iovecs;
+	struct dma_page_list page_list[0];
+};
+
+int dma_lock_iovec_pages(struct iovec *iov, size_t len,
+	struct dma_locked_list	**locked_list);
+void dma_unlock_iovec_pages(struct dma_locked_list* locked_list);
+dma_cookie_t dma_memcpy_toiovec(struct dma_chan *chan, struct iovec *iov,
+	struct dma_locked_list *locked_list, unsigned char *kdata, size_t len);
+dma_cookie_t dma_memcpy_pg_toiovec(struct dma_chan *chan, struct iovec *iov,
+	struct dma_locked_list *locked_list, struct page *page,
+	unsigned int offset, size_t len);
+
 #endif /* DMAENGINE_H */
diff --git a/include/net/netdma.h b/include/net/netdma.h
index 3cd9e6d..415d74c 100644
--- a/include/net/netdma.h
+++ b/include/net/netdma.h
@@ -21,6 +21,7 @@ file called LICENSE.
 #ifndef NETDMA_H
 #define NETDMA_H
 #include <linux/dmaengine.h>
+#include <linux/skbuff.h>
 
 static inline struct dma_chan *get_softnet_dma(void)
 {
@@ -33,4 +34,8 @@ static inline struct dma_chan *get_softn
 	return chan;
 }
 
+int dma_skb_copy_datagram_iovec(struct dma_chan* chan,
+		const struct sk_buff *skb, int offset, struct iovec *to,
+		size_t len, struct dma_locked_list *locked_list);
+
 #endif /* NETDMA_H */
diff --git a/net/core/Makefile b/net/core/Makefile
index 630da0f..d02132b 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -8,7 +8,8 @@ obj-y := sock.o request_sock.o skbuff.o 
 obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
 
 obj-y		     += dev.o ethtool.o dev_mcast.o dst.o \
-			neighbour.o rtnetlink.o utils.o link_watch.o filter.o
+			neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
+			user_dma.o
 
 obj-$(CONFIG_XFRM) += flow.o
 obj-$(CONFIG_SYSFS) += net-sysfs.o
diff --git a/net/core/user_dma.c b/net/core/user_dma.c
new file mode 100644
index 0000000..1e1aae5
--- /dev/null
+++ b/net/core/user_dma.c
@@ -0,0 +1,133 @@
+/*****************************************************************************
+Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
+Portions based on net/core/datagram.c and copyrighted by their authors.
+
+This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2 of the License, or (at your option)
+any later version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59
+Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+The full GNU General Public License is included in this distribution in the
+file called LICENSE.
+*****************************************************************************/
+
+/*
+ * This code allows the net stack to make use of a DMA engine for
+ * skb to iovec copies.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/socket.h>
+#include <linux/rtnetlink.h> /* for BUG_TRAP */
+#include <net/tcp.h>
+
+
+#ifdef CONFIG_NET_DMA
+
+/**
+ *	dma_skb_copy_datagram_iovec - Copy a datagram to an iovec.
+ *	@skb - buffer to copy
+ *	@offset - offset in the buffer to start copying from
+ *	@iovec - io vector to copy to
+ *	@len - amount of data to copy from buffer to iovec
+ *	@locked_list - locked iovec buffer data
+ *
+ *	Note: the iovec is modified during the copy.
+ */
+int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
+			struct sk_buff *skb, int offset, struct iovec *to,
+			size_t len, struct dma_locked_list *locked_list)
+{
+	int start = skb_headlen(skb);
+	int i, copy = start - offset;
+	dma_cookie_t cookie = 0;
+
+	/* Copy header. */
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		if ((cookie = dma_memcpy_toiovec(chan, to, locked_list,
+		     skb->data + offset, copy)) < 0)
+			goto fault;
+		if ((len -= copy) == 0)
+			goto end;
+		offset += copy;
+	}
+
+	/* Copy paged appendix. Hmm... why does this look so complicated? */
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+
+		BUG_TRAP(start <= offset + len);
+
+		end = start + skb_shinfo(skb)->frags[i].size;
+		if ((copy = end - offset) > 0) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+			struct page *page = frag->page;
+
+			if (copy > len)
+				copy = len;
+
+			cookie = dma_memcpy_pg_toiovec(chan, to, locked_list, page,
+					frag->page_offset + offset - start, copy);
+			if (cookie < 0)
+				goto fault;
+			if (!(len -= copy))
+				goto end;
+			offset += copy;
+		}
+		start = end;
+	}
+
+	if (skb_shinfo(skb)->frag_list) {
+		struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+		for (; list; list = list->next) {
+			int end;
+
+			BUG_TRAP(start <= offset + len);
+
+			end = start + list->len;
+			if ((copy = end - offset) > 0) {
+				if (copy > len)
+					copy = len;
+				if ((cookie = dma_skb_copy_datagram_iovec(chan, list,
+					        offset - start, to, copy, locked_list)) < 0)
+					goto fault;
+				if ((len -= copy) == 0)
+					goto end;
+				offset += copy;
+			}
+			start = end;
+		}
+	}
+
+end:
+	if (!len) {
+		skb->dma_cookie = cookie;
+		return cookie;
+	}
+
+fault:
+ 	return -EFAULT;
+}
+
+#else
+
+int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
+			const struct sk_buff *skb, int offset, struct iovec *to,
+			size_t len, struct dma_locked_list *locked_list)
+{
+	return skb_copy_datagram_iovec(skb, offset, to, len);
+}
+
+#endif


  parent reply	other threads:[~2006-03-03 21:41 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-03-03 21:40 [PATCH 0/8] Intel I/O Acceleration Technology (I/OAT) Chris Leech
2006-03-03 21:42 ` [PATCH 1/8] [I/OAT] DMA memcpy subsystem Chris Leech
2006-03-04  1:40   ` David S. Miller
2006-03-06 19:39     ` Chris Leech
2006-03-04 19:20   ` Benjamin LaHaise
2006-03-06 19:48     ` Chris Leech
2006-03-03 21:42 ` [PATCH 3/8] [I/OAT] Setup the networking subsystem as a DMA client Chris Leech
2006-03-03 21:42 ` Chris Leech [this message]
2006-03-05  7:15   ` [PATCH 4/8] [I/OAT] Utility functions for offloading sk_buff to iovec copies Andrew Morton
2006-03-03 21:42 ` [PATCH 5/8] [I/OAT] Structure changes for TCP recv offload to I/OAT Chris Leech
2006-03-05  7:19   ` Andrew Morton
2006-03-03 21:42 ` [PATCH 6/8] [I/OAT] Rename cleanup_rbuf to tcp_cleanup_rbuf and make non-static Chris Leech
2006-03-03 21:42 ` [PATCH 7/8] [I/OAT] Add a sysctl for tuning the I/OAT offloaded I/O threshold Chris Leech
2006-03-04 11:22   ` Alexey Dobriyan
2006-03-05  7:21   ` Andrew Morton
2006-03-03 21:42 ` [PATCH 8/8] [I/OAT] TCP recv offload to I/OAT Chris Leech
2006-03-04 16:39   ` Pavel Machek
2006-03-04 23:18   ` Greg KH
2006-03-06 19:28     ` Chris Leech
2006-03-05  7:30   ` Andrew Morton
2006-03-05  8:45   ` Andrew Morton
2006-03-05 10:27     ` David S. Miller
2006-03-06 19:36     ` Chris Leech
2006-03-03 22:27 ` [PATCH 0/8] Intel I/O Acceleration Technology (I/OAT) Jeff Garzik
2006-03-03 22:39   ` Chris Leech
2006-03-03 22:45     ` Jeff Garzik
2006-03-04 11:35     ` Evgeniy Polyakov
2006-03-05  8:09     ` Andrew Morton
2006-03-05  9:02       ` Discourage duplicate symbols in the kernel? [Was: Intel I/O Acc...] Sam Ravnborg
2006-03-05  9:18         ` Andrew Morton
2006-03-06 19:56           ` Chris Leech
2006-03-03 22:58 ` [PATCH 0/8] Intel I/O Acceleration Technology (I/OAT) Kumar Gala
2006-03-03 23:32   ` Chris Leech
2006-03-04 18:46 ` Jan Engelhardt
2006-03-04 21:41   ` David S. Miller
2006-03-04 22:05     ` Gene Heskett
2006-03-04 22:16       ` David S. Miller
2006-03-05 13:45         ` Jan Engelhardt
2006-03-05 13:55           ` Arjan van de Ven
2006-03-05 16:14         ` Matthieu CASTET
2006-03-05 16:30           ` Jeff Garzik
2006-03-06 19:24           ` Chris Leech
2006-03-06 19:15       ` Chris Leech
2006-03-05  1:43     ` Evgeniy Polyakov
2006-03-05  2:08       ` David S. Miller
2006-03-06 17:44       ` Ingo Oeser
2006-03-07  7:44         ` Evgeniy Polyakov
2006-03-07  9:43           ` Ingo Oeser
2006-03-07 10:16             ` Evgeniy Polyakov
  -- strict thread matches above, loose matches on Subject: below --
2006-03-11  2:27 Chris Leech
2006-03-11  2:29 ` [PATCH 4/8] [I/OAT] Utility functions for offloading sk_buff to iovec copies Chris Leech

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20060303214227.11908.75473.stgit@gitlost.site \
    --to=christopher.leech@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.