From: Zach Brown <zach.brown@oracle.com>
To: linux-fsdevel@vger.kernel.org
Cc: Christoph Hellwig <hch@infradead.org>, David Chinner <dgc@sgi.com>
Subject: [PATCH 1/4] struct rwmem: an abstraction of the memory argument to read/write
Date: Tue, 6 Nov 2007 17:43:28 -0800 [thread overview]
Message-ID: <11943998112428-git-send-email-zach.brown@oracle.com> (raw)
In-Reply-To: <11943998113245-git-send-email-zach.brown@oracle.com>
This adds a structure and interface to represent the segments of memory
which are acting as the source or destination for a read or write operation.
Callers would fill this structure and then pass it down the rw path.
The intent is to let stages in the rw path make specific calls against this
API and structure instead of working with, say, struct iovec natively.
The main intent of this is to enable kernel calls into the rw path which
specify memory with page/offset/len tuples.
Another potential benefit of this is the reduction in iterations over iovecs at
various points in the kernel. Each iov_length(iov) call, for example, could be
translated into rwm->total_bytes. O_DIRECTs check of memory alignment is
changed into a single test against rwm->boundary_bits.
I imagine this might integrate well with the iov_iter interface, though I
haven't examined that in any depth.
---
fs/Makefile | 2 +-
fs/rwmem.c | 92 +++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/rwmem.h | 29 +++++++++++++++
3 files changed, 122 insertions(+), 1 deletions(-)
create mode 100644 fs/rwmem.c
create mode 100644 include/linux/rwmem.h
diff --git a/fs/Makefile b/fs/Makefile
index 500cf15..c342365 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \
attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
pnode.o drop_caches.o splice.o sync.o utimes.o \
- stack.o
+ stack.o rwmem.o
ifeq ($(CONFIG_BLOCK),y)
obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o
diff --git a/fs/rwmem.c b/fs/rwmem.c
new file mode 100644
index 0000000..0433ba4
--- /dev/null
+++ b/fs/rwmem.c
@@ -0,0 +1,92 @@
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/uio.h>
+#include <linux/rwmem.h>
+
+static inline unsigned long pages_spanned(unsigned long addr,
+ unsigned long bytes)
+{
+ return ((addr + bytes + PAGE_SIZE - 1) >> PAGE_SHIFT) -
+ (addr >> PAGE_SHIFT);
+}
+
+void rwmem_iovec_init(struct rwmem *rwm)
+{
+ struct rwmem_iovec *rwi = container_of(rwm, struct rwmem_iovec, rwmem);
+ struct iovec *iov;
+ unsigned long i;
+
+ rwm->total_bytes = 0;
+ rwm->nr_pages = 0;
+ rwm->boundary_bits = 0;
+
+ for (i = 0; i < rwm->nr_segs; i++) {
+ iov = &rwi->iov[i];
+
+ rwm->total_bytes += iov->iov_len;
+ rwm->nr_pages += pages_spanned((unsigned long)iov->iov_base,
+ iov->iov_len);
+ rwm->boundary_bits |= (unsigned long)iov->iov_base |
+ (unsigned long)iov->iov_len;
+ }
+}
+
+/*
+ * Returns the offset of the start of a segment within its first page.
+ */
+unsigned long rwmem_iovec_seg_page_offset(struct rwmem *rwm, unsigned long i)
+{
+ struct rwmem_iovec *rwi = container_of(rwm, struct rwmem_iovec, rwmem);
+ BUG_ON(i >= rwm->nr_segs);
+ return (unsigned long)rwi->iov[i].iov_base & ~PAGE_MASK;
+}
+
+/*
+ * Returns the total bytes in the given segment.
+ */
+unsigned long rwmem_iovec_seg_bytes(struct rwmem *rwm, unsigned long i)
+{
+ struct rwmem_iovec *rwi = container_of(rwm, struct rwmem_iovec, rwmem);
+ BUG_ON(i >= rwm->nr_segs);
+ return rwi->iov[i].iov_len;
+}
+
+int rwmem_iovec_get_seg_pages(struct rwmem *rwm, unsigned long i,
+ unsigned long *cursor, struct page **pages,
+ unsigned long max_pages, int write)
+{
+ struct rwmem_iovec *rwi = container_of(rwm, struct rwmem_iovec, rwmem);
+ struct iovec *iov;
+ int ret;
+
+ BUG_ON(i >= rwm->nr_segs);
+ iov = &rwi->iov[i];
+
+ if (*cursor == 0)
+ *cursor = (unsigned long)iov->iov_base;
+
+ max_pages = min(pages_spanned(*cursor, iov->iov_len -
+ (*cursor - (unsigned long)iov->iov_base)),
+ max_pages);
+
+ down_read(¤t->mm->mmap_sem);
+ ret = get_user_pages(current, current->mm, *cursor, max_pages, write,
+ 0, pages, NULL);
+ up_read(¤t->mm->mmap_sem);
+
+ if (ret > 0) {
+ *cursor += ret * PAGE_SIZE;
+ if (*cursor >= (unsigned long)iov->iov_base + iov->iov_len)
+ *cursor = ~0;
+ }
+
+ return ret;
+}
+
+struct rwmem_ops rwmem_iovec_ops = {
+ .init = rwmem_iovec_init,
+ .seg_page_offset = rwmem_iovec_seg_page_offset,
+ .seg_bytes = rwmem_iovec_seg_bytes,
+ .get_seg_pages = rwmem_iovec_get_seg_pages,
+};
diff --git a/include/linux/rwmem.h b/include/linux/rwmem.h
new file mode 100644
index 0000000..666f9f4
--- /dev/null
+++ b/include/linux/rwmem.h
@@ -0,0 +1,29 @@
+#ifndef _LINUX_RWMEM_H
+#define _LINUX_RWMEM_H
+
+struct rwmwm_ops;
+
+struct rwmem {
+ struct rwmem_ops *ops;
+ size_t total_bytes;
+ unsigned long boundary_bits;
+ unsigned long nr_pages;
+ unsigned short nr_segs;
+};
+
+struct rwmem_ops {
+ void (*init)(struct rwmem *rwm);
+ unsigned long (*seg_page_offset)(struct rwmem *rwm, unsigned long i);
+ unsigned long (*seg_bytes)(struct rwmem *rwm, unsigned long i);
+ int (*get_seg_pages)(struct rwmem *rwm, unsigned long i,
+ unsigned long *cursor, struct page **pages,
+ unsigned long max_pages, int write);
+};
+
+struct rwmem_iovec {
+ struct rwmem rwmem;
+ const struct iovec *iov;
+};
+struct rwmem_ops rwmem_iovec_ops;
+
+#endif
--
1.5.2.2
next prev parent reply other threads:[~2007-11-07 1:43 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-11-07 1:43 [RFC] fs io with struct page instead of iovecs Zach Brown
2007-11-07 1:43 ` Zach Brown [this message]
2007-11-07 1:43 ` [PATCH 2/4] dio: use rwmem to work with r/w memory arguments Zach Brown
2007-11-07 1:43 ` [PATCH 3/4] add rwmem type backed by pages Zach Brown
2007-11-07 1:43 ` [PATCH 4/4] add dio interface for page/offset/len tuples Zach Brown
2007-11-07 16:50 ` [RFC] fs io with struct page instead of iovecs Badari Pulavarty
2007-11-07 17:02 ` Zach Brown
2007-11-07 20:44 ` David Chinner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=11943998112428-git-send-email-zach.brown@oracle.com \
--to=zach.brown@oracle.com \
--cc=dgc@sgi.com \
--cc=hch@infradead.org \
--cc=linux-fsdevel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).