From: Andi Kleen <andi@firstfloor.org>
To: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, dehrenberg@google.com,
Andi Kleen <ak@linux.intel.com>
Subject: [PATCH 5/6] DIO: Separate map_bh from dio
Date: Wed, 22 Jun 2011 12:18:41 -0700 [thread overview]
Message-ID: <1308770322-22565-6-git-send-email-andi@firstfloor.org> (raw)
In-Reply-To: <1308770322-22565-1-git-send-email-andi@firstfloor.org>
From: Andi Kleen <ak@linux.intel.com>
Only a single b_private field in the map_bh buffer head is needed after
the submission path. Move map_bh separately to avoid storing
this information in the long term slab.
This avoids the weird 104 byte hole in struct dio_submit which also needed
to be memseted early.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
fs/direct-io.c | 65 +++++++++++++++++++++++++++++++-------------------------
1 files changed, 36 insertions(+), 29 deletions(-)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 49dbd06..cc75445 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -119,6 +119,7 @@ struct dio {
loff_t i_size; /* i_size when submitted */
dio_iodone_t *end_io; /* IO completion function */
+ void *b_private; /* copy from map_bh.b_private */
/* BIO completion state */
spinlock_t bio_lock; /* protects BIO fields below */
@@ -133,7 +134,6 @@ struct dio {
struct kiocb *iocb; /* kiocb */
ssize_t result; /* IO result */
- struct buffer_head map_bh; /* last get_block() result */
/*
* pages[] (and any fields placed after it) are not zeroed out at
* allocation time. Don't add new fields after pages[] unless you
@@ -257,7 +257,7 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is
if (dio->end_io && dio->result) {
dio->end_io(dio->iocb, offset, transferred,
- dio->map_bh.b_private, ret, is_async);
+ dio->b_private, ret, is_async);
} else if (is_async) {
aio_complete(dio->iocb, ret, 0);
}
@@ -532,10 +532,10 @@ static int dio_bio_reap(struct dio *dio, struct dio_submit *sdio)
* buffer_mapped(). However the direct-io code will only process holes one
* block at a time - it will repeatedly call get_block() as it walks the hole.
*/
-static int get_more_blocks(struct dio *dio, struct dio_submit *sdio)
+static int get_more_blocks(struct dio *dio, struct dio_submit *sdio,
+ struct buffer_head *map_bh)
{
int ret;
- struct buffer_head *map_bh = &dio->map_bh;
sector_t fs_startblk; /* Into file, in filesystem-sized blocks */
unsigned long fs_count; /* Number of filesystem-sized blocks */
unsigned long dio_count;/* Number of dio_block-sized blocks */
@@ -579,6 +579,9 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio)
ret = (*sdio->get_block)(dio->inode, fs_startblk,
map_bh, create);
+
+ /* Store for completion */
+ dio->b_private = map_bh->b_private;
}
return ret;
}
@@ -587,7 +590,7 @@ static int get_more_blocks(struct dio *dio, struct dio_submit *sdio)
* There is no bio. Make one now.
*/
static int dio_new_bio(struct dio *dio, struct dio_submit *sdio,
- sector_t start_sector)
+ sector_t start_sector, struct buffer_head *map_bh)
{
sector_t sector;
int ret, nr_pages;
@@ -596,10 +599,10 @@ static int dio_new_bio(struct dio *dio, struct dio_submit *sdio,
if (ret)
goto out;
sector = start_sector << (sdio->blkbits - 9);
- nr_pages = min(sdio->pages_in_io, bio_get_nr_vecs(dio->map_bh.b_bdev));
+ nr_pages = min(sdio->pages_in_io, bio_get_nr_vecs(map_bh->b_bdev));
nr_pages = min(nr_pages, BIO_MAX_PAGES);
BUG_ON(nr_pages <= 0);
- dio_bio_alloc(dio, sdio, dio->map_bh.b_bdev, sector, nr_pages);
+ dio_bio_alloc(dio, sdio, map_bh->b_bdev, sector, nr_pages);
sdio->boundary = 0;
out:
return ret;
@@ -644,7 +647,8 @@ static int dio_bio_add_page(struct dio *dio, struct dio_submit *sdio)
* The caller of this function is responsible for removing cur_page from the
* dio, and for dropping the refcount which came from that presence.
*/
-static int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio)
+static int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio,
+ struct buffer_head *map_bh)
{
int ret = 0;
@@ -679,14 +683,14 @@ static int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio)
}
if (sdio->bio == NULL) {
- ret = dio_new_bio(dio, sdio, sdio->cur_page_block);
+ ret = dio_new_bio(dio, sdio, sdio->cur_page_block, map_bh);
if (ret)
goto out;
}
if (dio_bio_add_page(dio, sdio) != 0) {
dio_bio_submit(dio, sdio);
- ret = dio_new_bio(dio, sdio, sdio->cur_page_block);
+ ret = dio_new_bio(dio, sdio, sdio->cur_page_block, map_bh);
if (ret == 0) {
ret = dio_bio_add_page(dio, sdio);
BUG_ON(ret != 0);
@@ -715,7 +719,8 @@ out:
*/
static int
submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
- unsigned offset, unsigned len, sector_t blocknr)
+ unsigned offset, unsigned len, sector_t blocknr,
+ struct buffer_head *map_bh)
{
int ret = 0;
@@ -740,7 +745,7 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
* avoid metadata seeks.
*/
if (sdio->boundary) {
- ret = dio_send_cur_page(dio, sdio);
+ ret = dio_send_cur_page(dio, sdio, map_bh);
page_cache_release(sdio->cur_page);
sdio->cur_page = NULL;
}
@@ -751,7 +756,7 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
* If there's a deferred page already there then send it.
*/
if (sdio->cur_page) {
- ret = dio_send_cur_page(dio, sdio);
+ ret = dio_send_cur_page(dio, sdio, map_bh);
page_cache_release(sdio->cur_page);
sdio->cur_page = NULL;
if (ret)
@@ -773,16 +778,16 @@ out:
* file blocks. Only called for S_ISREG files - blockdevs do not set
* buffer_new
*/
-static void clean_blockdev_aliases(struct dio *dio)
+static void clean_blockdev_aliases(struct dio *dio, struct buffer_head *map_bh)
{
unsigned i;
unsigned nblocks;
- nblocks = dio->map_bh.b_size >> dio->inode->i_blkbits;
+ nblocks = map_bh->b_size >> dio->inode->i_blkbits;
for (i = 0; i < nblocks; i++) {
- unmap_underlying_metadata(dio->map_bh.b_bdev,
- dio->map_bh.b_blocknr + i);
+ unmap_underlying_metadata(map_bh->b_bdev,
+ map_bh->b_blocknr + i);
}
}
@@ -795,7 +800,8 @@ static void clean_blockdev_aliases(struct dio *dio)
* `end' is zero if we're doing the start of the IO, 1 at the end of the
* IO.
*/
-static void dio_zero_block(struct dio *dio, struct dio_submit *sdio, int end)
+static void dio_zero_block(struct dio *dio, struct dio_submit *sdio, int end,
+ struct buffer_head *map_bh)
{
unsigned dio_blocks_per_fs_block;
unsigned this_chunk_blocks; /* In dio_blocks */
@@ -803,7 +809,7 @@ static void dio_zero_block(struct dio *dio, struct dio_submit *sdio, int end)
struct page *page;
sdio->start_zero_done = 1;
- if (!sdio->blkfactor || !buffer_new(&dio->map_bh))
+ if (!sdio->blkfactor || !buffer_new(map_bh))
return;
dio_blocks_per_fs_block = 1 << sdio->blkfactor;
@@ -823,7 +829,7 @@ static void dio_zero_block(struct dio *dio, struct dio_submit *sdio, int end)
page = ZERO_PAGE(0);
if (submit_page_section(dio, sdio, page, 0, this_chunk_bytes,
- sdio->next_block_for_io))
+ sdio->next_block_for_io, map_bh))
return;
sdio->next_block_for_io += this_chunk_blocks;
@@ -845,13 +851,13 @@ static void dio_zero_block(struct dio *dio, struct dio_submit *sdio, int end)
* it should set b_size to PAGE_SIZE or more inside get_block(). This gives
* fine alignment but still allows this function to work in PAGE_SIZE units.
*/
-static int do_direct_IO(struct dio *dio, struct dio_submit *sdio)
+static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
+ struct buffer_head *map_bh)
{
const unsigned blkbits = sdio->blkbits;
const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
struct page *page;
unsigned block_in_page;
- struct buffer_head *map_bh = &dio->map_bh;
int ret = 0;
/* The I/O can start at any block offset within the first page */
@@ -877,7 +883,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio)
unsigned long blkmask;
unsigned long dio_remainder;
- ret = get_more_blocks(dio, sdio);
+ ret = get_more_blocks(dio, sdio, map_bh);
if (ret) {
page_cache_release(page);
goto out;
@@ -890,7 +896,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio)
sdio->next_block_for_io =
map_bh->b_blocknr << sdio->blkfactor;
if (buffer_new(map_bh))
- clean_blockdev_aliases(dio);
+ clean_blockdev_aliases(dio, map_bh);
if (!sdio->blkfactor)
goto do_holes;
@@ -949,7 +955,7 @@ do_holes:
* we must zero out the start of this block.
*/
if (unlikely(sdio->blkfactor && !sdio->start_zero_done))
- dio_zero_block(dio, sdio, 0);
+ dio_zero_block(dio, sdio, 0, map_bh);
/*
* Work out, in this_chunk_blocks, how much disk we
@@ -967,7 +973,7 @@ do_holes:
sdio->boundary = buffer_boundary(map_bh);
ret = submit_page_section(dio, sdio, page, offset_in_page,
- this_chunk_bytes, sdio->next_block_for_io);
+ this_chunk_bytes, sdio->next_block_for_io, map_bh);
if (ret) {
page_cache_release(page);
goto out;
@@ -1006,6 +1012,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
ssize_t ret = 0;
ssize_t ret2;
size_t bytes;
+ struct buffer_head map_bh = { 0, };
dio->inode = inode;
dio->rw = rw;
@@ -1060,7 +1067,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
sdio->total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE;
sdio->curr_user_address = user_addr;
- ret = do_direct_IO(dio, sdio);
+ ret = do_direct_IO(dio, sdio, &map_bh);
dio->result += iov[seg].iov_len -
((sdio->final_block_in_request - sdio->block_in_file) <<
@@ -1083,10 +1090,10 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
* There may be some unwritten disk at the end of a part-written
* fs-block-sized block. Go zero that now.
*/
- dio_zero_block(dio, sdio, 1);
+ dio_zero_block(dio, sdio, 1, &map_bh);
if (sdio->cur_page) {
- ret2 = dio_send_cur_page(dio, sdio);
+ ret2 = dio_send_cur_page(dio, sdio, &map_bh);
if (ret == 0)
ret = ret2;
page_cache_release(sdio->cur_page);
--
1.7.4.4
next prev parent reply other threads:[~2011-06-22 19:20 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-06-22 19:18 RFC: Micro-optimize direct IO submission path Andi Kleen
2011-06-22 19:18 ` [PATCH 1/6] DIO: Separate fields only used in the submission path from struct dio Andi Kleen
2011-06-22 19:18 ` [PATCH 2/6] DIO: Fix a wrong comment Andi Kleen
2011-06-22 19:18 ` [PATCH 3/6] DIO: Rearrange fields in dio/dio_submit to avoid holes Andi Kleen
2011-06-22 19:18 ` [PATCH 4/6] DIO: Use a slab cache for struct dio Andi Kleen
2011-06-22 19:18 ` Andi Kleen [this message]
2011-06-22 19:59 ` [PATCH 5/6] DIO: Separate map_bh from dio Christoph Hellwig
2011-06-22 19:18 ` [PATCH 6/6] DIO: Inline the complete submission path Andi Kleen
2011-06-22 19:59 ` Christoph Hellwig
2011-06-22 20:10 ` Andi Kleen
2011-06-23 10:35 ` Christoph Hellwig
2011-07-27 21:06 ` RFC: Micro-optimize direct IO " Christoph Hellwig
2011-07-27 21:35 ` Andi Kleen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1308770322-22565-6-git-send-email-andi@firstfloor.org \
--to=andi@firstfloor.org \
--cc=ak@linux.intel.com \
--cc=dehrenberg@google.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.