From: Andreas Gruenbacher <agruenba@redhat.com>
To: cluster-devel@redhat.com, Christoph Hellwig <hch@lst.de>
Cc: Andreas Gruenbacher <agruenba@redhat.com>, linux-fsdevel@vger.kernel.org
Subject: [PATCH v3 5/8] gfs2: Implement iomap buffered write support
Date: Fri, 23 Mar 2018 20:17:25 +0100 [thread overview]
Message-ID: <20180323191728.23819-6-agruenba@redhat.com> (raw)
In-Reply-To: <20180323191728.23819-1-agruenba@redhat.com>
With the traditional page-based writes, blocks are allocated separately
for each page written to. With iomap writes, we can allocate a lot more
blocks at once, with a fraction of the allocation overhead for each
page.
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
fs/gfs2/aops.c | 20 +++--
fs/gfs2/aops.h | 22 +++++
fs/gfs2/bmap.c | 263 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--
fs/gfs2/file.c | 44 ++++++++--
4 files changed, 325 insertions(+), 24 deletions(-)
create mode 100644 fs/gfs2/aops.h
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index a1c6b5de9200..3d9633175aa8 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -22,6 +22,7 @@
#include <linux/backing-dev.h>
#include <linux/uio.h>
#include <trace/events/writeback.h>
+#include <linux/sched/signal.h>
#include "gfs2.h"
#include "incore.h"
@@ -36,10 +37,11 @@
#include "super.h"
#include "util.h"
#include "glops.h"
+#include "aops.h"
-static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
- unsigned int from, unsigned int len)
+void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
+ unsigned int from, unsigned int len)
{
struct buffer_head *head = page_buffers(page);
unsigned int bsize = head->b_size;
@@ -462,7 +464,7 @@ static int gfs2_jdata_writepages(struct address_space *mapping,
* Returns: errno
*/
-static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
+int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
{
struct buffer_head *dibh;
u64 dsize = i_size_read(&ip->i_inode);
@@ -773,7 +775,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
* adjust_fs_space - Adjusts the free space available due to gfs2_grow
* @inode: the rindex inode
*/
-static void adjust_fs_space(struct inode *inode)
+void adjust_fs_space(struct inode *inode)
{
struct gfs2_sbd *sdp = inode->i_sb->s_fs_info;
struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
@@ -819,11 +821,11 @@ static void adjust_fs_space(struct inode *inode)
* This copies the data from the page into the inode block after
* the inode data structure itself.
*
- * Returns: errno
+ * Returns: copied bytes or errno
*/
-static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
- loff_t pos, unsigned copied,
- struct page *page)
+int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
+ loff_t pos, unsigned copied,
+ struct page *page)
{
struct gfs2_inode *ip = GFS2_I(inode);
u64 to = pos + copied;
@@ -862,7 +864,7 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
* The main write_end function for GFS2. We just put our locking around the VFS
* provided functions.
*
- * Returns: errno
+ * Returns: copied bytes or errno
*/
static int gfs2_write_end(struct file *file, struct address_space *mapping,
diff --git a/fs/gfs2/aops.h b/fs/gfs2/aops.h
new file mode 100644
index 000000000000..976bb32dd405
--- /dev/null
+++ b/fs/gfs2/aops.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2017 Red Hat, Inc. All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License version 2.
+ */
+
+#ifndef __AOPS_DOT_H__
+#define __AOPS_DOT_H__
+
+#include "incore.h"
+
+extern int stuffed_readpage(struct gfs2_inode *ip, struct page *page);
+extern int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
+ loff_t pos, unsigned copied,
+ struct page *page);
+extern void adjust_fs_space(struct inode *inode);
+extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page,
+ unsigned int from, unsigned int len);
+
+#endif /* __AOPS_DOT_H__ */
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 1ed7bc022d7b..b6ee5252c014 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -28,6 +28,7 @@
#include "trans.h"
#include "dir.h"
#include "util.h"
+#include "aops.h"
#include "trace_gfs2.h"
/* This doesn't need to be that large as max 64 bit pointers in a 4k
@@ -41,6 +42,8 @@ struct metapath {
int mp_aheight; /* actual height (lookup height) */
};
+static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length);
+
/**
* gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
* @ip: the inode
@@ -375,7 +378,7 @@ static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
return mp->mp_aheight - x - 1;
}
-static inline void release_metapath(struct metapath *mp)
+static void release_metapath(struct metapath *mp)
{
int i;
@@ -383,6 +386,7 @@ static inline void release_metapath(struct metapath *mp)
if (mp->mp_bh[i] == NULL)
break;
brelse(mp->mp_bh[i]);
+ mp->mp_bh[i] = NULL;
}
}
@@ -675,14 +679,15 @@ static u64 hole_size(struct inode *inode, sector_t lblock, struct metapath *mp)
return holesz << inode->i_blkbits;
}
-static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap)
+static void gfs2_stuffed_iomap(struct inode *inode, struct iomap *iomap,
+ u64 length)
{
struct gfs2_inode *ip = GFS2_I(inode);
iomap->addr = (ip->i_no_addr << inode->i_blkbits) +
sizeof(struct gfs2_dinode);
iomap->offset = 0;
- iomap->length = i_size_read(inode);
+ iomap->length = length;
iomap->type = IOMAP_MAPPED;
iomap->flags = IOMAP_F_DATA_INLINE;
}
@@ -722,10 +727,15 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
if (flags & IOMAP_REPORT) {
if (pos >= i_size_read(inode))
return -ENOENT;
- gfs2_stuffed_iomap(inode, iomap);
+ gfs2_stuffed_iomap(inode, iomap,
+ i_size_read(inode));
+ return 0;
+ }
+ if (pos + length <= gfs2_max_stuffed_size(ip)) {
+ gfs2_stuffed_iomap(inode, iomap,
+ gfs2_max_stuffed_size(ip));
return 0;
}
- BUG_ON(!(flags & IOMAP_WRITE));
}
lblock = pos >> inode->i_blkbits;
lblock_stop = (pos + length - 1) >> inode->i_blkbits;
@@ -796,9 +806,140 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
else
iomap->length = size - pos;
}
+ /* FIXME: Should we limit iomap->length to the maximum allocation size
+ * here according to how gfs2_iomap_alloc allocates blocks? */
goto out;
}
+static int gfs2_write_lock(struct inode *inode)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
+ int error;
+
+ gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
+ error = gfs2_glock_nq(&ip->i_gh);
+ if (error)
+ goto out_uninit;
+ if (&ip->i_inode == sdp->sd_rindex) {
+ struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
+
+ error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE,
+ GL_NOCACHE, &m_ip->i_gh);
+ if (error)
+ goto out_unlock;
+ }
+ return 0;
+
+out_unlock:
+ gfs2_glock_dq(&ip->i_gh);
+out_uninit:
+ gfs2_holder_uninit(&ip->i_gh);
+ return error;
+}
+
+static void gfs2_write_unlock(struct inode *inode)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
+
+ if (&ip->i_inode == sdp->sd_rindex) {
+ struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
+
+ gfs2_glock_dq_uninit(&m_ip->i_gh);
+ }
+ gfs2_glock_dq_uninit(&ip->i_gh);
+}
+
+static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, loff_t length,
+ unsigned flags, struct iomap *iomap)
+{
+ struct metapath mp = { .mp_aheight = 1, };
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
+ unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
+ bool unstuff, alloc_required;
+ int ret;
+
+ ret = gfs2_write_lock(inode);
+ if (ret)
+ return ret;
+
+ unstuff = gfs2_is_stuffed(ip) &&
+ pos + length > gfs2_max_stuffed_size(ip);
+
+ ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
+ if (ret)
+ goto out_release;
+
+ alloc_required = unstuff || iomap->type != IOMAP_MAPPED;
+
+ if (alloc_required || gfs2_is_jdata(ip))
+ gfs2_write_calc_reserv(ip, iomap->length, &data_blocks, &ind_blocks);
+
+ if (alloc_required) {
+ struct gfs2_alloc_parms ap = { .target = data_blocks + ind_blocks };
+ ret = gfs2_quota_lock_check(ip, &ap);
+ if (ret)
+ goto out_release;
+
+ ret = gfs2_inplace_reserve(ip, &ap);
+ if (ret)
+ goto out_qunlock;
+ }
+
+ rblocks = RES_DINODE + ind_blocks;
+ if (gfs2_is_jdata(ip))
+ rblocks += data_blocks;
+ if (ind_blocks || data_blocks)
+ rblocks += RES_STATFS + RES_QUOTA;
+ if (inode == sdp->sd_rindex)
+ rblocks += 2 * RES_STATFS;
+ if (alloc_required)
+ rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks);
+
+ ret = gfs2_trans_begin(sdp, rblocks, iomap->length >> inode->i_blkbits);
+ if (ret)
+ goto out_trans_fail;
+
+ if (unstuff) {
+ ret = gfs2_unstuff_dinode(ip, NULL);
+ if (ret)
+ goto out_trans_end;
+ release_metapath(&mp);
+ ret = gfs2_iomap_get(inode, iomap->offset, iomap->length,
+ flags, iomap, &mp);
+ if (ret)
+ goto out_trans_end;
+ }
+
+ if (iomap->type != IOMAP_MAPPED) {
+ ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
+ if (ret) {
+ gfs2_trans_end(sdp);
+ if (alloc_required)
+ gfs2_inplace_release(ip);
+ punch_hole(ip, iomap->offset, iomap->length);
+ goto out_qunlock;
+ }
+ }
+ release_metapath(&mp);
+ return 0;
+
+out_trans_end:
+ gfs2_trans_end(sdp);
+out_trans_fail:
+ if (alloc_required)
+ gfs2_inplace_release(ip);
+out_qunlock:
+ if (alloc_required)
+ gfs2_quota_unlock(ip);
+out_release:
+ release_metapath(&mp);
+ gfs2_write_unlock(inode);
+ return ret;
+}
+
static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
unsigned flags, struct iomap *iomap)
{
@@ -808,10 +949,7 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
trace_gfs2_iomap_start(ip, pos, length, flags);
if (flags & IOMAP_WRITE) {
- ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
- if (!ret && iomap->type == IOMAP_HOLE)
- ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
- release_metapath(&mp);
+ ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap);
} else {
ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
release_metapath(&mp);
@@ -820,8 +958,115 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
return ret;
}
+static int
+gfs2_iomap_write_begin(struct inode *inode, loff_t pos, unsigned len,
+ unsigned flags, struct page **pagep, struct iomap *iomap)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct page *page;
+ int ret;
+
+ if (gfs2_is_stuffed(ip)) {
+ BUG_ON(pos + len > gfs2_max_stuffed_size(ip));
+
+ page = grab_cache_page_write_begin(inode->i_mapping, 0, flags);
+ if (!page)
+ return -ENOMEM;
+
+ if (!PageUptodate(page)) {
+ ret = stuffed_readpage(ip, page);
+ if (ret) {
+ unlock_page(page);
+ put_page(page);
+ return ret;
+ }
+ }
+ *pagep = page;
+ return 0;
+ }
+
+ return iomap_write_begin(inode, pos, len, flags, pagep, iomap);
+}
+
+static int gfs2_iomap_write_end(struct inode *inode, loff_t pos, unsigned len,
+ unsigned copied, struct page *page)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct buffer_head *dibh;
+ int ret;
+
+ if (gfs2_is_stuffed(ip)) {
+ ret = gfs2_meta_inode_buffer(ip, &dibh);
+ if (ret) {
+ unlock_page(page);
+ put_page(page);
+ return ret;
+ }
+ ret = gfs2_stuffed_write_end(inode, dibh, pos, copied, page);
+ brelse(dibh);
+ return ret;
+ }
+
+ if (gfs2_is_jdata(ip))
+ gfs2_page_add_databufs(ip, page, offset_in_page(pos), len);
+
+ return iomap_write_end(inode, pos, len, copied, page);
+}
+
+static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
+ ssize_t written, unsigned flags, struct iomap *iomap)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
+ struct gfs2_trans *tr = current->journal_info;
+
+ if (!(flags & IOMAP_WRITE))
+ return 0;
+
+ gfs2_ordered_add_inode(ip);
+
+ if (tr->tr_num_buf_new)
+ __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+ else {
+ struct buffer_head *dibh;
+ int ret;
+
+ ret = gfs2_meta_inode_buffer(ip, &dibh);
+ if (unlikely(ret))
+ return ret;
+ gfs2_trans_add_meta(ip->i_gl, dibh);
+ brelse(dibh);
+ }
+
+ if (inode == sdp->sd_rindex) {
+ adjust_fs_space(inode);
+ sdp->sd_rindex_uptodate = 0;
+ }
+
+ gfs2_trans_end(sdp);
+ gfs2_inplace_release(ip);
+
+ if (length != written && (iomap->flags & IOMAP_F_NEW)) {
+ /* Deallocate blocks that were just allocated. */
+ loff_t end = round_down(pos + length, PAGE_SIZE);
+ pos = round_up(pos, PAGE_SIZE);
+ if (pos < end) {
+ truncate_pagecache_range(inode, pos, end - 1);
+ punch_hole(ip, pos, end - pos);
+ }
+ }
+
+ if (ip->i_qadata && ip->i_qadata->qa_qd_num)
+ gfs2_quota_unlock(ip);
+ gfs2_write_unlock(inode);
+ return 0;
+}
+
const struct iomap_ops gfs2_iomap_ops = {
.iomap_begin = gfs2_iomap_begin,
+ .write_begin = gfs2_iomap_write_begin,
+ .write_end = gfs2_iomap_write_end,
+ .iomap_end = gfs2_iomap_end,
};
/**
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 2edd3a9a7b79..d1b54e781577 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -26,10 +26,12 @@
#include <linux/dlm.h>
#include <linux/dlm_plock.h>
#include <linux/delay.h>
+#include <linux/backing-dev.h>
#include "gfs2.h"
#include "incore.h"
#include "bmap.h"
+#include "aops.h"
#include "dir.h"
#include "glock.h"
#include "glops.h"
@@ -691,9 +693,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
/**
* gfs2_file_write_iter - Perform a write to a file
* @iocb: The io context
- * @iov: The data to write
- * @nr_segs: Number of @iov segments
- * @pos: The file position
+ * @from: The data to write
*
* We have to do a lock/unlock here to refresh the inode size for
* O_APPEND writes, otherwise we can land up writing at the wrong
@@ -705,8 +705,9 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
- struct gfs2_inode *ip = GFS2_I(file_inode(file));
- int ret;
+ struct inode *inode = file->f_mapping->host;
+ struct gfs2_inode *ip = GFS2_I(inode);
+ ssize_t ret;
ret = gfs2_rsqa_alloc(ip);
if (ret)
@@ -723,7 +724,38 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
gfs2_glock_dq_uninit(&gh);
}
- return generic_file_write_iter(iocb, from);
+ if (iocb->ki_flags & IOCB_DIRECT)
+ return generic_file_write_iter(iocb, from);
+
+ inode_lock(inode);
+ ret = generic_write_checks(iocb, from);
+ if (ret <= 0)
+ goto out;
+
+ ret = file_remove_privs(file);
+ if (ret)
+ goto out;
+
+ ret = file_update_time(file);
+ if (ret)
+ goto out;
+
+ /* We can write back this queue in page reclaim */
+ current->backing_dev_info = inode_to_bdi(inode);
+
+ ret = iomap_file_buffered_write(iocb, from, &gfs2_iomap_ops);
+
+ current->backing_dev_info = NULL;
+
+out:
+ inode_unlock(inode);
+ if (likely(ret > 0)) {
+ iocb->ki_pos += ret;
+
+ /* Handle various SYNC-type writes */
+ ret = generic_write_sync(iocb, ret);
+ }
+ return ret;
}
static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
--
2.14.3
next prev parent reply other threads:[~2018-03-23 19:17 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-03-23 19:17 [PATCH v3 0/8] gfs2 iomap write support Andreas Gruenbacher
2018-03-23 19:17 ` [PATCH v3 1/8] gfs2: gfs2_stuffed_write_end cleanup Andreas Gruenbacher
2018-03-23 19:17 ` [PATCH v3 2/8] gfs2: Remove ordered write mode handling from gfs2_trans_add_data Andreas Gruenbacher
2018-03-23 19:17 ` [PATCH v3 3/8] gfs2: Iomap cleanups and improvements Andreas Gruenbacher
2018-04-06 14:06 ` [Cluster-devel] " Bob Peterson
2018-04-06 15:19 ` Andreas Gruenbacher
2018-03-23 19:17 ` [PATCH v3 4/8] iomap: Add write_{begin,end} iomap operations Andreas Gruenbacher
2018-03-23 19:17 ` Andreas Gruenbacher [this message]
2018-04-06 14:44 ` [Cluster-devel] [PATCH v3 5/8] gfs2: Implement iomap buffered write support Bob Peterson
2018-04-06 15:15 ` Andreas Grünbacher
2018-03-23 19:17 ` [PATCH v3 6/8] gfs2: Implement iomap direct I/O support Andreas Gruenbacher
2018-04-06 15:08 ` [Cluster-devel] " Bob Peterson
2018-03-23 19:17 ` [PATCH v3 7/8] gfs2: Remove gfs2_write_{begin,end} Andreas Gruenbacher
2018-03-23 19:17 ` [PATCH v3 8/8] iomap: Complete partial direct I/O writes synchronously Andreas Gruenbacher
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180323191728.23819-6-agruenba@redhat.com \
--to=agruenba@redhat.com \
--cc=cluster-devel@redhat.com \
--cc=hch@lst.de \
--cc=linux-fsdevel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).