From: Qu Wenruo <quwenruo@cn.fujitsu.com>
To: linux-btrfs@vger.kernel.org
Subject: [PATCH v3 18/21] btrfs: fallocate: Add support to accurate qgroup reserve
Date: Tue, 13 Oct 2015 10:20:24 +0800 [thread overview]
Message-ID: <1444702827-18169-19-git-send-email-quwenruo@cn.fujitsu.com> (raw)
In-Reply-To: <1444702827-18169-1-git-send-email-quwenruo@cn.fujitsu.com>
Now fallocate will do accurate qgroup reserve space check, unlike old
method, which will always reserve the whole length of the range.
With this patch, fallocate will:
1) Iterate the desired range and mark in data rsv map
Only range which is going to be allocated will be recorded in data
rsv map and reserve the space.
For already allocated range (normal/prealloc extent) they will be
skipped.
Also, record the marked range into a new list for later use.
2) If 1) succeeded, do real file extent allocate.
And at file extent allocation time, corresponding range will be
removed from the range in data rsv map.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
---
v2:
Fix comment typo
Add missing cleanup for falloc list
v3:
Fix a false error return, due to chunk_allocation may return >0, but
incorrectly considered as an error.
---
fs/btrfs/file.c | 161 ++++++++++++++++++++++++++++++++++++++++----------------
1 file changed, 117 insertions(+), 44 deletions(-)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c97b24f..35bfabf 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2545,17 +2545,61 @@ out_only_mutex:
return err;
}
+/* Helper structure to record which range is already reserved */
+struct falloc_range {
+ struct list_head list;
+ u64 start;
+ u64 len;
+};
+
+/*
+ * Helper function to add falloc range
+ *
+ * Caller should have locked the larger range of extent containing
+ * [start, len)
+ */
+static int add_falloc_range(struct list_head *head, u64 start, u64 len)
+{
+ struct falloc_range *prev = NULL;
+ struct falloc_range *range = NULL;
+
+ if (list_empty(head))
+ goto insert;
+
+ /*
+ * As fallocate iterate by bytenr order, we only need to check
+ * the last range.
+ */
+ prev = list_entry(head->prev, struct falloc_range, list);
+ if (prev->start + prev->len == start) {
+ prev->len += len;
+ return 0;
+ }
+insert:
+ range = kmalloc(sizeof(*range), GFP_NOFS);
+ if (!range)
+ return -ENOMEM;
+ range->start = start;
+ range->len = len;
+ list_add_tail(&range->list, head);
+ return 0;
+}
+
static long btrfs_fallocate(struct file *file, int mode,
loff_t offset, loff_t len)
{
struct inode *inode = file_inode(file);
struct extent_state *cached_state = NULL;
+ struct falloc_range *range;
+ struct falloc_range *tmp;
+ struct list_head reserve_list;
u64 cur_offset;
u64 last_byte;
u64 alloc_start;
u64 alloc_end;
u64 alloc_hint = 0;
u64 locked_end;
+ u64 actual_end = 0;
struct extent_map *em;
int blocksize = BTRFS_I(inode)->root->sectorsize;
int ret;
@@ -2571,14 +2615,12 @@ static long btrfs_fallocate(struct file *file, int mode,
return btrfs_punch_hole(inode, offset, len);
/*
- * Make sure we have enough space before we do the
- * allocation.
- * XXX: The behavior must be changed to do accurate check first
- * and then check data reserved space.
+ * Only trigger disk allocation, don't trigger qgroup reserve
+ *
+ * For qgroup space, it will be checked later.
*/
- ret = btrfs_check_data_free_space(inode, alloc_start,
- alloc_end - alloc_start);
- if (ret)
+ ret = btrfs_alloc_data_chunk_ondemand(inode, alloc_end - alloc_start);
+ if (ret < 0)
return ret;
mutex_lock(&inode->i_mutex);
@@ -2586,6 +2628,13 @@ static long btrfs_fallocate(struct file *file, int mode,
if (ret)
goto out;
+ /*
+ * TODO: Move these two operations after we have checked
+ * accurate reserved space, or fallocate can still fail but
+ * with page truncated or size expanded.
+ *
+ * But that's a minor problem and won't do much harm BTW.
+ */
if (alloc_start > inode->i_size) {
ret = btrfs_cont_expand(inode, i_size_read(inode),
alloc_start);
@@ -2644,10 +2693,10 @@ static long btrfs_fallocate(struct file *file, int mode,
}
}
+ /* First, check if we exceed the qgroup limit */
+ INIT_LIST_HEAD(&reserve_list);
cur_offset = alloc_start;
while (1) {
- u64 actual_end;
-
em = btrfs_get_extent(inode, NULL, 0, cur_offset,
alloc_end - cur_offset, 0);
if (IS_ERR_OR_NULL(em)) {
@@ -2660,54 +2709,78 @@ static long btrfs_fallocate(struct file *file, int mode,
last_byte = min(extent_map_end(em), alloc_end);
actual_end = min_t(u64, extent_map_end(em), offset + len);
last_byte = ALIGN(last_byte, blocksize);
-
if (em->block_start == EXTENT_MAP_HOLE ||
(cur_offset >= inode->i_size &&
!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
- ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
- last_byte - cur_offset,
- 1 << inode->i_blkbits,
- offset + len,
- &alloc_hint);
- } else if (actual_end > inode->i_size &&
- !(mode & FALLOC_FL_KEEP_SIZE)) {
- struct btrfs_trans_handle *trans;
- struct btrfs_root *root = BTRFS_I(inode)->root;
-
- /*
- * We didn't need to allocate any more space, but we
- * still extended the size of the file so we need to
- * update i_size and the inode item.
- */
- trans = btrfs_start_transaction(root, 1);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- } else {
- inode->i_ctime = CURRENT_TIME;
- i_size_write(inode, actual_end);
- btrfs_ordered_update_i_size(inode, actual_end,
- NULL);
- ret = btrfs_update_inode(trans, root, inode);
- if (ret)
- btrfs_end_transaction(trans, root);
- else
- ret = btrfs_end_transaction(trans,
- root);
+ ret = add_falloc_range(&reserve_list, cur_offset,
+ last_byte - cur_offset);
+ if (ret < 0) {
+ free_extent_map(em);
+ break;
}
+ ret = btrfs_qgroup_reserve_data(inode, cur_offset,
+ last_byte - cur_offset);
+ if (ret < 0)
+ break;
}
free_extent_map(em);
- if (ret < 0)
- break;
-
cur_offset = last_byte;
- if (cur_offset >= alloc_end) {
- ret = 0;
+ if (cur_offset >= alloc_end)
break;
+ }
+
+ /*
+ * If ret is still 0, means we're OK to fallocate.
+ * Or just cleanup the list and exit.
+ */
+ list_for_each_entry_safe(range, tmp, &reserve_list, list) {
+ if (!ret)
+ ret = btrfs_prealloc_file_range(inode, mode,
+ range->start,
+ range->len, 1 << inode->i_blkbits,
+ offset + len, &alloc_hint);
+ list_del(&range->list);
+ kfree(range);
+ }
+ if (ret < 0)
+ goto out_unlock;
+
+ if (actual_end > inode->i_size &&
+ !(mode & FALLOC_FL_KEEP_SIZE)) {
+ struct btrfs_trans_handle *trans;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+
+ /*
+ * We didn't need to allocate any more space, but we
+ * still extended the size of the file so we need to
+ * update i_size and the inode item.
+ */
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ } else {
+ inode->i_ctime = CURRENT_TIME;
+ i_size_write(inode, actual_end);
+ btrfs_ordered_update_i_size(inode, actual_end, NULL);
+ ret = btrfs_update_inode(trans, root, inode);
+ if (ret)
+ btrfs_end_transaction(trans, root);
+ else
+ ret = btrfs_end_transaction(trans, root);
}
}
+out_unlock:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
&cached_state, GFP_NOFS);
out:
+ /*
+ * As we waited the extent range, the data_rsv_map must be empty
+ * in the range, as written data range will be released from it.
+ * And for prealloacted extent, it will also be released when
+ * its metadata is written.
+ * So this is completely used as cleanup.
+ */
+ btrfs_qgroup_free_data(inode, alloc_start, alloc_end - alloc_start);
mutex_unlock(&inode->i_mutex);
/* Let go of our reservation. */
btrfs_free_reserved_data_space(inode, alloc_start,
--
2.6.1
next prev parent reply other threads:[~2015-10-13 2:20 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-10-13 2:20 [PATCH v3 00/21] Rework btrfs qgroup reserved space framework Qu Wenruo
2015-10-13 2:20 ` [PATCH v3 01/21] btrfs: extent_io: Introduce needed structure for recoding set/clear bits Qu Wenruo
2015-10-13 2:20 ` [PATCH v3 02/21] btrfs: extent_io: Introduce new function set_record_extent_bits Qu Wenruo
2015-10-13 2:20 ` [PATCH v3 03/21] btrfs: extent_io: Introduce new function clear_record_extent_bits() Qu Wenruo
2015-10-13 2:20 ` [PATCH v3 04/21] btrfs: qgroup: Introduce btrfs_qgroup_reserve_data function Qu Wenruo
2015-10-13 2:20 ` [PATCH v3 05/21] btrfs: qgroup: Introduce functions to release/free qgroup reserve data space Qu Wenruo
2015-10-13 2:20 ` [PATCH v3 06/21] btrfs: delayed_ref: Add new function to record reserved space into delayed ref Qu Wenruo
2015-10-25 14:39 ` Filipe Manana
2015-10-26 1:27 ` Qu Wenruo
2015-10-27 4:13 ` Qu Wenruo
2015-10-27 5:14 ` Chris Mason
2015-10-27 5:48 ` Qu Wenruo
2015-10-27 6:12 ` Chris Mason
2015-10-27 7:26 ` Qu Wenruo
2015-10-27 9:05 ` Qu Wenruo
2015-10-27 11:34 ` Chris Mason
2015-10-28 0:25 ` Qu Wenruo
2015-10-28 13:36 ` Holger Hoffstätte
2015-10-29 6:29 ` Chris Mason
2015-10-27 9:22 ` Filipe Manana
2015-10-13 2:20 ` [PATCH v3 07/21] btrfs: delayed_ref: release and free qgroup reserved at proper timing Qu Wenruo
2015-10-13 2:20 ` [PATCH v3 08/21] btrfs: qgroup: Introduce new functions to reserve/free metadata Qu Wenruo
2015-10-13 2:20 ` [PATCH v3 09/21] btrfs: qgroup: Use new metadata reservation Qu Wenruo
2015-10-13 2:20 ` [PATCH v3 10/21] btrfs: extent-tree: Add new version of btrfs_check_data_free_space and btrfs_free_reserved_data_space Qu Wenruo
2015-10-13 2:20 ` [PATCH v3 11/21] btrfs: extent-tree: Switch to new check_data_free_space and free_reserved_data_space Qu Wenruo
2015-10-13 2:20 ` [PATCH v3 12/21] btrfs: extent-tree: Add new version of btrfs_delalloc_reserve/release_space Qu Wenruo
2015-10-13 2:20 ` [PATCH v3 13/21] btrfs: extent-tree: Switch to new delalloc space reserve and release Qu Wenruo
2015-10-13 2:20 ` [PATCH v3 14/21] btrfs: qgroup: Cleanup old inaccurate facilities Qu Wenruo
2015-10-13 2:20 ` [PATCH v3 15/21] btrfs: qgroup: Add handler for NOCOW and inline Qu Wenruo
2015-10-13 2:20 ` [PATCH v3 16/21] btrfs: Add handler for invalidate page Qu Wenruo
2015-10-13 2:20 ` [PATCH v3 17/21] btrfs: qgroup: Add new trace point for qgroup data reserve Qu Wenruo
2015-10-13 2:20 ` Qu Wenruo [this message]
2015-10-13 2:20 ` [PATCH v3 19/21] btrfs: Avoid truncate tailing page if fallocate range doesn't exceed inode size Qu Wenruo
2015-10-13 2:20 ` [PATCH v3 20/21] btrfs: qgroup: Avoid calling btrfs_free_reserved_data_space in clear_bit_hook Qu Wenruo
2015-10-13 2:20 ` [PATCH v3 21/21] btrfs: qgroup: Check if qgroup reserved space leaked Qu Wenruo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1444702827-18169-19-git-send-email-quwenruo@cn.fujitsu.com \
--to=quwenruo@cn.fujitsu.com \
--cc=linux-btrfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).