* [PATCHv7 1/6] reiser4: fix reiser4_post_{commit,write_back}_hook() and their invocations.
2014-07-31 10:19 [PATCHv7 0/6] reiser4: discard support: simplified and race-free initial implementation Ivan Shapovalov
@ 2014-07-31 10:19 ` Ivan Shapovalov
2014-07-31 10:19 ` [PATCHv7 2/6] reiser4: make space_allocator's check_blocks() reusable Ivan Shapovalov
` (5 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Ivan Shapovalov @ 2014-07-31 10:19 UTC (permalink / raw)
To: reiserfs-devel; +Cc: edward.shishkin, Ivan Shapovalov
- let all hooks be called from one place (reiser4_write_logs())
- don't call reiser4_post_write_back_hook() twice
- fix reiser4_post_write_back_hook(): call the correct method of space allocator
Signed-off-by: Ivan Shapovalov <intelfx100@gmail.com>
---
fs/reiser4/block_alloc.c | 2 +-
fs/reiser4/wander.c | 3 +--
2 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/fs/reiser4/block_alloc.c b/fs/reiser4/block_alloc.c
index 81ed96f..3a4b667 100644
--- a/fs/reiser4/block_alloc.c
+++ b/fs/reiser4/block_alloc.c
@@ -1150,7 +1150,7 @@ void reiser4_post_write_back_hook(void)
{
assert("zam-504", get_current_super_private() != NULL);
- sa_post_commit_hook();
+ sa_post_write_back_hook();
}
/*
diff --git a/fs/reiser4/wander.c b/fs/reiser4/wander.c
index 0b518c3..4e29de8 100644
--- a/fs/reiser4/wander.c
+++ b/fs/reiser4/wander.c
@@ -1140,7 +1140,6 @@ static int write_tx_back(struct commit_handle * ch)
int ret;
int barrier;
- reiser4_post_commit_hook();
fq = get_fq_for_current_atom();
if (IS_ERR(fq))
return PTR_ERR(fq);
@@ -1165,7 +1164,6 @@ static int write_tx_back(struct commit_handle * ch)
if (ret)
return ret;
}
- reiser4_post_write_back_hook();
return 0;
}
@@ -1251,6 +1249,7 @@ int reiser4_write_logs(long *nr_submitted)
spin_lock_atom(atom);
reiser4_atom_set_stage(atom, ASTAGE_POST_COMMIT);
spin_unlock_atom(atom);
+ reiser4_post_commit_hook();
ret = write_tx_back(&ch);
reiser4_post_write_back_hook();
--
2.0.3
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCHv7 2/6] reiser4: make space_allocator's check_blocks() reusable.
2014-07-31 10:19 [PATCHv7 0/6] reiser4: discard support: simplified and race-free initial implementation Ivan Shapovalov
2014-07-31 10:19 ` [PATCHv7 1/6] reiser4: fix reiser4_post_{commit,write_back}_hook() and their invocations Ivan Shapovalov
@ 2014-07-31 10:19 ` Ivan Shapovalov
2014-07-31 10:19 ` [PATCHv7 3/6] reiser4: add an implementation of "block lists", splitted off the discard code Ivan Shapovalov
` (4 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Ivan Shapovalov @ 2014-07-31 10:19 UTC (permalink / raw)
To: reiserfs-devel; +Cc: edward.shishkin, Ivan Shapovalov
Make check_blocks() return a boolean value (whether did the extent's
state match our expectations) instead of asserting success and crashing
system otherwise.
Also make it possible to check extents spanning multiple bitmap blocks.
The only user of reiser4_check_block() in its previous form has been updated
to assert on true return value.
Thus check_blocks() can now be reused by various parts of reiser4, e. g.
by the discard subsystem which will be added in next commits.
Signed-off-by: Ivan Shapovalov <intelfx100@gmail.com>
---
fs/reiser4/block_alloc.c | 16 +------
fs/reiser4/block_alloc.h | 14 +++---
fs/reiser4/plugin/space/bitmap.c | 79 +++++++++++++++++++++----------
fs/reiser4/plugin/space/bitmap.h | 2 +-
fs/reiser4/plugin/space/space_allocator.h | 4 +-
fs/reiser4/znode.c | 9 ++--
6 files changed, 70 insertions(+), 54 deletions(-)
diff --git a/fs/reiser4/block_alloc.c b/fs/reiser4/block_alloc.c
index 3a4b667..59515c3 100644
--- a/fs/reiser4/block_alloc.c
+++ b/fs/reiser4/block_alloc.c
@@ -962,26 +962,14 @@ static void used2free(reiser4_super_info_data * sbinfo, __u64 count)
spin_unlock_reiser4_super(sbinfo);
}
-#if REISER4_DEBUG
-
/* check "allocated" state of given block range */
-static void
+int
reiser4_check_blocks(const reiser4_block_nr * start,
const reiser4_block_nr * len, int desired)
{
- sa_check_blocks(start, len, desired);
+ return sa_check_blocks(start, len, desired);
}
-/* check "allocated" state of given block */
-void reiser4_check_block(const reiser4_block_nr * block, int desired)
-{
- const reiser4_block_nr one = 1;
-
- reiser4_check_blocks(block, &one, desired);
-}
-
-#endif
-
/* Blocks deallocation function may do an actual deallocation through space
plugin allocation or store deleted block numbers in atom's delete_set data
structure depend on @defer parameter. */
diff --git a/fs/reiser4/block_alloc.h b/fs/reiser4/block_alloc.h
index 689efc1..a4e98af 100644
--- a/fs/reiser4/block_alloc.h
+++ b/fs/reiser4/block_alloc.h
@@ -150,15 +150,15 @@ extern void cluster_reserved2free(int count);
extern int reiser4_check_block_counters(const struct super_block *);
-#if REISER4_DEBUG
-extern void reiser4_check_block(const reiser4_block_nr *, int);
+extern int reiser4_check_blocks(const reiser4_block_nr *start,
+ const reiser4_block_nr *len, int desired);
-#else
-
-# define reiser4_check_block(beg, val) noop
-
-#endif
+static inline int reiser4_check_block(const reiser4_block_nr *start,
+ int desired)
+{
+ return reiser4_check_blocks(start, NULL, desired);
+}
extern int reiser4_pre_commit_hook(void);
extern void reiser4_post_commit_hook(void);
diff --git a/fs/reiser4/plugin/space/bitmap.c b/fs/reiser4/plugin/space/bitmap.c
index 1d0fabf..bd41fb9 100644
--- a/fs/reiser4/plugin/space/bitmap.c
+++ b/fs/reiser4/plugin/space/bitmap.c
@@ -1222,29 +1222,13 @@ void reiser4_dealloc_blocks_bitmap(reiser4_space_allocator * allocator,
release_and_unlock_bnode(bnode);
}
-/* plugin->u.space_allocator.check_blocks(). */
-void reiser4_check_blocks_bitmap(const reiser4_block_nr * start,
- const reiser4_block_nr * len, int desired)
+static int check_blocks_one_bitmap(bmap_nr_t bmap, bmap_off_t start_offset,
+ bmap_off_t end_offset, int desired)
{
-#if REISER4_DEBUG
struct super_block *super = reiser4_get_current_sb();
-
- bmap_nr_t bmap;
- bmap_off_t start_offset;
- bmap_off_t end_offset;
-
- struct bitmap_node *bnode;
+ struct bitmap_node *bnode = get_bnode(super, bmap);
int ret;
- assert("zam-622", len != NULL);
- check_block_range(start, len);
- parse_blocknr(start, &bmap, &start_offset);
-
- end_offset = start_offset + *len;
- assert("nikita-2214", end_offset <= bmap_bit_count(super->s_blocksize));
-
- bnode = get_bnode(super, bmap);
-
assert("nikita-2215", bnode != NULL);
ret = load_and_lock_bnode(bnode);
@@ -1253,19 +1237,62 @@ void reiser4_check_blocks_bitmap(const reiser4_block_nr * start,
assert("nikita-2216", jnode_is_loaded(bnode->wjnode));
if (desired) {
- assert("zam-623",
- reiser4_find_next_zero_bit(bnode_working_data(bnode),
+ ret = reiser4_find_next_zero_bit(bnode_working_data(bnode),
end_offset, start_offset)
- >= end_offset);
+ >= end_offset;
} else {
- assert("zam-624",
- reiser4_find_next_set_bit(bnode_working_data(bnode),
+ ret = reiser4_find_next_set_bit(bnode_working_data(bnode),
end_offset, start_offset)
- >= end_offset);
+ >= end_offset;
}
release_and_unlock_bnode(bnode);
-#endif
+
+ return ret;
+}
+
+/* plugin->u.space_allocator.check_blocks(). */
+int reiser4_check_blocks_bitmap(const reiser4_block_nr * start,
+ const reiser4_block_nr * len, int desired)
+{
+ struct super_block *super = reiser4_get_current_sb();
+
+ reiser4_block_nr end;
+ bmap_nr_t bmap, end_bmap;
+ bmap_off_t offset, end_offset;
+ const bmap_off_t max_offset = bmap_bit_count(super->s_blocksize);
+
+ assert("intelfx-9", start != NULL);
+ assert("intelfx-10", ergo(len != NULL, *len > 0));
+
+ if (len != NULL) {
+ check_block_range(start, len);
+ end = *start + *len - 1;
+ } else {
+ /* on next line, end is used as temporary len for check_block_range() */
+ end = 1; check_block_range(start, &end);
+ end = *start;
+ }
+
+ parse_blocknr(start, &bmap, &offset);
+
+ if (end == *start) {
+ end_bmap = bmap;
+ end_offset = offset;
+ } else {
+ parse_blocknr(&end, &end_bmap, &end_offset);
+ }
+ ++end_offset;
+
+ assert("intelfx-4", end_bmap >= bmap);
+ assert("intelfx-5", ergo(end_bmap == bmap, end_offset >= offset));
+
+ for (; bmap < end_bmap; bmap++, offset = 0) {
+ if (!check_blocks_one_bitmap(bmap, offset, max_offset, desired)) {
+ return 0;
+ }
+ }
+ return check_blocks_one_bitmap(bmap, offset, end_offset, desired);
}
/* conditional insertion of @node into atom's overwrite set if it was not there */
diff --git a/fs/reiser4/plugin/space/bitmap.h b/fs/reiser4/plugin/space/bitmap.h
index be867f1..4590498 100644
--- a/fs/reiser4/plugin/space/bitmap.h
+++ b/fs/reiser4/plugin/space/bitmap.h
@@ -19,7 +19,7 @@ extern int reiser4_alloc_blocks_bitmap(reiser4_space_allocator *,
reiser4_blocknr_hint *, int needed,
reiser4_block_nr * start,
reiser4_block_nr * len);
-extern void reiser4_check_blocks_bitmap(const reiser4_block_nr *,
+extern int reiser4_check_blocks_bitmap(const reiser4_block_nr *,
const reiser4_block_nr *, int);
extern void reiser4_dealloc_blocks_bitmap(reiser4_space_allocator *,
reiser4_block_nr,
diff --git a/fs/reiser4/plugin/space/space_allocator.h b/fs/reiser4/plugin/space/space_allocator.h
index 5bfa9a3..71bfd11 100644
--- a/fs/reiser4/plugin/space/space_allocator.h
+++ b/fs/reiser4/plugin/space/space_allocator.h
@@ -29,9 +29,9 @@ static inline void sa_dealloc_blocks (reiser4_space_allocator * al, reiser4_bloc
reiser4_dealloc_blocks_##allocator (al, start, len); \
} \
\
-static inline void sa_check_blocks (const reiser4_block_nr * start, const reiser4_block_nr * end, int desired) \
+static inline int sa_check_blocks (const reiser4_block_nr * start, const reiser4_block_nr * end, int desired) \
{ \
- reiser4_check_blocks_##allocator (start, end, desired); \
+ return reiser4_check_blocks_##allocator (start, end, desired); \
} \
\
static inline void sa_pre_commit_hook (void) \
diff --git a/fs/reiser4/znode.c b/fs/reiser4/znode.c
index 4ff9714..08eab3d 100644
--- a/fs/reiser4/znode.c
+++ b/fs/reiser4/znode.c
@@ -534,10 +534,11 @@ znode *zget(reiser4_tree * tree,
write_unlock_tree(tree);
}
-#if REISER4_DEBUG
- if (!reiser4_blocknr_is_fake(blocknr) && *blocknr != 0)
- reiser4_check_block(blocknr, 1);
-#endif
+
+ assert("intelfx-6",
+ ergo(!reiser4_blocknr_is_fake(blocknr) && *blocknr != 0,
+ reiser4_check_block(blocknr, 1)));
+
/* Check for invalid tree level, return -EIO */
if (unlikely(znode_get_level(result) != level)) {
warning("jmacd-504",
--
2.0.3
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCHv7 3/6] reiser4: add an implementation of "block lists", splitted off the discard code.
2014-07-31 10:19 [PATCHv7 0/6] reiser4: discard support: simplified and race-free initial implementation Ivan Shapovalov
2014-07-31 10:19 ` [PATCHv7 1/6] reiser4: fix reiser4_post_{commit,write_back}_hook() and their invocations Ivan Shapovalov
2014-07-31 10:19 ` [PATCHv7 2/6] reiser4: make space_allocator's check_blocks() reusable Ivan Shapovalov
@ 2014-07-31 10:19 ` Ivan Shapovalov
2014-07-31 10:19 ` [PATCHv7 4/6] reiser4: blocknr_list: use kmem_cache instead of kmalloc for allocating entries Ivan Shapovalov
` (3 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Ivan Shapovalov @ 2014-07-31 10:19 UTC (permalink / raw)
To: reiserfs-devel; +Cc: edward.shishkin, Ivan Shapovalov
The block list is a less memory efficient, but ordered (and thus sortable)
implementation of the same concept as the blocknr_set.
Signed-off-by: Ivan Shapovalov <intelfx100@gmail.com>
---
fs/reiser4/Makefile | 1 +
fs/reiser4/blocknrlist.c | 311 +++++++++++++++++++++++++++++++++++++++++++++++
fs/reiser4/forward.h | 1 +
fs/reiser4/txnmgr.h | 19 +++
4 files changed, 332 insertions(+)
create mode 100644 fs/reiser4/blocknrlist.c
diff --git a/fs/reiser4/Makefile b/fs/reiser4/Makefile
index ff73d43..9f07194 100644
--- a/fs/reiser4/Makefile
+++ b/fs/reiser4/Makefile
@@ -46,6 +46,7 @@ reiser4-y := \
status_flags.o \
init_super.o \
safe_link.o \
+ blocknrlist.o \
\
plugin/plugin.o \
plugin/plugin_set.o \
diff --git a/fs/reiser4/blocknrlist.c b/fs/reiser4/blocknrlist.c
new file mode 100644
index 0000000..2868771
--- /dev/null
+++ b/fs/reiser4/blocknrlist.c
@@ -0,0 +1,311 @@
+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
+ * reiser4/README */
+
+/* This is a block list implementation, used to create ordered block sets
+ (at the cost of being less memory efficient than blocknr_set).
+ It is used by discard code. */
+
+#include "debug.h"
+#include "dformat.h"
+#include "txnmgr.h"
+#include "context.h"
+
+#include <linux/slab.h>
+#include <linux/list_sort.h>
+
+/**
+ * Represents an extent range [@start; @end).
+ */
+struct blocknr_list_entry {
+ reiser4_block_nr start, len;
+ struct list_head link;
+};
+
+#define blocknr_list_entry(ptr) list_entry(ptr, blocknr_list_entry, link)
+
+static void blocknr_list_entry_init(blocknr_list_entry *entry)
+{
+ assert("intelfx-11", entry != NULL);
+
+ entry->start = 0;
+ entry->len = 0;
+ INIT_LIST_HEAD(&entry->link);
+}
+
+static blocknr_list_entry *blocknr_list_entry_alloc(void)
+{
+ blocknr_list_entry *entry;
+
+ entry = (blocknr_list_entry *)kmalloc(sizeof(blocknr_list_entry),
+ reiser4_ctx_gfp_mask_get());
+ if (entry == NULL) {
+ return NULL;
+ }
+
+ blocknr_list_entry_init(entry);
+
+ return entry;
+}
+
+static void blocknr_list_entry_free(blocknr_list_entry *entry)
+{
+ assert("intelfx-12", entry != NULL);
+
+ kfree(entry);
+}
+
+/**
+ * Given ranges @to and [@start; @end), if they overlap, their union
+ * is calculated and saved in @to.
+ */
+static int blocknr_list_entry_merge(blocknr_list_entry *to,
+ reiser4_block_nr start,
+ reiser4_block_nr len)
+{
+ reiser4_block_nr end, to_end;
+
+ assert("intelfx-13", to != NULL);
+
+ assert("intelfx-16", to->len > 0);
+ assert("intelfx-17", len > 0);
+
+ end = start + len;
+ to_end = to->start + to->len;
+
+ if ((to->start <= end) && (start <= to_end)) {
+ if (start < to->start) {
+ to->start = start;
+ }
+
+ if (end > to_end) {
+ to_end = end;
+ }
+
+ to->len = to_end - to->start;
+
+ return 0;
+ }
+
+ return -1;
+}
+
+static int blocknr_list_entry_merge_entry(blocknr_list_entry *to,
+ blocknr_list_entry *from)
+{
+ assert("intelfx-18", from != NULL);
+
+ return blocknr_list_entry_merge(to, from->start, from->len);
+}
+
+/**
+ * A comparison function for list_sort().
+ *
+ * "The comparison function @cmp must return a negative value if @a
+ * should sort before @b, and a positive value if @a should sort after
+ * @b. If @a and @b are equivalent, and their original relative
+ * ordering is to be preserved, @cmp must return 0."
+ */
+static int blocknr_list_entry_compare(void* priv UNUSED_ARG,
+ struct list_head *a, struct list_head *b)
+{
+ blocknr_list_entry *entry_a, *entry_b;
+ reiser4_block_nr entry_a_end, entry_b_end;
+
+ assert("intelfx-19", a != NULL);
+ assert("intelfx-20", b != NULL);
+
+ entry_a = blocknr_list_entry(a);
+ entry_b = blocknr_list_entry(b);
+
+ entry_a_end = entry_a->start + entry_a->len;
+ entry_b_end = entry_b->start + entry_b->len;
+
+ /* First sort by starting block numbers... */
+ if (entry_a->start < entry_b->start) {
+ return -1;
+ }
+
+ if (entry_a->start > entry_b->start) {
+ return 1;
+ }
+
+ /** Then by ending block numbers.
+ * If @a contains @b, it will be sorted before. */
+ if (entry_a_end > entry_b_end) {
+ return -1;
+ }
+
+ if (entry_a_end < entry_b_end) {
+ return 1;
+ }
+
+ return 0;
+}
+
+void blocknr_list_init(struct list_head* blist)
+{
+ assert("intelfx-24", blist != NULL);
+
+ INIT_LIST_HEAD(blist);
+}
+
+void blocknr_list_destroy(struct list_head* blist)
+{
+ struct list_head *pos, *tmp;
+ blocknr_list_entry *entry;
+
+ assert("intelfx-25", blist != NULL);
+
+ list_for_each_safe(pos, tmp, blist) {
+ entry = blocknr_list_entry(pos);
+ list_del_init(pos);
+ blocknr_list_entry_free(entry);
+ }
+
+ assert("intelfx-48", list_empty(blist));
+}
+
+void blocknr_list_merge(struct list_head *from, struct list_head *to)
+{
+ assert("intelfx-26", from != NULL);
+ assert("intelfx-27", to != NULL);
+
+ list_splice_tail_init(from, to);
+
+ assert("intelfx-49", list_empty(from));
+}
+
+void blocknr_list_sort_and_join(struct list_head *blist)
+{
+ struct list_head *pos, *next;
+ struct blocknr_list_entry *entry, *next_entry;
+
+ assert("intelfx-50", blist != NULL);
+
+ /* Step 1. Sort the extent list. */
+ list_sort(NULL, blist, blocknr_list_entry_compare);
+
+ /* Step 2. Join adjacent extents in the list. */
+ pos = blist->next;
+ next = pos->next;
+ entry = blocknr_list_entry(pos);
+
+ for (; next != blist; next = pos->next) {
+ /** @next is a valid node at this point */
+ next_entry = blocknr_list_entry(next);
+
+ /** try to merge @next into @pos */
+ if (!blocknr_list_entry_merge_entry(entry, next_entry)) {
+ /** successful; delete the @next node.
+ * next merge will be attempted into the same node. */
+ list_del_init(next);
+ blocknr_list_entry_free(next_entry);
+ } else {
+ /** otherwise advance @pos. */
+ pos = next;
+ entry = next_entry;
+ }
+ }
+}
+
+int blocknr_list_add_extent(txn_atom *atom,
+ struct list_head *blist,
+ blocknr_list_entry **new_entry,
+ const reiser4_block_nr *start,
+ const reiser4_block_nr *len)
+{
+ assert("intelfx-29", atom != NULL);
+ assert("intelfx-42", atom_is_protected(atom));
+ assert("intelfx-43", blist != NULL);
+ assert("intelfx-30", new_entry != NULL);
+ assert("intelfx-31", start != NULL);
+ assert("intelfx-32", len != NULL && *len > 0);
+
+ if (*new_entry == NULL) {
+ /*
+ * Optimization: try to merge new extent into the last one.
+ */
+ if (!list_empty(blist)) {
+ blocknr_list_entry *last_entry;
+ last_entry = blocknr_list_entry(blist->prev);
+ if (!blocknr_list_entry_merge(last_entry, *start, *len)) {
+ return 0;
+ }
+ }
+
+ /*
+ * Otherwise, allocate a new entry and tell -E_REPEAT.
+ * Next time we'll take the branch below.
+ */
+ spin_unlock_atom(atom);
+ *new_entry = blocknr_list_entry_alloc();
+ return (*new_entry != NULL) ? -E_REPEAT : RETERR(-ENOMEM);
+ }
+
+ /*
+ * The entry has been allocated beforehand, fill it and link to the list.
+ */
+ (*new_entry)->start = *start;
+ (*new_entry)->len = *len;
+ list_add_tail(&(*new_entry)->link, blist);
+
+ return 0;
+}
+
+int blocknr_list_iterator(txn_atom *atom,
+ struct list_head *blist,
+ blocknr_set_actor_f actor,
+ void *data,
+ int delete)
+{
+ struct list_head *pos;
+ blocknr_list_entry *entry;
+ int ret = 0;
+
+ assert("intelfx-46", blist != NULL);
+ assert("intelfx-47", actor != NULL);
+
+ if (delete) {
+ struct list_head *tmp;
+
+ list_for_each_safe(pos, tmp, blist) {
+ entry = blocknr_list_entry(pos);
+
+ /*
+ * Do not exit, delete flag is set. Instead, on the first error we
+ * downgrade from iterating to just deleting.
+ */
+ if (ret == 0) {
+ ret = actor(atom, &entry->start, &entry->len, data);
+ }
+
+ list_del_init(pos);
+ blocknr_list_entry_free(entry);
+ }
+
+ assert("intelfx-44", list_empty(blist));
+ } else {
+ list_for_each(pos, blist) {
+ entry = blocknr_list_entry(pos);
+
+ ret = actor(atom, &entry->start, &entry->len, data);
+
+ if (ret != 0) {
+ return ret;
+ }
+ }
+ }
+
+ return ret;
+}
+
+/* Make Linus happy.
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 120
+ scroll-step: 1
+ End:
+*/
diff --git a/fs/reiser4/forward.h b/fs/reiser4/forward.h
index 15dbfdc..9170c2b 100644
--- a/fs/reiser4/forward.h
+++ b/fs/reiser4/forward.h
@@ -38,6 +38,7 @@ typedef struct reiser4_dir_entry_desc reiser4_dir_entry_desc;
typedef struct reiser4_context reiser4_context;
typedef struct carry_level carry_level;
typedef struct blocknr_set_entry blocknr_set_entry;
+typedef struct blocknr_list_entry blocknr_list_entry;
/* super_block->s_fs_info points to this */
typedef struct reiser4_super_info_data reiser4_super_info_data;
/* next two objects are fields of reiser4_super_info_data */
diff --git a/fs/reiser4/txnmgr.h b/fs/reiser4/txnmgr.h
index 034a3fe..18ca23d 100644
--- a/fs/reiser4/txnmgr.h
+++ b/fs/reiser4/txnmgr.h
@@ -485,6 +485,25 @@ extern int blocknr_set_iterator(txn_atom * atom, struct list_head * bset,
blocknr_set_actor_f actor, void *data,
int delete);
+/* This is the block list interface (see blocknrlist.c) */
+extern void blocknr_list_init(struct list_head *blist);
+extern void blocknr_list_destroy(struct list_head *blist);
+extern void blocknr_list_merge(struct list_head *from, struct list_head *to);
+extern void blocknr_list_sort_and_join(struct list_head *blist);
+/**
+ * The @atom should be locked.
+ */
+extern int blocknr_list_add_extent(txn_atom *atom,
+ struct list_head *blist,
+ blocknr_list_entry **new_entry,
+ const reiser4_block_nr *start,
+ const reiser4_block_nr *len);
+extern int blocknr_list_iterator(txn_atom *atom,
+ struct list_head *blist,
+ blocknr_set_actor_f actor,
+ void *data,
+ int delete);
+
/* flush code takes care about how to fuse flush queues */
extern void flush_init_atom(txn_atom * atom);
extern void flush_fuse_queues(txn_atom * large, txn_atom * small);
--
2.0.3
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCHv7 4/6] reiser4: blocknr_list: use kmem_cache instead of kmalloc for allocating entries.
2014-07-31 10:19 [PATCHv7 0/6] reiser4: discard support: simplified and race-free initial implementation Ivan Shapovalov
` (2 preceding siblings ...)
2014-07-31 10:19 ` [PATCHv7 3/6] reiser4: add an implementation of "block lists", splitted off the discard code Ivan Shapovalov
@ 2014-07-31 10:19 ` Ivan Shapovalov
2014-07-31 10:19 ` [PATCHv7 5/6] reiser4: blocknr_set: " Ivan Shapovalov
` (2 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Ivan Shapovalov @ 2014-07-31 10:19 UTC (permalink / raw)
To: reiserfs-devel; +Cc: edward.shishkin, Ivan Shapovalov
Signed-off-by: Ivan Shapovalov <intelfx100@gmail.com>
---
fs/reiser4/blocknrlist.c | 31 ++++++++++++++++++++++++++++---
fs/reiser4/super_ops.c | 7 +++++++
fs/reiser4/txnmgr.h | 2 ++
3 files changed, 37 insertions(+), 3 deletions(-)
diff --git a/fs/reiser4/blocknrlist.c b/fs/reiser4/blocknrlist.c
index 2868771..39a4a9b 100644
--- a/fs/reiser4/blocknrlist.c
+++ b/fs/reiser4/blocknrlist.c
@@ -9,10 +9,13 @@
#include "dformat.h"
#include "txnmgr.h"
#include "context.h"
+#include "super.h"
#include <linux/slab.h>
#include <linux/list_sort.h>
+static struct kmem_cache *blocknr_list_slab = NULL;
+
/**
* Represents an extent range [@start; @end).
*/
@@ -36,8 +39,8 @@ static blocknr_list_entry *blocknr_list_entry_alloc(void)
{
blocknr_list_entry *entry;
- entry = (blocknr_list_entry *)kmalloc(sizeof(blocknr_list_entry),
- reiser4_ctx_gfp_mask_get());
+ entry = (blocknr_list_entry *)kmem_cache_alloc(blocknr_list_slab,
+ reiser4_ctx_gfp_mask_get());
if (entry == NULL) {
return NULL;
}
@@ -51,7 +54,7 @@ static void blocknr_list_entry_free(blocknr_list_entry *entry)
{
assert("intelfx-12", entry != NULL);
- kfree(entry);
+ kmem_cache_free(blocknr_list_slab, entry);
}
/**
@@ -142,6 +145,28 @@ static int blocknr_list_entry_compare(void* priv UNUSED_ARG,
return 0;
}
+int blocknr_list_init_static(void)
+{
+ assert("intelfx-54", blocknr_list_slab == NULL);
+
+ blocknr_list_slab = kmem_cache_create("blocknr_list_entry",
+ sizeof(blocknr_list_entry),
+ 0,
+ SLAB_HWCACHE_ALIGN |
+ SLAB_RECLAIM_ACCOUNT,
+ NULL);
+ if (blocknr_list_slab == NULL) {
+ return RETERR(-ENOMEM);
+ }
+
+ return 0;
+}
+
+void blocknr_list_done_static(void)
+{
+ destroy_reiser4_cache(&blocknr_list_slab);
+}
+
void blocknr_list_init(struct list_head* blist)
{
assert("intelfx-24", blist != NULL);
diff --git a/fs/reiser4/super_ops.c b/fs/reiser4/super_ops.c
index 81773b3..a63ceb5 100644
--- a/fs/reiser4/super_ops.c
+++ b/fs/reiser4/super_ops.c
@@ -678,11 +678,17 @@ static int __init init_reiser4(void)
if ((result = reiser4_init_d_cursor()) != 0)
goto failed_init_d_cursor;
+ /* initialize cache of blocknr list entries */
+ if ((result = blocknr_list_init_static()) != 0)
+ goto failed_init_blocknr_list;
+
if ((result = register_filesystem(&reiser4_fs_type)) == 0) {
reiser4_debugfs_root = debugfs_create_dir("reiser4", NULL);
return 0;
}
+ blocknr_list_done_static();
+ failed_init_blocknr_list:
reiser4_done_d_cursor();
failed_init_d_cursor:
reiser4_done_file_fsdata();
@@ -718,6 +724,7 @@ static void __exit done_reiser4(void)
debugfs_remove(reiser4_debugfs_root);
result = unregister_filesystem(&reiser4_fs_type);
BUG_ON(result != 0);
+ blocknr_list_done_static();
reiser4_done_d_cursor();
reiser4_done_file_fsdata();
reiser4_done_dentry_fsdata();
diff --git a/fs/reiser4/txnmgr.h b/fs/reiser4/txnmgr.h
index 18ca23d..3515de9 100644
--- a/fs/reiser4/txnmgr.h
+++ b/fs/reiser4/txnmgr.h
@@ -486,6 +486,8 @@ extern int blocknr_set_iterator(txn_atom * atom, struct list_head * bset,
int delete);
/* This is the block list interface (see blocknrlist.c) */
+extern int blocknr_list_init_static(void);
+extern void blocknr_list_done_static(void);
extern void blocknr_list_init(struct list_head *blist);
extern void blocknr_list_destroy(struct list_head *blist);
extern void blocknr_list_merge(struct list_head *from, struct list_head *to);
--
2.0.3
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCHv7 5/6] reiser4: blocknr_set: use kmem_cache instead of kmalloc for allocating entries.
2014-07-31 10:19 [PATCHv7 0/6] reiser4: discard support: simplified and race-free initial implementation Ivan Shapovalov
` (3 preceding siblings ...)
2014-07-31 10:19 ` [PATCHv7 4/6] reiser4: blocknr_list: use kmem_cache instead of kmalloc for allocating entries Ivan Shapovalov
@ 2014-07-31 10:19 ` Ivan Shapovalov
2014-07-31 10:19 ` [PATCHv7 6/6] reiser4: discard support: initial implementation using blocknr_list, without extent padding Ivan Shapovalov
2014-07-31 10:34 ` [PATCHv7 0/6] reiser4: discard support: simplified and race-free initial implementation Ivan Shapovalov
6 siblings, 0 replies; 8+ messages in thread
From: Ivan Shapovalov @ 2014-07-31 10:19 UTC (permalink / raw)
To: reiserfs-devel; +Cc: edward.shishkin, Ivan Shapovalov
Signed-off-by: Ivan Shapovalov <intelfx100@gmail.com>
---
fs/reiser4/blocknrset.c | 34 +++++++++++++++++++++++++++++++---
fs/reiser4/super_ops.c | 7 +++++++
fs/reiser4/txnmgr.h | 2 ++
3 files changed, 40 insertions(+), 3 deletions(-)
diff --git a/fs/reiser4/blocknrset.c b/fs/reiser4/blocknrset.c
index bf57c17..2f18cbc 100644
--- a/fs/reiser4/blocknrset.c
+++ b/fs/reiser4/blocknrset.c
@@ -8,6 +8,7 @@ reiser4/README */
#include "dformat.h"
#include "txnmgr.h"
#include "context.h"
+#include "super.h"
#include <linux/slab.h>
@@ -42,6 +43,8 @@ reiser4/README */
sizeof(struct list_head)) / \
sizeof(reiser4_block_nr))
+static struct kmem_cache *blocknr_set_slab = NULL;
+
/* An entry of the blocknr_set */
struct blocknr_set_entry {
unsigned nr_singles;
@@ -82,8 +85,8 @@ static blocknr_set_entry *bse_alloc(void)
{
blocknr_set_entry *e;
- if ((e = (blocknr_set_entry *) kmalloc(sizeof(blocknr_set_entry),
- reiser4_ctx_gfp_mask_get())) == NULL)
+ if ((e = (blocknr_set_entry *) kmem_cache_alloc(blocknr_set_slab,
+ reiser4_ctx_gfp_mask_get())) == NULL)
return NULL;
bse_init(e);
@@ -95,7 +98,7 @@ static blocknr_set_entry *bse_alloc(void)
/* Audited by: green(2002.06.11) */
static void bse_free(blocknr_set_entry * bse)
{
- kfree(bse);
+ kmem_cache_free(blocknr_set_slab, bse);
}
/* Add a block number to a blocknr_set_entry */
@@ -225,6 +228,31 @@ blocknr_set_add_pair(txn_atom * atom,
return blocknr_set_add(atom, bset, new_bsep, a, b);
}
+/* Initialize slab cache of blocknr_set_entry objects. */
+int blocknr_set_init_static(void)
+{
+ assert("intelfx-55", blocknr_set_slab == NULL);
+
+ blocknr_set_slab = kmem_cache_create("blocknr_set_entry",
+ sizeof(blocknr_set_entry),
+ 0,
+ SLAB_HWCACHE_ALIGN |
+ SLAB_RECLAIM_ACCOUNT,
+ NULL);
+
+ if (blocknr_set_slab == NULL) {
+ return RETERR(-ENOMEM);
+ }
+
+ return 0;
+}
+
+/* Destroy slab cache of blocknr_set_entry objects. */
+void blocknr_set_done_static(void)
+{
+ destroy_reiser4_cache(&blocknr_set_slab);
+}
+
/* Initialize a blocknr_set. */
void blocknr_set_init(struct list_head *bset)
{
diff --git a/fs/reiser4/super_ops.c b/fs/reiser4/super_ops.c
index a63ceb5..bcd7fd6 100644
--- a/fs/reiser4/super_ops.c
+++ b/fs/reiser4/super_ops.c
@@ -678,6 +678,10 @@ static int __init init_reiser4(void)
if ((result = reiser4_init_d_cursor()) != 0)
goto failed_init_d_cursor;
+ /* initialize cache of blocknr set entries */
+ if ((result = blocknr_set_init_static()) != 0)
+ goto failed_init_blocknr_set;
+
/* initialize cache of blocknr list entries */
if ((result = blocknr_list_init_static()) != 0)
goto failed_init_blocknr_list;
@@ -689,6 +693,8 @@ static int __init init_reiser4(void)
blocknr_list_done_static();
failed_init_blocknr_list:
+ blocknr_set_done_static();
+ failed_init_blocknr_set:
reiser4_done_d_cursor();
failed_init_d_cursor:
reiser4_done_file_fsdata();
@@ -725,6 +731,7 @@ static void __exit done_reiser4(void)
result = unregister_filesystem(&reiser4_fs_type);
BUG_ON(result != 0);
blocknr_list_done_static();
+ blocknr_set_done_static();
reiser4_done_d_cursor();
reiser4_done_file_fsdata();
reiser4_done_dentry_fsdata();
diff --git a/fs/reiser4/txnmgr.h b/fs/reiser4/txnmgr.h
index 3515de9..0dee787 100644
--- a/fs/reiser4/txnmgr.h
+++ b/fs/reiser4/txnmgr.h
@@ -465,6 +465,8 @@ int capture_bulk(jnode **, int count);
/* See the comment on the function blocknrset.c:blocknr_set_add for the
calling convention of these three routines. */
+extern int blocknr_set_init_static(void);
+extern void blocknr_set_done_static(void);
extern void blocknr_set_init(struct list_head * bset);
extern void blocknr_set_destroy(struct list_head * bset);
extern void blocknr_set_merge(struct list_head * from, struct list_head * into);
--
2.0.3
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCHv7 6/6] reiser4: discard support: initial implementation using blocknr_list, without extent padding.
2014-07-31 10:19 [PATCHv7 0/6] reiser4: discard support: simplified and race-free initial implementation Ivan Shapovalov
` (4 preceding siblings ...)
2014-07-31 10:19 ` [PATCHv7 5/6] reiser4: blocknr_set: " Ivan Shapovalov
@ 2014-07-31 10:19 ` Ivan Shapovalov
2014-07-31 10:34 ` [PATCHv7 0/6] reiser4: discard support: simplified and race-free initial implementation Ivan Shapovalov
6 siblings, 0 replies; 8+ messages in thread
From: Ivan Shapovalov @ 2014-07-31 10:19 UTC (permalink / raw)
To: reiserfs-devel; +Cc: edward.shishkin, Ivan Shapovalov
Now reiser4_post_write_back_hook() is used for discarding and completing
deferred deallocations. The invocation of this hook has been moved past
immediate deallocations in the wandered logs code. Also, when discard is
enabled, immediate deallocations are made deferred, and discard procedure
takes place before any deallocations are completed. This allows to avoid
costly bitmap checks at discard time.
Implementation details:
- before discarding, the delete set is sorted and extents are merged
- each extent is submitted to discard as-is (blocks in all extents
are still marked as allocated at this point, so there can be no races)
- processing stops at first failure (this does not fail atom commit)
For now (shortcomings):
- extents are not padded to erase unit boundaries
Signed-off-by: Ivan Shapovalov <intelfx100@gmail.com>
---
fs/reiser4/Makefile | 1 +
fs/reiser4/block_alloc.c | 44 +++++++---
fs/reiser4/dformat.h | 2 +
fs/reiser4/discard.c | 179 +++++++++++++++++++++++++++++++++++++++
fs/reiser4/discard.h | 42 +++++++++
fs/reiser4/init_super.c | 2 +
fs/reiser4/plugin/space/bitmap.c | 3 +-
fs/reiser4/super.h | 4 +-
fs/reiser4/txnmgr.c | 90 ++++++++++++++++++--
fs/reiser4/txnmgr.h | 37 +++++++-
fs/reiser4/wander.c | 3 +-
11 files changed, 381 insertions(+), 26 deletions(-)
create mode 100644 fs/reiser4/discard.c
create mode 100644 fs/reiser4/discard.h
diff --git a/fs/reiser4/Makefile b/fs/reiser4/Makefile
index 9f07194..f50bb96 100644
--- a/fs/reiser4/Makefile
+++ b/fs/reiser4/Makefile
@@ -47,6 +47,7 @@ reiser4-y := \
init_super.o \
safe_link.o \
blocknrlist.o \
+ discard.o \
\
plugin/plugin.o \
plugin/plugin_set.o \
diff --git a/fs/reiser4/block_alloc.c b/fs/reiser4/block_alloc.c
index 59515c3..324b11c 100644
--- a/fs/reiser4/block_alloc.c
+++ b/fs/reiser4/block_alloc.c
@@ -9,6 +9,7 @@ reiser4/README */
#include "block_alloc.h"
#include "tree.h"
#include "super.h"
+#include "discard.h"
#include <linux/types.h> /* for __u?? */
#include <linux/fs.h> /* for struct super_block */
@@ -992,6 +993,7 @@ reiser4_dealloc_blocks(const reiser4_block_nr * start,
int ret;
reiser4_context *ctx;
reiser4_super_info_data *sbinfo;
+ void *new_entry = NULL;
ctx = get_current_context();
sbinfo = get_super_private(ctx->super);
@@ -1006,18 +1008,15 @@ reiser4_dealloc_blocks(const reiser4_block_nr * start,
spin_unlock_reiser4_super(sbinfo);
}
- if (flags & BA_DEFER) {
- blocknr_set_entry *bsep = NULL;
-
- /* storing deleted block numbers in a blocknr set
- datastructure for further actual deletion */
+ if ((flags & BA_DEFER) ||
+ reiser4_is_set(reiser4_get_current_sb(), REISER4_DISCARD)) {
+ /* store deleted block numbers in the atom's deferred delete set
+ for further actual deletion */
do {
atom = get_current_atom_locked();
assert("zam-430", atom != NULL);
- ret =
- blocknr_set_add_extent(atom, &atom->delete_set,
- &bsep, start, len);
+ ret = atom_dset_deferred_add_extent(atom, &new_entry, start, len);
if (ret == -ENOMEM)
return ret;
@@ -1120,15 +1119,13 @@ apply_dset(txn_atom * atom UNUSED_ARG, const reiser4_block_nr * a,
void reiser4_post_commit_hook(void)
{
+#ifdef REISER4_DEBUG
txn_atom *atom;
atom = get_current_atom_locked();
assert("zam-452", atom->stage == ASTAGE_POST_COMMIT);
spin_unlock_atom(atom);
-
- /* do the block deallocation which was deferred
- until commit is done */
- blocknr_set_iterator(atom, &atom->delete_set, apply_dset, NULL, 1);
+#endif
assert("zam-504", get_current_super_private() != NULL);
sa_post_commit_hook();
@@ -1136,8 +1133,29 @@ void reiser4_post_commit_hook(void)
void reiser4_post_write_back_hook(void)
{
- assert("zam-504", get_current_super_private() != NULL);
+ struct list_head discarded_set;
+ txn_atom *atom;
+ int ret;
+ /* process and issue discard requests */
+ blocknr_list_init (&discarded_set);
+ do {
+ atom = get_current_atom_locked();
+ ret = discard_atom(atom, &discarded_set);
+ } while (ret == -E_REPEAT);
+
+ if (ret) {
+ warning("intelfx-8", "discard atom failed (%d)", ret);
+ }
+
+ atom = get_current_atom_locked();
+ discard_atom_post(atom, &discarded_set);
+
+ /* do the block deallocation which was deferred
+ until commit is done */
+ atom_dset_deferred_apply(atom, apply_dset, NULL, 1);
+
+ assert("zam-504", get_current_super_private() != NULL);
sa_post_write_back_hook();
}
diff --git a/fs/reiser4/dformat.h b/fs/reiser4/dformat.h
index 7943762..7316754 100644
--- a/fs/reiser4/dformat.h
+++ b/fs/reiser4/dformat.h
@@ -14,6 +14,8 @@
#if !defined(__FS_REISER4_DFORMAT_H__)
#define __FS_REISER4_DFORMAT_H__
+#include "debug.h"
+
#include <asm/byteorder.h>
#include <asm/unaligned.h>
#include <linux/types.h>
diff --git a/fs/reiser4/discard.c b/fs/reiser4/discard.c
new file mode 100644
index 0000000..7a07afc
--- /dev/null
+++ b/fs/reiser4/discard.c
@@ -0,0 +1,179 @@
+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
+ * reiser4/README */
+
+/* TRIM/discard interoperation subsystem for reiser4. */
+
+/*
+ * This subsystem is responsible for populating an atom's ->discard_set and
+ * (later) converting it into a series of discard calls to the kernel.
+ *
+ * The discard is an in-kernel interface for notifying the storage
+ * hardware about blocks that are being logically freed by the filesystem.
+ * This is done via calling the blkdev_issue_discard() function. There are
+ * restrictions on block ranges: they should constitute at least one erase unit
+ * in length and be correspondingly aligned. Otherwise a discard request will
+ * be ignored.
+ *
+ * The erase unit size is kept in struct queue_limits as discard_granularity.
+ * The offset from the partition start to the first erase unit is kept in
+ * struct queue_limits as discard_alignment.
+ *
+ * At atom level, we record numbers of all blocks that happen to be deallocated
+ * during the transaction. Then we read the generated set, filter out any blocks
+ * that have since been allocated again and issue discards for everything still
+ * valid. This is what discard.[ch] is here for.
+ *
+ * However, simply iterating through the recorded extents is not enough:
+ * - if a single extent is smaller than the erase unit, then this particular
+ * extent won't be discarded even if it is surrounded by enough free blocks
+ * to constitute a whole erase unit;
+ * - we won't be able to merge small adjacent extents forming an extent long
+ * enough to be discarded.
+ *
+ * MECHANISM:
+ *
+ * During the transaction deallocated extents are recorded in atom's delete
+ * set. In reiser4, there are two methods to deallocate a block:
+ * 1. deferred deallocation, enabled by BA_DEFER flag to reiser4_dealloc_block().
+ * In this mode, blocks are stored to delete set instead of being marked free
+ * immediately. After committing the transaction, the delete set is "applied"
+ * by the block allocator and all these blocks are marked free in memory
+ * (see reiser4_post_write_back_hook()).
+ * Space management plugins also read the delete set to update on-disk
+ * allocation records (see reiser4_pre_commit_hook()).
+ * 2. immediate deallocation (the opposite).
+ * In this mode, blocks are marked free immediately. This is used by the
+ * journal subsystem to manage space used by the journal records, so these
+ * allocations are not visible to the space management plugins and never hit
+ * the disk.
+ *
+ * When discard is enabled, all immediate deallocations become deferred. This
+ * is OK because journal's allocations happen after reiser4_pre_commit_hook()
+ * where the on-disk space allocation records are updated. So, in this mode
+ * the atom's delete set becomes "the discard set" -- list of blocks that have
+ * to be considered for discarding.
+ *
+ * Discarding is performed before completing deferred deallocations, hence all
+ * extents in the discard set are still marked as allocated and cannot contain
+ * any data. Thus we can avoid any checks for blocks directly present in the
+ * discard set.
+ *
+ * For now, we don't perform "padding" of extents to erase unit boundaries.
+ * This means if extents are not aligned with the device's erase unit lattice,
+ * the partial erase units at head and tail of extents are truncated by kernel
+ * (in blkdev_issue_discard()).
+ *
+ * So, at commit time the following actions take place:
+ * - delete sets are merged to form the discard set;
+ * - elements of the discard set are sorted;
+ * - the discard set is iterated, joining any adjacent extents;
+ * - for each extent, a single call to blkdev_issue_discard() is done.
+ */
+
+#include "discard.h"
+#include "context.h"
+#include "debug.h"
+#include "txnmgr.h"
+#include "super.h"
+
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/blkdev.h>
+
+static int __discard_extent(struct block_device *bdev, sector_t start,
+ sector_t len)
+{
+ assert("intelfx-21", bdev != NULL);
+
+ return blkdev_issue_discard(bdev, start, len, reiser4_ctx_gfp_mask_get(),
+ 0);
+}
+
+static int discard_extent(txn_atom *atom UNUSED_ARG,
+ const reiser4_block_nr* start,
+ const reiser4_block_nr* len,
+ void *data UNUSED_ARG)
+{
+ struct super_block *sb = reiser4_get_current_sb();
+ struct block_device *bdev = sb->s_bdev;
+
+ sector_t extent_start_sec, extent_len_sec;
+
+ const int sec_per_blk = sb->s_blocksize >> 9;
+
+ /* we assume block = N * sector */
+ assert("intelfx-7", sec_per_blk > 0);
+
+ /* convert extent to sectors */
+ extent_start_sec = *start * sec_per_blk;
+ extent_len_sec = *len * sec_per_blk;
+
+ /* discard the extent, don't pad it to erase unit boundaries for now */
+ return __discard_extent(bdev, extent_start_sec, extent_len_sec);
+}
+
+int discard_atom(txn_atom *atom, struct list_head *processed_set)
+{
+ int ret;
+ struct list_head discard_set;
+
+ if (!reiser4_is_set(reiser4_get_current_sb(), REISER4_DISCARD)) {
+ spin_unlock_atom(atom);
+ return 0;
+ }
+
+ assert("intelfx-28", atom != NULL);
+ assert("intelfx-59", processed_entries != NULL);
+
+ if (list_empty(&atom->discard.delete_set)) {
+ /* Nothing left to discard. */
+ spin_unlock_atom(atom);
+ return 0;
+ }
+
+ /* Take the delete sets from the atom in order to release atom spinlock. */
+ blocknr_list_init(&discard_set);
+ blocknr_list_merge(&atom->discard.delete_set, &discard_set);
+ spin_unlock_atom(atom);
+
+ /* Sort the discard list, joining adjacent and overlapping extents. */
+ blocknr_list_sort_and_join(&discard_set);
+
+ /* Perform actual dirty work. */
+ ret = blocknr_list_iterator(NULL, &discard_set, &discard_extent, NULL, 0);
+
+ /* Add processed extents to the temporary list. */
+ blocknr_list_merge(&discard_set, processed_set);
+
+ if (ret != 0) {
+ return ret;
+ }
+
+ /* Let's do this again for any new extents in the atom's discard set. */
+ return -E_REPEAT;
+}
+
+void discard_atom_post(txn_atom *atom, struct list_head *processed_set)
+{
+ assert("intelfx-60", atom != NULL);
+ assert("intelfx-61", processed_entries != NULL);
+
+ if (!reiser4_is_set(reiser4_get_current_sb(), REISER4_DISCARD)) {
+ spin_unlock_atom(atom);
+ return;
+ }
+
+ blocknr_list_merge(processed_set, &atom->discard.delete_set);
+ spin_unlock_atom(atom);
+}
+
+/* Make Linus happy.
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 120
+ scroll-step: 1
+ End:
+*/
diff --git a/fs/reiser4/discard.h b/fs/reiser4/discard.h
new file mode 100644
index 0000000..5f0d0d8
--- /dev/null
+++ b/fs/reiser4/discard.h
@@ -0,0 +1,42 @@
+/* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
+ * reiser4/README */
+
+/* TRIM/discard interoperation subsystem for reiser4. */
+
+#if !defined(__FS_REISER4_DISCARD_H__)
+#define __FS_REISER4_DISCARD_H__
+
+#include "forward.h"
+#include "dformat.h"
+
+/**
+ * Issue discard requests for all block extents recorded in @atom's delete sets,
+ * if discard is enabled. The extents processed are removed from the @atom's
+ * delete sets and stored in @processed_set.
+ *
+ * @atom must be locked on entry and is unlocked on exit.
+ * @processed_set must be initialized with blocknr_list_init().
+ */
+extern int discard_atom(txn_atom *atom, struct list_head *processed_set);
+
+/**
+ * Splices @processed_set back to @atom's delete set.
+ * Must be called after discard_atom() loop, using the same @processed_set.
+ *
+ * @atom must be locked on entry and is unlocked on exit.
+ * @processed_set must be the same as passed to discard_atom().
+ */
+extern void discard_atom_post(txn_atom *atom, struct list_head *processed_set);
+
+/* __FS_REISER4_DISCARD_H__ */
+#endif
+
+/* Make Linus happy.
+ Local variables:
+ c-indentation-style: "K&R"
+ mode-name: "LC"
+ c-basic-offset: 8
+ tab-width: 8
+ fill-column: 120
+ End:
+*/
diff --git a/fs/reiser4/init_super.c b/fs/reiser4/init_super.c
index 620a0f5..1ff8dad 100644
--- a/fs/reiser4/init_super.c
+++ b/fs/reiser4/init_super.c
@@ -494,6 +494,8 @@ int reiser4_init_super_data(struct super_block *super, char *opt_string)
PUSH_BIT_OPT("atomic_write", REISER4_ATOMIC_WRITE);
/* disable use of write barriers in the reiser4 log writer. */
PUSH_BIT_OPT("no_write_barrier", REISER4_NO_WRITE_BARRIER);
+ /* enable issuing of discard requests */
+ PUSH_BIT_OPT("discard", REISER4_DISCARD);
PUSH_OPT(p, opts,
{
diff --git a/fs/reiser4/plugin/space/bitmap.c b/fs/reiser4/plugin/space/bitmap.c
index bd41fb9..3da3f6b 100644
--- a/fs/reiser4/plugin/space/bitmap.c
+++ b/fs/reiser4/plugin/space/bitmap.c
@@ -1458,8 +1458,7 @@ int reiser4_pre_commit_hook_bitmap(void)
}
}
- blocknr_set_iterator(atom, &atom->delete_set, apply_dset_to_commit_bmap,
- &blocks_freed, 0);
+ atom_dset_deferred_apply(atom, apply_dset_to_commit_bmap, &blocks_freed, 0);
blocks_freed -= atom->nr_blocks_allocated;
diff --git a/fs/reiser4/super.h b/fs/reiser4/super.h
index 0c73845..895c3f3 100644
--- a/fs/reiser4/super.h
+++ b/fs/reiser4/super.h
@@ -51,7 +51,9 @@ typedef enum {
/* enforce atomicity during write(2) */
REISER4_ATOMIC_WRITE = 6,
/* don't use write barriers in the log writer code. */
- REISER4_NO_WRITE_BARRIER = 7
+ REISER4_NO_WRITE_BARRIER = 7,
+ /* enable issuing of discard requests */
+ REISER4_DISCARD = 8
} reiser4_fs_flag;
/*
diff --git a/fs/reiser4/txnmgr.c b/fs/reiser4/txnmgr.c
index 4950179..d73ecb9 100644
--- a/fs/reiser4/txnmgr.c
+++ b/fs/reiser4/txnmgr.c
@@ -233,6 +233,7 @@ year old --- define all technical terms used.
#include "vfs_ops.h"
#include "inode.h"
#include "flush.h"
+#include "discard.h"
#include <asm/atomic.h>
#include <linux/types.h>
@@ -404,9 +405,10 @@ static void atom_init(txn_atom * atom)
INIT_LIST_HEAD(&atom->atom_link);
INIT_LIST_HEAD(&atom->fwaitfor_list);
INIT_LIST_HEAD(&atom->fwaiting_list);
- blocknr_set_init(&atom->delete_set);
blocknr_set_init(&atom->wandered_map);
+ atom_dset_init(atom);
+
init_atom_fq_parts(atom);
}
@@ -798,9 +800,10 @@ static void atom_free(txn_atom * atom)
(atom->stage == ASTAGE_INVALID || atom->stage == ASTAGE_DONE));
atom->stage = ASTAGE_FREE;
- blocknr_set_destroy(&atom->delete_set);
blocknr_set_destroy(&atom->wandered_map);
+ atom_dset_destroy(atom);
+
assert("jmacd-16", atom_isclean(atom));
spin_unlock_atom(atom);
@@ -2938,9 +2941,11 @@ static void capture_fuse_into(txn_atom * small, txn_atom * large)
large->flags |= small->flags;
/* Merge blocknr sets. */
- blocknr_set_merge(&small->delete_set, &large->delete_set);
blocknr_set_merge(&small->wandered_map, &large->wandered_map);
+ /* Merge delete sets. */
+ atom_dset_merge(small, large);
+
/* Merge allocated/deleted file counts */
large->nr_objects_deleted += small->nr_objects_deleted;
large->nr_objects_created += small->nr_objects_created;
@@ -3064,9 +3069,7 @@ reiser4_block_nr txnmgr_count_deleted_blocks(void)
list_for_each_entry(atom, &tmgr->atoms_list, atom_link) {
spin_lock_atom(atom);
if (atom_isopen(atom))
- blocknr_set_iterator(
- atom, &atom->delete_set,
- count_deleted_blocks_actor, &result, 0);
+ atom_dset_deferred_apply(atom, count_deleted_blocks_actor, &result, 0);
spin_unlock_atom(atom);
}
spin_unlock_txnmgr(tmgr);
@@ -3074,6 +3077,81 @@ reiser4_block_nr txnmgr_count_deleted_blocks(void)
return result;
}
+void atom_dset_init(txn_atom *atom)
+{
+ if (reiser4_is_set(reiser4_get_current_sb(), REISER4_DISCARD)) {
+ blocknr_list_init(&atom->discard.delete_set);
+ } else {
+ blocknr_set_init(&atom->nodiscard.delete_set);
+ }
+}
+
+void atom_dset_destroy(txn_atom *atom)
+{
+ if (reiser4_is_set(reiser4_get_current_sb(), REISER4_DISCARD)) {
+ blocknr_list_destroy(&atom->discard.delete_set);
+ } else {
+ blocknr_set_destroy(&atom->nodiscard.delete_set);
+ }
+}
+
+void atom_dset_merge(txn_atom *from, txn_atom *to)
+{
+ if (reiser4_is_set(reiser4_get_current_sb(), REISER4_DISCARD)) {
+ blocknr_list_merge(&from->discard.delete_set, &to->discard.delete_set);
+ } else {
+ blocknr_set_merge(&from->nodiscard.delete_set, &to->nodiscard.delete_set);
+ }
+}
+
+int atom_dset_deferred_apply(txn_atom* atom,
+ blocknr_set_actor_f actor,
+ void *data,
+ int delete)
+{
+ int ret;
+
+ if (reiser4_is_set(reiser4_get_current_sb(), REISER4_DISCARD)) {
+ ret = blocknr_list_iterator(atom,
+ &atom->discard.delete_set,
+ actor,
+ data,
+ delete);
+ } else {
+ ret = blocknr_set_iterator(atom,
+ &atom->nodiscard.delete_set,
+ actor,
+ data,
+ delete);
+ }
+
+ return ret;
+}
+
+extern int atom_dset_deferred_add_extent(txn_atom *atom,
+ void **new_entry,
+ const reiser4_block_nr *start,
+ const reiser4_block_nr *len)
+{
+ int ret;
+
+ if (reiser4_is_set(reiser4_get_current_sb(), REISER4_DISCARD)) {
+ ret = blocknr_list_add_extent(atom,
+ &atom->discard.delete_set,
+ (blocknr_list_entry**)new_entry,
+ start,
+ len);
+ } else {
+ ret = blocknr_set_add_extent(atom,
+ &atom->nodiscard.delete_set,
+ (blocknr_set_entry**)new_entry,
+ start,
+ len);
+ }
+
+ return ret;
+}
+
/*
* Local variables:
* c-indentation-style: "K&R"
diff --git a/fs/reiser4/txnmgr.h b/fs/reiser4/txnmgr.h
index 0dee787..72b84a2 100644
--- a/fs/reiser4/txnmgr.h
+++ b/fs/reiser4/txnmgr.h
@@ -245,9 +245,24 @@ struct txn_atom {
/* Start time. */
unsigned long start_time;
- /* The atom's delete set. It collects block numbers of the nodes
- which were deleted during the transaction. */
- struct list_head delete_set;
+ /* The atom's delete sets.
+ "simple" are blocknr_set instances and are used when discard is disabled.
+ "discard" are blocknr_list instances and are used when discard is enabled. */
+ union {
+ struct {
+ /* The atom's delete set. It collects block numbers of the nodes
+ which were deleted during the transaction. */
+ struct list_head delete_set;
+ } nodiscard;
+
+ struct {
+ /* The atom's delete set. It collects all blocks that have been
+ deallocated (both immediate and deferred) during the transaction.
+ These blocks are considered for discarding at commit time.
+ For details see discard.c */
+ struct list_head delete_set;
+ } discard;
+ };
/* The atom's wandered_block mapping. */
struct list_head wandered_map;
@@ -508,6 +523,22 @@ extern int blocknr_list_iterator(txn_atom *atom,
void *data,
int delete);
+/* These are wrappers for accessing and modifying atom's delete lists,
+ depending on whether discard is enabled or not.
+ If it is enabled, (less memory efficient) blocknr_list is used for delete
+ list storage. Otherwise, blocknr_set is used for this purpose. */
+extern void atom_dset_init(txn_atom *atom);
+extern void atom_dset_destroy(txn_atom *atom);
+extern void atom_dset_merge(txn_atom *from, txn_atom *to);
+extern int atom_dset_deferred_apply(txn_atom* atom,
+ blocknr_set_actor_f actor,
+ void *data,
+ int delete);
+extern int atom_dset_deferred_add_extent(txn_atom *atom,
+ void **new_entry,
+ const reiser4_block_nr *start,
+ const reiser4_block_nr *len);
+
/* flush code takes care about how to fuse flush queues */
extern void flush_init_atom(txn_atom * atom);
extern void flush_fuse_queues(txn_atom * large, txn_atom * small);
diff --git a/fs/reiser4/wander.c b/fs/reiser4/wander.c
index 4e29de8..04ddec6 100644
--- a/fs/reiser4/wander.c
+++ b/fs/reiser4/wander.c
@@ -1252,7 +1252,6 @@ int reiser4_write_logs(long *nr_submitted)
reiser4_post_commit_hook();
ret = write_tx_back(&ch);
- reiser4_post_write_back_hook();
up_and_ret:
if (ret) {
@@ -1265,6 +1264,8 @@ int reiser4_write_logs(long *nr_submitted)
dealloc_tx_list(&ch);
dealloc_wmap(&ch);
+ reiser4_post_write_back_hook();
+
put_overwrite_set(&ch);
done_commit_handle(&ch);
--
2.0.3
^ permalink raw reply related [flat|nested] 8+ messages in thread* Re: [PATCHv7 0/6] reiser4: discard support: simplified and race-free initial implementation.
2014-07-31 10:19 [PATCHv7 0/6] reiser4: discard support: simplified and race-free initial implementation Ivan Shapovalov
` (5 preceding siblings ...)
2014-07-31 10:19 ` [PATCHv7 6/6] reiser4: discard support: initial implementation using blocknr_list, without extent padding Ivan Shapovalov
@ 2014-07-31 10:34 ` Ivan Shapovalov
6 siblings, 0 replies; 8+ messages in thread
From: Ivan Shapovalov @ 2014-07-31 10:34 UTC (permalink / raw)
To: reiserfs-devel; +Cc: edward.shishkin
[-- Attachment #1: Type: text/plain, Size: 7803 bytes --]
On Thursday 31 July 2014 at 14:19:43, Ivan Shapovalov wrote:
> [...]
>
> Ivan Shapovalov (6):
> reiser4: fix reiser4_post_{commit,write_back}_hook() and their invocations.
> reiser4: make space_allocator's check_blocks() reusable.
> reiser4: add an implementation of "block lists", splitted off the discard code.
> reiser4: blocknr_list: use kmem_cache instead of kmalloc for allocating entries.
> reiser4: blocknr_set: use kmem_cache instead of kmalloc for allocating entries.
> reiser4: discard support: initial implementation using blocknr_list, without extent padding.
>
> [...]
This is a diff between previous two patchsets and this one, for ease of reviewing.
diff --git a/fs/reiser4/block_alloc.c b/fs/reiser4/block_alloc.c
index 98080a1..324b11c 100644
--- a/fs/reiser4/block_alloc.c
+++ b/fs/reiser4/block_alloc.c
@@ -9,6 +9,7 @@ reiser4/README */
#include "block_alloc.h"
#include "tree.h"
#include "super.h"
+#include "discard.h"
#include <linux/types.h> /* for __u?? */
#include <linux/fs.h> /* for struct super_block */
@@ -1144,7 +1145,7 @@ void reiser4_post_write_back_hook(void)
} while (ret == -E_REPEAT);
if (ret) {
- warning("intelfx-8", "discard atom failed (%ld)", ret);
+ warning("intelfx-8", "discard atom failed (%d)", ret);
}
atom = get_current_atom_locked();
diff --git a/fs/reiser4/discard.c b/fs/reiser4/discard.c
index 8442619..7a07afc 100644
--- a/fs/reiser4/discard.c
+++ b/fs/reiser4/discard.c
@@ -53,23 +53,21 @@
* the atom's delete set becomes "the discard set" -- list of blocks that have
* to be considered for discarding.
*
- * On atom commit we will generate a minimal superset of the discard set,
- * comprised of whole erase units.
- *
* Discarding is performed before completing deferred deallocations, hence all
* extents in the discard set are still marked as allocated and cannot contain
* any data. Thus we can avoid any checks for blocks directly present in the
* discard set.
*
- * However, we pad each extent from both sides to erase unit boundaries, and
- * these paddings still have to be checked if they fall outside of initial
- * extent (may not happen if block size > erase unit size).
+ * For now, we don't perform "padding" of extents to erase unit boundaries.
+ * This means if extents are not aligned with the device's erase unit lattice,
+ * the partial erase units at head and tail of extents are truncated by kernel
+ * (in blkdev_issue_discard()).
*
* So, at commit time the following actions take place:
* - delete sets are merged to form the discard set;
* - elements of the discard set are sorted;
* - the discard set is iterated, joining any adjacent extents;
- * - <TODO>
+ * - for each extent, a single call to blkdev_issue_discard() is done.
*/
#include "discard.h"
@@ -98,92 +96,20 @@ static int discard_extent(txn_atom *atom UNUSED_ARG,
{
struct super_block *sb = reiser4_get_current_sb();
struct block_device *bdev = sb->s_bdev;
- struct queue_limits *limits = &bdev_get_queue(bdev)->limits;
- sector_t extent_start_sec, extent_end_sec,
- unit_sec, request_start_sec = 0, request_len_sec = 0;
- reiser4_block_nr unit_start_blk, unit_len_blk;
- int ret, erase_unit_counter = 0;
+ sector_t extent_start_sec, extent_len_sec;
const int sec_per_blk = sb->s_blocksize >> 9;
- /* from blkdev_issue_discard():
- * Zero-sector (unknown) and one-sector granularities are the same. */
- const int granularity = max(limits->discard_granularity >> 9, 1U);
- const int alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
-
/* we assume block = N * sector */
assert("intelfx-7", sec_per_blk > 0);
/* convert extent to sectors */
extent_start_sec = *start * sec_per_blk;
- extent_end_sec = (*start + *len) * sec_per_blk;
+ extent_len_sec = *len * sec_per_blk;
- /* round down extent start sector to an erase unit boundary */
- unit_sec = extent_start_sec;
- if (granularity > 1) {
- sector_t tmp = extent_start_sec - alignment;
- unit_sec -= sector_div(tmp, granularity);
- }
-
- /* iterate over erase units in the extent */
- do {
- /* considering erase unit:
- * [unit_sec; unit_sec + granularity) */
-
- /* calculate block range for erase unit:
- * [unit_start_blk; unit_start_blk+unit_len_blk) */
- unit_start_blk = unit_sec;
- do_div(unit_start_blk, sec_per_blk);
-
- if (granularity > 1) {
- unit_len_blk = unit_sec + granularity - 1;
- do_div(unit_len_blk, sec_per_blk);
- ++unit_len_blk;
-
- assert("intelfx-22", unit_len_blk > unit_start_blk);
-
- unit_len_blk -= unit_start_blk;
- } else {
- unit_len_blk = 1;
- }
-
- if (reiser4_check_blocks(&unit_start_blk, &unit_len_blk, 0)) {
- /* OK. Add this unit to the accumulator.
- * We accumulate discard units to call blkdev_issue_discard()
- * not too frequently. */
-
- if (request_len_sec > 0) {
- request_len_sec += granularity;
- } else {
- request_start_sec = unit_sec;
- request_len_sec = granularity;
- }
- } else {
- /* This unit can't be discarded. Discard what's been accumulated
- * so far. */
- if (request_len_sec > 0) {
- ret = __discard_extent(bdev, request_start_sec, request_len_sec);
- if (ret != 0) {
- return ret;
- }
- request_len_sec = 0;
- }
- }
-
- unit_sec += granularity;
- ++erase_unit_counter;
- } while (unit_sec < extent_end_sec);
-
- /* Discard the last accumulated request. */
- if (request_len_sec > 0) {
- ret = __discard_extent(bdev, request_start_sec, request_len_sec);
- if (ret != 0) {
- return ret;
- }
- }
-
- return 0;
+ /* discard the extent, don't pad it to erase unit boundaries for now */
+ return __discard_extent(bdev, extent_start_sec, extent_len_sec);
}
int discard_atom(txn_atom *atom, struct list_head *processed_set)
@@ -201,6 +127,7 @@ int discard_atom(txn_atom *atom, struct list_head *processed_set)
if (list_empty(&atom->discard.delete_set)) {
/* Nothing left to discard. */
+ spin_unlock_atom(atom);
return 0;
}
diff --git a/fs/reiser4/plugin/space/bitmap.c b/fs/reiser4/plugin/space/bitmap.c
index 03bc5e7..3da3f6b 100644
--- a/fs/reiser4/plugin/space/bitmap.c
+++ b/fs/reiser4/plugin/space/bitmap.c
@@ -1259,8 +1259,7 @@ int reiser4_check_blocks_bitmap(const reiser4_block_nr * start,
reiser4_block_nr end;
bmap_nr_t bmap, end_bmap;
- bmap_off_t offset;
- bmap_off_t end_offset;
+ bmap_off_t offset, end_offset;
const bmap_off_t max_offset = bmap_bit_count(super->s_blocksize);
assert("intelfx-9", start != NULL);
@@ -1270,9 +1269,8 @@ int reiser4_check_blocks_bitmap(const reiser4_block_nr * start,
check_block_range(start, len);
end = *start + *len - 1;
} else {
- /* end is used as temporary len here */
- end = 1;
- check_block_range(start, &end);
+ /* on next line, end is used as temporary len for check_block_range() */
+ end = 1; check_block_range(start, &end);
end = *start;
}
diff --git a/fs/reiser4/txnmgr.h b/fs/reiser4/txnmgr.h
index 05990d8..72b84a2 100644
--- a/fs/reiser4/txnmgr.h
+++ b/fs/reiser4/txnmgr.h
@@ -256,8 +256,10 @@ struct txn_atom {
} nodiscard;
struct {
- /* The atom's delete set. It collects block numbers which were
- deallocated with BA_DEFER, i. e. of ordinary nodes. */
+ /* The atom's delete set. It collects all blocks that have been
+ deallocated (both immediate and deferred) during the transaction.
+ These blocks are considered for discarding at commit time.
+ For details see discard.c */
struct list_head delete_set;
} discard;
};
Thanks,
--
Ivan Shapovalov / intelfx /
[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 213 bytes --]
^ permalink raw reply related [flat|nested] 8+ messages in thread