* [PATCH 1/2] reiser4: Auto-punching holes: basic stuff
@ 2015-07-19 15:29 Edward Shishkin
0 siblings, 0 replies; only message in thread
From: Edward Shishkin @ 2015-07-19 15:29 UTC (permalink / raw)
To: ReiserFS development mailing list
[-- Attachment #1: Type: text/plain, Size: 251 bytes --]
. Auto-punching holes: basic stuff;
. Handle empty nodes appeared after node conversions at flush time,
Cache a locked right neighbor in the struct flush_pos (to access
it when current node becomes empty, and hence is removed from the
tree).
[-- Attachment #2: reiser4-punch-holes.patch --]
[-- Type: text/x-patch, Size: 12807 bytes --]
Signed-off-by: Edward Shishkin <edward.shishkin@gmail.com>
---
fs/reiser4/flush.c | 107 ++++++++++++++++++++++++---------
fs/reiser4/flush.h | 4 -
fs/reiser4/init_super.c | 2
fs/reiser4/plugin/file/cryptcompress.c | 32 +++++++++
fs/reiser4/plugin/file/cryptcompress.h | 1
fs/reiser4/plugin/item/ctail.c | 75 +++++++++++++++++------
fs/reiser4/super.h | 4 -
7 files changed, 175 insertions(+), 50 deletions(-)
--- a/fs/reiser4/plugin/file/cryptcompress.c
+++ b/fs/reiser4/plugin/file/cryptcompress.c
@@ -921,12 +921,34 @@ static unsigned deflate_overrun(struct i
return coa_overrun(inode_compression_plugin(inode), ilen);
}
+static bool is_all_zero(char const* mem, size_t size)
+{
+ while (size-- > 0)
+ if (*mem++)
+ return false;
+ return true;
+}
+
+static inline bool should_punch_hole(struct tfm_cluster *tc)
+{
+ if (!reiser4_is_set(reiser4_get_current_sb(), REISER4_DONT_PUNCH_HOLES)
+ && is_all_zero(tfm_stream_data(tc, INPUT_STREAM), tc->lsize)) {
+ /*
+ * the logical cluster is filled with zeros,
+ * so we'll punch a hole
+ */
+ tc->all_zero = 1;
+ return true;
+ }
+ return false;
+}
+
/* Estimating compressibility of a logical cluster by various
policies represented by compression mode plugin.
If this returns false, then compressor won't be called for
the cluster of index @index.
*/
-static int should_compress(struct tfm_cluster * tc, cloff_t index,
+static int should_compress(struct tfm_cluster *tc, cloff_t index,
struct inode *inode)
{
compression_plugin *cplug = inode_compression_plugin(inode);
@@ -936,6 +958,12 @@ static int should_compress(struct tfm_cl
assert("edward-1322", cplug != NULL);
assert("edward-1323", mplug != NULL);
+ if (should_punch_hole(tc))
+ /*
+ * we are about to punch a hole,
+ * so don't compress data
+ */
+ return 0;
return /* estimate by size */
(cplug->min_size_deflate ?
tc->len >= cplug->min_size_deflate() :
@@ -3368,7 +3396,7 @@ static int prune_cryptcompress(struct in
clust.dstat == UNPR_DISK_CLUSTER);
assert("edward-1191", inode->i_size == new_size);
- assert("edward-1206", body_truncate_ok(inode, ridx));
+
truncate_fake:
/* drop all the pages that don't have jnodes (i.e. pages
which can not be truncated by cut_file_items() because
--- a/fs/reiser4/plugin/file/cryptcompress.h
+++ b/fs/reiser4/plugin/file/cryptcompress.h
@@ -159,6 +159,7 @@ struct tfm_cluster {
int uptodate;
int lsize; /* number of bytes in logical cluster */
int len; /* length of the transform stream */
+ int all_zero; /* logical cluster is filled with zeros */
};
static inline coa_t get_coa(struct tfm_cluster * tc, reiser4_compression_id id,
--- a/fs/reiser4/plugin/item/ctail.c
+++ b/fs/reiser4/plugin/item/ctail.c
@@ -1177,6 +1177,8 @@ static int alloc_item_convert_data(struc
sq->itm = kmalloc(sizeof(*sq->itm), reiser4_ctx_gfp_mask_get());
if (sq->itm == NULL)
return RETERR(-ENOMEM);
+ init_lh(&sq->right_lock);
+ sq->right_locked = 0;
return 0;
}
@@ -1186,22 +1188,28 @@ static void free_item_convert_data(struc
assert("edward-819", sq->itm != NULL);
assert("edward-820", sq->iplug != NULL);
+ done_lh(&sq->right_lock);
+ sq->right_locked = 0;
kfree(sq->itm);
sq->itm = NULL;
return;
}
-static int alloc_convert_data(flush_pos_t * pos)
+static struct convert_info *alloc_convert_data(void)
{
- assert("edward-821", pos != NULL);
- assert("edward-822", pos->sq == NULL);
+ struct convert_info *info;
- pos->sq = kmalloc(sizeof(*pos->sq), reiser4_ctx_gfp_mask_get());
- if (!pos->sq)
- return RETERR(-ENOMEM);
- memset(pos->sq, 0, sizeof(*pos->sq));
- cluster_init_write(&pos->sq->clust, NULL);
- return 0;
+ info = kmalloc(sizeof(*info), reiser4_ctx_gfp_mask_get());
+ if (info != NULL) {
+ memset(info, 0, sizeof(*info));
+ cluster_init_write(&info->clust, NULL);
+ }
+ return info;
+}
+
+static void reset_convert_data(struct convert_info *info)
+{
+ info->clust.tc.all_zero = 0;
}
void free_convert_data(flush_pos_t * pos)
@@ -1230,7 +1238,6 @@ static int init_item_convert_data(flush_
assert("edward-828", inode != NULL);
sq = pos->sq;
-
memset(sq->itm, 0, sizeof(*sq->itm));
/* iplug->init_convert_data() */
@@ -1258,10 +1265,13 @@ static int attach_convert_idata(flush_po
item_plugin_by_id(CTAIL_ID));
if (!pos->sq) {
- ret = alloc_convert_data(pos);
- if (ret)
- return ret;
+ pos->sq = alloc_convert_data();
+ if (!pos->sq)
+ return RETERR(-ENOMEM);
}
+ else
+ reset_convert_data(pos->sq);
+
clust = &pos->sq->clust;
ret = grab_coa(&clust->tc, cplug);
if (ret)
@@ -1300,6 +1310,9 @@ static int attach_convert_idata(flush_po
clust->tc.len,
clust_to_off(clust->index, inode),
WRITE_OP, &info->flow);
+ if (clust->tc.all_zero)
+ info->flow.length = 0;
+
jput(pos->child);
return 0;
err:
@@ -1420,6 +1433,7 @@ static int pre_convert_ctail(flush_pos_t
coord_init_before_first_item(&coord, slider);
if (node_is_empty(slider)) {
+ warning("edward-1641", "Found empty right neighbor");
znode_make_dirty(slider);
znode_set_convertible(slider);
/*
@@ -1450,14 +1464,25 @@ static int pre_convert_ctail(flush_pos_t
znode_set_convertible(slider);
}
stop = 1;
+ convert_data(pos)->right_locked = 1;
} else {
item_convert_data(pos)->d_next = DC_AFTER_CLUSTER;
stop = 1;
+ convert_data(pos)->right_locked = 1;
}
zrelse(slider);
done_lh(&slider_lh);
move_lh(&slider_lh, &right_lh);
}
+ if (convert_data(pos)->right_locked)
+ /*
+ * Store locked right neighbor in
+ * the conversion info. Otherwise,
+ * we won't be able to access it,
+ * if the current node gets deleted
+ * during conversion
+ */
+ move_lh(&convert_data(pos)->right_lock, &slider_lh);
done_lh(&slider_lh);
done_lh(&right_lh);
@@ -1566,11 +1591,25 @@ static int assign_conversion_mode(flush_
}
if (ret)
goto dont_convert;
- /*
- * this is the first ctail in the cluster,
- * so it should be overwritten
- */
- *mode = CTAIL_OVERWRITE_ITEM;
+
+ if (pos->sq->clust.tc.all_zero) {
+ assert("edward-1634",
+ item_convert_data(pos)->flow.length == 0);
+ /*
+ * new content is filled with zeros -
+ * we punch a hole using cut (not kill)
+ * primitive, so attached pages won't
+ * be truncated
+ */
+ *mode = CTAIL_CUT_ITEM;
+ }
+ else
+ /*
+ * this is the first ctail in the cluster,
+ * so it (may be only its head) should be
+ * overwritten
+ */
+ *mode = CTAIL_OVERWRITE_ITEM;
} else
/*
* non-convertible item
--- a/fs/reiser4/flush.h
+++ b/fs/reiser4/flush.h
@@ -74,6 +74,8 @@ struct convert_info {
item_plugin *iplug; /* current item plugin */
struct convert_item_info *itm; /* current item info */
struct cluster_handle clust; /* transform cluster */
+ lock_handle right_lock; /* lock handle of the right neighbor */
+ int right_locked;
};
typedef enum flush_position_state {
@@ -231,7 +233,7 @@ static inline int should_terminate_squal
item_convert_count(pos) >= SQUALLOC_THRESHOLD;
}
-#if 1
+#if REISER4_DEBUG
#define check_convert_info(pos) \
do { \
if (unlikely(should_convert_right_neighbor(pos))) { \
--- a/fs/reiser4/flush.c
+++ b/fs/reiser4/flush.c
@@ -1915,8 +1915,12 @@ out:
}
/*
- * Process nodes on leaf level until unformatted node or
- * rightmost node in the slum reached
+ * Process nodes on the leaf level until unformatted node or
+ * rightmost node in the slum reached.
+ *
+ * This function is a complicated beast, because it calls a
+ * static machine ->convert_node() for every node, which, in
+ * turn, scans node's items and does something for each of them.
*/
static int handle_pos_on_formatted(flush_pos_t *pos)
{
@@ -1933,19 +1937,39 @@ static int handle_pos_on_formatted(flush
return ret;
}
while (1) {
- int expected;
- expected = should_convert_right_neighbor(pos);
- ret = neighbor_in_slum(pos->lock.node, &right_lock, RIGHT_SIDE,
- ZNODE_WRITE_LOCK, !expected, expected);
- if (ret) {
- if (expected)
- warning("edward-1495",
- "Right neighbor is expected but not found (%d). Fsck?",
- ret);
- break;
+ assert("edward-1635",
+ ergo(node_is_empty(pos->lock.node),
+ ZF_ISSET(pos->lock.node, JNODE_HEARD_BANSHEE)));
+ /*
+ * First of all, grab a right neighbor
+ */
+ if (convert_data(pos) && convert_data(pos)->right_locked) {
+ /*
+ * the right neighbor was locked by convert_node()
+ * transfer the lock from the "cache".
+ */
+ move_lh(&right_lock, &convert_data(pos)->right_lock);
+ done_lh(&convert_data(pos)->right_lock);
+ convert_data(pos)->right_locked = 0;
+ }
+ else {
+ ret = neighbor_in_slum(pos->lock.node, &right_lock,
+ RIGHT_SIDE, ZNODE_WRITE_LOCK,
+ 1, 0);
+ if (ret) {
+ /*
+ * There is no right neighbor for some reasons,
+ * so finish with this level.
+ */
+ assert("edward-1636",
+ !should_convert_right_neighbor(pos));
+ break;
+ }
}
/*
- * we don't prep(allocate) nodes for flushing twice. This can be
+ * Check "flushprepped" status of the right neighbor.
+ *
+ * We don't prep(allocate) nodes for flushing twice. This can be
* suboptimal, or it can be optimal. For now we choose to live
* with the risk that it will be suboptimal because it would be
* quite complex to code it to be smarter.
@@ -1957,38 +1981,65 @@ static int handle_pos_on_formatted(flush
pos_stop(pos);
break;
}
-
ret = incr_load_count_znode(&right_load, right_lock.node);
if (ret)
break;
if (znode_convertible(right_lock.node)) {
+ assert("edward-xxxx",
+ ergo(convert_data(pos),
+ convert_data(pos)->right_locked == 0));
+
ret = convert_node(pos, right_lock.node);
if (ret)
break;
- if (unlikely(node_is_empty(right_lock.node))) {
- /*
- * node became empty after convertion,
- * skip this
- */
- done_load_count(&right_load);
- done_lh(&right_lock);
- continue;
- }
+ }
+ else
+ assert("edward-1637",
+ !should_convert_right_neighbor(pos));
+
+ if (node_is_empty(pos->lock.node)) {
+ /*
+ * Current node became empty after conversion
+ * and, hence, was removed from the tree;
+ * Advance the current position to the right neighbor.
+ */
+ assert("edward-1638",
+ ZF_ISSET(pos->lock.node, JNODE_HEARD_BANSHEE));
+ move_flush_pos(pos, &right_lock, &right_load, NULL);
+ continue;
+ }
+ if (node_is_empty(right_lock.node)) {
+ assert("edward-1639",
+ ZF_ISSET(right_lock.node, JNODE_HEARD_BANSHEE));
+ /*
+ * The right neighbor became empty after
+ * convertion, and hence it was deleted
+ * from the tree - skip this.
+ * Since current node is not empty,
+ * we'll obtain a correct pointer to
+ * the next right neighbor
+ */
+ done_load_count(&right_load);
+ done_lh(&right_lock);
+ continue;
}
/*
- * Current node and its right neighbor are converted.
+ * At this point both, current node and its right
+ * neigbor are converted and not empty.
* Squeeze them _before_ going upward.
*/
ret = squeeze_right_neighbor(pos, pos->lock.node,
right_lock.node);
if (ret < 0)
break;
-
if (node_is_empty(right_lock.node)) {
+ assert("edward-1640",
+ ZF_ISSET(right_lock.node, JNODE_HEARD_BANSHEE));
/*
- * right node was squeezed completely,
- * skip this
- */
+ * right neighbor was squeezed completely,
+ * and hence has been deleted from the tree.
+ * Skip this.
+ */
done_load_count(&right_load);
done_lh(&right_lock);
continue;
--- a/fs/reiser4/init_super.c
+++ b/fs/reiser4/init_super.c
@@ -496,6 +496,8 @@ int reiser4_init_super_data(struct super
PUSH_BIT_OPT("no_write_barrier", REISER4_NO_WRITE_BARRIER);
/* enable issuing of discard requests */
PUSH_BIT_OPT("discard", REISER4_DISCARD);
+ /* disable hole punching at flush time */
+ PUSH_BIT_OPT("dont_punch_holes", REISER4_DONT_PUNCH_HOLES);
PUSH_OPT(p, opts,
{
--- a/fs/reiser4/super.h
+++ b/fs/reiser4/super.h
@@ -53,7 +53,9 @@ typedef enum {
/* don't use write barriers in the log writer code. */
REISER4_NO_WRITE_BARRIER = 7,
/* enable issuing of discard requests */
- REISER4_DISCARD = 8
+ REISER4_DISCARD = 8,
+ /* disable hole punching at flush time */
+ REISER4_DONT_PUNCH_HOLES = 9
} reiser4_fs_flag;
/*
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2015-07-19 15:29 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-07-19 15:29 [PATCH 1/2] reiser4: Auto-punching holes: basic stuff Edward Shishkin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).