From: Joel Becker <joel.becker@oracle.com>
To: ocfs2-devel@oss.oracle.com
Subject: [Ocfs2-devel] [PATCH 2/3] ocfs2: Properly lock extent map size changes.
Date: Wed Mar 28 18:51:01 2007 [thread overview]
Message-ID: <20070329005717.663893000@oracle.com> (raw)
In-Reply-To: 20070329005041.518030000@oracle.com
The extent map code failed to properly lock changes to ->em_clusters, the
extent map's idea of its own size.
This leads to a subtle race. One process is updating the size to match an
inode that changed, while another process is already past that in
the lookup code checking the size against its arguments. For a moment, the
size is wrong (due to how the size is checked and calculated).
Properly locking the update and the query makes this safe.
The check for size change is abstracted into a common function.
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
fs/ocfs2/extent_map.c | 90 +++++++++++++++++++++++++++----------------------
1 files changed, 50 insertions(+), 40 deletions(-)
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 80ac69f..9ff4351 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -84,6 +84,8 @@ static int ocfs2_extent_map_try_insert(s
struct ocfs2_extent_rec *rec,
int tree_depth,
struct ocfs2_em_insert_context *ctxt);
+static void ocfs2_extent_map_check_size_change(struct inode *inode,
+ u32 expected_clusters);
/* returns 1 only if the rec contains all the given clusters -- that is that
* rec's cpos is <= the cluster cpos and that the rec endpoint (cpos +
@@ -558,8 +560,10 @@ static int ocfs2_extent_map_insert(struc
int ret;
struct ocfs2_em_insert_context ctxt = {0, };
+ spin_lock(&OCFS2_I(inode)->ip_lock);
if ((le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters)) >
OCFS2_I(inode)->ip_map.em_clusters) {
+ spin_unlock(&OCFS2_I(inode)->ip_lock);
ret = -EBADR;
mlog_errno(ret);
return ret;
@@ -569,6 +573,7 @@ static int ocfs2_extent_map_insert(struc
if (!rec->e_clusters) {
if ((le32_to_cpu(rec->e_cpos) + le32_to_cpu(rec->e_clusters)) !=
OCFS2_I(inode)->ip_map.em_clusters) {
+ spin_unlock(&OCFS2_I(inode)->ip_lock);
ret = -EBADR;
mlog_errno(ret);
ocfs2_error(inode->i_sb,
@@ -578,9 +583,12 @@ static int ocfs2_extent_map_insert(struc
return ret;
}
+ spin_unlock(&OCFS2_I(inode)->ip_lock);
+
/* Ignore the truncated tail */
return 0;
}
+ spin_unlock(&OCFS2_I(inode)->ip_lock);
ret = -ENOMEM;
ctxt.new_ent = kmem_cache_alloc(ocfs2_em_ent_cachep,
@@ -662,15 +670,8 @@ int ocfs2_extent_map_append(struct inode
BUG_ON(!new_clusters);
BUG_ON(le32_to_cpu(rec->e_clusters) < new_clusters);
- if (em->em_clusters < OCFS2_I(inode)->ip_clusters) {
- /*
- * Size changed underneath us on disk. Drop any
- * straddling records and update our idea of
- * i_clusters
- */
- ocfs2_extent_map_drop(inode, em->em_clusters - 1);
- em->em_clusters = OCFS2_I(inode)->ip_clusters;
- }
+ ocfs2_extent_map_check_size_change(inode,
+ OCFS2_I(inode)->ip_clusters);
mlog_bug_on_msg((le32_to_cpu(rec->e_cpos) +
le32_to_cpu(rec->e_clusters)) !=
@@ -745,7 +746,6 @@ int ocfs2_extent_map_get_rec(struct inod
int *tree_depth)
{
int ret = -ENOENT;
- struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
struct ocfs2_extent_map_entry *ent;
*rec = NULL;
@@ -753,15 +753,7 @@ int ocfs2_extent_map_get_rec(struct inod
if (cpos >= OCFS2_I(inode)->ip_clusters)
return -EINVAL;
- if (cpos >= em->em_clusters) {
- /*
- * Size changed underneath us on disk. Drop any
- * straddling records and update our idea of
- * i_clusters
- */
- ocfs2_extent_map_drop(inode, em->em_clusters - 1);
- em->em_clusters = OCFS2_I(inode)->ip_clusters ;
- }
+ ocfs2_extent_map_check_size_change(inode, cpos);
ent = ocfs2_extent_map_lookup(&OCFS2_I(inode)->ip_map, cpos, 1,
NULL, NULL);
@@ -782,7 +774,6 @@ int ocfs2_extent_map_get_clusters(struct
{
int ret;
u32 coff, ccount;
- struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
struct ocfs2_extent_map_entry *ent = NULL;
*p_cpos = ccount = 0;
@@ -790,16 +781,7 @@ int ocfs2_extent_map_get_clusters(struct
if ((v_cpos + count) > OCFS2_I(inode)->ip_clusters)
return -EINVAL;
- if ((v_cpos + count) > em->em_clusters) {
- /*
- * Size changed underneath us on disk. Drop any
- * straddling records and update our idea of
- * i_clusters
- */
- ocfs2_extent_map_drop(inode, em->em_clusters - 1);
- em->em_clusters = OCFS2_I(inode)->ip_clusters;
- }
-
+ ocfs2_extent_map_check_size_change(inode, v_cpos + count);
ret = ocfs2_extent_map_lookup_read(inode, v_cpos, count, &ent);
if (ret)
@@ -838,7 +820,6 @@ int ocfs2_extent_map_get_blocks(struct i
u32 cpos, clusters;
int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
struct ocfs2_extent_map_entry *ent = NULL;
- struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
struct ocfs2_extent_rec *rec;
*p_blkno = 0;
@@ -852,15 +833,7 @@ int ocfs2_extent_map_get_blocks(struct i
return ret;
}
- if ((cpos + clusters) > em->em_clusters) {
- /*
- * Size changed underneath us on disk. Drop any
- * straddling records and update our idea of
- * i_clusters
- */
- ocfs2_extent_map_drop(inode, em->em_clusters - 1);
- em->em_clusters = OCFS2_I(inode)->ip_clusters;
- }
+ ocfs2_extent_map_check_size_change(inode, cpos + clusters);
ret = ocfs2_extent_map_lookup_read(inode, cpos, clusters, &ent);
if (ret) {
@@ -996,6 +969,43 @@ int ocfs2_extent_map_drop(struct inode *
}
/*
+ * This is almost a wrapper of ocfs2_extent_map_drop(), but must
+ * handle its locking carefully.
+ */
+static void ocfs2_extent_map_check_size_change(struct inode *inode,
+ u32 expected_clusters)
+{
+ struct rb_node *free_head = NULL;
+ struct ocfs2_extent_map *em = &OCFS2_I(inode)->ip_map;
+ struct ocfs2_extent_map_entry *ent;
+
+ spin_lock(&OCFS2_I(inode)->ip_lock);
+
+ if (em->em_clusters < expected_clusters) {
+ /*
+ * Size changed underneath us on disk. Drop any
+ * straddling records and update our idea of
+ * i_clusters
+ */
+ __ocfs2_extent_map_drop(inode, em->em_clusters -1,
+ &free_head, &ent);
+
+ if (ent) {
+ rb_erase(&ent->e_node, &em->em_extents);
+ ent->e_node.rb_right = free_head;
+ free_head = &ent->e_node;
+ }
+
+ em->em_clusters = OCFS2_I(inode)->ip_clusters;
+ }
+
+ spin_unlock(&OCFS2_I(inode)->ip_lock);
+
+ if (free_head)
+ __ocfs2_extent_map_drop_cleanup(free_head);
+}
+
+/*
* Remove all entries past new_clusters and also clip any extent
* straddling new_clusters, if there is one. This does not check
* or modify ip_clusters
--
1.4.2.3
next prev parent reply other threads:[~2007-03-28 18:51 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-03-28 18:50 [Ocfs2-devel] [PATCH 0/3] Properly protect the extent map Joel Becker
2007-03-28 18:50 ` [Ocfs2-devel] [PATCH 1/3] ocfs2: Wrap access of directory allocations with ip_alloc_sem Joel Becker
2007-03-28 18:51 ` [Ocfs2-devel] [PATCH 3/3] ocfs2: Local mounts should not truncate the extent map Joel Becker
2007-03-28 18:51 ` Joel Becker [this message]
2007-03-29 2:04 ` [Ocfs2-devel] [PATCH 0/3] Properly protect " Fabio Massimo Di Nitto
2007-03-29 9:41 ` Sunil Mushran
2007-04-02 11:45 ` Mark Fasheh
2007-04-02 11:48 ` Fabio Massimo Di Nitto
2007-04-02 11:58 ` Mark Fasheh
2007-04-02 12:00 ` Fabio Massimo Di Nitto
2007-04-02 11:47 ` Mark Fasheh
2007-04-02 16:55 ` Joel Becker
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070329005717.663893000@oracle.com \
--to=joel.becker@oracle.com \
--cc=ocfs2-devel@oss.oracle.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.