linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Btrfs: make xattr replace operations atomic
@ 2014-11-07 20:39 Filipe Manana
  2014-11-07 20:54 ` Chris Mason
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Filipe Manana @ 2014-11-07 20:39 UTC (permalink / raw)
  To: linux-btrfs; +Cc: oliva, Filipe Manana

Replacing a xattr consists of doing a lookup for its existing value, delete
the current value from the respective leaf, release the search path and then
finally insert the new value. This leaves a time window where readers (getxattr,
listxattrs) won't see any value for the xattr. Xattrs are used to store ACLs,
so this has security implications.

This change also fixes 2 other existing issues which were:

*) Deleting the old xattr value without verifying first if the new xattr will
   fit in the existing leaf item (in case multiple xattrs are packed in the
   same item due to name hash collision);

*) Returning -EEXIST when the flag XATTR_CREATE is given and the xattr doesn't
   exist but we have have an existing item that packs muliple xattrs with
   the same name hash as the input xattr. In this case we should return ENOSPC.

A test case for xfstests follows soon.

Thanks to Alexandre Oliva for reporting the non-atomicity of the xattr replace
implementation.

Reported-by: Alexandre Oliva <oliva@gnu.org>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/ctree.h    |   4 ++
 fs/btrfs/dir-item.c |  10 ++--
 fs/btrfs/xattr.c    | 142 ++++++++++++++++++++++++++++++----------------------
 3 files changed, 88 insertions(+), 68 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index a5e471a..9a47dfe 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3687,6 +3687,10 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
 int verify_dir_item(struct btrfs_root *root,
 		    struct extent_buffer *leaf,
 		    struct btrfs_dir_item *dir_item);
+struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
+						 struct btrfs_path *path,
+						 const char *name,
+						 int name_len);
 
 /* orphan.c */
 int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 65bf60e..c14e682 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -21,10 +21,6 @@
 #include "hash.h"
 #include "transaction.h"
 
-static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
-			      struct btrfs_path *path,
-			      const char *name, int name_len);
-
 /*
  * insert a name into a directory, doing overflow properly if there is a hash
  * collision.  data_size indicates how big the item inserted should be.  On
@@ -385,9 +381,9 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
  * this walks through all the entries in a dir item and finds one
  * for a specific name.
  */
-static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
-			      struct btrfs_path *path,
-			      const char *name, int name_len)
+struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
+						 struct btrfs_path *path,
+						 const char *name, int name_len)
 {
 	struct btrfs_dir_item *dir_item;
 	unsigned long name_ptr;
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index dcf2013..3c8fba1 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -91,7 +91,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
 		       struct inode *inode, const char *name,
 		       const void *value, size_t size, int flags)
 {
-	struct btrfs_dir_item *di;
+	struct btrfs_dir_item *di = NULL;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_path *path;
 	size_t name_len = strlen(name);
@@ -104,83 +104,103 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
 	if (!path)
 		return -ENOMEM;
 
-	if (flags & XATTR_REPLACE) {
-		di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name,
-					name_len, -1);
-		if (IS_ERR(di)) {
-			ret = PTR_ERR(di);
-			goto out;
-		} else if (!di) {
+	if (!value && ((flags & XATTR_REPLACE) || !flags)) {
+		/*
+		 * We're deleting only a xattr (no replace).
+		 * Don't follow the path below because it could leave a leaf
+		 * empty.
+		 */
+		di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode),
+					name, name_len, -1);
+		if (!di && (flags & XATTR_REPLACE))
 			ret = -ENODATA;
-			goto out;
-		}
-		ret = btrfs_delete_one_dir_name(trans, root, path, di);
-		if (ret)
-			goto out;
-		btrfs_release_path(path);
+		else if (di)
+			ret = btrfs_delete_one_dir_name(trans, root, path, di);
+		goto out;
+	}
 
+	ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
+				      name, name_len, value, size);
+	if (ret == -EOVERFLOW) {
 		/*
-		 * remove the attribute
+		 * We have an existing item in a leaf, split_leaf couldn't
+		 * expand it. That item might have or not a dir_item that
+		 * matches our target xattr, so lets check.
 		 */
-		if (!value)
-			goto out;
-	} else {
-		di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
-					name, name_len, 0);
-		if (IS_ERR(di)) {
-			ret = PTR_ERR(di);
+		ret = 0;
+		di = btrfs_match_dir_item_name(root, path, name, name_len);
+		if (!di && (flags & XATTR_CREATE)) {
+			ret = -ENOSPC;
 			goto out;
 		}
-		if (!di && !value)
-			goto out;
-		btrfs_release_path(path);
+	} else if (ret == -EEXIST) {
+		ret = 0;
+		di = btrfs_match_dir_item_name(root, path, name, name_len);
+		ASSERT(di); /* logic error */
+	} else if (ret) {
+		goto out;
 	}
 
-again:
-	ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
-				      name, name_len, value, size);
-	/*
-	 * If we're setting an xattr to a new value but the new value is say
-	 * exactly BTRFS_MAX_XATTR_SIZE, we could end up with EOVERFLOW getting
-	 * back from split_leaf.  This is because it thinks we'll be extending
-	 * the existing item size, but we're asking for enough space to add the
-	 * item itself.  So if we get EOVERFLOW just set ret to EEXIST and let
-	 * the rest of the function figure it out.
-	 */
-	if (ret == -EOVERFLOW)
+	if (di && (flags & XATTR_CREATE))
 		ret = -EEXIST;
+	if (!di && (flags & XATTR_REPLACE))
+		ret = -ENODATA;
+	if (ret)
+		goto out;
 
-	if (ret == -EEXIST) {
-		if (flags & XATTR_CREATE)
-			goto out;
+	if (di) {
 		/*
-		 * We can't use the path we already have since we won't have the
-		 * proper locking for a delete, so release the path and
-		 * re-lookup to delete the thing.
+		 * We're doing a replace, and it must be atomic, that is, at
+		 * any point in time we have either the old or the new xattr
+		 * value in the tree. We don't want readers (getxattr and
+		 * listxattrs) to miss a value, this is specially important
+		 * for ACLs.
 		 */
-		btrfs_release_path(path);
-		di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode),
-					name, name_len, -1);
-		if (IS_ERR(di)) {
-			ret = PTR_ERR(di);
-			goto out;
-		} else if (!di) {
-			/* Shouldn't happen but just in case... */
-			btrfs_release_path(path);
-			goto again;
+		const int slot = path->slots[0];
+		struct extent_buffer *leaf = path->nodes[0];
+		const u16 old_data_len = btrfs_dir_data_len(leaf, di);
+		const u32 item_size = btrfs_item_size_nr(leaf, slot);
+		const u32 data_size = sizeof(*di) + name_len + size;
+		struct btrfs_item *item;
+		unsigned long data_ptr;
+		char *ptr;
+
+		if (size > old_data_len) {
+			if (btrfs_leaf_free_space(root, leaf) <
+			    (size - old_data_len)) {
+				ret = -ENOSPC;
+				goto out;
+			}
 		}
 
-		ret = btrfs_delete_one_dir_name(trans, root, path, di);
-		if (ret)
-			goto out;
+		if (old_data_len + name_len + sizeof(*di) == item_size) {
+			/* No other xattrs packed in the same leaf item. */
+			if (size > old_data_len)
+				btrfs_extend_item(root, path,
+						  size - old_data_len);
+			else if (size < old_data_len)
+				btrfs_truncate_item(root, path, data_size, 1);
+		} else {
+			/* There are other xattrs packed in the same item. */
+			ret = btrfs_delete_one_dir_name(trans, root, path, di);
+			if (ret)
+				goto out;
+			btrfs_extend_item(root, path, data_size);
+		}
 
+		item = btrfs_item_nr(slot);
+		ptr = btrfs_item_ptr(leaf, slot, char);
+		ptr += btrfs_item_size(leaf, item) - data_size;
+		di = (struct btrfs_dir_item *)ptr;
+		btrfs_set_dir_data_len(leaf, di, size);
+		data_ptr = ((unsigned long)(di + 1)) + name_len;
+		write_extent_buffer(leaf, value, data_ptr, size);
+	} else {
 		/*
-		 * We have a value to set, so go back and try to insert it now.
+		 * Insert, and we had space for the xattr, so path->slots[0] is
+		 * where our xattr dir_item is and btrfs_insert_xattr_item()
+		 * filled it.
 		 */
-		if (value) {
-			btrfs_release_path(path);
-			goto again;
-		}
 	}
 out:
 	btrfs_free_path(path);
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] Btrfs: make xattr replace operations atomic
  2014-11-07 20:39 [PATCH] Btrfs: make xattr replace operations atomic Filipe Manana
@ 2014-11-07 20:54 ` Chris Mason
  2014-11-07 21:34   ` Filipe David Manana
  2014-11-08  1:01 ` [PATCH v2] " Filipe Manana
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 8+ messages in thread
From: Chris Mason @ 2014-11-07 20:54 UTC (permalink / raw)
  To: Filipe Manana; +Cc: linux-btrfs, oliva, Filipe Manana



On Fri, Nov 7, 2014 at 3:39 PM, Filipe Manana <fdmanana@suse.com> wrote:
> Replacing a xattr consists of doing a lookup for its existing value, 
> delete
> the current value from the respective leaf, release the search path 
> and then
> finally insert the new value. This leaves a time window where readers 
> (getxattr,
> listxattrs) won't see any value for the xattr. Xattrs are used to 
> store ACLs,
> so this has security implications.
> 
> This change also fixes 2 other existing issues which were:
> 
> *) Deleting the old xattr value without verifying first if the new 
> xattr will
>    fit in the existing leaf item (in case multiple xattrs are packed 
> in the
>    same item due to name hash collision);
> 
> *) Returning -EEXIST when the flag XATTR_CREATE is given and the 
> xattr doesn't
>    exist but we have have an existing item that packs muliple xattrs 
> with
>    the same name hash as the input xattr. In this case we should 
> return ENOSPC.
> 
> A test case for xfstests follows soon.
> 
> Thanks to Alexandre Oliva for reporting the non-atomicity of the 
> xattr replace
> implementation.

Thanks Filipe, one problem:

> +
> +		if (size > old_data_len) {
> +			if (btrfs_leaf_free_space(root, leaf) <
> +			    (size - old_data_len)) {
> +				ret = -ENOSPC;
> +				goto out;
> +			}
>  		}

This means replacing an xattr item can fail if the leaf doesn't happen 
to have free space.  btrfs_search_slot already has code meant to extend 
the item, even if the item you're searching for is already found.  (see 
the call to split_leaf with extend == 1).

So what you want to do is find out the current size of the item, and 
call btrfs search_slot again with that size + the extra needed for the 
new name.  From there you should be able to call btrfs_extend_item like 
you currently are.

-chris




^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] Btrfs: make xattr replace operations atomic
  2014-11-07 20:54 ` Chris Mason
@ 2014-11-07 21:34   ` Filipe David Manana
  0 siblings, 0 replies; 8+ messages in thread
From: Filipe David Manana @ 2014-11-07 21:34 UTC (permalink / raw)
  To: Chris Mason; +Cc: Filipe Manana, linux-btrfs@vger.kernel.org, Alexandre Oliva

On Fri, Nov 7, 2014 at 8:54 PM, Chris Mason <clm@fb.com> wrote:
>
>
> On Fri, Nov 7, 2014 at 3:39 PM, Filipe Manana <fdmanana@suse.com> wrote:
>>
>> Replacing a xattr consists of doing a lookup for its existing value,
>> delete
>> the current value from the respective leaf, release the search path and
>> then
>> finally insert the new value. This leaves a time window where readers
>> (getxattr,
>> listxattrs) won't see any value for the xattr. Xattrs are used to store
>> ACLs,
>> so this has security implications.
>>
>> This change also fixes 2 other existing issues which were:
>>
>> *) Deleting the old xattr value without verifying first if the new xattr
>> will
>>    fit in the existing leaf item (in case multiple xattrs are packed in
>> the
>>    same item due to name hash collision);
>>
>> *) Returning -EEXIST when the flag XATTR_CREATE is given and the xattr
>> doesn't
>>    exist but we have have an existing item that packs muliple xattrs with
>>    the same name hash as the input xattr. In this case we should return
>> ENOSPC.
>>
>> A test case for xfstests follows soon.
>>
>> Thanks to Alexandre Oliva for reporting the non-atomicity of the xattr
>> replace
>> implementation.
>
>
> Thanks Filipe, one problem:
>
>> +
>> +               if (size > old_data_len) {
>> +                       if (btrfs_leaf_free_space(root, leaf) <
>> +                           (size - old_data_len)) {
>> +                               ret = -ENOSPC;
>> +                               goto out;
>> +                       }
>>                 }
>
>
> This means replacing an xattr item can fail if the leaf doesn't happen to
> have free space.  btrfs_search_slot already has code meant to extend the
> item, even if the item you're searching for is already found.  (see the call
> to split_leaf with extend == 1).
>
> So what you want to do is find out the current size of the item, and call
> btrfs search_slot again with that size + the extra needed for the new name.
> From there you should be able to call btrfs_extend_item like you currently
> are.

So what I do is call btrfs_seach_slot with ins_size == new_xattr_size.
If it returns -eoverflow (it couldn't extent by +new_size, which
includes the name, dir_item and btrfs_item structs), I don't return
immediately because after deleting the existing xattr we may have
enough space in the leaf (that is, we no longer to extend by new_size,
but by a smaller amount == new_size - old_size, and excluding the
name, dir_item and btrfs_item structs), effectively not requiring the
original size passed to btrfs_search_slot. So if I correctly
understood you, it's almost equivalent to your suggestion but without
releasing paths and keep retrying.

Did I miss something? :)

>
> -chris
>
>
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Filipe David Manana,

"Reasonable men adapt themselves to the world.
 Unreasonable men adapt the world to themselves.
 That's why all progress depends on unreasonable men."

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2] Btrfs: make xattr replace operations atomic
  2014-11-07 20:39 [PATCH] Btrfs: make xattr replace operations atomic Filipe Manana
  2014-11-07 20:54 ` Chris Mason
@ 2014-11-08  1:01 ` Filipe Manana
  2014-11-08  1:26   ` Robert White
  2014-11-08 13:01 ` [PATCH v3] " Filipe Manana
  2014-11-09  8:38 ` [PATCH v4] " Filipe Manana
  3 siblings, 1 reply; 8+ messages in thread
From: Filipe Manana @ 2014-11-08  1:01 UTC (permalink / raw)
  To: linux-btrfs; +Cc: Filipe Manana

Replacing a xattr consists of doing a lookup for its existing value, delete
the current value from the respective leaf, release the search path and then
finally insert the new value. This leaves a time window where readers (getxattr,
listxattrs) won't see any value for the xattr. Xattrs are used to store ACLs,
so this has security implications.

This change also fixes 2 other existing issues which were:

*) Deleting the old xattr value without verifying first if the new xattr will
   fit in the existing leaf item (in case multiple xattrs are packed in the
   same item due to name hash collision);

*) Returning -EEXIST when the flag XATTR_CREATE is given and the xattr doesn't
   exist but we have have an existing item that packs muliple xattrs with
   the same name hash as the input xattr. In this case we should return ENOSPC.

A test case for xfstests follows soon.

Thanks to Alexandre Oliva for reporting the non-atomicity of the xattr replace
implementation.

Reported-by: Alexandre Oliva <oliva@gnu.org>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
---

V2: Added missing btrfs_mark_buffer_dirty() call for the case where a replace
    happens for an item with no other xattrs and the new xattr value size is
    the same as the old xattr value size;
    Made btrfs_search_slot not release the path if it returns EOVERFLOW (from
    split_leaf), so that we can get the current size of the item when doing
    the replace and extend the leaf to a size smaller then what we passed to
    btrfs_search_slot if the xattr exists in the item (and the new extend size
    is new_size - old_size, excluding xattr's name and sizeof struct btrfs_dir_item).

 fs/btrfs/ctree.c    |   2 +-
 fs/btrfs/ctree.h    |   4 ++
 fs/btrfs/dir-item.c |  10 ++--
 fs/btrfs/xattr.c    | 145 ++++++++++++++++++++++++++++++----------------------
 4 files changed, 92 insertions(+), 69 deletions(-)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 19bc616..461fcb7 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2939,7 +2939,7 @@ done:
 	 */
 	if (!p->leave_spinning)
 		btrfs_set_path_blocking(p);
-	if (ret < 0)
+	if (ret < 0 && ret != -EOVERFLOW)
 		btrfs_release_path(p);
 	return ret;
 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index a5e471a..9a47dfe 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3687,6 +3687,10 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
 int verify_dir_item(struct btrfs_root *root,
 		    struct extent_buffer *leaf,
 		    struct btrfs_dir_item *dir_item);
+struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
+						 struct btrfs_path *path,
+						 const char *name,
+						 int name_len);
 
 /* orphan.c */
 int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index fc8df86..1752625 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -21,10 +21,6 @@
 #include "hash.h"
 #include "transaction.h"
 
-static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
-			      struct btrfs_path *path,
-			      const char *name, int name_len);
-
 /*
  * insert a name into a directory, doing overflow properly if there is a hash
  * collision.  data_size indicates how big the item inserted should be.  On
@@ -383,9 +379,9 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
  * this walks through all the entries in a dir item and finds one
  * for a specific name.
  */
-static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
-			      struct btrfs_path *path,
-			      const char *name, int name_len)
+struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
+						 struct btrfs_path *path,
+						 const char *name, int name_len)
 {
 	struct btrfs_dir_item *dir_item;
 	unsigned long name_ptr;
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index dcf2013..821e49c 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -29,6 +29,7 @@
 #include "xattr.h"
 #include "disk-io.h"
 #include "props.h"
+#include "locking.h"
 
 
 ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
@@ -91,7 +92,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
 		       struct inode *inode, const char *name,
 		       const void *value, size_t size, int flags)
 {
-	struct btrfs_dir_item *di;
+	struct btrfs_dir_item *di = NULL;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_path *path;
 	size_t name_len = strlen(name);
@@ -104,83 +105,105 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
 	if (!path)
 		return -ENOMEM;
 
-	if (flags & XATTR_REPLACE) {
-		di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name,
-					name_len, -1);
-		if (IS_ERR(di)) {
-			ret = PTR_ERR(di);
-			goto out;
-		} else if (!di) {
+	if (!value && ((flags & XATTR_REPLACE) || !flags)) {
+		/*
+		 * We're deleting only a xattr (no replace).
+		 * Don't follow the path below because it could leave a leaf
+		 * empty.
+		 */
+		di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode),
+					name, name_len, -1);
+		if (!di && (flags & XATTR_REPLACE))
 			ret = -ENODATA;
-			goto out;
-		}
-		ret = btrfs_delete_one_dir_name(trans, root, path, di);
-		if (ret)
-			goto out;
-		btrfs_release_path(path);
+		else if (di)
+			ret = btrfs_delete_one_dir_name(trans, root, path, di);
+		goto out;
+	}
 
+	ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
+				      name, name_len, value, size);
+	if (ret == -EOVERFLOW) {
 		/*
-		 * remove the attribute
+		 * We have an existing item in a leaf, split_leaf couldn't
+		 * expand it. That item might have or not a dir_item that
+		 * matches our target xattr, so lets check.
 		 */
-		if (!value)
-			goto out;
-	} else {
-		di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
-					name, name_len, 0);
-		if (IS_ERR(di)) {
-			ret = PTR_ERR(di);
+		ret = 0;
+		btrfs_assert_tree_locked(path->nodes[0]);
+		di = btrfs_match_dir_item_name(root, path, name, name_len);
+		if (!di && (flags & XATTR_CREATE)) {
+			ret = -ENOSPC;
 			goto out;
 		}
-		if (!di && !value)
-			goto out;
-		btrfs_release_path(path);
+	} else if (ret == -EEXIST) {
+		ret = 0;
+		di = btrfs_match_dir_item_name(root, path, name, name_len);
+		ASSERT(di); /* logic error */
+	} else if (ret) {
+		goto out;
 	}
 
-again:
-	ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
-				      name, name_len, value, size);
-	/*
-	 * If we're setting an xattr to a new value but the new value is say
-	 * exactly BTRFS_MAX_XATTR_SIZE, we could end up with EOVERFLOW getting
-	 * back from split_leaf.  This is because it thinks we'll be extending
-	 * the existing item size, but we're asking for enough space to add the
-	 * item itself.  So if we get EOVERFLOW just set ret to EEXIST and let
-	 * the rest of the function figure it out.
-	 */
-	if (ret == -EOVERFLOW)
+	if (di && (flags & XATTR_CREATE))
 		ret = -EEXIST;
+	if (!di && (flags & XATTR_REPLACE))
+		ret = -ENODATA;
+	if (ret)
+		goto out;
 
-	if (ret == -EEXIST) {
-		if (flags & XATTR_CREATE)
-			goto out;
+	if (di) {
 		/*
-		 * We can't use the path we already have since we won't have the
-		 * proper locking for a delete, so release the path and
-		 * re-lookup to delete the thing.
+		 * We're doing a replace, and it must be atomic, that is, at
+		 * any point in time we have either the old or the new xattr
+		 * value in the tree. We don't want readers (getxattr and
+		 * listxattrs) to miss a value, this is specially important
+		 * for ACLs.
 		 */
-		btrfs_release_path(path);
-		di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode),
-					name, name_len, -1);
-		if (IS_ERR(di)) {
-			ret = PTR_ERR(di);
-			goto out;
-		} else if (!di) {
-			/* Shouldn't happen but just in case... */
-			btrfs_release_path(path);
-			goto again;
+		const int slot = path->slots[0];
+		struct extent_buffer *leaf = path->nodes[0];
+		const u16 old_data_len = btrfs_dir_data_len(leaf, di);
+		const u32 item_size = btrfs_item_size_nr(leaf, slot);
+		const u32 data_size = sizeof(*di) + name_len + size;
+		struct btrfs_item *item;
+		unsigned long data_ptr;
+		char *ptr;
+
+		if (size > old_data_len) {
+			if (btrfs_leaf_free_space(root, leaf) <
+			    (size - old_data_len)) {
+				ret = -ENOSPC;
+				goto out;
+			}
 		}
 
-		ret = btrfs_delete_one_dir_name(trans, root, path, di);
-		if (ret)
-			goto out;
+		if (old_data_len + name_len + sizeof(*di) == item_size) {
+			/* No other xattrs packed in the same leaf item. */
+			if (size > old_data_len)
+				btrfs_extend_item(root, path,
+						  size - old_data_len);
+			else if (size < old_data_len)
+				btrfs_truncate_item(root, path, data_size, 1);
+		} else {
+			/* There are other xattrs packed in the same item. */
+			ret = btrfs_delete_one_dir_name(trans, root, path, di);
+			if (ret)
+				goto out;
+			btrfs_extend_item(root, path, data_size);
+		}
 
+		item = btrfs_item_nr(slot);
+		ptr = btrfs_item_ptr(leaf, slot, char);
+		ptr += btrfs_item_size(leaf, item) - data_size;
+		di = (struct btrfs_dir_item *)ptr;
+		btrfs_set_dir_data_len(leaf, di, size);
+		data_ptr = ((unsigned long)(di + 1)) + name_len;
+		write_extent_buffer(leaf, value, data_ptr, size);
+		btrfs_mark_buffer_dirty(leaf);
+	} else {
 		/*
-		 * We have a value to set, so go back and try to insert it now.
+		 * Insert, and we had space for the xattr, so path->slots[0] is
+		 * where our xattr dir_item is and btrfs_insert_xattr_item()
+		 * filled it.
 		 */
-		if (value) {
-			btrfs_release_path(path);
-			goto again;
-		}
 	}
 out:
 	btrfs_free_path(path);
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] Btrfs: make xattr replace operations atomic
  2014-11-08  1:01 ` [PATCH v2] " Filipe Manana
@ 2014-11-08  1:26   ` Robert White
  2014-11-08  7:17     ` Filipe David Manana
  0 siblings, 1 reply; 8+ messages in thread
From: Robert White @ 2014-11-08  1:26 UTC (permalink / raw)
  To: Filipe Manana, linux-btrfs

On 11/07/2014 05:01 PM, Filipe Manana wrote:
> V2: Added missing btrfs_mark_buffer_dirty() call for the case where a replace
>      happens for an item with no other xattrs and the new xattr value size is
>      the same as the old xattr value size;
>      Made btrfs_search_slot not release the path if it returns EOVERFLOW (from
>      split_leaf), so that we can get the current size of the item when doing
>      the replace and extend the leaf to a size smaller then what we passed to
>      btrfs_search_slot if the xattr exists in the item (and the new extend size
>      is new_size - old_size, excluding xattr's name and sizeof struct btrfs_dir_item).

Stupid Question Here: How does this "replace and extend" affect the 
whole copy-on-write paradigm. Aren't you now overwriting the contents of 
the old data instead of doing a COW?

Not a passive-aggressive commentary question, I genuinely don't 
understand this piece of code so I am seeking a learning opportunity.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] Btrfs: make xattr replace operations atomic
  2014-11-08  1:26   ` Robert White
@ 2014-11-08  7:17     ` Filipe David Manana
  0 siblings, 0 replies; 8+ messages in thread
From: Filipe David Manana @ 2014-11-08  7:17 UTC (permalink / raw)
  To: Robert White; +Cc: Filipe Manana, linux-btrfs@vger.kernel.org

On Sat, Nov 8, 2014 at 1:26 AM, Robert White <rwhite@pobox.com> wrote:
> On 11/07/2014 05:01 PM, Filipe Manana wrote:
>>
>> V2: Added missing btrfs_mark_buffer_dirty() call for the case where a
>> replace
>>      happens for an item with no other xattrs and the new xattr value size
>> is
>>      the same as the old xattr value size;
>>      Made btrfs_search_slot not release the path if it returns EOVERFLOW
>> (from
>>      split_leaf), so that we can get the current size of the item when
>> doing
>>      the replace and extend the leaf to a size smaller then what we passed
>> to
>>      btrfs_search_slot if the xattr exists in the item (and the new extend
>> size
>>      is new_size - old_size, excluding xattr's name and sizeof struct
>> btrfs_dir_item).
>
>
> Stupid Question Here: How does this "replace and extend" affect the whole
> copy-on-write paradigm. Aren't you now overwriting the contents of the old
> data instead of doing a COW?

Btree nodes are always COWed.

>
> Not a passive-aggressive commentary question, I genuinely don't understand
> this piece of code so I am seeking a learning opportunity.
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Filipe David Manana,

"Reasonable men adapt themselves to the world.
 Unreasonable men adapt the world to themselves.
 That's why all progress depends on unreasonable men."

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v3] Btrfs: make xattr replace operations atomic
  2014-11-07 20:39 [PATCH] Btrfs: make xattr replace operations atomic Filipe Manana
  2014-11-07 20:54 ` Chris Mason
  2014-11-08  1:01 ` [PATCH v2] " Filipe Manana
@ 2014-11-08 13:01 ` Filipe Manana
  2014-11-09  8:38 ` [PATCH v4] " Filipe Manana
  3 siblings, 0 replies; 8+ messages in thread
From: Filipe Manana @ 2014-11-08 13:01 UTC (permalink / raw)
  To: linux-btrfs; +Cc: Filipe Manana

Replacing a xattr consists of doing a lookup for its existing value, delete
the current value from the respective leaf, release the search path and then
finally insert the new value. This leaves a time window where readers (getxattr,
listxattrs) won't see any value for the xattr. Xattrs are used to store ACLs,
so this has security implications.

This change also fixes 2 other existing issues which were:

*) Deleting the old xattr value without verifying first if the new xattr will
   fit in the existing leaf item (in case multiple xattrs are packed in the
   same item due to name hash collision);

*) Returning -EEXIST when the flag XATTR_CREATE is given and the xattr doesn't
   exist but we have have an existing item that packs muliple xattrs with
   the same name hash as the input xattr. In this case we should return ENOSPC.

A test case for xfstests follows soon.

Thanks to Alexandre Oliva for reporting the non-atomicity of the xattr replace
implementation.

Reported-by: Alexandre Oliva <oliva@gnu.org>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
---

V2: Added missing btrfs_mark_buffer_dirty() call for the case where a replace
    happens for an item with no other xattrs and the new xattr value size is
    the same as the old xattr value size;
    Made btrfs_search_slot not release the path if it returns EOVERFLOW (from
    split_leaf), so that we can get the current size of the item when doing
    the replace and extend the leaf to a size smaller then what we passed to
    btrfs_search_slot if the xattr exists in the item (and the new extend size
    is new_size - old_size, excluding xattr's name and sizeof struct btrfs_dir_item).

V3: Made btrfs_search_slot not release the path on error logic more explicit
    and generic.

 fs/btrfs/ctree.c    |   2 +-
 fs/btrfs/ctree.h    |   5 ++
 fs/btrfs/dir-item.c |  10 ++--
 fs/btrfs/xattr.c    | 146 ++++++++++++++++++++++++++++++----------------------
 4 files changed, 94 insertions(+), 69 deletions(-)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 19bc616..8172341 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2939,7 +2939,7 @@ done:
 	 */
 	if (!p->leave_spinning)
 		btrfs_set_path_blocking(p);
-	if (ret < 0)
+	if (ret < 0 && !p->skip_release_on_error)
 		btrfs_release_path(p);
 	return ret;
 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index a5e471a..e3ab75e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -607,6 +607,7 @@ struct btrfs_path {
 	unsigned int leave_spinning:1;
 	unsigned int search_commit_root:1;
 	unsigned int need_commit_sem:1;
+	unsigned int skip_release_on_error:1;
 };
 
 /*
@@ -3687,6 +3688,10 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
 int verify_dir_item(struct btrfs_root *root,
 		    struct extent_buffer *leaf,
 		    struct btrfs_dir_item *dir_item);
+struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
+						 struct btrfs_path *path,
+						 const char *name,
+						 int name_len);
 
 /* orphan.c */
 int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index fc8df86..1752625 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -21,10 +21,6 @@
 #include "hash.h"
 #include "transaction.h"
 
-static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
-			      struct btrfs_path *path,
-			      const char *name, int name_len);
-
 /*
  * insert a name into a directory, doing overflow properly if there is a hash
  * collision.  data_size indicates how big the item inserted should be.  On
@@ -383,9 +379,9 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
  * this walks through all the entries in a dir item and finds one
  * for a specific name.
  */
-static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
-			      struct btrfs_path *path,
-			      const char *name, int name_len)
+struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
+						 struct btrfs_path *path,
+						 const char *name, int name_len)
 {
 	struct btrfs_dir_item *dir_item;
 	unsigned long name_ptr;
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index dcf2013..b3b5632 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -29,6 +29,7 @@
 #include "xattr.h"
 #include "disk-io.h"
 #include "props.h"
+#include "locking.h"
 
 
 ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
@@ -91,7 +92,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
 		       struct inode *inode, const char *name,
 		       const void *value, size_t size, int flags)
 {
-	struct btrfs_dir_item *di;
+	struct btrfs_dir_item *di = NULL;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_path *path;
 	size_t name_len = strlen(name);
@@ -103,84 +104,107 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
+	path->skip_release_on_error = 1;
 
-	if (flags & XATTR_REPLACE) {
-		di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name,
-					name_len, -1);
-		if (IS_ERR(di)) {
-			ret = PTR_ERR(di);
-			goto out;
-		} else if (!di) {
+	if (!value && ((flags & XATTR_REPLACE) || !flags)) {
+		/*
+		 * We're deleting only a xattr (no replace).
+		 * Don't follow the path below because it could leave a leaf
+		 * empty.
+		 */
+		di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode),
+					name, name_len, -1);
+		if (!di && (flags & XATTR_REPLACE))
 			ret = -ENODATA;
-			goto out;
-		}
-		ret = btrfs_delete_one_dir_name(trans, root, path, di);
-		if (ret)
-			goto out;
-		btrfs_release_path(path);
+		else if (di)
+			ret = btrfs_delete_one_dir_name(trans, root, path, di);
+		goto out;
+	}
 
+	ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
+				      name, name_len, value, size);
+	if (ret == -EOVERFLOW) {
 		/*
-		 * remove the attribute
+		 * We have an existing item in a leaf, split_leaf couldn't
+		 * expand it. That item might have or not a dir_item that
+		 * matches our target xattr, so lets check.
 		 */
-		if (!value)
-			goto out;
-	} else {
-		di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
-					name, name_len, 0);
-		if (IS_ERR(di)) {
-			ret = PTR_ERR(di);
+		ret = 0;
+		btrfs_assert_tree_locked(path->nodes[0]);
+		di = btrfs_match_dir_item_name(root, path, name, name_len);
+		if (!di && (flags & XATTR_CREATE)) {
+			ret = -ENOSPC;
 			goto out;
 		}
-		if (!di && !value)
-			goto out;
-		btrfs_release_path(path);
+	} else if (ret == -EEXIST) {
+		ret = 0;
+		di = btrfs_match_dir_item_name(root, path, name, name_len);
+		ASSERT(di); /* logic error */
+	} else if (ret) {
+		goto out;
 	}
 
-again:
-	ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
-				      name, name_len, value, size);
-	/*
-	 * If we're setting an xattr to a new value but the new value is say
-	 * exactly BTRFS_MAX_XATTR_SIZE, we could end up with EOVERFLOW getting
-	 * back from split_leaf.  This is because it thinks we'll be extending
-	 * the existing item size, but we're asking for enough space to add the
-	 * item itself.  So if we get EOVERFLOW just set ret to EEXIST and let
-	 * the rest of the function figure it out.
-	 */
-	if (ret == -EOVERFLOW)
+	if (di && (flags & XATTR_CREATE))
 		ret = -EEXIST;
+	if (!di && (flags & XATTR_REPLACE))
+		ret = -ENODATA;
+	if (ret)
+		goto out;
 
-	if (ret == -EEXIST) {
-		if (flags & XATTR_CREATE)
-			goto out;
+	if (di) {
 		/*
-		 * We can't use the path we already have since we won't have the
-		 * proper locking for a delete, so release the path and
-		 * re-lookup to delete the thing.
+		 * We're doing a replace, and it must be atomic, that is, at
+		 * any point in time we have either the old or the new xattr
+		 * value in the tree. We don't want readers (getxattr and
+		 * listxattrs) to miss a value, this is specially important
+		 * for ACLs.
 		 */
-		btrfs_release_path(path);
-		di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode),
-					name, name_len, -1);
-		if (IS_ERR(di)) {
-			ret = PTR_ERR(di);
-			goto out;
-		} else if (!di) {
-			/* Shouldn't happen but just in case... */
-			btrfs_release_path(path);
-			goto again;
+		const int slot = path->slots[0];
+		struct extent_buffer *leaf = path->nodes[0];
+		const u16 old_data_len = btrfs_dir_data_len(leaf, di);
+		const u32 item_size = btrfs_item_size_nr(leaf, slot);
+		const u32 data_size = sizeof(*di) + name_len + size;
+		struct btrfs_item *item;
+		unsigned long data_ptr;
+		char *ptr;
+
+		if (size > old_data_len) {
+			if (btrfs_leaf_free_space(root, leaf) <
+			    (size - old_data_len)) {
+				ret = -ENOSPC;
+				goto out;
+			}
 		}
 
-		ret = btrfs_delete_one_dir_name(trans, root, path, di);
-		if (ret)
-			goto out;
+		if (old_data_len + name_len + sizeof(*di) == item_size) {
+			/* No other xattrs packed in the same leaf item. */
+			if (size > old_data_len)
+				btrfs_extend_item(root, path,
+						  size - old_data_len);
+			else if (size < old_data_len)
+				btrfs_truncate_item(root, path, data_size, 1);
+		} else {
+			/* There are other xattrs packed in the same item. */
+			ret = btrfs_delete_one_dir_name(trans, root, path, di);
+			if (ret)
+				goto out;
+			btrfs_extend_item(root, path, data_size);
+		}
 
+		item = btrfs_item_nr(slot);
+		ptr = btrfs_item_ptr(leaf, slot, char);
+		ptr += btrfs_item_size(leaf, item) - data_size;
+		di = (struct btrfs_dir_item *)ptr;
+		btrfs_set_dir_data_len(leaf, di, size);
+		data_ptr = ((unsigned long)(di + 1)) + name_len;
+		write_extent_buffer(leaf, value, data_ptr, size);
+		btrfs_mark_buffer_dirty(leaf);
+	} else {
 		/*
-		 * We have a value to set, so go back and try to insert it now.
+		 * Insert, and we had space for the xattr, so path->slots[0] is
+		 * where our xattr dir_item is and btrfs_insert_xattr_item()
+		 * filled it.
 		 */
-		if (value) {
-			btrfs_release_path(path);
-			goto again;
-		}
 	}
 out:
 	btrfs_free_path(path);
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v4] Btrfs: make xattr replace operations atomic
  2014-11-07 20:39 [PATCH] Btrfs: make xattr replace operations atomic Filipe Manana
                   ` (2 preceding siblings ...)
  2014-11-08 13:01 ` [PATCH v3] " Filipe Manana
@ 2014-11-09  8:38 ` Filipe Manana
  3 siblings, 0 replies; 8+ messages in thread
From: Filipe Manana @ 2014-11-09  8:38 UTC (permalink / raw)
  To: linux-btrfs; +Cc: Filipe Manana

Replacing a xattr consists of doing a lookup for its existing value, delete
the current value from the respective leaf, release the search path and then
finally insert the new value. This leaves a time window where readers (getxattr,
listxattrs) won't see any value for the xattr. Xattrs are used to store ACLs,
so this has security implications.

This change also fixes 2 other existing issues which were:

*) Deleting the old xattr value without verifying first if the new xattr will
   fit in the existing leaf item (in case multiple xattrs are packed in the
   same item due to name hash collision);

*) Returning -EEXIST when the flag XATTR_CREATE is given and the xattr doesn't
   exist but we have have an existing item that packs muliple xattrs with
   the same name hash as the input xattr. In this case we should return ENOSPC.

A test case for xfstests follows soon.

Thanks to Alexandre Oliva for reporting the non-atomicity of the xattr replace
implementation.

Reported-by: Alexandre Oliva <oliva@gnu.org>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
---

V2: Added missing btrfs_mark_buffer_dirty() call for the case where a replace
    happens for an item with no other xattrs and the new xattr value size is
    the same as the old xattr value size;
    Made btrfs_search_slot not release the path if it returns EOVERFLOW (from
    split_leaf), so that we can get the current size of the item when doing
    the replace and extend the leaf to a size smaller then what we passed to
    btrfs_search_slot if the xattr exists in the item (and the new extend size
    is new_size - old_size, excluding xattr's name and sizeof struct btrfs_dir_item).

V3: Made btrfs_search_slot not release the path on error logic more explicit
    and generic.

V4: Added missing ENOSPC condition - if overflow detected, there's no matching
    dir_item for our xattr in the leaf item and there are no flags. Ensure
    a replace can't insert a new value.

 fs/btrfs/ctree.c    |   2 +-
 fs/btrfs/ctree.h    |   5 ++
 fs/btrfs/dir-item.c |  10 ++--
 fs/btrfs/xattr.c    | 150 ++++++++++++++++++++++++++++++++--------------------
 4 files changed, 102 insertions(+), 65 deletions(-)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 19bc616..8172341 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2939,7 +2939,7 @@ done:
 	 */
 	if (!p->leave_spinning)
 		btrfs_set_path_blocking(p);
-	if (ret < 0)
+	if (ret < 0 && !p->skip_release_on_error)
 		btrfs_release_path(p);
 	return ret;
 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index a5e471a..e3ab75e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -607,6 +607,7 @@ struct btrfs_path {
 	unsigned int leave_spinning:1;
 	unsigned int search_commit_root:1;
 	unsigned int need_commit_sem:1;
+	unsigned int skip_release_on_error:1;
 };
 
 /*
@@ -3687,6 +3688,10 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
 int verify_dir_item(struct btrfs_root *root,
 		    struct extent_buffer *leaf,
 		    struct btrfs_dir_item *dir_item);
+struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
+						 struct btrfs_path *path,
+						 const char *name,
+						 int name_len);
 
 /* orphan.c */
 int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index fc8df86..1752625 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -21,10 +21,6 @@
 #include "hash.h"
 #include "transaction.h"
 
-static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
-			      struct btrfs_path *path,
-			      const char *name, int name_len);
-
 /*
  * insert a name into a directory, doing overflow properly if there is a hash
  * collision.  data_size indicates how big the item inserted should be.  On
@@ -383,9 +379,9 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
  * this walks through all the entries in a dir item and finds one
  * for a specific name.
  */
-static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
-			      struct btrfs_path *path,
-			      const char *name, int name_len)
+struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
+						 struct btrfs_path *path,
+						 const char *name, int name_len)
 {
 	struct btrfs_dir_item *dir_item;
 	unsigned long name_ptr;
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index dcf2013..47b1946 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -29,6 +29,7 @@
 #include "xattr.h"
 #include "disk-io.h"
 #include "props.h"
+#include "locking.h"
 
 
 ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
@@ -91,7 +92,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
 		       struct inode *inode, const char *name,
 		       const void *value, size_t size, int flags)
 {
-	struct btrfs_dir_item *di;
+	struct btrfs_dir_item *di = NULL;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_path *path;
 	size_t name_len = strlen(name);
@@ -103,84 +104,119 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
+	path->skip_release_on_error = 1;
+
+	if (!value) {
+		di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode),
+					name, name_len, -1);
+		if (!di && (flags & XATTR_REPLACE))
+			ret = -ENODATA;
+		else if (di)
+			ret = btrfs_delete_one_dir_name(trans, root, path, di);
+		goto out;
+	}
 
+	/*
+	 * For a replace we can't just do the insert blindly.
+	 * Do a lookup first (read-only btrfs_search_slot), and return if xattr
+	 * doesn't exist. If it exists, fall down below to the insert/replace
+	 * path - we can't race with a concurrent xattr delete, because the VFS
+	 * locks the inode's i_mutex before calling setxattr or removexattr.
+	 */
 	if (flags & XATTR_REPLACE) {
-		di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name,
-					name_len, -1);
-		if (IS_ERR(di)) {
-			ret = PTR_ERR(di);
-			goto out;
-		} else if (!di) {
+		ASSERT(mutex_is_locked(&inode->i_mutex));
+		di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
+					name, name_len, 0);
+		if (!di) {
 			ret = -ENODATA;
 			goto out;
 		}
-		ret = btrfs_delete_one_dir_name(trans, root, path, di);
-		if (ret)
-			goto out;
 		btrfs_release_path(path);
+		di = NULL;
+	}
 
+	ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
+				      name, name_len, value, size);
+	if (ret == -EOVERFLOW) {
 		/*
-		 * remove the attribute
+		 * We have an existing item in a leaf, split_leaf couldn't
+		 * expand it. That item might have or not a dir_item that
+		 * matches our target xattr, so lets check.
 		 */
-		if (!value)
-			goto out;
-	} else {
-		di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
-					name, name_len, 0);
-		if (IS_ERR(di)) {
-			ret = PTR_ERR(di);
+		ret = 0;
+		btrfs_assert_tree_locked(path->nodes[0]);
+		di = btrfs_match_dir_item_name(root, path, name, name_len);
+		if (!di && !(flags & XATTR_REPLACE)) {
+			ret = -ENOSPC;
 			goto out;
 		}
-		if (!di && !value)
-			goto out;
-		btrfs_release_path(path);
+	} else if (ret == -EEXIST) {
+		ret = 0;
+		di = btrfs_match_dir_item_name(root, path, name, name_len);
+		ASSERT(di); /* logic error */
+	} else if (ret) {
+		goto out;
 	}
 
-again:
-	ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
-				      name, name_len, value, size);
-	/*
-	 * If we're setting an xattr to a new value but the new value is say
-	 * exactly BTRFS_MAX_XATTR_SIZE, we could end up with EOVERFLOW getting
-	 * back from split_leaf.  This is because it thinks we'll be extending
-	 * the existing item size, but we're asking for enough space to add the
-	 * item itself.  So if we get EOVERFLOW just set ret to EEXIST and let
-	 * the rest of the function figure it out.
-	 */
-	if (ret == -EOVERFLOW)
+	if (di && (flags & XATTR_CREATE)) {
 		ret = -EEXIST;
+		goto out;
+	}
 
-	if (ret == -EEXIST) {
-		if (flags & XATTR_CREATE)
-			goto out;
+	if (di) {
 		/*
-		 * We can't use the path we already have since we won't have the
-		 * proper locking for a delete, so release the path and
-		 * re-lookup to delete the thing.
+		 * We're doing a replace, and it must be atomic, that is, at
+		 * any point in time we have either the old or the new xattr
+		 * value in the tree. We don't want readers (getxattr and
+		 * listxattrs) to miss a value, this is specially important
+		 * for ACLs.
 		 */
-		btrfs_release_path(path);
-		di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode),
-					name, name_len, -1);
-		if (IS_ERR(di)) {
-			ret = PTR_ERR(di);
-			goto out;
-		} else if (!di) {
-			/* Shouldn't happen but just in case... */
-			btrfs_release_path(path);
-			goto again;
+		const int slot = path->slots[0];
+		struct extent_buffer *leaf = path->nodes[0];
+		const u16 old_data_len = btrfs_dir_data_len(leaf, di);
+		const u32 item_size = btrfs_item_size_nr(leaf, slot);
+		const u32 data_size = sizeof(*di) + name_len + size;
+		struct btrfs_item *item;
+		unsigned long data_ptr;
+		char *ptr;
+
+		if (size > old_data_len) {
+			if (btrfs_leaf_free_space(root, leaf) <
+			    (size - old_data_len)) {
+				ret = -ENOSPC;
+				goto out;
+			}
 		}
 
-		ret = btrfs_delete_one_dir_name(trans, root, path, di);
-		if (ret)
-			goto out;
+		if (old_data_len + name_len + sizeof(*di) == item_size) {
+			/* No other xattrs packed in the same leaf item. */
+			if (size > old_data_len)
+				btrfs_extend_item(root, path,
+						  size - old_data_len);
+			else if (size < old_data_len)
+				btrfs_truncate_item(root, path, data_size, 1);
+		} else {
+			/* There are other xattrs packed in the same item. */
+			ret = btrfs_delete_one_dir_name(trans, root, path, di);
+			if (ret)
+				goto out;
+			btrfs_extend_item(root, path, data_size);
+		}
 
+		item = btrfs_item_nr(slot);
+		ptr = btrfs_item_ptr(leaf, slot, char);
+		ptr += btrfs_item_size(leaf, item) - data_size;
+		di = (struct btrfs_dir_item *)ptr;
+		btrfs_set_dir_data_len(leaf, di, size);
+		data_ptr = ((unsigned long)(di + 1)) + name_len;
+		write_extent_buffer(leaf, value, data_ptr, size);
+		btrfs_mark_buffer_dirty(leaf);
+	} else {
 		/*
-		 * We have a value to set, so go back and try to insert it now.
+		 * Insert, and we had space for the xattr, so path->slots[0] is
+		 * where our xattr dir_item is and btrfs_insert_xattr_item()
+		 * filled it.
 		 */
-		if (value) {
-			btrfs_release_path(path);
-			goto again;
-		}
 	}
 out:
 	btrfs_free_path(path);
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2014-11-09  8:42 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-11-07 20:39 [PATCH] Btrfs: make xattr replace operations atomic Filipe Manana
2014-11-07 20:54 ` Chris Mason
2014-11-07 21:34   ` Filipe David Manana
2014-11-08  1:01 ` [PATCH v2] " Filipe Manana
2014-11-08  1:26   ` Robert White
2014-11-08  7:17     ` Filipe David Manana
2014-11-08 13:01 ` [PATCH v3] " Filipe Manana
2014-11-09  8:38 ` [PATCH v4] " Filipe Manana

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).