linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Btrfs: incremental send, don't rename a directory too soon
@ 2015-02-28 21:00 Filipe Manana
  2015-02-28 22:29 ` [PATCH v2] " Filipe Manana
  0 siblings, 1 reply; 6+ messages in thread
From: Filipe Manana @ 2015-02-28 21:00 UTC (permalink / raw)
  To: linux-btrfs; +Cc: ames, Filipe Manana

There's one more case where we can't issue a rename operation for a
directory as soon as we process it. We used to delay directory renames
only if they have some ancestor directory with a higher inode number
that got renamed too, but there's another case where we need to delay
the rename too - when a directory A is renamed to the old name of a
directory B but that directory B has its rename delayed because it
has now (in the send root) an ancestor with a higher inode number that
was renamed. If we don't delay the directory rename in this case, the
receiving end of the send stream will attempt to rename A to the old
name of B before B got renamed to its new name, which results in a
"directory not empty" error. So fix this by delaying directory renames
for this case too.

Steps to reproduce:

  $ mkfs.btrfs -f /dev/sdb
  $ mount /dev/sdb /mnt

  $ mkdir /mnt/a
  $ mkdir /mnt/b
  $ mkdir /mnt/c
  $ touch /mnt/a/file

  $ btrfs subvolume snapshot -r /mnt /mnt/snap1

  $ mv /mnt/c /mnt/x
  $ mv /mnt/a /mnt/x/y
  $ mv /mnt/b /mnt/a

  $ btrfs subvolume snapshot -r /mnt /mnt/snap2

  $ btrfs send /mnt/snap1 -f /tmp/1.send
  $ btrfs send -p /mnt/snap1 /mnt/snap2 -f /tmp/2.send

  $ mkfs.btrfs -f /dev/sdc
  $ mount /dev/sdc /mnt2
  $ btrfs receive /mnt2 -f /tmp/1.send
  $ btrfs receive /mnt2 -f /tmp/2.send
  ERROR: rename b -> a failed. Directory not empty

A test case for xfstests follows soon.

Reported-by: Ames Cornish <ames@cornishes.net>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
---
 fs/btrfs/send.c | 164 ++++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 149 insertions(+), 15 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index fe58572..f28ab44 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -230,6 +230,7 @@ struct pending_dir_move {
 	u64 parent_ino;
 	u64 ino;
 	u64 gen;
+	bool is_orphan;
 	struct list_head update_refs;
 };
 
@@ -2984,7 +2985,8 @@ static int add_pending_dir_move(struct send_ctx *sctx,
 				u64 ino_gen,
 				u64 parent_ino,
 				struct list_head *new_refs,
-				struct list_head *deleted_refs)
+				struct list_head *deleted_refs,
+				const bool is_orphan)
 {
 	struct rb_node **p = &sctx->pending_dir_moves.rb_node;
 	struct rb_node *parent = NULL;
@@ -2999,6 +3001,7 @@ static int add_pending_dir_move(struct send_ctx *sctx,
 	pm->parent_ino = parent_ino;
 	pm->ino = ino;
 	pm->gen = ino_gen;
+	pm->is_orphan = is_orphan;
 	INIT_LIST_HEAD(&pm->list);
 	INIT_LIST_HEAD(&pm->update_refs);
 	RB_CLEAR_NODE(&pm->node);
@@ -3131,16 +3134,20 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
 	rmdir_ino = dm->rmdir_ino;
 	free_waiting_dir_move(sctx, dm);
 
-	ret = get_first_ref(sctx->parent_root, pm->ino,
-			    &parent_ino, &parent_gen, name);
-	if (ret < 0)
-		goto out;
-
-	ret = get_cur_path(sctx, parent_ino, parent_gen,
-			   from_path);
-	if (ret < 0)
-		goto out;
-	ret = fs_path_add_path(from_path, name);
+	if (pm->is_orphan) {
+		ret = gen_unique_name(sctx, pm->ino,
+				      pm->gen, from_path);
+	} else {
+		ret = get_first_ref(sctx->parent_root, pm->ino,
+				    &parent_ino, &parent_gen, name);
+		if (ret < 0)
+			goto out;
+		ret = get_cur_path(sctx, parent_ino, parent_gen,
+				   from_path);
+		if (ret < 0)
+			goto out;
+		ret = fs_path_add_path(from_path, name);
+	}
 	if (ret < 0)
 		goto out;
 
@@ -3150,7 +3157,8 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
 		LIST_HEAD(deleted_refs);
 		ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
 		ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
-					   &pm->update_refs, &deleted_refs);
+					   &pm->update_refs, &deleted_refs,
+					   pm->is_orphan);
 		if (ret < 0)
 			goto out;
 		if (rmdir_ino) {
@@ -3283,6 +3291,120 @@ out:
 	return ret;
 }
 
+/*
+ * We might need to delay a directory rename even when no ancestor directory
+ * (in the send root) with a higher inode number than ours (sctx->cur_ino) was
+ * renamed. This happens when we rename a directory to the old name (the name
+ * in the parent root) of some other unrelated directory that got its rename
+ * delayed due to some ancestor with higher number that got renamed.
+ *
+ * Example:
+ *
+ * Parent snapshot:
+ * .                                       (ino 256)
+ * |---- a/                                (ino 257)
+ * |     |---- file                        (ino 260)
+ * |
+ * |---- b/                                (ino 258)
+ * |---- c/                                (ino 259)
+ *
+ * Send snapshot:
+ * .                                       (ino 256)
+ * |---- a/                                (ino 258)
+ * |---- x/                                (ino 259)
+ *       |---- y/                          (ino 257)
+ *             |----- file                 (ino 260)
+ *
+ * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257
+ * from 'a' to 'x/y' happening first, which in turn depends on the rename of
+ * inode 259 from 'c' to 'x'. So the order of rename commands the send stream
+ * must issue is:
+ *
+ * 1 - rename 259 from 'c' to 'x'
+ * 2 - rename 257 from 'a' to 'x/y'
+ * 3 - rename 258 from 'b' to 'a'
+ *
+ * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can
+ * be done right away and < 0 on error.
+ */
+static int wait_for_dest_dir_move(struct send_ctx *sctx,
+				  struct recorded_ref *parent_ref,
+				  const bool is_orphan)
+{
+	struct btrfs_path *path;
+	struct btrfs_key key;
+	struct btrfs_key di_key;
+	struct btrfs_dir_item *di;
+	u64 left_gen;
+	u64 right_gen;
+	int ret = 0;
+
+	path = alloc_path_for_send();
+	if (!path)
+		return -ENOMEM;
+
+	key.objectid = parent_ref->dir;
+	key.type = BTRFS_DIR_ITEM_KEY;
+	key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len);
+
+	ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
+	if (ret < 0) {
+		goto out;
+	} else if (ret > 0) {
+		ret = 0;
+		goto out;
+	}
+
+	di = btrfs_match_dir_item_name(sctx->parent_root, path,
+				       parent_ref->name, parent_ref->name_len);
+	if (!di) {
+		ret = 0;
+		goto out;
+	}
+	/*
+	 * di_key.objectid has the number of the inode that has a dentry in the
+	 * parent directory with the same name that sctx->cur_ino is being
+	 * renamed to. We need to check if that inode is in the send root as
+	 * well and if it is currently marked as an inode with a pending rename,
+	 * if it is, we need to delay the rename of sctx->cur_ino as well, so
+	 * that it happens after that other inode is renamed.
+	 */
+	btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key);
+
+	ret = get_inode_info(sctx->parent_root, di_key.objectid, NULL,
+			     &left_gen, NULL, NULL, NULL, NULL);
+	if (ret < 0)
+		goto out;
+	ret = get_inode_info(sctx->send_root, di_key.objectid, NULL,
+			     &right_gen, NULL, NULL, NULL, NULL);
+	if (ret < 0) {
+		if (ret == -ENOENT)
+			ret = 0;
+		goto out;
+	}
+
+	/* Different inode, no need to delay the rename of sctx->cur_ino */
+	if (right_gen != left_gen) {
+		ret = 0;
+		goto out;
+	}
+
+	if (is_waiting_for_move(sctx, di_key.objectid)) {
+		ret = add_pending_dir_move(sctx,
+					   sctx->cur_ino,
+					   sctx->cur_inode_gen,
+					   di_key.objectid,
+					   &sctx->new_refs,
+					   &sctx->deleted_refs,
+					   is_orphan);
+		if (!ret)
+			ret = 1;
+	}
+out:
+	btrfs_free_path(path);
+	return ret;
+}
+
 static int wait_for_parent_move(struct send_ctx *sctx,
 				struct recorded_ref *parent_ref)
 {
@@ -3349,7 +3471,8 @@ out:
 					   sctx->cur_inode_gen,
 					   ino,
 					   &sctx->new_refs,
-					   &sctx->deleted_refs);
+					   &sctx->deleted_refs,
+					   false);
 		if (!ret)
 			ret = 1;
 	}
@@ -3372,6 +3495,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
 	int did_overwrite = 0;
 	int is_orphan = 0;
 	u64 last_dir_ino_rm = 0;
+	bool can_rename = true;
 
 verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
 
@@ -3490,12 +3614,22 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
 			}
 		}
 
+		if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) {
+			ret = wait_for_dest_dir_move(sctx, cur, is_orphan);
+			if (ret < 0)
+				goto out;
+			if (ret == 1) {
+				can_rename = false;
+				*pending_move = 1;
+			}
+		}
+
 		/*
 		 * link/move the ref to the new place. If we have an orphan
 		 * inode, move it and update valid_path. If not, link or move
 		 * it depending on the inode mode.
 		 */
-		if (is_orphan) {
+		if (is_orphan && can_rename) {
 			ret = send_rename(sctx, valid_path, cur->full_path);
 			if (ret < 0)
 				goto out;
@@ -3503,7 +3637,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
 			ret = fs_path_copy(valid_path, cur->full_path);
 			if (ret < 0)
 				goto out;
-		} else {
+		} else if (can_rename) {
 			if (S_ISDIR(sctx->cur_inode_mode)) {
 				/*
 				 * Dirs can't be linked, so move it. For moved
-- 
2.1.3


^ permalink raw reply related	[flat|nested] 6+ messages in thread
* Re: [PATCH v2] Btrfs: incremental send, don't rename a directory too soon
@ 2015-04-14  7:33 Robbie Ko
  2015-04-14  8:16 ` Filipe David Manana
  0 siblings, 1 reply; 6+ messages in thread
From: Robbie Ko @ 2015-04-14  7:33 UTC (permalink / raw)
  To: Filipe Manana, linux-btrfs@vger.kernel.org

Hi,

After applying the patch, I got WARN_ON.
btrfs progs finished without any error message,
but received subvolume is not the same as send subvolume.

Here's the related information.
thanks,
robbieko

uanme -a
Linux ubuntu 4.0.0-rc4-custom #2 SMP Tue Apr 14 11:43:00 CST 2015
x86_64 x86_64 x86_64 GNU/Linux
btrfs --version
Btrfs v3.14.1

Steps to reproduce:

 $ mkfs.btrfs -f /dev/sdb
 $ mount /dev/sdb /mnt
 $ mkfs.btrfs -f /dev/sdc
 $ mount /dev/sdc /mnt2

$ mkdir -p /mnt/data
$ mkdir -p /mnt/data/n1/n2
$ mkdir -p /mnt/data/n4
$ mkdir -p /mnt/data/n1/n2/p1
$ mkdir -p /mnt/data/t4
$ mkdir -p /mnt/data/p1
$ mkdir -p /mnt/data/p1/2

  $ btrfs subvolume snapshot -r /mnt /mnt/snap1

$ mv /mnt/data/n1/n2 /mnt/data/t4
$ mv /mnt/data/n4 /mnt/data/t4/n2
$ mv /mnt/data/t4/n2/p1 /mnt/data/t4/p1
$ mv /mnt/data/p1 /mnt/data/t4/n2

  $ btrfs subvolume snapshot -r /mnt /mnt/snap2

  $ btrfs send /mnt/snap1 | btrfs receive /mnt2
  $ btrfs send -p /mnt/snap1 /mnt/snap2 | btrfs receive /mnt2

Call trace message

[  135.498533] ------------[ cut here ]------------

[  135.498557] WARNING: CPU: 1 PID: 2346 at fs/btrfs/send.c:5934
btrfs_ioctl_send+0xc4c/0x11e0 [btrfs]()

[  135.498560] Modules linked in: nf_conntrack_ipv4(E)
nf_defrag_ipv4(E) xt_conntrack(E) nf_conntrack(E) ipt_REJECT(E)
nf_reject_ipv4(E) xt_tcpudp(E) iptable_filter(E) ip_tables(E)
x_tables(E) bridge(E) stp(E) llc(E) snd_intel8x0(E) snd_ac97_codec(E)
ac97_bus(E) snd_pcm(E) snd_timer(E) snd(E) iosf_mbi(E) soundcore(E)
ppdev(E) joydev(E) lp(E) serio_raw(E) parport_pc(E) i2c_piix4(E)
mac_hid(E) parport(E) btrfs(E) xor(E) raid6_pq(E) hid_generic(E)
usbhid(E) hid(E) ahci(E) psmouse(E) libahci(E) e1000(E) pata_acpi(E)

[  135.498578] CPU: 1 PID: 2346 Comm: btrfs Tainted: G         E
4.0.0-rc4-custom #3

[  135.498580] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS
VirtualBox 12/01/2006

[  135.498583]  ffffffffc0509016 ffff88007a233c08 ffffffff817b62f3
0000000000000007

[  135.498586]  0000000000000000 ffff88007a233c48 ffffffff8107452a
ffffc90000567000

[  135.498590]  ffff8800799f1400 ffff8800799f1418 ffff88007b3f82d0
ffff880079f60000

[  135.498593] Call Trace:

[  135.498602]  [<ffffffff817b62f3>] dump_stack+0x45/0x57

[  135.498609]  [<ffffffff8107452a>] warn_slowpath_common+0x8a/0xc0

[  135.498614]  [<ffffffff8107461a>] warn_slowpath_null+0x1a/0x20

[  135.498626]  [<ffffffffc04f5c6c>] btrfs_ioctl_send+0xc4c/0x11e0 [btrfs]

[  135.498632]  [<ffffffff8118020e>] ? __alloc_pages_nodemask+0x1ae/0xab0

[  135.498638]  [<ffffffff810a4775>] ? sched_clock_local+0x25/0x90

[  135.498643]  [<ffffffff8109182e>] ? alloc_pid+0x2e/0x530

[  135.498655]  [<ffffffffc04bc4f6>] btrfs_ioctl+0x286/0x27e0 [btrfs]

[  135.498660]  [<ffffffff810a6368>] ? __enqueue_entity+0x78/0x80

[  135.498665]  [<ffffffff810add70>] ? enqueue_entity+0x400/0xc20

[  135.498679]  [<ffffffff8101dc3a>] ? native_sched_clock+0x2a/0x90

[  135.498686]  [<ffffffff810ae708>] ? enqueue_task_fair+0x178/0x730

[  135.498698]  [<ffffffff81047c1d>] ? native_smp_send_reschedule+0x4d/0x70

[  135.498703]  [<ffffffff8109d5f0>] ? resched_curr+0x70/0xc0

[  135.498710]  [<ffffffff8109e12a>] ? check_preempt_curr+0x5a/0xa0

[  135.498715]  [<ffffffff810a152f>] ? wake_up_new_task+0x12f/0x1b0

[  135.498722]  [<ffffffff81204010>] do_vfs_ioctl+0x2e0/0x4e0

[  135.498728]  [<ffffffff8107368c>] ? do_fork+0x13c/0x370

[  135.498733]  [<ffffffff81204291>] SyS_ioctl+0x81/0xa0

[  135.498738]  [<ffffffff81073946>] ? SyS_clone+0x16/0x20

[  135.498839]  [<ffffffff817bd80d>] ? stub_clone+0x6d/0x90

[  135.498845]  [<ffffffff817bd50d>] system_call_fastpath+0x16/0x1b

[  135.498848] ---[ end trace e1dd916182de3a9d ]---

[  135.498851] ------------[ cut here ]------------

[  135.498871] WARNING: CPU: 1 PID: 2346 at fs/btrfs/send.c:5951
btrfs_ioctl_send+0x28f/0x11e0 [btrfs]()

[  135.498894] Modules linked in: nf_conntrack_ipv4(E)
nf_defrag_ipv4(E) xt_conntrack(E) nf_conntrack(E) ipt_REJECT(E)
nf_reject_ipv4(E) xt_tcpudp(E) iptable_filter(E) ip_tables(E)
x_tables(E) bridge(E) stp(E) llc(E) snd_intel8x0(E) snd_ac97_codec(E)
ac97_bus(E) snd_pcm(E) snd_timer(E) snd(E) iosf_mbi(E) soundcore(E)
ppdev(E) joydev(E) lp(E) serio_raw(E) parport_pc(E) i2c_piix4(E)
mac_hid(E) parport(E) btrfs(E) xor(E) raid6_pq(E) hid_generic(E)
usbhid(E) hid(E) ahci(E) psmouse(E) libahci(E) e1000(E) pata_acpi(E)

[  135.498900] CPU: 1 PID: 2346 Comm: btrfs Tainted: G     W   E
4.0.0-rc4-custom #3

[  135.498903] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS
VirtualBox 12/01/2006

[  135.498910]  ffffffffc0509016 ffff88007a233c08 ffffffff817b62f3
000000004c724c72

[  135.498918]  0000000000000000 ffff88007a233c48 ffffffff8107452a
ffff88007a233c38

[  135.498923]  ffff8800799f1400 ffff880078d52cc0 ffff880078d52cd8
ffff8800799f15d8

[  135.498927] Call Trace:

[  135.498937]  [<ffffffff817b62f3>] dump_stack+0x45/0x57

[  135.498945]  [<ffffffff8107452a>] warn_slowpath_common+0x8a/0xc0

[  135.498951]  [<ffffffff8107461a>] warn_slowpath_null+0x1a/0x20

[  135.498975]  [<ffffffffc04f52af>] btrfs_ioctl_send+0x28f/0x11e0 [btrfs]

[  135.498984]  [<ffffffff8118020e>] ? __alloc_pages_nodemask+0x1ae/0xab0

[  135.498990]  [<ffffffff810a4775>] ? sched_clock_local+0x25/0x90

[  135.498996]  [<ffffffff8109182e>] ? alloc_pid+0x2e/0x530

[  135.499023]  [<ffffffffc04bc4f6>] btrfs_ioctl+0x286/0x27e0 [btrfs]

[  135.499031]  [<ffffffff810a6368>] ? __enqueue_entity+0x78/0x80

[  135.499037]  [<ffffffff810add70>] ? enqueue_entity+0x400/0xc20

[  135.499046]  [<ffffffff8101dc3a>] ? native_sched_clock+0x2a/0x90

[  135.499055]  [<ffffffff810ae708>] ? enqueue_task_fair+0x178/0x730

[  135.499061]  [<ffffffff81047c1d>] ? native_smp_send_reschedule+0x4d/0x70

[  135.499069]  [<ffffffff8109d5f0>] ? resched_curr+0x70/0xc0

[  135.499078]  [<ffffffff8109e12a>] ? check_preempt_curr+0x5a/0xa0

[  135.499087]  [<ffffffff810a152f>] ? wake_up_new_task+0x12f/0x1b0

[  135.499096]  [<ffffffff81204010>] do_vfs_ioctl+0x2e0/0x4e0

[  135.499106]  [<ffffffff8107368c>] ? do_fork+0x13c/0x370

[  135.499115]  [<ffffffff81204291>] SyS_ioctl+0x81/0xa0

[  135.499121]  [<ffffffff81073946>] ? SyS_clone+0x16/0x20

[  135.499131]  [<ffffffff817bd80d>] ? stub_clone+0x6d/0x90

[  135.499140]  [<ffffffff817bd50d>] system_call_fastpath+0x16/0x1b

[  135.499144] ---[ end trace e1dd916182de3a9e ]---

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2015-04-14 12:32 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-02-28 21:00 [PATCH] Btrfs: incremental send, don't rename a directory too soon Filipe Manana
2015-02-28 22:29 ` [PATCH v2] " Filipe Manana
  -- strict thread matches above, loose matches on Subject: below --
2015-04-14  7:33 Robbie Ko
2015-04-14  8:16 ` Filipe David Manana
2015-04-14 11:09   ` Robbie Ko
2015-04-14 12:32     ` Filipe David Manana

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).