public inbox for linux-nfs@vger.kernel.org
 help / color / mirror / Atom feed
From: cel@kernel.org
To: <linux-nfs@vger.kernel.org>
Cc: Christoph Hellwig <hch@lst.de>, Chuck Lever <chuck.lever@oracle.com>
Subject: [RFC PATCH 3/4] nfs/blocklayout: Fix premature PR key unregistration
Date: Wed, 19 Jun 2024 13:39:33 -0400	[thread overview]
Message-ID: <20240619173929.177818-9-cel@kernel.org> (raw)
In-Reply-To: <20240619173929.177818-6-cel@kernel.org>

From: Chuck Lever <chuck.lever@oracle.com>

During generic/069 runs with pNFS SCSI layouts, the NFS client emits
the following in the system journal:

kernel: pNFS: failed to open device /dev/disk/by-id/dm-uuid-mpath-0x6001405e3366f045b7949eb8e4540b51 (-2)
kernel: pNFS: using block device sdb (reservation key 0x666b60901e7b26b3)
kernel: pNFS: failed to open device /dev/disk/by-id/dm-uuid-mpath-0x6001405e3366f045b7949eb8e4540b51 (-2)
kernel: pNFS: using block device sdb (reservation key 0x666b60901e7b26b3)
kernel: sd 6:0:0:1: reservation conflict
kernel: sd 6:0:0:1: [sdb] tag#16 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=0s
kernel: sd 6:0:0:1: [sdb] tag#16 CDB: Write(10) 2a 00 00 00 00 50 00 00 08 00
kernel: reservation conflict error, dev sdb, sector 80 op 0x1:(WRITE) flags 0x0 phys_seg 1 prio class 2
kernel: sd 6:0:0:1: reservation conflict
kernel: sd 6:0:0:1: reservation conflict
kernel: sd 6:0:0:1: [sdb] tag#18 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=0s
kernel: sd 6:0:0:1: [sdb] tag#17 FAILED Result: hostbyte=DID_OK driverbyte=DRIVER_OK cmd_age=0s
kernel: sd 6:0:0:1: [sdb] tag#18 CDB: Write(10) 2a 00 00 00 00 60 00 00 08 00
kernel: sd 6:0:0:1: [sdb] tag#17 CDB: Write(10) 2a 00 00 00 00 58 00 00 08 00
kernel: reservation conflict error, dev sdb, sector 96 op 0x1:(WRITE) flags 0x0 phys_seg 1 prio class 0
kernel: reservation conflict error, dev sdb, sector 88 op 0x1:(WRITE) flags 0x0 phys_seg 1 prio class 0
systemd[1]: fstests-generic-069.scope: Deactivated successfully.
systemd[1]: fstests-generic-069.scope: Consumed 5.092s CPU time.
systemd[1]: media-test.mount: Deactivated successfully.
systemd[1]: media-scratch.mount: Deactivated successfully.
kernel: sd 6:0:0:1: reservation conflict
kernel: failed to unregister PR key.

This appears to be due to a race. bl_alloc_lseg() calls this:

561 static struct nfs4_deviceid_node *
562 bl_find_get_deviceid(struct nfs_server *server,
563                 const struct nfs4_deviceid *id, const struct cred *cred,
564                 gfp_t gfp_mask)
565 {
566         struct nfs4_deviceid_node *node;
567         unsigned long start, end;
568
569 retry:
570         node = nfs4_find_get_deviceid(server, id, cred, gfp_mask);
571         if (!node)
572                 return ERR_PTR(-ENODEV);

nfs4_find_get_deviceid() does a lookup without the spin lock first.
If it can't find a matching deviceid, it creates a new device_info
(which calls bl_alloc_deviceid_node, and that registers the device's
PR key).

Then it takes the nfs4_deviceid_lock and looks up the deviceid again.
If it finds it this time, bl_find_get_deviceid() frees the spare
(new) device_info, which unregisters the PR key for the same device.

Any subsequent I/O from this client on that device gets EBADE.

The umount later unregisters the device's PR key again.

To prevent this problem, register the PR key after the deviceid_node
lookup.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfs/blocklayout/blocklayout.c |  9 ++++++++-
 fs/nfs/blocklayout/blocklayout.h |  1 +
 fs/nfs/blocklayout/dev.c         | 29 +++++++++++++++++++++--------
 3 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 6be13e0ec170..75cc5e50bd37 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -571,8 +571,14 @@ bl_find_get_deviceid(struct nfs_server *server,
 	if (!node)
 		return ERR_PTR(-ENODEV);
 
-	if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags) == 0)
+	if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags) == 0) {
+		struct pnfs_block_dev *d =
+			container_of(node, struct pnfs_block_dev, node);
+		if (d->pr_reg)
+			if (d->pr_reg(d) < 0)
+				goto out_put;
 		return node;
+	}
 
 	end = jiffies;
 	start = end - PNFS_DEVICE_RETRY_TIMEOUT;
@@ -581,6 +587,7 @@ bl_find_get_deviceid(struct nfs_server *server,
 		goto retry;
 	}
 
+out_put:
 	nfs4_put_deviceid_node(node);
 	return ERR_PTR(-ENODEV);
 }
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index f1eeb4914199..8aabaf5218b8 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -116,6 +116,7 @@ struct pnfs_block_dev {
 
 	bool (*map)(struct pnfs_block_dev *dev, u64 offset,
 			struct pnfs_block_dev_map *map);
+	int (*pr_reg)(struct pnfs_block_dev *dev);
 };
 
 /* sector_t fields are all in 512-byte sectors */
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index 356bc967fb5d..3d2401820ef4 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -230,6 +230,26 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
 	return true;
 }
 
+static int bl_register_scsi(struct pnfs_block_dev *d)
+{
+	struct block_device *bdev = file_bdev(d->bdev_file);
+	const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
+	int error;
+
+	if (d->pr_registered)
+		return 0;
+
+	error = ops->pr_register(bdev, 0, d->pr_key, true);
+	if (error) {
+		trace_bl_pr_key_reg_err(bdev->bd_disk->disk_name, d->pr_key, error);
+		return -error;
+	}
+
+	trace_bl_pr_key_reg(bdev->bd_disk->disk_name, d->pr_key);
+	d->pr_registered = true;
+	return 0;
+}
+
 static int
 bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
 		struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask);
@@ -373,14 +393,7 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
 		goto out_blkdev_put;
 	}
 
-	error = ops->pr_register(bdev, 0, d->pr_key, true);
-	if (error) {
-		trace_bl_pr_key_reg_err(bdev->bd_disk->disk_name, d->pr_key, error);
-		goto out_blkdev_put;
-	}
-	trace_bl_pr_key_reg(bdev->bd_disk->disk_name, d->pr_key);
-
-	d->pr_registered = true;
+	d->pr_reg = bl_register_scsi;
 	return 0;
 
 out_blkdev_put:
-- 
2.45.1


  parent reply	other threads:[~2024-06-19 17:40 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-06-19 17:39 [RFC PATCH 0/4] Snapshot of fixes for SCSI PR key registration cel
2024-06-19 17:39 ` [RFC PATCH 1/4] nfs/blocklayout: SCSI layout trace points for reservation key reg/unreg cel
2024-06-20  4:50   ` Christoph Hellwig
2024-06-20  4:52     ` Christoph Hellwig
2024-06-20 14:30     ` Chuck Lever
2024-06-19 17:39 ` [RFC PATCH 2/4] nfs/blocklayout: Report only when /no/ device is found cel
2024-06-20  4:36   ` Christoph Hellwig
2024-06-20 14:59     ` Chuck Lever
2024-06-20 12:17   ` Benjamin Coddington
2024-06-20 14:10     ` Christoph Hellwig
2024-06-19 17:39 ` cel [this message]
2024-06-20  5:06   ` [RFC PATCH 3/4] nfs/blocklayout: Fix premature PR key unregistration Christoph Hellwig
2024-06-20 13:52     ` Benjamin Coddington
2024-06-20 13:58       ` Chuck Lever
2024-06-20 14:15       ` Christoph Hellwig
2024-06-20 14:18         ` Chuck Lever III
2024-06-20 15:45         ` Benjamin Coddington
2024-06-20 15:48           ` Chuck Lever
2024-06-20 15:58             ` Benjamin Coddington
2024-06-20 15:39     ` Chuck Lever
2024-06-20 13:51   ` Benjamin Coddington
2024-06-20 14:34     ` Chuck Lever
2024-06-20 14:37       ` Christoph Hellwig
2024-06-20 15:30       ` Benjamin Coddington
2024-06-20 15:46         ` Chuck Lever
2024-06-20 15:56           ` Benjamin Coddington
2024-06-20 16:45             ` Benjamin Coddington
2024-06-20 17:08               ` Chuck Lever
2024-06-19 17:39 ` [RFC PATCH 4/4] nfs/blocklayout: Use bulk page allocation APIs cel
2024-06-20  4:44   ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240619173929.177818-9-cel@kernel.org \
    --to=cel@kernel.org \
    --cc=chuck.lever@oracle.com \
    --cc=hch@lst.de \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox