CEPH filesystem development
 help / color / mirror / Atom feed
* [PATCH 0/3] rbd: reduce the potential for erroneous blocklisting
@ 2023-07-25  4:35 Ilya Dryomov
  2023-07-25  4:35 ` [PATCH 1/3] rbd: make get_lock_owner_info() return a single locker or NULL Ilya Dryomov
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Ilya Dryomov @ 2023-07-25  4:35 UTC (permalink / raw)
  To: ceph-devel; +Cc: Dongsheng Yang

Hello,

This came out of snapshot-based mirroring work.  Patches 1 and 2 are
preparatory, patch 3 fixes the issue (in as much as reasonable).

Thanks,

                Ilya


Ilya Dryomov (3):
  rbd: make get_lock_owner_info() return a single locker or NULL
  rbd: harden get_lock_owner_info() a bit
  rbd: retrieve and check lock owner twice before blocklisting

 drivers/block/rbd.c                  | 115 ++++++++++++++++++---------
 include/linux/ceph/cls_lock_client.h |  10 +++
 2 files changed, 87 insertions(+), 38 deletions(-)

-- 
2.41.0


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/3] rbd: make get_lock_owner_info() return a single locker or NULL
  2023-07-25  4:35 [PATCH 0/3] rbd: reduce the potential for erroneous blocklisting Ilya Dryomov
@ 2023-07-25  4:35 ` Ilya Dryomov
  2023-07-25  4:35 ` [PATCH 2/3] rbd: harden get_lock_owner_info() a bit Ilya Dryomov
  2023-07-25  4:35 ` [PATCH 3/3] rbd: retrieve and check lock owner twice before blocklisting Ilya Dryomov
  2 siblings, 0 replies; 4+ messages in thread
From: Ilya Dryomov @ 2023-07-25  4:35 UTC (permalink / raw)
  To: ceph-devel; +Cc: Dongsheng Yang

Make the "num_lockers can be only 0 or 1" assumption explicit and
simplify the API by getting rid of output parameters in preparation
for calling get_lock_owner_info() twice before blocklisting.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c | 84 +++++++++++++++++++++++++++------------------
 1 file changed, 51 insertions(+), 33 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index bd0e075a5d89..dca6c1e5f6bc 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3849,10 +3849,17 @@ static void wake_lock_waiters(struct rbd_device *rbd_dev, int result)
 	list_splice_tail_init(&rbd_dev->acquiring_list, &rbd_dev->running_list);
 }
 
-static int get_lock_owner_info(struct rbd_device *rbd_dev,
-			       struct ceph_locker **lockers, u32 *num_lockers)
+static void free_locker(struct ceph_locker *locker)
+{
+	if (locker)
+		ceph_free_lockers(locker, 1);
+}
+
+static struct ceph_locker *get_lock_owner_info(struct rbd_device *rbd_dev)
 {
 	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+	struct ceph_locker *lockers;
+	u32 num_lockers;
 	u8 lock_type;
 	char *lock_tag;
 	int ret;
@@ -3861,39 +3868,45 @@ static int get_lock_owner_info(struct rbd_device *rbd_dev,
 
 	ret = ceph_cls_lock_info(osdc, &rbd_dev->header_oid,
 				 &rbd_dev->header_oloc, RBD_LOCK_NAME,
-				 &lock_type, &lock_tag, lockers, num_lockers);
-	if (ret)
-		return ret;
+				 &lock_type, &lock_tag, &lockers, &num_lockers);
+	if (ret) {
+		rbd_warn(rbd_dev, "failed to retrieve lockers: %d", ret);
+		return ERR_PTR(ret);
+	}
 
-	if (*num_lockers == 0) {
+	if (num_lockers == 0) {
 		dout("%s rbd_dev %p no lockers detected\n", __func__, rbd_dev);
+		lockers = NULL;
 		goto out;
 	}
 
 	if (strcmp(lock_tag, RBD_LOCK_TAG)) {
 		rbd_warn(rbd_dev, "locked by external mechanism, tag %s",
 			 lock_tag);
-		ret = -EBUSY;
-		goto out;
+		goto err_busy;
 	}
 
 	if (lock_type == CEPH_CLS_LOCK_SHARED) {
 		rbd_warn(rbd_dev, "shared lock type detected");
-		ret = -EBUSY;
-		goto out;
+		goto err_busy;
 	}
 
-	if (strncmp((*lockers)[0].id.cookie, RBD_LOCK_COOKIE_PREFIX,
+	WARN_ON(num_lockers != 1);
+	if (strncmp(lockers[0].id.cookie, RBD_LOCK_COOKIE_PREFIX,
 		    strlen(RBD_LOCK_COOKIE_PREFIX))) {
 		rbd_warn(rbd_dev, "locked by external mechanism, cookie %s",
-			 (*lockers)[0].id.cookie);
-		ret = -EBUSY;
-		goto out;
+			 lockers[0].id.cookie);
+		goto err_busy;
 	}
 
 out:
 	kfree(lock_tag);
-	return ret;
+	return lockers;
+
+err_busy:
+	kfree(lock_tag);
+	ceph_free_lockers(lockers, num_lockers);
+	return ERR_PTR(-EBUSY);
 }
 
 static int find_watcher(struct rbd_device *rbd_dev,
@@ -3947,51 +3960,56 @@ static int find_watcher(struct rbd_device *rbd_dev,
 static int rbd_try_lock(struct rbd_device *rbd_dev)
 {
 	struct ceph_client *client = rbd_dev->rbd_client->client;
-	struct ceph_locker *lockers;
-	u32 num_lockers;
+	struct ceph_locker *locker;
 	int ret;
 
 	for (;;) {
+		locker = NULL;
+
 		ret = rbd_lock(rbd_dev);
 		if (ret != -EBUSY)
-			return ret;
+			goto out;
 
 		/* determine if the current lock holder is still alive */
-		ret = get_lock_owner_info(rbd_dev, &lockers, &num_lockers);
-		if (ret)
-			return ret;
-
-		if (num_lockers == 0)
+		locker = get_lock_owner_info(rbd_dev);
+		if (IS_ERR(locker)) {
+			ret = PTR_ERR(locker);
+			locker = NULL;
+			goto out;
+		}
+		if (!locker)
 			goto again;
 
-		ret = find_watcher(rbd_dev, lockers);
+		ret = find_watcher(rbd_dev, locker);
 		if (ret)
 			goto out; /* request lock or error */
 
 		rbd_warn(rbd_dev, "breaking header lock owned by %s%llu",
-			 ENTITY_NAME(lockers[0].id.name));
+			 ENTITY_NAME(locker->id.name));
 
 		ret = ceph_monc_blocklist_add(&client->monc,
-					      &lockers[0].info.addr);
+					      &locker->info.addr);
 		if (ret) {
-			rbd_warn(rbd_dev, "blocklist of %s%llu failed: %d",
-				 ENTITY_NAME(lockers[0].id.name), ret);
+			rbd_warn(rbd_dev, "failed to blocklist %s%llu: %d",
+				 ENTITY_NAME(locker->id.name), ret);
 			goto out;
 		}
 
 		ret = ceph_cls_break_lock(&client->osdc, &rbd_dev->header_oid,
 					  &rbd_dev->header_oloc, RBD_LOCK_NAME,
-					  lockers[0].id.cookie,
-					  &lockers[0].id.name);
-		if (ret && ret != -ENOENT)
+					  locker->id.cookie, &locker->id.name);
+		if (ret && ret != -ENOENT) {
+			rbd_warn(rbd_dev, "failed to break header lock: %d",
+				 ret);
 			goto out;
+		}
 
 again:
-		ceph_free_lockers(lockers, num_lockers);
+		free_locker(locker);
 	}
 
 out:
-	ceph_free_lockers(lockers, num_lockers);
+	free_locker(locker);
 	return ret;
 }
 
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/3] rbd: harden get_lock_owner_info() a bit
  2023-07-25  4:35 [PATCH 0/3] rbd: reduce the potential for erroneous blocklisting Ilya Dryomov
  2023-07-25  4:35 ` [PATCH 1/3] rbd: make get_lock_owner_info() return a single locker or NULL Ilya Dryomov
@ 2023-07-25  4:35 ` Ilya Dryomov
  2023-07-25  4:35 ` [PATCH 3/3] rbd: retrieve and check lock owner twice before blocklisting Ilya Dryomov
  2 siblings, 0 replies; 4+ messages in thread
From: Ilya Dryomov @ 2023-07-25  4:35 UTC (permalink / raw)
  To: ceph-devel; +Cc: Dongsheng Yang

- we want the exclusive lock type, so test for it directly
- use sscanf() to actually parse the lock cookie and avoid admitting
  invalid handles
- bail if locker has a blank address

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index dca6c1e5f6bc..94629e826369 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3862,10 +3862,9 @@ static struct ceph_locker *get_lock_owner_info(struct rbd_device *rbd_dev)
 	u32 num_lockers;
 	u8 lock_type;
 	char *lock_tag;
+	u64 handle;
 	int ret;
 
-	dout("%s rbd_dev %p\n", __func__, rbd_dev);
-
 	ret = ceph_cls_lock_info(osdc, &rbd_dev->header_oid,
 				 &rbd_dev->header_oloc, RBD_LOCK_NAME,
 				 &lock_type, &lock_tag, &lockers, &num_lockers);
@@ -3886,18 +3885,28 @@ static struct ceph_locker *get_lock_owner_info(struct rbd_device *rbd_dev)
 		goto err_busy;
 	}
 
-	if (lock_type == CEPH_CLS_LOCK_SHARED) {
-		rbd_warn(rbd_dev, "shared lock type detected");
+	if (lock_type != CEPH_CLS_LOCK_EXCLUSIVE) {
+		rbd_warn(rbd_dev, "incompatible lock type detected");
 		goto err_busy;
 	}
 
 	WARN_ON(num_lockers != 1);
-	if (strncmp(lockers[0].id.cookie, RBD_LOCK_COOKIE_PREFIX,
-		    strlen(RBD_LOCK_COOKIE_PREFIX))) {
+	ret = sscanf(lockers[0].id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu",
+		     &handle);
+	if (ret != 1) {
 		rbd_warn(rbd_dev, "locked by external mechanism, cookie %s",
 			 lockers[0].id.cookie);
 		goto err_busy;
 	}
+	if (ceph_addr_is_blank(&lockers[0].info.addr)) {
+		rbd_warn(rbd_dev, "locker has a blank address");
+		goto err_busy;
+	}
+
+	dout("%s rbd_dev %p got locker %s%llu@%pISpc/%u handle %llu\n",
+	     __func__, rbd_dev, ENTITY_NAME(lockers[0].id.name),
+	     &lockers[0].info.addr.in_addr,
+	     le32_to_cpu(lockers[0].info.addr.nonce), handle);
 
 out:
 	kfree(lock_tag);
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 3/3] rbd: retrieve and check lock owner twice before blocklisting
  2023-07-25  4:35 [PATCH 0/3] rbd: reduce the potential for erroneous blocklisting Ilya Dryomov
  2023-07-25  4:35 ` [PATCH 1/3] rbd: make get_lock_owner_info() return a single locker or NULL Ilya Dryomov
  2023-07-25  4:35 ` [PATCH 2/3] rbd: harden get_lock_owner_info() a bit Ilya Dryomov
@ 2023-07-25  4:35 ` Ilya Dryomov
  2 siblings, 0 replies; 4+ messages in thread
From: Ilya Dryomov @ 2023-07-25  4:35 UTC (permalink / raw)
  To: ceph-devel; +Cc: Dongsheng Yang

An attempt to acquire exclusive lock can race with the current lock
owner closing the image:

1. lock is held by client123, rbd_lock() returns -EBUSY
2. get_lock_owner_info() returns client123 instance details
3. client123 closes the image, lock is released
4. find_watcher() returns 0 as there is no matching watcher anymore
5. client123 instance gets erroneously blocklisted

Particularly impacted is mirror snapshot scheduler in snapshot-based
mirroring since it happens to open and close images a lot (images are
opened only for as long as it takes to take the next mirror snapshot,
the same client instance is used for all images).

To reduce the potential for erroneous blocklisting, retrieve the lock
owner again after find_watcher() returns 0.  If it's still there, make
sure it matches the previously detected lock owner.

Cc: stable@vger.kernel.org # 6d1736a0e432: rbd: make get_lock_owner_info() return a single locker or NULL
Cc: stable@vger.kernel.org # 5dc06bec6a5b: rbd: harden get_lock_owner_info() a bit
Cc: stable@vger.kernel.org
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c                  | 16 ++++++++++++++--
 include/linux/ceph/cls_lock_client.h | 10 ++++++++++
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 94629e826369..e4b5829a03b4 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3969,11 +3969,11 @@ static int find_watcher(struct rbd_device *rbd_dev,
 static int rbd_try_lock(struct rbd_device *rbd_dev)
 {
 	struct ceph_client *client = rbd_dev->rbd_client->client;
-	struct ceph_locker *locker;
+	struct ceph_locker *locker, *refreshed_locker;
 	int ret;
 
 	for (;;) {
-		locker = NULL;
+		locker = refreshed_locker = NULL;
 
 		ret = rbd_lock(rbd_dev);
 		if (ret != -EBUSY)
@@ -3993,6 +3993,16 @@ static int rbd_try_lock(struct rbd_device *rbd_dev)
 		if (ret)
 			goto out; /* request lock or error */
 
+		refreshed_locker = get_lock_owner_info(rbd_dev);
+		if (IS_ERR(refreshed_locker)) {
+			ret = PTR_ERR(refreshed_locker);
+			refreshed_locker = NULL;
+			goto out;
+		}
+		if (!refreshed_locker ||
+		    !ceph_locker_equal(locker, refreshed_locker))
+			goto again;
+
 		rbd_warn(rbd_dev, "breaking header lock owned by %s%llu",
 			 ENTITY_NAME(locker->id.name));
 
@@ -4014,10 +4024,12 @@ static int rbd_try_lock(struct rbd_device *rbd_dev)
 		}
 
 again:
+		free_locker(refreshed_locker);
 		free_locker(locker);
 	}
 
 out:
+	free_locker(refreshed_locker);
 	free_locker(locker);
 	return ret;
 }
diff --git a/include/linux/ceph/cls_lock_client.h b/include/linux/ceph/cls_lock_client.h
index 17bc7584d1fe..b26f44ea38ca 100644
--- a/include/linux/ceph/cls_lock_client.h
+++ b/include/linux/ceph/cls_lock_client.h
@@ -24,6 +24,16 @@ struct ceph_locker {
 	struct ceph_locker_info info;
 };
 
+static inline bool ceph_locker_equal(const struct ceph_locker *lhs,
+				     const struct ceph_locker *rhs)
+{
+	return lhs->id.name.type == rhs->id.name.type &&
+	       lhs->id.name.num == rhs->id.name.num &&
+	       !strcmp(lhs->id.cookie, rhs->id.cookie) &&
+	       !memcmp(&lhs->info.addr, &rhs->info.addr,
+		       sizeof(rhs->info.addr));
+}
+
 int ceph_cls_lock(struct ceph_osd_client *osdc,
 		  struct ceph_object_id *oid,
 		  struct ceph_object_locator *oloc,
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-07-25  4:36 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-07-25  4:35 [PATCH 0/3] rbd: reduce the potential for erroneous blocklisting Ilya Dryomov
2023-07-25  4:35 ` [PATCH 1/3] rbd: make get_lock_owner_info() return a single locker or NULL Ilya Dryomov
2023-07-25  4:35 ` [PATCH 2/3] rbd: harden get_lock_owner_info() a bit Ilya Dryomov
2023-07-25  4:35 ` [PATCH 3/3] rbd: retrieve and check lock owner twice before blocklisting Ilya Dryomov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox