CEPH filesystem development
 help / color / mirror / Atom feed
* [PATCH v2 0/3] rbd: reduce the potential for erroneous blocklisting
@ 2023-07-25 21:28 Ilya Dryomov
  2023-07-25 21:28 ` [PATCH v2 1/3] rbd: make get_lock_owner_info() return a single locker or NULL Ilya Dryomov
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Ilya Dryomov @ 2023-07-25 21:28 UTC (permalink / raw)
  To: ceph-devel; +Cc: Dongsheng Yang

Hello,

This came out of snapshot-based mirroring work.  Patches 1 and 2 are
preparatory, patch 3 fixes the issue (in as much as reasonable).

v1 -> v2:
- ceph_addr_is_blank() was missing an export
- amended locker equality semantics to ignore addr->type and moved the
  function to rbd.c

Thanks,

                Ilya


Ilya Dryomov (3):
  rbd: make get_lock_owner_info() return a single locker or NULL
  rbd: harden get_lock_owner_info() a bit
  rbd: retrieve and check lock owner twice before blocklisting

 drivers/block/rbd.c  | 124 ++++++++++++++++++++++++++++++-------------
 net/ceph/messenger.c |   1 +
 2 files changed, 87 insertions(+), 38 deletions(-)

-- 
2.41.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH v2 1/3] rbd: make get_lock_owner_info() return a single locker or NULL
  2023-07-25 21:28 [PATCH v2 0/3] rbd: reduce the potential for erroneous blocklisting Ilya Dryomov
@ 2023-07-25 21:28 ` Ilya Dryomov
  2023-07-25 21:28 ` [PATCH v2 2/3] rbd: harden get_lock_owner_info() a bit Ilya Dryomov
  2023-07-25 21:28 ` [PATCH v2 3/3] rbd: retrieve and check lock owner twice before blocklisting Ilya Dryomov
  2 siblings, 0 replies; 6+ messages in thread
From: Ilya Dryomov @ 2023-07-25 21:28 UTC (permalink / raw)
  To: ceph-devel; +Cc: Dongsheng Yang

Make the "num_lockers can be only 0 or 1" assumption explicit and
simplify the API by getting rid of output parameters in preparation
for calling get_lock_owner_info() twice before blocklisting.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c | 84 +++++++++++++++++++++++++++------------------
 1 file changed, 51 insertions(+), 33 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index bd0e075a5d89..dca6c1e5f6bc 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3849,10 +3849,17 @@ static void wake_lock_waiters(struct rbd_device *rbd_dev, int result)
 	list_splice_tail_init(&rbd_dev->acquiring_list, &rbd_dev->running_list);
 }
 
-static int get_lock_owner_info(struct rbd_device *rbd_dev,
-			       struct ceph_locker **lockers, u32 *num_lockers)
+static void free_locker(struct ceph_locker *locker)
+{
+	if (locker)
+		ceph_free_lockers(locker, 1);
+}
+
+static struct ceph_locker *get_lock_owner_info(struct rbd_device *rbd_dev)
 {
 	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+	struct ceph_locker *lockers;
+	u32 num_lockers;
 	u8 lock_type;
 	char *lock_tag;
 	int ret;
@@ -3861,39 +3868,45 @@ static int get_lock_owner_info(struct rbd_device *rbd_dev,
 
 	ret = ceph_cls_lock_info(osdc, &rbd_dev->header_oid,
 				 &rbd_dev->header_oloc, RBD_LOCK_NAME,
-				 &lock_type, &lock_tag, lockers, num_lockers);
-	if (ret)
-		return ret;
+				 &lock_type, &lock_tag, &lockers, &num_lockers);
+	if (ret) {
+		rbd_warn(rbd_dev, "failed to retrieve lockers: %d", ret);
+		return ERR_PTR(ret);
+	}
 
-	if (*num_lockers == 0) {
+	if (num_lockers == 0) {
 		dout("%s rbd_dev %p no lockers detected\n", __func__, rbd_dev);
+		lockers = NULL;
 		goto out;
 	}
 
 	if (strcmp(lock_tag, RBD_LOCK_TAG)) {
 		rbd_warn(rbd_dev, "locked by external mechanism, tag %s",
 			 lock_tag);
-		ret = -EBUSY;
-		goto out;
+		goto err_busy;
 	}
 
 	if (lock_type == CEPH_CLS_LOCK_SHARED) {
 		rbd_warn(rbd_dev, "shared lock type detected");
-		ret = -EBUSY;
-		goto out;
+		goto err_busy;
 	}
 
-	if (strncmp((*lockers)[0].id.cookie, RBD_LOCK_COOKIE_PREFIX,
+	WARN_ON(num_lockers != 1);
+	if (strncmp(lockers[0].id.cookie, RBD_LOCK_COOKIE_PREFIX,
 		    strlen(RBD_LOCK_COOKIE_PREFIX))) {
 		rbd_warn(rbd_dev, "locked by external mechanism, cookie %s",
-			 (*lockers)[0].id.cookie);
-		ret = -EBUSY;
-		goto out;
+			 lockers[0].id.cookie);
+		goto err_busy;
 	}
 
 out:
 	kfree(lock_tag);
-	return ret;
+	return lockers;
+
+err_busy:
+	kfree(lock_tag);
+	ceph_free_lockers(lockers, num_lockers);
+	return ERR_PTR(-EBUSY);
 }
 
 static int find_watcher(struct rbd_device *rbd_dev,
@@ -3947,51 +3960,56 @@ static int find_watcher(struct rbd_device *rbd_dev,
 static int rbd_try_lock(struct rbd_device *rbd_dev)
 {
 	struct ceph_client *client = rbd_dev->rbd_client->client;
-	struct ceph_locker *lockers;
-	u32 num_lockers;
+	struct ceph_locker *locker;
 	int ret;
 
 	for (;;) {
+		locker = NULL;
+
 		ret = rbd_lock(rbd_dev);
 		if (ret != -EBUSY)
-			return ret;
+			goto out;
 
 		/* determine if the current lock holder is still alive */
-		ret = get_lock_owner_info(rbd_dev, &lockers, &num_lockers);
-		if (ret)
-			return ret;
-
-		if (num_lockers == 0)
+		locker = get_lock_owner_info(rbd_dev);
+		if (IS_ERR(locker)) {
+			ret = PTR_ERR(locker);
+			locker = NULL;
+			goto out;
+		}
+		if (!locker)
 			goto again;
 
-		ret = find_watcher(rbd_dev, lockers);
+		ret = find_watcher(rbd_dev, locker);
 		if (ret)
 			goto out; /* request lock or error */
 
 		rbd_warn(rbd_dev, "breaking header lock owned by %s%llu",
-			 ENTITY_NAME(lockers[0].id.name));
+			 ENTITY_NAME(locker->id.name));
 
 		ret = ceph_monc_blocklist_add(&client->monc,
-					      &lockers[0].info.addr);
+					      &locker->info.addr);
 		if (ret) {
-			rbd_warn(rbd_dev, "blocklist of %s%llu failed: %d",
-				 ENTITY_NAME(lockers[0].id.name), ret);
+			rbd_warn(rbd_dev, "failed to blocklist %s%llu: %d",
+				 ENTITY_NAME(locker->id.name), ret);
 			goto out;
 		}
 
 		ret = ceph_cls_break_lock(&client->osdc, &rbd_dev->header_oid,
 					  &rbd_dev->header_oloc, RBD_LOCK_NAME,
-					  lockers[0].id.cookie,
-					  &lockers[0].id.name);
-		if (ret && ret != -ENOENT)
+					  locker->id.cookie, &locker->id.name);
+		if (ret && ret != -ENOENT) {
+			rbd_warn(rbd_dev, "failed to break header lock: %d",
+				 ret);
 			goto out;
+		}
 
 again:
-		ceph_free_lockers(lockers, num_lockers);
+		free_locker(locker);
 	}
 
 out:
-	ceph_free_lockers(lockers, num_lockers);
+	free_locker(locker);
 	return ret;
 }
 
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v2 2/3] rbd: harden get_lock_owner_info() a bit
  2023-07-25 21:28 [PATCH v2 0/3] rbd: reduce the potential for erroneous blocklisting Ilya Dryomov
  2023-07-25 21:28 ` [PATCH v2 1/3] rbd: make get_lock_owner_info() return a single locker or NULL Ilya Dryomov
@ 2023-07-25 21:28 ` Ilya Dryomov
  2023-07-25 21:28 ` [PATCH v2 3/3] rbd: retrieve and check lock owner twice before blocklisting Ilya Dryomov
  2 siblings, 0 replies; 6+ messages in thread
From: Ilya Dryomov @ 2023-07-25 21:28 UTC (permalink / raw)
  To: ceph-devel; +Cc: Dongsheng Yang

- we want the exclusive lock type, so test for it directly
- use sscanf() to actually parse the lock cookie and avoid admitting
  invalid handles
- bail if locker has a blank address

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c  | 21 +++++++++++++++------
 net/ceph/messenger.c |  1 +
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index dca6c1e5f6bc..94629e826369 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3862,10 +3862,9 @@ static struct ceph_locker *get_lock_owner_info(struct rbd_device *rbd_dev)
 	u32 num_lockers;
 	u8 lock_type;
 	char *lock_tag;
+	u64 handle;
 	int ret;
 
-	dout("%s rbd_dev %p\n", __func__, rbd_dev);
-
 	ret = ceph_cls_lock_info(osdc, &rbd_dev->header_oid,
 				 &rbd_dev->header_oloc, RBD_LOCK_NAME,
 				 &lock_type, &lock_tag, &lockers, &num_lockers);
@@ -3886,18 +3885,28 @@ static struct ceph_locker *get_lock_owner_info(struct rbd_device *rbd_dev)
 		goto err_busy;
 	}
 
-	if (lock_type == CEPH_CLS_LOCK_SHARED) {
-		rbd_warn(rbd_dev, "shared lock type detected");
+	if (lock_type != CEPH_CLS_LOCK_EXCLUSIVE) {
+		rbd_warn(rbd_dev, "incompatible lock type detected");
 		goto err_busy;
 	}
 
 	WARN_ON(num_lockers != 1);
-	if (strncmp(lockers[0].id.cookie, RBD_LOCK_COOKIE_PREFIX,
-		    strlen(RBD_LOCK_COOKIE_PREFIX))) {
+	ret = sscanf(lockers[0].id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu",
+		     &handle);
+	if (ret != 1) {
 		rbd_warn(rbd_dev, "locked by external mechanism, cookie %s",
 			 lockers[0].id.cookie);
 		goto err_busy;
 	}
+	if (ceph_addr_is_blank(&lockers[0].info.addr)) {
+		rbd_warn(rbd_dev, "locker has a blank address");
+		goto err_busy;
+	}
+
+	dout("%s rbd_dev %p got locker %s%llu@%pISpc/%u handle %llu\n",
+	     __func__, rbd_dev, ENTITY_NAME(lockers[0].id.name),
+	     &lockers[0].info.addr.in_addr,
+	     le32_to_cpu(lockers[0].info.addr.nonce), handle);
 
 out:
 	kfree(lock_tag);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index cd7b0bf5369e..5eb4898cccd4 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1123,6 +1123,7 @@ bool ceph_addr_is_blank(const struct ceph_entity_addr *addr)
 		return true;
 	}
 }
+EXPORT_SYMBOL(ceph_addr_is_blank);
 
 int ceph_addr_port(const struct ceph_entity_addr *addr)
 {
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v2 3/3] rbd: retrieve and check lock owner twice before blocklisting
  2023-07-25 21:28 [PATCH v2 0/3] rbd: reduce the potential for erroneous blocklisting Ilya Dryomov
  2023-07-25 21:28 ` [PATCH v2 1/3] rbd: make get_lock_owner_info() return a single locker or NULL Ilya Dryomov
  2023-07-25 21:28 ` [PATCH v2 2/3] rbd: harden get_lock_owner_info() a bit Ilya Dryomov
@ 2023-07-25 21:28 ` Ilya Dryomov
       [not found]   ` <AJYA4wAxJMc9WiAuqHSSGKra.3.1690375435133.Hmail.dongsheng.yang@easystack.cn>
  2 siblings, 1 reply; 6+ messages in thread
From: Ilya Dryomov @ 2023-07-25 21:28 UTC (permalink / raw)
  To: ceph-devel; +Cc: Dongsheng Yang

An attempt to acquire exclusive lock can race with the current lock
owner closing the image:

1. lock is held by client123, rbd_lock() returns -EBUSY
2. get_lock_owner_info() returns client123 instance details
3. client123 closes the image, lock is released
4. find_watcher() returns 0 as there is no matching watcher anymore
5. client123 instance gets erroneously blocklisted

Particularly impacted is mirror snapshot scheduler in snapshot-based
mirroring since it happens to open and close images a lot (images are
opened only for as long as it takes to take the next mirror snapshot,
the same client instance is used for all images).

To reduce the potential for erroneous blocklisting, retrieve the lock
owner again after find_watcher() returns 0.  If it's still there, make
sure it matches the previously detected lock owner.

Cc: stable@vger.kernel.org # ba6b7b6db4df: rbd: make get_lock_owner_info() return a single locker or NULL
Cc: stable@vger.kernel.org # c476a060136a: rbd: harden get_lock_owner_info() a bit
Cc: stable@vger.kernel.org
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 94629e826369..24afcc93ac01 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3849,6 +3849,15 @@ static void wake_lock_waiters(struct rbd_device *rbd_dev, int result)
 	list_splice_tail_init(&rbd_dev->acquiring_list, &rbd_dev->running_list);
 }
 
+static bool locker_equal(const struct ceph_locker *lhs,
+			 const struct ceph_locker *rhs)
+{
+	return lhs->id.name.type == rhs->id.name.type &&
+	       lhs->id.name.num == rhs->id.name.num &&
+	       !strcmp(lhs->id.cookie, rhs->id.cookie) &&
+	       ceph_addr_equal_no_type(&lhs->info.addr, &rhs->info.addr);
+}
+
 static void free_locker(struct ceph_locker *locker)
 {
 	if (locker)
@@ -3969,11 +3978,11 @@ static int find_watcher(struct rbd_device *rbd_dev,
 static int rbd_try_lock(struct rbd_device *rbd_dev)
 {
 	struct ceph_client *client = rbd_dev->rbd_client->client;
-	struct ceph_locker *locker;
+	struct ceph_locker *locker, *refreshed_locker;
 	int ret;
 
 	for (;;) {
-		locker = NULL;
+		locker = refreshed_locker = NULL;
 
 		ret = rbd_lock(rbd_dev);
 		if (ret != -EBUSY)
@@ -3993,6 +4002,16 @@ static int rbd_try_lock(struct rbd_device *rbd_dev)
 		if (ret)
 			goto out; /* request lock or error */
 
+		refreshed_locker = get_lock_owner_info(rbd_dev);
+		if (IS_ERR(refreshed_locker)) {
+			ret = PTR_ERR(refreshed_locker);
+			refreshed_locker = NULL;
+			goto out;
+		}
+		if (!refreshed_locker ||
+		    !locker_equal(locker, refreshed_locker))
+			goto again;
+
 		rbd_warn(rbd_dev, "breaking header lock owned by %s%llu",
 			 ENTITY_NAME(locker->id.name));
 
@@ -4014,10 +4033,12 @@ static int rbd_try_lock(struct rbd_device *rbd_dev)
 		}
 
 again:
+		free_locker(refreshed_locker);
 		free_locker(locker);
 	}
 
 out:
+	free_locker(refreshed_locker);
 	free_locker(locker);
 	return ret;
 }
-- 
2.41.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 3/3] rbd: retrieve and check lock owner twice before blocklisting
       [not found]   ` <AJYA4wAxJMc9WiAuqHSSGKra.3.1690375435133.Hmail.dongsheng.yang@easystack.cn>
@ 2023-07-26 13:06     ` Ilya Dryomov
  2023-07-26 14:43       ` 杨东升
  0 siblings, 1 reply; 6+ messages in thread
From: Ilya Dryomov @ 2023-07-26 13:06 UTC (permalink / raw)
  To: 杨东升; +Cc: ceph-devel

On Wed, Jul 26, 2023 at 2:44 PM 杨东升 <dongsheng.yang@easystack.cn> wrote:
>
>
> From: Ilya Dryomov <idryomov@gmail.com>
>
> Date: 2023-07-26 05:28:46
> To:  ceph-devel@vger.kernel.org
> Cc:  Dongsheng Yang <dongsheng.yang@easystack.cn>
> Subject: [PATCH v2 3/3] rbd: retrieve and check lock owner twice before blocklisting>An attempt to acquire exclusive lock can race with the current lock
> >owner closing the image:
> >
> >1. lock is held by client123, rbd_lock() returns -EBUSY
> >2. get_lock_owner_info() returns client123 instance details
> >3. client123 closes the image, lock is released
> >4. find_watcher() returns 0 as there is no matching watcher anymore
> >5. client123 instance gets erroneously blocklisted
> >
> >Particularly impacted is mirror snapshot scheduler in snapshot-based
> >mirroring since it happens to open and close images a lot (images are
> >opened only for as long as it takes to take the next mirror snapshot,
> >the same client instance is used for all images).
> >
> >To reduce the potential for erroneous blocklisting, retrieve the lock
> >owner again after find_watcher() returns 0.  If it's still there, make
> >sure it matches the previously detected lock owner.
> >
> >Cc: stable@vger.kernel.org # ba6b7b6db4df: rbd: make get_lock_owner_info() return a single locker or NULL
> >Cc: stable@vger.kernel.org # c476a060136a: rbd: harden get_lock_owner_info() a bit
> >Cc: stable@vger.kernel.org
> >Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
>
>
> Reviewed-by: Dongsheng Yang <dongsheng.yang@easystack.cn>

Hi Dongsheng,

Is this a review just for this patch or for the entire series?  The
patch doesn't apply in isolation, so I'm assuming it's for the entire
series but need confirmation since it's not on the cover letter.

Thanks,

                Ilya

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re:Re: [PATCH v2 3/3] rbd: retrieve and check lock owner twice before blocklisting
  2023-07-26 13:06     ` Ilya Dryomov
@ 2023-07-26 14:43       ` 杨东升
  0 siblings, 0 replies; 6+ messages in thread
From: 杨东升 @ 2023-07-26 14:43 UTC (permalink / raw)
  To: Ilya Dryomov; +Cc: ceph-devel

发件人:Ilya Dryomov <idryomov@gmail.com>
发送日期:2023-07-26 21:06:17
收件人:"杨东升" <dongsheng.yang@easystack.cn>
抄送人:ceph-devel@vger.kernel.org
主题:Re: [PATCH v2 3/3] rbd: retrieve and check lock owner twice before blocklisting>On Wed, Jul 26, 2023 at 2:44 PM 杨东升 <dongsheng.yang@easystack.cn> wrote:
>>
>>
>> From: Ilya Dryomov <idryomov@gmail.com>
>>
>> Date: 2023-07-26 05:28:46
>> To:  ceph-devel@vger.kernel.org
>> Cc:  Dongsheng Yang <dongsheng.yang@easystack.cn>
>> Subject: [PATCH v2 3/3] rbd: retrieve and check lock owner twice before blocklisting>An attempt to acquire exclusive lock can race with the current lock
>> >owner closing the image:
>> >
>> >1. lock is held by client123, rbd_lock() returns -EBUSY
>> >2. get_lock_owner_info() returns client123 instance details
>> >3. client123 closes the image, lock is released
>> >4. find_watcher() returns 0 as there is no matching watcher anymore
>> >5. client123 instance gets erroneously blocklisted
>> >
>> >Particularly impacted is mirror snapshot scheduler in snapshot-based
>> >mirroring since it happens to open and close images a lot (images are
>> >opened only for as long as it takes to take the next mirror snapshot,
>> >the same client instance is used for all images).
>> >
>> >To reduce the potential for erroneous blocklisting, retrieve the lock
>> >owner again after find_watcher() returns 0.  If it's still there, make
>> >sure it matches the previously detected lock owner.
>> >
>> >Cc: stable@vger.kernel.org # ba6b7b6db4df: rbd: make get_lock_owner_info() return a single locker or NULL
>> >Cc: stable@vger.kernel.org # c476a060136a: rbd: harden get_lock_owner_info() a bit
>> >Cc: stable@vger.kernel.org
>> >Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
>>
>>
>> Reviewed-by: Dongsheng Yang <dongsheng.yang@easystack.cn>
>
>Hi Dongsheng,
>
>Is this a review just for this patch or for the entire series?  The
>patch doesn't apply in isolation, so I'm assuming it's for the entire
>series but need confirmation since it's not on the cover letter.


Hi Ilya,
    It's for entire series. Actually I reply Reviewed-by to each of them.
So you can add my reviewed-by for 1/3 2/3 and 3/3 :)

Thanx



>
>Thanks,
>
>                Ilya



^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-07-26 14:43 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-07-25 21:28 [PATCH v2 0/3] rbd: reduce the potential for erroneous blocklisting Ilya Dryomov
2023-07-25 21:28 ` [PATCH v2 1/3] rbd: make get_lock_owner_info() return a single locker or NULL Ilya Dryomov
2023-07-25 21:28 ` [PATCH v2 2/3] rbd: harden get_lock_owner_info() a bit Ilya Dryomov
2023-07-25 21:28 ` [PATCH v2 3/3] rbd: retrieve and check lock owner twice before blocklisting Ilya Dryomov
     [not found]   ` <AJYA4wAxJMc9WiAuqHSSGKra.3.1690375435133.Hmail.dongsheng.yang@easystack.cn>
2023-07-26 13:06     ` Ilya Dryomov
2023-07-26 14:43       ` 杨东升

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox